GNU Linux-libre 5.4.274-gnu1
[releases.git] / arch / x86 / events / intel / core.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Per core/cpu state
4  *
5  * Used to coordinate shared registers between HT threads or
6  * among events on a single PMU.
7  */
8
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11 #include <linux/stddef.h>
12 #include <linux/types.h>
13 #include <linux/init.h>
14 #include <linux/slab.h>
15 #include <linux/export.h>
16 #include <linux/nmi.h>
17
18 #include <asm/cpufeature.h>
19 #include <asm/hardirq.h>
20 #include <asm/intel-family.h>
21 #include <asm/intel_pt.h>
22 #include <asm/apic.h>
23 #include <asm/cpu_device_id.h>
24
25 #include "../perf_event.h"
26
27 /*
28  * Intel PerfMon, used on Core and later.
29  */
30 static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
31 {
32         [PERF_COUNT_HW_CPU_CYCLES]              = 0x003c,
33         [PERF_COUNT_HW_INSTRUCTIONS]            = 0x00c0,
34         [PERF_COUNT_HW_CACHE_REFERENCES]        = 0x4f2e,
35         [PERF_COUNT_HW_CACHE_MISSES]            = 0x412e,
36         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = 0x00c4,
37         [PERF_COUNT_HW_BRANCH_MISSES]           = 0x00c5,
38         [PERF_COUNT_HW_BUS_CYCLES]              = 0x013c,
39         [PERF_COUNT_HW_REF_CPU_CYCLES]          = 0x0300, /* pseudo-encoding */
40 };
41
42 static struct event_constraint intel_core_event_constraints[] __read_mostly =
43 {
44         INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
45         INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
46         INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
47         INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
48         INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
49         INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
50         EVENT_CONSTRAINT_END
51 };
52
53 static struct event_constraint intel_core2_event_constraints[] __read_mostly =
54 {
55         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
56         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
57         FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
58         INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
59         INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
60         INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
61         INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
62         INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
63         INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
64         INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
65         INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
66         INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
67         INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
68         EVENT_CONSTRAINT_END
69 };
70
71 static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
72 {
73         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
74         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
75         FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
76         INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
77         INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
78         INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
79         INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
80         INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
81         INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
82         INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
83         INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
84         EVENT_CONSTRAINT_END
85 };
86
87 static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
88 {
89         /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
90         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
91         INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
92         EVENT_EXTRA_END
93 };
94
95 static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
96 {
97         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
98         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
99         FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
100         INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
101         INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
102         INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
103         INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
104         EVENT_CONSTRAINT_END
105 };
106
107 static struct event_constraint intel_snb_event_constraints[] __read_mostly =
108 {
109         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
110         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
111         FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
112         INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
113         INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
114         INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
115         INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
116         INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
117         INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
118         INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
119         INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
120         INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
121
122         /*
123          * When HT is off these events can only run on the bottom 4 counters
124          * When HT is on, they are impacted by the HT bug and require EXCL access
125          */
126         INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
127         INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
128         INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
129         INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
130
131         EVENT_CONSTRAINT_END
132 };
133
134 static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
135 {
136         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
137         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
138         FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
139         INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
140         INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
141         INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
142         INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */
143         INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
144         INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
145         INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */
146         INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
147         INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
148         INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
149
150         /*
151          * When HT is off these events can only run on the bottom 4 counters
152          * When HT is on, they are impacted by the HT bug and require EXCL access
153          */
154         INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
155         INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
156         INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
157         INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
158
159         EVENT_CONSTRAINT_END
160 };
161
162 static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
163 {
164         /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
165         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
166         INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
167         INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
168         EVENT_EXTRA_END
169 };
170
171 static struct event_constraint intel_v1_event_constraints[] __read_mostly =
172 {
173         EVENT_CONSTRAINT_END
174 };
175
176 static struct event_constraint intel_gen_event_constraints[] __read_mostly =
177 {
178         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
179         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
180         FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
181         EVENT_CONSTRAINT_END
182 };
183
184 static struct event_constraint intel_slm_event_constraints[] __read_mostly =
185 {
186         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
187         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
188         FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
189         EVENT_CONSTRAINT_END
190 };
191
192 static struct event_constraint intel_skl_event_constraints[] = {
193         FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
194         FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
195         FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
196         INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2),    /* INST_RETIRED.PREC_DIST */
197
198         /*
199          * when HT is off, these can only run on the bottom 4 counters
200          */
201         INTEL_EVENT_CONSTRAINT(0xd0, 0xf),      /* MEM_INST_RETIRED.* */
202         INTEL_EVENT_CONSTRAINT(0xd1, 0xf),      /* MEM_LOAD_RETIRED.* */
203         INTEL_EVENT_CONSTRAINT(0xd2, 0xf),      /* MEM_LOAD_L3_HIT_RETIRED.* */
204         INTEL_EVENT_CONSTRAINT(0xcd, 0xf),      /* MEM_TRANS_RETIRED.* */
205         INTEL_EVENT_CONSTRAINT(0xc6, 0xf),      /* FRONTEND_RETIRED.* */
206
207         EVENT_CONSTRAINT_END
208 };
209
210 static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
211         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0),
212         INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1),
213         EVENT_EXTRA_END
214 };
215
216 static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
217         /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
218         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
219         INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
220         INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
221         EVENT_EXTRA_END
222 };
223
224 static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
225         /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
226         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
227         INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
228         INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
229         EVENT_EXTRA_END
230 };
231
232 static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
233         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
234         INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
235         INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
236         /*
237          * Note the low 8 bits eventsel code is not a continuous field, containing
238          * some #GPing bits. These are masked out.
239          */
240         INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
241         EVENT_EXTRA_END
242 };
243
244 static struct event_constraint intel_icl_event_constraints[] = {
245         FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
246         FIXED_EVENT_CONSTRAINT(0x01c0, 0),      /* INST_RETIRED.PREC_DIST */
247         FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
248         FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
249         FIXED_EVENT_CONSTRAINT(0x0400, 3),      /* SLOTS */
250         INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
251         INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
252         INTEL_EVENT_CONSTRAINT(0x32, 0xf),      /* SW_PREFETCH_ACCESS.* */
253         INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x56, 0xf),
254         INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf),
255         INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff),  /* CYCLE_ACTIVITY.STALLS_TOTAL */
256         INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff),  /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */
257         INTEL_UEVENT_CONSTRAINT(0x14a3, 0xff),  /* CYCLE_ACTIVITY.STALLS_MEM_ANY */
258         INTEL_EVENT_CONSTRAINT(0xa3, 0xf),      /* CYCLE_ACTIVITY.* */
259         INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf),
260         INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf),
261         INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf),
262         INTEL_EVENT_CONSTRAINT(0xef, 0xf),
263         INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf),
264         EVENT_CONSTRAINT_END
265 };
266
267 static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
268         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffbfffull, RSP_0),
269         INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffffbfffull, RSP_1),
270         INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
271         INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
272         EVENT_EXTRA_END
273 };
274
275 EVENT_ATTR_STR(mem-loads,       mem_ld_nhm,     "event=0x0b,umask=0x10,ldlat=3");
276 EVENT_ATTR_STR(mem-loads,       mem_ld_snb,     "event=0xcd,umask=0x1,ldlat=3");
277 EVENT_ATTR_STR(mem-stores,      mem_st_snb,     "event=0xcd,umask=0x2");
278
279 static struct attribute *nhm_mem_events_attrs[] = {
280         EVENT_PTR(mem_ld_nhm),
281         NULL,
282 };
283
284 /*
285  * topdown events for Intel Core CPUs.
286  *
287  * The events are all in slots, which is a free slot in a 4 wide
288  * pipeline. Some events are already reported in slots, for cycle
289  * events we multiply by the pipeline width (4).
290  *
291  * With Hyper Threading on, topdown metrics are either summed or averaged
292  * between the threads of a core: (count_t0 + count_t1).
293  *
294  * For the average case the metric is always scaled to pipeline width,
295  * so we use factor 2 ((count_t0 + count_t1) / 2 * 4)
296  */
297
298 EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots,
299         "event=0x3c,umask=0x0",                 /* cpu_clk_unhalted.thread */
300         "event=0x3c,umask=0x0,any=1");          /* cpu_clk_unhalted.thread_any */
301 EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2");
302 EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued,
303         "event=0xe,umask=0x1");                 /* uops_issued.any */
304 EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired,
305         "event=0xc2,umask=0x2");                /* uops_retired.retire_slots */
306 EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles,
307         "event=0x9c,umask=0x1");                /* idq_uops_not_delivered_core */
308 EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
309         "event=0xd,umask=0x3,cmask=1",          /* int_misc.recovery_cycles */
310         "event=0xd,umask=0x3,cmask=1,any=1");   /* int_misc.recovery_cycles_any */
311 EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
312         "4", "2");
313
314 static struct attribute *snb_events_attrs[] = {
315         EVENT_PTR(td_slots_issued),
316         EVENT_PTR(td_slots_retired),
317         EVENT_PTR(td_fetch_bubbles),
318         EVENT_PTR(td_total_slots),
319         EVENT_PTR(td_total_slots_scale),
320         EVENT_PTR(td_recovery_bubbles),
321         EVENT_PTR(td_recovery_bubbles_scale),
322         NULL,
323 };
324
325 static struct attribute *snb_mem_events_attrs[] = {
326         EVENT_PTR(mem_ld_snb),
327         EVENT_PTR(mem_st_snb),
328         NULL,
329 };
330
331 static struct event_constraint intel_hsw_event_constraints[] = {
332         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
333         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
334         FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
335         INTEL_UEVENT_CONSTRAINT(0x148, 0x4),    /* L1D_PEND_MISS.PENDING */
336         INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
337         INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
338         /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
339         INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
340         /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
341         INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
342         /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
343         INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
344
345         /*
346          * When HT is off these events can only run on the bottom 4 counters
347          * When HT is on, they are impacted by the HT bug and require EXCL access
348          */
349         INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
350         INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
351         INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
352         INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
353
354         EVENT_CONSTRAINT_END
355 };
356
357 static struct event_constraint intel_bdw_event_constraints[] = {
358         FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
359         FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
360         FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
361         INTEL_UEVENT_CONSTRAINT(0x148, 0x4),    /* L1D_PEND_MISS.PENDING */
362         INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4),        /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
363         /*
364          * when HT is off, these can only run on the bottom 4 counters
365          */
366         INTEL_EVENT_CONSTRAINT(0xd0, 0xf),      /* MEM_INST_RETIRED.* */
367         INTEL_EVENT_CONSTRAINT(0xd1, 0xf),      /* MEM_LOAD_RETIRED.* */
368         INTEL_EVENT_CONSTRAINT(0xd2, 0xf),      /* MEM_LOAD_L3_HIT_RETIRED.* */
369         INTEL_EVENT_CONSTRAINT(0xcd, 0xf),      /* MEM_TRANS_RETIRED.* */
370         EVENT_CONSTRAINT_END
371 };
372
373 static u64 intel_pmu_event_map(int hw_event)
374 {
375         return intel_perfmon_event_map[hw_event];
376 }
377
378 /*
379  * Notes on the events:
380  * - data reads do not include code reads (comparable to earlier tables)
381  * - data counts include speculative execution (except L1 write, dtlb, bpu)
382  * - remote node access includes remote memory, remote cache, remote mmio.
383  * - prefetches are not included in the counts.
384  * - icache miss does not include decoded icache
385  */
386
387 #define SKL_DEMAND_DATA_RD              BIT_ULL(0)
388 #define SKL_DEMAND_RFO                  BIT_ULL(1)
389 #define SKL_ANY_RESPONSE                BIT_ULL(16)
390 #define SKL_SUPPLIER_NONE               BIT_ULL(17)
391 #define SKL_L3_MISS_LOCAL_DRAM          BIT_ULL(26)
392 #define SKL_L3_MISS_REMOTE_HOP0_DRAM    BIT_ULL(27)
393 #define SKL_L3_MISS_REMOTE_HOP1_DRAM    BIT_ULL(28)
394 #define SKL_L3_MISS_REMOTE_HOP2P_DRAM   BIT_ULL(29)
395 #define SKL_L3_MISS                     (SKL_L3_MISS_LOCAL_DRAM| \
396                                          SKL_L3_MISS_REMOTE_HOP0_DRAM| \
397                                          SKL_L3_MISS_REMOTE_HOP1_DRAM| \
398                                          SKL_L3_MISS_REMOTE_HOP2P_DRAM)
399 #define SKL_SPL_HIT                     BIT_ULL(30)
400 #define SKL_SNOOP_NONE                  BIT_ULL(31)
401 #define SKL_SNOOP_NOT_NEEDED            BIT_ULL(32)
402 #define SKL_SNOOP_MISS                  BIT_ULL(33)
403 #define SKL_SNOOP_HIT_NO_FWD            BIT_ULL(34)
404 #define SKL_SNOOP_HIT_WITH_FWD          BIT_ULL(35)
405 #define SKL_SNOOP_HITM                  BIT_ULL(36)
406 #define SKL_SNOOP_NON_DRAM              BIT_ULL(37)
407 #define SKL_ANY_SNOOP                   (SKL_SPL_HIT|SKL_SNOOP_NONE| \
408                                          SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
409                                          SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
410                                          SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM)
411 #define SKL_DEMAND_READ                 SKL_DEMAND_DATA_RD
412 #define SKL_SNOOP_DRAM                  (SKL_SNOOP_NONE| \
413                                          SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
414                                          SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
415                                          SKL_SNOOP_HITM|SKL_SPL_HIT)
416 #define SKL_DEMAND_WRITE                SKL_DEMAND_RFO
417 #define SKL_LLC_ACCESS                  SKL_ANY_RESPONSE
418 #define SKL_L3_MISS_REMOTE              (SKL_L3_MISS_REMOTE_HOP0_DRAM| \
419                                          SKL_L3_MISS_REMOTE_HOP1_DRAM| \
420                                          SKL_L3_MISS_REMOTE_HOP2P_DRAM)
421
422 static __initconst const u64 skl_hw_cache_event_ids
423                                 [PERF_COUNT_HW_CACHE_MAX]
424                                 [PERF_COUNT_HW_CACHE_OP_MAX]
425                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
426 {
427  [ C(L1D ) ] = {
428         [ C(OP_READ) ] = {
429                 [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_INST_RETIRED.ALL_LOADS */
430                 [ C(RESULT_MISS)   ] = 0x151,   /* L1D.REPLACEMENT */
431         },
432         [ C(OP_WRITE) ] = {
433                 [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_INST_RETIRED.ALL_STORES */
434                 [ C(RESULT_MISS)   ] = 0x0,
435         },
436         [ C(OP_PREFETCH) ] = {
437                 [ C(RESULT_ACCESS) ] = 0x0,
438                 [ C(RESULT_MISS)   ] = 0x0,
439         },
440  },
441  [ C(L1I ) ] = {
442         [ C(OP_READ) ] = {
443                 [ C(RESULT_ACCESS) ] = 0x0,
444                 [ C(RESULT_MISS)   ] = 0x283,   /* ICACHE_64B.MISS */
445         },
446         [ C(OP_WRITE) ] = {
447                 [ C(RESULT_ACCESS) ] = -1,
448                 [ C(RESULT_MISS)   ] = -1,
449         },
450         [ C(OP_PREFETCH) ] = {
451                 [ C(RESULT_ACCESS) ] = 0x0,
452                 [ C(RESULT_MISS)   ] = 0x0,
453         },
454  },
455  [ C(LL  ) ] = {
456         [ C(OP_READ) ] = {
457                 [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
458                 [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
459         },
460         [ C(OP_WRITE) ] = {
461                 [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
462                 [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
463         },
464         [ C(OP_PREFETCH) ] = {
465                 [ C(RESULT_ACCESS) ] = 0x0,
466                 [ C(RESULT_MISS)   ] = 0x0,
467         },
468  },
469  [ C(DTLB) ] = {
470         [ C(OP_READ) ] = {
471                 [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_INST_RETIRED.ALL_LOADS */
472                 [ C(RESULT_MISS)   ] = 0xe08,   /* DTLB_LOAD_MISSES.WALK_COMPLETED */
473         },
474         [ C(OP_WRITE) ] = {
475                 [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_INST_RETIRED.ALL_STORES */
476                 [ C(RESULT_MISS)   ] = 0xe49,   /* DTLB_STORE_MISSES.WALK_COMPLETED */
477         },
478         [ C(OP_PREFETCH) ] = {
479                 [ C(RESULT_ACCESS) ] = 0x0,
480                 [ C(RESULT_MISS)   ] = 0x0,
481         },
482  },
483  [ C(ITLB) ] = {
484         [ C(OP_READ) ] = {
485                 [ C(RESULT_ACCESS) ] = 0x2085,  /* ITLB_MISSES.STLB_HIT */
486                 [ C(RESULT_MISS)   ] = 0xe85,   /* ITLB_MISSES.WALK_COMPLETED */
487         },
488         [ C(OP_WRITE) ] = {
489                 [ C(RESULT_ACCESS) ] = -1,
490                 [ C(RESULT_MISS)   ] = -1,
491         },
492         [ C(OP_PREFETCH) ] = {
493                 [ C(RESULT_ACCESS) ] = -1,
494                 [ C(RESULT_MISS)   ] = -1,
495         },
496  },
497  [ C(BPU ) ] = {
498         [ C(OP_READ) ] = {
499                 [ C(RESULT_ACCESS) ] = 0xc4,    /* BR_INST_RETIRED.ALL_BRANCHES */
500                 [ C(RESULT_MISS)   ] = 0xc5,    /* BR_MISP_RETIRED.ALL_BRANCHES */
501         },
502         [ C(OP_WRITE) ] = {
503                 [ C(RESULT_ACCESS) ] = -1,
504                 [ C(RESULT_MISS)   ] = -1,
505         },
506         [ C(OP_PREFETCH) ] = {
507                 [ C(RESULT_ACCESS) ] = -1,
508                 [ C(RESULT_MISS)   ] = -1,
509         },
510  },
511  [ C(NODE) ] = {
512         [ C(OP_READ) ] = {
513                 [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
514                 [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
515         },
516         [ C(OP_WRITE) ] = {
517                 [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
518                 [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
519         },
520         [ C(OP_PREFETCH) ] = {
521                 [ C(RESULT_ACCESS) ] = 0x0,
522                 [ C(RESULT_MISS)   ] = 0x0,
523         },
524  },
525 };
526
527 static __initconst const u64 skl_hw_cache_extra_regs
528                                 [PERF_COUNT_HW_CACHE_MAX]
529                                 [PERF_COUNT_HW_CACHE_OP_MAX]
530                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
531 {
532  [ C(LL  ) ] = {
533         [ C(OP_READ) ] = {
534                 [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
535                                        SKL_LLC_ACCESS|SKL_ANY_SNOOP,
536                 [ C(RESULT_MISS)   ] = SKL_DEMAND_READ|
537                                        SKL_L3_MISS|SKL_ANY_SNOOP|
538                                        SKL_SUPPLIER_NONE,
539         },
540         [ C(OP_WRITE) ] = {
541                 [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
542                                        SKL_LLC_ACCESS|SKL_ANY_SNOOP,
543                 [ C(RESULT_MISS)   ] = SKL_DEMAND_WRITE|
544                                        SKL_L3_MISS|SKL_ANY_SNOOP|
545                                        SKL_SUPPLIER_NONE,
546         },
547         [ C(OP_PREFETCH) ] = {
548                 [ C(RESULT_ACCESS) ] = 0x0,
549                 [ C(RESULT_MISS)   ] = 0x0,
550         },
551  },
552  [ C(NODE) ] = {
553         [ C(OP_READ) ] = {
554                 [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
555                                        SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
556                 [ C(RESULT_MISS)   ] = SKL_DEMAND_READ|
557                                        SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
558         },
559         [ C(OP_WRITE) ] = {
560                 [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
561                                        SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
562                 [ C(RESULT_MISS)   ] = SKL_DEMAND_WRITE|
563                                        SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
564         },
565         [ C(OP_PREFETCH) ] = {
566                 [ C(RESULT_ACCESS) ] = 0x0,
567                 [ C(RESULT_MISS)   ] = 0x0,
568         },
569  },
570 };
571
572 #define SNB_DMND_DATA_RD        (1ULL << 0)
573 #define SNB_DMND_RFO            (1ULL << 1)
574 #define SNB_DMND_IFETCH         (1ULL << 2)
575 #define SNB_DMND_WB             (1ULL << 3)
576 #define SNB_PF_DATA_RD          (1ULL << 4)
577 #define SNB_PF_RFO              (1ULL << 5)
578 #define SNB_PF_IFETCH           (1ULL << 6)
579 #define SNB_LLC_DATA_RD         (1ULL << 7)
580 #define SNB_LLC_RFO             (1ULL << 8)
581 #define SNB_LLC_IFETCH          (1ULL << 9)
582 #define SNB_BUS_LOCKS           (1ULL << 10)
583 #define SNB_STRM_ST             (1ULL << 11)
584 #define SNB_OTHER               (1ULL << 15)
585 #define SNB_RESP_ANY            (1ULL << 16)
586 #define SNB_NO_SUPP             (1ULL << 17)
587 #define SNB_LLC_HITM            (1ULL << 18)
588 #define SNB_LLC_HITE            (1ULL << 19)
589 #define SNB_LLC_HITS            (1ULL << 20)
590 #define SNB_LLC_HITF            (1ULL << 21)
591 #define SNB_LOCAL               (1ULL << 22)
592 #define SNB_REMOTE              (0xffULL << 23)
593 #define SNB_SNP_NONE            (1ULL << 31)
594 #define SNB_SNP_NOT_NEEDED      (1ULL << 32)
595 #define SNB_SNP_MISS            (1ULL << 33)
596 #define SNB_NO_FWD              (1ULL << 34)
597 #define SNB_SNP_FWD             (1ULL << 35)
598 #define SNB_HITM                (1ULL << 36)
599 #define SNB_NON_DRAM            (1ULL << 37)
600
601 #define SNB_DMND_READ           (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD)
602 #define SNB_DMND_WRITE          (SNB_DMND_RFO|SNB_LLC_RFO)
603 #define SNB_DMND_PREFETCH       (SNB_PF_DATA_RD|SNB_PF_RFO)
604
605 #define SNB_SNP_ANY             (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \
606                                  SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \
607                                  SNB_HITM)
608
609 #define SNB_DRAM_ANY            (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY)
610 #define SNB_DRAM_REMOTE         (SNB_REMOTE|SNB_SNP_ANY)
611
612 #define SNB_L3_ACCESS           SNB_RESP_ANY
613 #define SNB_L3_MISS             (SNB_DRAM_ANY|SNB_NON_DRAM)
614
615 static __initconst const u64 snb_hw_cache_extra_regs
616                                 [PERF_COUNT_HW_CACHE_MAX]
617                                 [PERF_COUNT_HW_CACHE_OP_MAX]
618                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
619 {
620  [ C(LL  ) ] = {
621         [ C(OP_READ) ] = {
622                 [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS,
623                 [ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_L3_MISS,
624         },
625         [ C(OP_WRITE) ] = {
626                 [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS,
627                 [ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_L3_MISS,
628         },
629         [ C(OP_PREFETCH) ] = {
630                 [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS,
631                 [ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_L3_MISS,
632         },
633  },
634  [ C(NODE) ] = {
635         [ C(OP_READ) ] = {
636                 [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY,
637                 [ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_DRAM_REMOTE,
638         },
639         [ C(OP_WRITE) ] = {
640                 [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY,
641                 [ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE,
642         },
643         [ C(OP_PREFETCH) ] = {
644                 [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY,
645                 [ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE,
646         },
647  },
648 };
649
650 static __initconst const u64 snb_hw_cache_event_ids
651                                 [PERF_COUNT_HW_CACHE_MAX]
652                                 [PERF_COUNT_HW_CACHE_OP_MAX]
653                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
654 {
655  [ C(L1D) ] = {
656         [ C(OP_READ) ] = {
657                 [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS        */
658                 [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPLACEMENT              */
659         },
660         [ C(OP_WRITE) ] = {
661                 [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES       */
662                 [ C(RESULT_MISS)   ] = 0x0851, /* L1D.ALL_M_REPLACEMENT        */
663         },
664         [ C(OP_PREFETCH) ] = {
665                 [ C(RESULT_ACCESS) ] = 0x0,
666                 [ C(RESULT_MISS)   ] = 0x024e, /* HW_PRE_REQ.DL1_MISS          */
667         },
668  },
669  [ C(L1I ) ] = {
670         [ C(OP_READ) ] = {
671                 [ C(RESULT_ACCESS) ] = 0x0,
672                 [ C(RESULT_MISS)   ] = 0x0280, /* ICACHE.MISSES */
673         },
674         [ C(OP_WRITE) ] = {
675                 [ C(RESULT_ACCESS) ] = -1,
676                 [ C(RESULT_MISS)   ] = -1,
677         },
678         [ C(OP_PREFETCH) ] = {
679                 [ C(RESULT_ACCESS) ] = 0x0,
680                 [ C(RESULT_MISS)   ] = 0x0,
681         },
682  },
683  [ C(LL  ) ] = {
684         [ C(OP_READ) ] = {
685                 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
686                 [ C(RESULT_ACCESS) ] = 0x01b7,
687                 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
688                 [ C(RESULT_MISS)   ] = 0x01b7,
689         },
690         [ C(OP_WRITE) ] = {
691                 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
692                 [ C(RESULT_ACCESS) ] = 0x01b7,
693                 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
694                 [ C(RESULT_MISS)   ] = 0x01b7,
695         },
696         [ C(OP_PREFETCH) ] = {
697                 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
698                 [ C(RESULT_ACCESS) ] = 0x01b7,
699                 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
700                 [ C(RESULT_MISS)   ] = 0x01b7,
701         },
702  },
703  [ C(DTLB) ] = {
704         [ C(OP_READ) ] = {
705                 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
706                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
707         },
708         [ C(OP_WRITE) ] = {
709                 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
710                 [ C(RESULT_MISS)   ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
711         },
712         [ C(OP_PREFETCH) ] = {
713                 [ C(RESULT_ACCESS) ] = 0x0,
714                 [ C(RESULT_MISS)   ] = 0x0,
715         },
716  },
717  [ C(ITLB) ] = {
718         [ C(OP_READ) ] = {
719                 [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT         */
720                 [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK    */
721         },
722         [ C(OP_WRITE) ] = {
723                 [ C(RESULT_ACCESS) ] = -1,
724                 [ C(RESULT_MISS)   ] = -1,
725         },
726         [ C(OP_PREFETCH) ] = {
727                 [ C(RESULT_ACCESS) ] = -1,
728                 [ C(RESULT_MISS)   ] = -1,
729         },
730  },
731  [ C(BPU ) ] = {
732         [ C(OP_READ) ] = {
733                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
734                 [ C(RESULT_MISS)   ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
735         },
736         [ C(OP_WRITE) ] = {
737                 [ C(RESULT_ACCESS) ] = -1,
738                 [ C(RESULT_MISS)   ] = -1,
739         },
740         [ C(OP_PREFETCH) ] = {
741                 [ C(RESULT_ACCESS) ] = -1,
742                 [ C(RESULT_MISS)   ] = -1,
743         },
744  },
745  [ C(NODE) ] = {
746         [ C(OP_READ) ] = {
747                 [ C(RESULT_ACCESS) ] = 0x01b7,
748                 [ C(RESULT_MISS)   ] = 0x01b7,
749         },
750         [ C(OP_WRITE) ] = {
751                 [ C(RESULT_ACCESS) ] = 0x01b7,
752                 [ C(RESULT_MISS)   ] = 0x01b7,
753         },
754         [ C(OP_PREFETCH) ] = {
755                 [ C(RESULT_ACCESS) ] = 0x01b7,
756                 [ C(RESULT_MISS)   ] = 0x01b7,
757         },
758  },
759
760 };
761
762 /*
763  * Notes on the events:
764  * - data reads do not include code reads (comparable to earlier tables)
765  * - data counts include speculative execution (except L1 write, dtlb, bpu)
766  * - remote node access includes remote memory, remote cache, remote mmio.
767  * - prefetches are not included in the counts because they are not
768  *   reliably counted.
769  */
770
771 #define HSW_DEMAND_DATA_RD              BIT_ULL(0)
772 #define HSW_DEMAND_RFO                  BIT_ULL(1)
773 #define HSW_ANY_RESPONSE                BIT_ULL(16)
774 #define HSW_SUPPLIER_NONE               BIT_ULL(17)
775 #define HSW_L3_MISS_LOCAL_DRAM          BIT_ULL(22)
776 #define HSW_L3_MISS_REMOTE_HOP0         BIT_ULL(27)
777 #define HSW_L3_MISS_REMOTE_HOP1         BIT_ULL(28)
778 #define HSW_L3_MISS_REMOTE_HOP2P        BIT_ULL(29)
779 #define HSW_L3_MISS                     (HSW_L3_MISS_LOCAL_DRAM| \
780                                          HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
781                                          HSW_L3_MISS_REMOTE_HOP2P)
782 #define HSW_SNOOP_NONE                  BIT_ULL(31)
783 #define HSW_SNOOP_NOT_NEEDED            BIT_ULL(32)
784 #define HSW_SNOOP_MISS                  BIT_ULL(33)
785 #define HSW_SNOOP_HIT_NO_FWD            BIT_ULL(34)
786 #define HSW_SNOOP_HIT_WITH_FWD          BIT_ULL(35)
787 #define HSW_SNOOP_HITM                  BIT_ULL(36)
788 #define HSW_SNOOP_NON_DRAM              BIT_ULL(37)
789 #define HSW_ANY_SNOOP                   (HSW_SNOOP_NONE| \
790                                          HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \
791                                          HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \
792                                          HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM)
793 #define HSW_SNOOP_DRAM                  (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM)
794 #define HSW_DEMAND_READ                 HSW_DEMAND_DATA_RD
795 #define HSW_DEMAND_WRITE                HSW_DEMAND_RFO
796 #define HSW_L3_MISS_REMOTE              (HSW_L3_MISS_REMOTE_HOP0|\
797                                          HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P)
798 #define HSW_LLC_ACCESS                  HSW_ANY_RESPONSE
799
800 #define BDW_L3_MISS_LOCAL               BIT(26)
801 #define BDW_L3_MISS                     (BDW_L3_MISS_LOCAL| \
802                                          HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
803                                          HSW_L3_MISS_REMOTE_HOP2P)
804
805
806 static __initconst const u64 hsw_hw_cache_event_ids
807                                 [PERF_COUNT_HW_CACHE_MAX]
808                                 [PERF_COUNT_HW_CACHE_OP_MAX]
809                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
810 {
811  [ C(L1D ) ] = {
812         [ C(OP_READ) ] = {
813                 [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_UOPS_RETIRED.ALL_LOADS */
814                 [ C(RESULT_MISS)   ] = 0x151,   /* L1D.REPLACEMENT */
815         },
816         [ C(OP_WRITE) ] = {
817                 [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_UOPS_RETIRED.ALL_STORES */
818                 [ C(RESULT_MISS)   ] = 0x0,
819         },
820         [ C(OP_PREFETCH) ] = {
821                 [ C(RESULT_ACCESS) ] = 0x0,
822                 [ C(RESULT_MISS)   ] = 0x0,
823         },
824  },
825  [ C(L1I ) ] = {
826         [ C(OP_READ) ] = {
827                 [ C(RESULT_ACCESS) ] = 0x0,
828                 [ C(RESULT_MISS)   ] = 0x280,   /* ICACHE.MISSES */
829         },
830         [ C(OP_WRITE) ] = {
831                 [ C(RESULT_ACCESS) ] = -1,
832                 [ C(RESULT_MISS)   ] = -1,
833         },
834         [ C(OP_PREFETCH) ] = {
835                 [ C(RESULT_ACCESS) ] = 0x0,
836                 [ C(RESULT_MISS)   ] = 0x0,
837         },
838  },
839  [ C(LL  ) ] = {
840         [ C(OP_READ) ] = {
841                 [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
842                 [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
843         },
844         [ C(OP_WRITE) ] = {
845                 [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
846                 [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
847         },
848         [ C(OP_PREFETCH) ] = {
849                 [ C(RESULT_ACCESS) ] = 0x0,
850                 [ C(RESULT_MISS)   ] = 0x0,
851         },
852  },
853  [ C(DTLB) ] = {
854         [ C(OP_READ) ] = {
855                 [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_UOPS_RETIRED.ALL_LOADS */
856                 [ C(RESULT_MISS)   ] = 0x108,   /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
857         },
858         [ C(OP_WRITE) ] = {
859                 [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_UOPS_RETIRED.ALL_STORES */
860                 [ C(RESULT_MISS)   ] = 0x149,   /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
861         },
862         [ C(OP_PREFETCH) ] = {
863                 [ C(RESULT_ACCESS) ] = 0x0,
864                 [ C(RESULT_MISS)   ] = 0x0,
865         },
866  },
867  [ C(ITLB) ] = {
868         [ C(OP_READ) ] = {
869                 [ C(RESULT_ACCESS) ] = 0x6085,  /* ITLB_MISSES.STLB_HIT */
870                 [ C(RESULT_MISS)   ] = 0x185,   /* ITLB_MISSES.MISS_CAUSES_A_WALK */
871         },
872         [ C(OP_WRITE) ] = {
873                 [ C(RESULT_ACCESS) ] = -1,
874                 [ C(RESULT_MISS)   ] = -1,
875         },
876         [ C(OP_PREFETCH) ] = {
877                 [ C(RESULT_ACCESS) ] = -1,
878                 [ C(RESULT_MISS)   ] = -1,
879         },
880  },
881  [ C(BPU ) ] = {
882         [ C(OP_READ) ] = {
883                 [ C(RESULT_ACCESS) ] = 0xc4,    /* BR_INST_RETIRED.ALL_BRANCHES */
884                 [ C(RESULT_MISS)   ] = 0xc5,    /* BR_MISP_RETIRED.ALL_BRANCHES */
885         },
886         [ C(OP_WRITE) ] = {
887                 [ C(RESULT_ACCESS) ] = -1,
888                 [ C(RESULT_MISS)   ] = -1,
889         },
890         [ C(OP_PREFETCH) ] = {
891                 [ C(RESULT_ACCESS) ] = -1,
892                 [ C(RESULT_MISS)   ] = -1,
893         },
894  },
895  [ C(NODE) ] = {
896         [ C(OP_READ) ] = {
897                 [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
898                 [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
899         },
900         [ C(OP_WRITE) ] = {
901                 [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
902                 [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
903         },
904         [ C(OP_PREFETCH) ] = {
905                 [ C(RESULT_ACCESS) ] = 0x0,
906                 [ C(RESULT_MISS)   ] = 0x0,
907         },
908  },
909 };
910
911 static __initconst const u64 hsw_hw_cache_extra_regs
912                                 [PERF_COUNT_HW_CACHE_MAX]
913                                 [PERF_COUNT_HW_CACHE_OP_MAX]
914                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
915 {
916  [ C(LL  ) ] = {
917         [ C(OP_READ) ] = {
918                 [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
919                                        HSW_LLC_ACCESS,
920                 [ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
921                                        HSW_L3_MISS|HSW_ANY_SNOOP,
922         },
923         [ C(OP_WRITE) ] = {
924                 [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
925                                        HSW_LLC_ACCESS,
926                 [ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
927                                        HSW_L3_MISS|HSW_ANY_SNOOP,
928         },
929         [ C(OP_PREFETCH) ] = {
930                 [ C(RESULT_ACCESS) ] = 0x0,
931                 [ C(RESULT_MISS)   ] = 0x0,
932         },
933  },
934  [ C(NODE) ] = {
935         [ C(OP_READ) ] = {
936                 [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
937                                        HSW_L3_MISS_LOCAL_DRAM|
938                                        HSW_SNOOP_DRAM,
939                 [ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
940                                        HSW_L3_MISS_REMOTE|
941                                        HSW_SNOOP_DRAM,
942         },
943         [ C(OP_WRITE) ] = {
944                 [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
945                                        HSW_L3_MISS_LOCAL_DRAM|
946                                        HSW_SNOOP_DRAM,
947                 [ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
948                                        HSW_L3_MISS_REMOTE|
949                                        HSW_SNOOP_DRAM,
950         },
951         [ C(OP_PREFETCH) ] = {
952                 [ C(RESULT_ACCESS) ] = 0x0,
953                 [ C(RESULT_MISS)   ] = 0x0,
954         },
955  },
956 };
957
958 static __initconst const u64 westmere_hw_cache_event_ids
959                                 [PERF_COUNT_HW_CACHE_MAX]
960                                 [PERF_COUNT_HW_CACHE_OP_MAX]
961                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
962 {
963  [ C(L1D) ] = {
964         [ C(OP_READ) ] = {
965                 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
966                 [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
967         },
968         [ C(OP_WRITE) ] = {
969                 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
970                 [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
971         },
972         [ C(OP_PREFETCH) ] = {
973                 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
974                 [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
975         },
976  },
977  [ C(L1I ) ] = {
978         [ C(OP_READ) ] = {
979                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
980                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
981         },
982         [ C(OP_WRITE) ] = {
983                 [ C(RESULT_ACCESS) ] = -1,
984                 [ C(RESULT_MISS)   ] = -1,
985         },
986         [ C(OP_PREFETCH) ] = {
987                 [ C(RESULT_ACCESS) ] = 0x0,
988                 [ C(RESULT_MISS)   ] = 0x0,
989         },
990  },
991  [ C(LL  ) ] = {
992         [ C(OP_READ) ] = {
993                 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
994                 [ C(RESULT_ACCESS) ] = 0x01b7,
995                 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
996                 [ C(RESULT_MISS)   ] = 0x01b7,
997         },
998         /*
999          * Use RFO, not WRITEBACK, because a write miss would typically occur
1000          * on RFO.
1001          */
1002         [ C(OP_WRITE) ] = {
1003                 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
1004                 [ C(RESULT_ACCESS) ] = 0x01b7,
1005                 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
1006                 [ C(RESULT_MISS)   ] = 0x01b7,
1007         },
1008         [ C(OP_PREFETCH) ] = {
1009                 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
1010                 [ C(RESULT_ACCESS) ] = 0x01b7,
1011                 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
1012                 [ C(RESULT_MISS)   ] = 0x01b7,
1013         },
1014  },
1015  [ C(DTLB) ] = {
1016         [ C(OP_READ) ] = {
1017                 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
1018                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
1019         },
1020         [ C(OP_WRITE) ] = {
1021                 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
1022                 [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
1023         },
1024         [ C(OP_PREFETCH) ] = {
1025                 [ C(RESULT_ACCESS) ] = 0x0,
1026                 [ C(RESULT_MISS)   ] = 0x0,
1027         },
1028  },
1029  [ C(ITLB) ] = {
1030         [ C(OP_READ) ] = {
1031                 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
1032                 [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
1033         },
1034         [ C(OP_WRITE) ] = {
1035                 [ C(RESULT_ACCESS) ] = -1,
1036                 [ C(RESULT_MISS)   ] = -1,
1037         },
1038         [ C(OP_PREFETCH) ] = {
1039                 [ C(RESULT_ACCESS) ] = -1,
1040                 [ C(RESULT_MISS)   ] = -1,
1041         },
1042  },
1043  [ C(BPU ) ] = {
1044         [ C(OP_READ) ] = {
1045                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
1046                 [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
1047         },
1048         [ C(OP_WRITE) ] = {
1049                 [ C(RESULT_ACCESS) ] = -1,
1050                 [ C(RESULT_MISS)   ] = -1,
1051         },
1052         [ C(OP_PREFETCH) ] = {
1053                 [ C(RESULT_ACCESS) ] = -1,
1054                 [ C(RESULT_MISS)   ] = -1,
1055         },
1056  },
1057  [ C(NODE) ] = {
1058         [ C(OP_READ) ] = {
1059                 [ C(RESULT_ACCESS) ] = 0x01b7,
1060                 [ C(RESULT_MISS)   ] = 0x01b7,
1061         },
1062         [ C(OP_WRITE) ] = {
1063                 [ C(RESULT_ACCESS) ] = 0x01b7,
1064                 [ C(RESULT_MISS)   ] = 0x01b7,
1065         },
1066         [ C(OP_PREFETCH) ] = {
1067                 [ C(RESULT_ACCESS) ] = 0x01b7,
1068                 [ C(RESULT_MISS)   ] = 0x01b7,
1069         },
1070  },
1071 };
1072
1073 /*
1074  * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
1075  * See IA32 SDM Vol 3B 30.6.1.3
1076  */
1077
1078 #define NHM_DMND_DATA_RD        (1 << 0)
1079 #define NHM_DMND_RFO            (1 << 1)
1080 #define NHM_DMND_IFETCH         (1 << 2)
1081 #define NHM_DMND_WB             (1 << 3)
1082 #define NHM_PF_DATA_RD          (1 << 4)
1083 #define NHM_PF_DATA_RFO         (1 << 5)
1084 #define NHM_PF_IFETCH           (1 << 6)
1085 #define NHM_OFFCORE_OTHER       (1 << 7)
1086 #define NHM_UNCORE_HIT          (1 << 8)
1087 #define NHM_OTHER_CORE_HIT_SNP  (1 << 9)
1088 #define NHM_OTHER_CORE_HITM     (1 << 10)
1089                                 /* reserved */
1090 #define NHM_REMOTE_CACHE_FWD    (1 << 12)
1091 #define NHM_REMOTE_DRAM         (1 << 13)
1092 #define NHM_LOCAL_DRAM          (1 << 14)
1093 #define NHM_NON_DRAM            (1 << 15)
1094
1095 #define NHM_LOCAL               (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
1096 #define NHM_REMOTE              (NHM_REMOTE_DRAM)
1097
1098 #define NHM_DMND_READ           (NHM_DMND_DATA_RD)
1099 #define NHM_DMND_WRITE          (NHM_DMND_RFO|NHM_DMND_WB)
1100 #define NHM_DMND_PREFETCH       (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
1101
1102 #define NHM_L3_HIT      (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
1103 #define NHM_L3_MISS     (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
1104 #define NHM_L3_ACCESS   (NHM_L3_HIT|NHM_L3_MISS)
1105
1106 static __initconst const u64 nehalem_hw_cache_extra_regs
1107                                 [PERF_COUNT_HW_CACHE_MAX]
1108                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1109                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
1110 {
1111  [ C(LL  ) ] = {
1112         [ C(OP_READ) ] = {
1113                 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
1114                 [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_L3_MISS,
1115         },
1116         [ C(OP_WRITE) ] = {
1117                 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
1118                 [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_L3_MISS,
1119         },
1120         [ C(OP_PREFETCH) ] = {
1121                 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
1122                 [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
1123         },
1124  },
1125  [ C(NODE) ] = {
1126         [ C(OP_READ) ] = {
1127                 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
1128                 [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE,
1129         },
1130         [ C(OP_WRITE) ] = {
1131                 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
1132                 [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE,
1133         },
1134         [ C(OP_PREFETCH) ] = {
1135                 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
1136                 [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE,
1137         },
1138  },
1139 };
1140
1141 static __initconst const u64 nehalem_hw_cache_event_ids
1142                                 [PERF_COUNT_HW_CACHE_MAX]
1143                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1144                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
1145 {
1146  [ C(L1D) ] = {
1147         [ C(OP_READ) ] = {
1148                 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
1149                 [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
1150         },
1151         [ C(OP_WRITE) ] = {
1152                 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
1153                 [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
1154         },
1155         [ C(OP_PREFETCH) ] = {
1156                 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
1157                 [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
1158         },
1159  },
1160  [ C(L1I ) ] = {
1161         [ C(OP_READ) ] = {
1162                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
1163                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
1164         },
1165         [ C(OP_WRITE) ] = {
1166                 [ C(RESULT_ACCESS) ] = -1,
1167                 [ C(RESULT_MISS)   ] = -1,
1168         },
1169         [ C(OP_PREFETCH) ] = {
1170                 [ C(RESULT_ACCESS) ] = 0x0,
1171                 [ C(RESULT_MISS)   ] = 0x0,
1172         },
1173  },
1174  [ C(LL  ) ] = {
1175         [ C(OP_READ) ] = {
1176                 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
1177                 [ C(RESULT_ACCESS) ] = 0x01b7,
1178                 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
1179                 [ C(RESULT_MISS)   ] = 0x01b7,
1180         },
1181         /*
1182          * Use RFO, not WRITEBACK, because a write miss would typically occur
1183          * on RFO.
1184          */
1185         [ C(OP_WRITE) ] = {
1186                 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
1187                 [ C(RESULT_ACCESS) ] = 0x01b7,
1188                 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
1189                 [ C(RESULT_MISS)   ] = 0x01b7,
1190         },
1191         [ C(OP_PREFETCH) ] = {
1192                 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
1193                 [ C(RESULT_ACCESS) ] = 0x01b7,
1194                 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
1195                 [ C(RESULT_MISS)   ] = 0x01b7,
1196         },
1197  },
1198  [ C(DTLB) ] = {
1199         [ C(OP_READ) ] = {
1200                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
1201                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
1202         },
1203         [ C(OP_WRITE) ] = {
1204                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
1205                 [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
1206         },
1207         [ C(OP_PREFETCH) ] = {
1208                 [ C(RESULT_ACCESS) ] = 0x0,
1209                 [ C(RESULT_MISS)   ] = 0x0,
1210         },
1211  },
1212  [ C(ITLB) ] = {
1213         [ C(OP_READ) ] = {
1214                 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
1215                 [ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
1216         },
1217         [ C(OP_WRITE) ] = {
1218                 [ C(RESULT_ACCESS) ] = -1,
1219                 [ C(RESULT_MISS)   ] = -1,
1220         },
1221         [ C(OP_PREFETCH) ] = {
1222                 [ C(RESULT_ACCESS) ] = -1,
1223                 [ C(RESULT_MISS)   ] = -1,
1224         },
1225  },
1226  [ C(BPU ) ] = {
1227         [ C(OP_READ) ] = {
1228                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
1229                 [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
1230         },
1231         [ C(OP_WRITE) ] = {
1232                 [ C(RESULT_ACCESS) ] = -1,
1233                 [ C(RESULT_MISS)   ] = -1,
1234         },
1235         [ C(OP_PREFETCH) ] = {
1236                 [ C(RESULT_ACCESS) ] = -1,
1237                 [ C(RESULT_MISS)   ] = -1,
1238         },
1239  },
1240  [ C(NODE) ] = {
1241         [ C(OP_READ) ] = {
1242                 [ C(RESULT_ACCESS) ] = 0x01b7,
1243                 [ C(RESULT_MISS)   ] = 0x01b7,
1244         },
1245         [ C(OP_WRITE) ] = {
1246                 [ C(RESULT_ACCESS) ] = 0x01b7,
1247                 [ C(RESULT_MISS)   ] = 0x01b7,
1248         },
1249         [ C(OP_PREFETCH) ] = {
1250                 [ C(RESULT_ACCESS) ] = 0x01b7,
1251                 [ C(RESULT_MISS)   ] = 0x01b7,
1252         },
1253  },
1254 };
1255
1256 static __initconst const u64 core2_hw_cache_event_ids
1257                                 [PERF_COUNT_HW_CACHE_MAX]
1258                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1259                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
1260 {
1261  [ C(L1D) ] = {
1262         [ C(OP_READ) ] = {
1263                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
1264                 [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
1265         },
1266         [ C(OP_WRITE) ] = {
1267                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
1268                 [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
1269         },
1270         [ C(OP_PREFETCH) ] = {
1271                 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
1272                 [ C(RESULT_MISS)   ] = 0,
1273         },
1274  },
1275  [ C(L1I ) ] = {
1276         [ C(OP_READ) ] = {
1277                 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
1278                 [ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
1279         },
1280         [ C(OP_WRITE) ] = {
1281                 [ C(RESULT_ACCESS) ] = -1,
1282                 [ C(RESULT_MISS)   ] = -1,
1283         },
1284         [ C(OP_PREFETCH) ] = {
1285                 [ C(RESULT_ACCESS) ] = 0,
1286                 [ C(RESULT_MISS)   ] = 0,
1287         },
1288  },
1289  [ C(LL  ) ] = {
1290         [ C(OP_READ) ] = {
1291                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
1292                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
1293         },
1294         [ C(OP_WRITE) ] = {
1295                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
1296                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
1297         },
1298         [ C(OP_PREFETCH) ] = {
1299                 [ C(RESULT_ACCESS) ] = 0,
1300                 [ C(RESULT_MISS)   ] = 0,
1301         },
1302  },
1303  [ C(DTLB) ] = {
1304         [ C(OP_READ) ] = {
1305                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
1306                 [ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
1307         },
1308         [ C(OP_WRITE) ] = {
1309                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
1310                 [ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
1311         },
1312         [ C(OP_PREFETCH) ] = {
1313                 [ C(RESULT_ACCESS) ] = 0,
1314                 [ C(RESULT_MISS)   ] = 0,
1315         },
1316  },
1317  [ C(ITLB) ] = {
1318         [ C(OP_READ) ] = {
1319                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
1320                 [ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
1321         },
1322         [ C(OP_WRITE) ] = {
1323                 [ C(RESULT_ACCESS) ] = -1,
1324                 [ C(RESULT_MISS)   ] = -1,
1325         },
1326         [ C(OP_PREFETCH) ] = {
1327                 [ C(RESULT_ACCESS) ] = -1,
1328                 [ C(RESULT_MISS)   ] = -1,
1329         },
1330  },
1331  [ C(BPU ) ] = {
1332         [ C(OP_READ) ] = {
1333                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
1334                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
1335         },
1336         [ C(OP_WRITE) ] = {
1337                 [ C(RESULT_ACCESS) ] = -1,
1338                 [ C(RESULT_MISS)   ] = -1,
1339         },
1340         [ C(OP_PREFETCH) ] = {
1341                 [ C(RESULT_ACCESS) ] = -1,
1342                 [ C(RESULT_MISS)   ] = -1,
1343         },
1344  },
1345 };
1346
1347 static __initconst const u64 atom_hw_cache_event_ids
1348                                 [PERF_COUNT_HW_CACHE_MAX]
1349                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1350                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
1351 {
1352  [ C(L1D) ] = {
1353         [ C(OP_READ) ] = {
1354                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
1355                 [ C(RESULT_MISS)   ] = 0,
1356         },
1357         [ C(OP_WRITE) ] = {
1358                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
1359                 [ C(RESULT_MISS)   ] = 0,
1360         },
1361         [ C(OP_PREFETCH) ] = {
1362                 [ C(RESULT_ACCESS) ] = 0x0,
1363                 [ C(RESULT_MISS)   ] = 0,
1364         },
1365  },
1366  [ C(L1I ) ] = {
1367         [ C(OP_READ) ] = {
1368                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
1369                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
1370         },
1371         [ C(OP_WRITE) ] = {
1372                 [ C(RESULT_ACCESS) ] = -1,
1373                 [ C(RESULT_MISS)   ] = -1,
1374         },
1375         [ C(OP_PREFETCH) ] = {
1376                 [ C(RESULT_ACCESS) ] = 0,
1377                 [ C(RESULT_MISS)   ] = 0,
1378         },
1379  },
1380  [ C(LL  ) ] = {
1381         [ C(OP_READ) ] = {
1382                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
1383                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
1384         },
1385         [ C(OP_WRITE) ] = {
1386                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
1387                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
1388         },
1389         [ C(OP_PREFETCH) ] = {
1390                 [ C(RESULT_ACCESS) ] = 0,
1391                 [ C(RESULT_MISS)   ] = 0,
1392         },
1393  },
1394  [ C(DTLB) ] = {
1395         [ C(OP_READ) ] = {
1396                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
1397                 [ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
1398         },
1399         [ C(OP_WRITE) ] = {
1400                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
1401                 [ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
1402         },
1403         [ C(OP_PREFETCH) ] = {
1404                 [ C(RESULT_ACCESS) ] = 0,
1405                 [ C(RESULT_MISS)   ] = 0,
1406         },
1407  },
1408  [ C(ITLB) ] = {
1409         [ C(OP_READ) ] = {
1410                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
1411                 [ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
1412         },
1413         [ C(OP_WRITE) ] = {
1414                 [ C(RESULT_ACCESS) ] = -1,
1415                 [ C(RESULT_MISS)   ] = -1,
1416         },
1417         [ C(OP_PREFETCH) ] = {
1418                 [ C(RESULT_ACCESS) ] = -1,
1419                 [ C(RESULT_MISS)   ] = -1,
1420         },
1421  },
1422  [ C(BPU ) ] = {
1423         [ C(OP_READ) ] = {
1424                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
1425                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
1426         },
1427         [ C(OP_WRITE) ] = {
1428                 [ C(RESULT_ACCESS) ] = -1,
1429                 [ C(RESULT_MISS)   ] = -1,
1430         },
1431         [ C(OP_PREFETCH) ] = {
1432                 [ C(RESULT_ACCESS) ] = -1,
1433                 [ C(RESULT_MISS)   ] = -1,
1434         },
1435  },
1436 };
1437
1438 EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c");
1439 EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2");
1440 /* no_alloc_cycles.not_delivered */
1441 EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm,
1442                "event=0xca,umask=0x50");
1443 EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2");
1444 /* uops_retired.all */
1445 EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm,
1446                "event=0xc2,umask=0x10");
1447 /* uops_retired.all */
1448 EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm,
1449                "event=0xc2,umask=0x10");
1450
1451 static struct attribute *slm_events_attrs[] = {
1452         EVENT_PTR(td_total_slots_slm),
1453         EVENT_PTR(td_total_slots_scale_slm),
1454         EVENT_PTR(td_fetch_bubbles_slm),
1455         EVENT_PTR(td_fetch_bubbles_scale_slm),
1456         EVENT_PTR(td_slots_issued_slm),
1457         EVENT_PTR(td_slots_retired_slm),
1458         NULL
1459 };
1460
1461 static struct extra_reg intel_slm_extra_regs[] __read_mostly =
1462 {
1463         /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
1464         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
1465         INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1),
1466         EVENT_EXTRA_END
1467 };
1468
1469 #define SLM_DMND_READ           SNB_DMND_DATA_RD
1470 #define SLM_DMND_WRITE          SNB_DMND_RFO
1471 #define SLM_DMND_PREFETCH       (SNB_PF_DATA_RD|SNB_PF_RFO)
1472
1473 #define SLM_SNP_ANY             (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM)
1474 #define SLM_LLC_ACCESS          SNB_RESP_ANY
1475 #define SLM_LLC_MISS            (SLM_SNP_ANY|SNB_NON_DRAM)
1476
1477 static __initconst const u64 slm_hw_cache_extra_regs
1478                                 [PERF_COUNT_HW_CACHE_MAX]
1479                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1480                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
1481 {
1482  [ C(LL  ) ] = {
1483         [ C(OP_READ) ] = {
1484                 [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
1485                 [ C(RESULT_MISS)   ] = 0,
1486         },
1487         [ C(OP_WRITE) ] = {
1488                 [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
1489                 [ C(RESULT_MISS)   ] = SLM_DMND_WRITE|SLM_LLC_MISS,
1490         },
1491         [ C(OP_PREFETCH) ] = {
1492                 [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS,
1493                 [ C(RESULT_MISS)   ] = SLM_DMND_PREFETCH|SLM_LLC_MISS,
1494         },
1495  },
1496 };
1497
1498 static __initconst const u64 slm_hw_cache_event_ids
1499                                 [PERF_COUNT_HW_CACHE_MAX]
1500                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1501                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
1502 {
1503  [ C(L1D) ] = {
1504         [ C(OP_READ) ] = {
1505                 [ C(RESULT_ACCESS) ] = 0,
1506                 [ C(RESULT_MISS)   ] = 0x0104, /* LD_DCU_MISS */
1507         },
1508         [ C(OP_WRITE) ] = {
1509                 [ C(RESULT_ACCESS) ] = 0,
1510                 [ C(RESULT_MISS)   ] = 0,
1511         },
1512         [ C(OP_PREFETCH) ] = {
1513                 [ C(RESULT_ACCESS) ] = 0,
1514                 [ C(RESULT_MISS)   ] = 0,
1515         },
1516  },
1517  [ C(L1I ) ] = {
1518         [ C(OP_READ) ] = {
1519                 [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */
1520                 [ C(RESULT_MISS)   ] = 0x0280, /* ICACGE.MISSES */
1521         },
1522         [ C(OP_WRITE) ] = {
1523                 [ C(RESULT_ACCESS) ] = -1,
1524                 [ C(RESULT_MISS)   ] = -1,
1525         },
1526         [ C(OP_PREFETCH) ] = {
1527                 [ C(RESULT_ACCESS) ] = 0,
1528                 [ C(RESULT_MISS)   ] = 0,
1529         },
1530  },
1531  [ C(LL  ) ] = {
1532         [ C(OP_READ) ] = {
1533                 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
1534                 [ C(RESULT_ACCESS) ] = 0x01b7,
1535                 [ C(RESULT_MISS)   ] = 0,
1536         },
1537         [ C(OP_WRITE) ] = {
1538                 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
1539                 [ C(RESULT_ACCESS) ] = 0x01b7,
1540                 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
1541                 [ C(RESULT_MISS)   ] = 0x01b7,
1542         },
1543         [ C(OP_PREFETCH) ] = {
1544                 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
1545                 [ C(RESULT_ACCESS) ] = 0x01b7,
1546                 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
1547                 [ C(RESULT_MISS)   ] = 0x01b7,
1548         },
1549  },
1550  [ C(DTLB) ] = {
1551         [ C(OP_READ) ] = {
1552                 [ C(RESULT_ACCESS) ] = 0,
1553                 [ C(RESULT_MISS)   ] = 0x0804, /* LD_DTLB_MISS */
1554         },
1555         [ C(OP_WRITE) ] = {
1556                 [ C(RESULT_ACCESS) ] = 0,
1557                 [ C(RESULT_MISS)   ] = 0,
1558         },
1559         [ C(OP_PREFETCH) ] = {
1560                 [ C(RESULT_ACCESS) ] = 0,
1561                 [ C(RESULT_MISS)   ] = 0,
1562         },
1563  },
1564  [ C(ITLB) ] = {
1565         [ C(OP_READ) ] = {
1566                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
1567                 [ C(RESULT_MISS)   ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */
1568         },
1569         [ C(OP_WRITE) ] = {
1570                 [ C(RESULT_ACCESS) ] = -1,
1571                 [ C(RESULT_MISS)   ] = -1,
1572         },
1573         [ C(OP_PREFETCH) ] = {
1574                 [ C(RESULT_ACCESS) ] = -1,
1575                 [ C(RESULT_MISS)   ] = -1,
1576         },
1577  },
1578  [ C(BPU ) ] = {
1579         [ C(OP_READ) ] = {
1580                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
1581                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
1582         },
1583         [ C(OP_WRITE) ] = {
1584                 [ C(RESULT_ACCESS) ] = -1,
1585                 [ C(RESULT_MISS)   ] = -1,
1586         },
1587         [ C(OP_PREFETCH) ] = {
1588                 [ C(RESULT_ACCESS) ] = -1,
1589                 [ C(RESULT_MISS)   ] = -1,
1590         },
1591  },
1592 };
1593
1594 EVENT_ATTR_STR(topdown-total-slots, td_total_slots_glm, "event=0x3c");
1595 EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_glm, "3");
1596 /* UOPS_NOT_DELIVERED.ANY */
1597 EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_glm, "event=0x9c");
1598 /* ISSUE_SLOTS_NOT_CONSUMED.RECOVERY */
1599 EVENT_ATTR_STR(topdown-recovery-bubbles, td_recovery_bubbles_glm, "event=0xca,umask=0x02");
1600 /* UOPS_RETIRED.ANY */
1601 EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_glm, "event=0xc2");
1602 /* UOPS_ISSUED.ANY */
1603 EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_glm, "event=0x0e");
1604
1605 static struct attribute *glm_events_attrs[] = {
1606         EVENT_PTR(td_total_slots_glm),
1607         EVENT_PTR(td_total_slots_scale_glm),
1608         EVENT_PTR(td_fetch_bubbles_glm),
1609         EVENT_PTR(td_recovery_bubbles_glm),
1610         EVENT_PTR(td_slots_issued_glm),
1611         EVENT_PTR(td_slots_retired_glm),
1612         NULL
1613 };
1614
1615 static struct extra_reg intel_glm_extra_regs[] __read_mostly = {
1616         /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
1617         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0),
1618         INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1),
1619         EVENT_EXTRA_END
1620 };
1621
1622 #define GLM_DEMAND_DATA_RD              BIT_ULL(0)
1623 #define GLM_DEMAND_RFO                  BIT_ULL(1)
1624 #define GLM_ANY_RESPONSE                BIT_ULL(16)
1625 #define GLM_SNP_NONE_OR_MISS            BIT_ULL(33)
1626 #define GLM_DEMAND_READ                 GLM_DEMAND_DATA_RD
1627 #define GLM_DEMAND_WRITE                GLM_DEMAND_RFO
1628 #define GLM_DEMAND_PREFETCH             (SNB_PF_DATA_RD|SNB_PF_RFO)
1629 #define GLM_LLC_ACCESS                  GLM_ANY_RESPONSE
1630 #define GLM_SNP_ANY                     (GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM)
1631 #define GLM_LLC_MISS                    (GLM_SNP_ANY|SNB_NON_DRAM)
1632
1633 static __initconst const u64 glm_hw_cache_event_ids
1634                                 [PERF_COUNT_HW_CACHE_MAX]
1635                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1636                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1637         [C(L1D)] = {
1638                 [C(OP_READ)] = {
1639                         [C(RESULT_ACCESS)]      = 0x81d0,       /* MEM_UOPS_RETIRED.ALL_LOADS */
1640                         [C(RESULT_MISS)]        = 0x0,
1641                 },
1642                 [C(OP_WRITE)] = {
1643                         [C(RESULT_ACCESS)]      = 0x82d0,       /* MEM_UOPS_RETIRED.ALL_STORES */
1644                         [C(RESULT_MISS)]        = 0x0,
1645                 },
1646                 [C(OP_PREFETCH)] = {
1647                         [C(RESULT_ACCESS)]      = 0x0,
1648                         [C(RESULT_MISS)]        = 0x0,
1649                 },
1650         },
1651         [C(L1I)] = {
1652                 [C(OP_READ)] = {
1653                         [C(RESULT_ACCESS)]      = 0x0380,       /* ICACHE.ACCESSES */
1654                         [C(RESULT_MISS)]        = 0x0280,       /* ICACHE.MISSES */
1655                 },
1656                 [C(OP_WRITE)] = {
1657                         [C(RESULT_ACCESS)]      = -1,
1658                         [C(RESULT_MISS)]        = -1,
1659                 },
1660                 [C(OP_PREFETCH)] = {
1661                         [C(RESULT_ACCESS)]      = 0x0,
1662                         [C(RESULT_MISS)]        = 0x0,
1663                 },
1664         },
1665         [C(LL)] = {
1666                 [C(OP_READ)] = {
1667                         [C(RESULT_ACCESS)]      = 0x1b7,        /* OFFCORE_RESPONSE */
1668                         [C(RESULT_MISS)]        = 0x1b7,        /* OFFCORE_RESPONSE */
1669                 },
1670                 [C(OP_WRITE)] = {
1671                         [C(RESULT_ACCESS)]      = 0x1b7,        /* OFFCORE_RESPONSE */
1672                         [C(RESULT_MISS)]        = 0x1b7,        /* OFFCORE_RESPONSE */
1673                 },
1674                 [C(OP_PREFETCH)] = {
1675                         [C(RESULT_ACCESS)]      = 0x1b7,        /* OFFCORE_RESPONSE */
1676                         [C(RESULT_MISS)]        = 0x1b7,        /* OFFCORE_RESPONSE */
1677                 },
1678         },
1679         [C(DTLB)] = {
1680                 [C(OP_READ)] = {
1681                         [C(RESULT_ACCESS)]      = 0x81d0,       /* MEM_UOPS_RETIRED.ALL_LOADS */
1682                         [C(RESULT_MISS)]        = 0x0,
1683                 },
1684                 [C(OP_WRITE)] = {
1685                         [C(RESULT_ACCESS)]      = 0x82d0,       /* MEM_UOPS_RETIRED.ALL_STORES */
1686                         [C(RESULT_MISS)]        = 0x0,
1687                 },
1688                 [C(OP_PREFETCH)] = {
1689                         [C(RESULT_ACCESS)]      = 0x0,
1690                         [C(RESULT_MISS)]        = 0x0,
1691                 },
1692         },
1693         [C(ITLB)] = {
1694                 [C(OP_READ)] = {
1695                         [C(RESULT_ACCESS)]      = 0x00c0,       /* INST_RETIRED.ANY_P */
1696                         [C(RESULT_MISS)]        = 0x0481,       /* ITLB.MISS */
1697                 },
1698                 [C(OP_WRITE)] = {
1699                         [C(RESULT_ACCESS)]      = -1,
1700                         [C(RESULT_MISS)]        = -1,
1701                 },
1702                 [C(OP_PREFETCH)] = {
1703                         [C(RESULT_ACCESS)]      = -1,
1704                         [C(RESULT_MISS)]        = -1,
1705                 },
1706         },
1707         [C(BPU)] = {
1708                 [C(OP_READ)] = {
1709                         [C(RESULT_ACCESS)]      = 0x00c4,       /* BR_INST_RETIRED.ALL_BRANCHES */
1710                         [C(RESULT_MISS)]        = 0x00c5,       /* BR_MISP_RETIRED.ALL_BRANCHES */
1711                 },
1712                 [C(OP_WRITE)] = {
1713                         [C(RESULT_ACCESS)]      = -1,
1714                         [C(RESULT_MISS)]        = -1,
1715                 },
1716                 [C(OP_PREFETCH)] = {
1717                         [C(RESULT_ACCESS)]      = -1,
1718                         [C(RESULT_MISS)]        = -1,
1719                 },
1720         },
1721 };
1722
1723 static __initconst const u64 glm_hw_cache_extra_regs
1724                                 [PERF_COUNT_HW_CACHE_MAX]
1725                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1726                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1727         [C(LL)] = {
1728                 [C(OP_READ)] = {
1729                         [C(RESULT_ACCESS)]      = GLM_DEMAND_READ|
1730                                                   GLM_LLC_ACCESS,
1731                         [C(RESULT_MISS)]        = GLM_DEMAND_READ|
1732                                                   GLM_LLC_MISS,
1733                 },
1734                 [C(OP_WRITE)] = {
1735                         [C(RESULT_ACCESS)]      = GLM_DEMAND_WRITE|
1736                                                   GLM_LLC_ACCESS,
1737                         [C(RESULT_MISS)]        = GLM_DEMAND_WRITE|
1738                                                   GLM_LLC_MISS,
1739                 },
1740                 [C(OP_PREFETCH)] = {
1741                         [C(RESULT_ACCESS)]      = GLM_DEMAND_PREFETCH|
1742                                                   GLM_LLC_ACCESS,
1743                         [C(RESULT_MISS)]        = GLM_DEMAND_PREFETCH|
1744                                                   GLM_LLC_MISS,
1745                 },
1746         },
1747 };
1748
1749 static __initconst const u64 glp_hw_cache_event_ids
1750                                 [PERF_COUNT_HW_CACHE_MAX]
1751                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1752                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1753         [C(L1D)] = {
1754                 [C(OP_READ)] = {
1755                         [C(RESULT_ACCESS)]      = 0x81d0,       /* MEM_UOPS_RETIRED.ALL_LOADS */
1756                         [C(RESULT_MISS)]        = 0x0,
1757                 },
1758                 [C(OP_WRITE)] = {
1759                         [C(RESULT_ACCESS)]      = 0x82d0,       /* MEM_UOPS_RETIRED.ALL_STORES */
1760                         [C(RESULT_MISS)]        = 0x0,
1761                 },
1762                 [C(OP_PREFETCH)] = {
1763                         [C(RESULT_ACCESS)]      = 0x0,
1764                         [C(RESULT_MISS)]        = 0x0,
1765                 },
1766         },
1767         [C(L1I)] = {
1768                 [C(OP_READ)] = {
1769                         [C(RESULT_ACCESS)]      = 0x0380,       /* ICACHE.ACCESSES */
1770                         [C(RESULT_MISS)]        = 0x0280,       /* ICACHE.MISSES */
1771                 },
1772                 [C(OP_WRITE)] = {
1773                         [C(RESULT_ACCESS)]      = -1,
1774                         [C(RESULT_MISS)]        = -1,
1775                 },
1776                 [C(OP_PREFETCH)] = {
1777                         [C(RESULT_ACCESS)]      = 0x0,
1778                         [C(RESULT_MISS)]        = 0x0,
1779                 },
1780         },
1781         [C(LL)] = {
1782                 [C(OP_READ)] = {
1783                         [C(RESULT_ACCESS)]      = 0x1b7,        /* OFFCORE_RESPONSE */
1784                         [C(RESULT_MISS)]        = 0x1b7,        /* OFFCORE_RESPONSE */
1785                 },
1786                 [C(OP_WRITE)] = {
1787                         [C(RESULT_ACCESS)]      = 0x1b7,        /* OFFCORE_RESPONSE */
1788                         [C(RESULT_MISS)]        = 0x1b7,        /* OFFCORE_RESPONSE */
1789                 },
1790                 [C(OP_PREFETCH)] = {
1791                         [C(RESULT_ACCESS)]      = 0x0,
1792                         [C(RESULT_MISS)]        = 0x0,
1793                 },
1794         },
1795         [C(DTLB)] = {
1796                 [C(OP_READ)] = {
1797                         [C(RESULT_ACCESS)]      = 0x81d0,       /* MEM_UOPS_RETIRED.ALL_LOADS */
1798                         [C(RESULT_MISS)]        = 0xe08,        /* DTLB_LOAD_MISSES.WALK_COMPLETED */
1799                 },
1800                 [C(OP_WRITE)] = {
1801                         [C(RESULT_ACCESS)]      = 0x82d0,       /* MEM_UOPS_RETIRED.ALL_STORES */
1802                         [C(RESULT_MISS)]        = 0xe49,        /* DTLB_STORE_MISSES.WALK_COMPLETED */
1803                 },
1804                 [C(OP_PREFETCH)] = {
1805                         [C(RESULT_ACCESS)]      = 0x0,
1806                         [C(RESULT_MISS)]        = 0x0,
1807                 },
1808         },
1809         [C(ITLB)] = {
1810                 [C(OP_READ)] = {
1811                         [C(RESULT_ACCESS)]      = 0x00c0,       /* INST_RETIRED.ANY_P */
1812                         [C(RESULT_MISS)]        = 0x0481,       /* ITLB.MISS */
1813                 },
1814                 [C(OP_WRITE)] = {
1815                         [C(RESULT_ACCESS)]      = -1,
1816                         [C(RESULT_MISS)]        = -1,
1817                 },
1818                 [C(OP_PREFETCH)] = {
1819                         [C(RESULT_ACCESS)]      = -1,
1820                         [C(RESULT_MISS)]        = -1,
1821                 },
1822         },
1823         [C(BPU)] = {
1824                 [C(OP_READ)] = {
1825                         [C(RESULT_ACCESS)]      = 0x00c4,       /* BR_INST_RETIRED.ALL_BRANCHES */
1826                         [C(RESULT_MISS)]        = 0x00c5,       /* BR_MISP_RETIRED.ALL_BRANCHES */
1827                 },
1828                 [C(OP_WRITE)] = {
1829                         [C(RESULT_ACCESS)]      = -1,
1830                         [C(RESULT_MISS)]        = -1,
1831                 },
1832                 [C(OP_PREFETCH)] = {
1833                         [C(RESULT_ACCESS)]      = -1,
1834                         [C(RESULT_MISS)]        = -1,
1835                 },
1836         },
1837 };
1838
1839 static __initconst const u64 glp_hw_cache_extra_regs
1840                                 [PERF_COUNT_HW_CACHE_MAX]
1841                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1842                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1843         [C(LL)] = {
1844                 [C(OP_READ)] = {
1845                         [C(RESULT_ACCESS)]      = GLM_DEMAND_READ|
1846                                                   GLM_LLC_ACCESS,
1847                         [C(RESULT_MISS)]        = GLM_DEMAND_READ|
1848                                                   GLM_LLC_MISS,
1849                 },
1850                 [C(OP_WRITE)] = {
1851                         [C(RESULT_ACCESS)]      = GLM_DEMAND_WRITE|
1852                                                   GLM_LLC_ACCESS,
1853                         [C(RESULT_MISS)]        = GLM_DEMAND_WRITE|
1854                                                   GLM_LLC_MISS,
1855                 },
1856                 [C(OP_PREFETCH)] = {
1857                         [C(RESULT_ACCESS)]      = 0x0,
1858                         [C(RESULT_MISS)]        = 0x0,
1859                 },
1860         },
1861 };
1862
1863 #define TNT_LOCAL_DRAM                  BIT_ULL(26)
1864 #define TNT_DEMAND_READ                 GLM_DEMAND_DATA_RD
1865 #define TNT_DEMAND_WRITE                GLM_DEMAND_RFO
1866 #define TNT_LLC_ACCESS                  GLM_ANY_RESPONSE
1867 #define TNT_SNP_ANY                     (SNB_SNP_NOT_NEEDED|SNB_SNP_MISS| \
1868                                          SNB_NO_FWD|SNB_SNP_FWD|SNB_HITM)
1869 #define TNT_LLC_MISS                    (TNT_SNP_ANY|SNB_NON_DRAM|TNT_LOCAL_DRAM)
1870
1871 static __initconst const u64 tnt_hw_cache_extra_regs
1872                                 [PERF_COUNT_HW_CACHE_MAX]
1873                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1874                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1875         [C(LL)] = {
1876                 [C(OP_READ)] = {
1877                         [C(RESULT_ACCESS)]      = TNT_DEMAND_READ|
1878                                                   TNT_LLC_ACCESS,
1879                         [C(RESULT_MISS)]        = TNT_DEMAND_READ|
1880                                                   TNT_LLC_MISS,
1881                 },
1882                 [C(OP_WRITE)] = {
1883                         [C(RESULT_ACCESS)]      = TNT_DEMAND_WRITE|
1884                                                   TNT_LLC_ACCESS,
1885                         [C(RESULT_MISS)]        = TNT_DEMAND_WRITE|
1886                                                   TNT_LLC_MISS,
1887                 },
1888                 [C(OP_PREFETCH)] = {
1889                         [C(RESULT_ACCESS)]      = 0x0,
1890                         [C(RESULT_MISS)]        = 0x0,
1891                 },
1892         },
1893 };
1894
1895 static struct extra_reg intel_tnt_extra_regs[] __read_mostly = {
1896         /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
1897         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff0ffffff9fffull, RSP_0),
1898         INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff0ffffff9fffull, RSP_1),
1899         EVENT_EXTRA_END
1900 };
1901
1902 #define KNL_OT_L2_HITE          BIT_ULL(19) /* Other Tile L2 Hit */
1903 #define KNL_OT_L2_HITF          BIT_ULL(20) /* Other Tile L2 Hit */
1904 #define KNL_MCDRAM_LOCAL        BIT_ULL(21)
1905 #define KNL_MCDRAM_FAR          BIT_ULL(22)
1906 #define KNL_DDR_LOCAL           BIT_ULL(23)
1907 #define KNL_DDR_FAR             BIT_ULL(24)
1908 #define KNL_DRAM_ANY            (KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \
1909                                     KNL_DDR_LOCAL | KNL_DDR_FAR)
1910 #define KNL_L2_READ             SLM_DMND_READ
1911 #define KNL_L2_WRITE            SLM_DMND_WRITE
1912 #define KNL_L2_PREFETCH         SLM_DMND_PREFETCH
1913 #define KNL_L2_ACCESS           SLM_LLC_ACCESS
1914 #define KNL_L2_MISS             (KNL_OT_L2_HITE | KNL_OT_L2_HITF | \
1915                                    KNL_DRAM_ANY | SNB_SNP_ANY | \
1916                                                   SNB_NON_DRAM)
1917
1918 static __initconst const u64 knl_hw_cache_extra_regs
1919                                 [PERF_COUNT_HW_CACHE_MAX]
1920                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1921                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1922         [C(LL)] = {
1923                 [C(OP_READ)] = {
1924                         [C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS,
1925                         [C(RESULT_MISS)]   = 0,
1926                 },
1927                 [C(OP_WRITE)] = {
1928                         [C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS,
1929                         [C(RESULT_MISS)]   = KNL_L2_WRITE | KNL_L2_MISS,
1930                 },
1931                 [C(OP_PREFETCH)] = {
1932                         [C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS,
1933                         [C(RESULT_MISS)]   = KNL_L2_PREFETCH | KNL_L2_MISS,
1934                 },
1935         },
1936 };
1937
1938 /*
1939  * Used from PMIs where the LBRs are already disabled.
1940  *
1941  * This function could be called consecutively. It is required to remain in
1942  * disabled state if called consecutively.
1943  *
1944  * During consecutive calls, the same disable value will be written to related
1945  * registers, so the PMU state remains unchanged.
1946  *
1947  * intel_bts events don't coexist with intel PMU's BTS events because of
1948  * x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them
1949  * disabled around intel PMU's event batching etc, only inside the PMI handler.
1950  */
1951 static void __intel_pmu_disable_all(void)
1952 {
1953         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1954
1955         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
1956
1957         if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
1958                 intel_pmu_disable_bts();
1959
1960         intel_pmu_pebs_disable_all();
1961 }
1962
1963 static void intel_pmu_disable_all(void)
1964 {
1965         __intel_pmu_disable_all();
1966         intel_pmu_lbr_disable_all();
1967 }
1968
1969 static void __intel_pmu_enable_all(int added, bool pmi)
1970 {
1971         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1972
1973         intel_pmu_pebs_enable_all();
1974         intel_pmu_lbr_enable_all(pmi);
1975         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
1976                         x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
1977
1978         if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
1979                 struct perf_event *event =
1980                         cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
1981
1982                 if (WARN_ON_ONCE(!event))
1983                         return;
1984
1985                 intel_pmu_enable_bts(event->hw.config);
1986         }
1987 }
1988
1989 static void intel_pmu_enable_all(int added)
1990 {
1991         __intel_pmu_enable_all(added, false);
1992 }
1993
1994 /*
1995  * Workaround for:
1996  *   Intel Errata AAK100 (model 26)
1997  *   Intel Errata AAP53  (model 30)
1998  *   Intel Errata BD53   (model 44)
1999  *
2000  * The official story:
2001  *   These chips need to be 'reset' when adding counters by programming the
2002  *   magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
2003  *   in sequence on the same PMC or on different PMCs.
2004  *
2005  * In practise it appears some of these events do in fact count, and
2006  * we need to program all 4 events.
2007  */
2008 static void intel_pmu_nhm_workaround(void)
2009 {
2010         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2011         static const unsigned long nhm_magic[4] = {
2012                 0x4300B5,
2013                 0x4300D2,
2014                 0x4300B1,
2015                 0x4300B1
2016         };
2017         struct perf_event *event;
2018         int i;
2019
2020         /*
2021          * The Errata requires below steps:
2022          * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
2023          * 2) Configure 4 PERFEVTSELx with the magic events and clear
2024          *    the corresponding PMCx;
2025          * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
2026          * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
2027          * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
2028          */
2029
2030         /*
2031          * The real steps we choose are a little different from above.
2032          * A) To reduce MSR operations, we don't run step 1) as they
2033          *    are already cleared before this function is called;
2034          * B) Call x86_perf_event_update to save PMCx before configuring
2035          *    PERFEVTSELx with magic number;
2036          * C) With step 5), we do clear only when the PERFEVTSELx is
2037          *    not used currently.
2038          * D) Call x86_perf_event_set_period to restore PMCx;
2039          */
2040
2041         /* We always operate 4 pairs of PERF Counters */
2042         for (i = 0; i < 4; i++) {
2043                 event = cpuc->events[i];
2044                 if (event)
2045                         x86_perf_event_update(event);
2046         }
2047
2048         for (i = 0; i < 4; i++) {
2049                 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
2050                 wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
2051         }
2052
2053         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
2054         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
2055
2056         for (i = 0; i < 4; i++) {
2057                 event = cpuc->events[i];
2058
2059                 if (event) {
2060                         x86_perf_event_set_period(event);
2061                         __x86_pmu_enable_event(&event->hw,
2062                                         ARCH_PERFMON_EVENTSEL_ENABLE);
2063                 } else
2064                         wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
2065         }
2066 }
2067
2068 static void intel_pmu_nhm_enable_all(int added)
2069 {
2070         if (added)
2071                 intel_pmu_nhm_workaround();
2072         intel_pmu_enable_all(added);
2073 }
2074
2075 static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on)
2076 {
2077         u64 val = on ? MSR_TFA_RTM_FORCE_ABORT : 0;
2078
2079         if (cpuc->tfa_shadow != val) {
2080                 cpuc->tfa_shadow = val;
2081                 wrmsrl(MSR_TSX_FORCE_ABORT, val);
2082         }
2083 }
2084
2085 static void intel_tfa_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
2086 {
2087         /*
2088          * We're going to use PMC3, make sure TFA is set before we touch it.
2089          */
2090         if (cntr == 3)
2091                 intel_set_tfa(cpuc, true);
2092 }
2093
2094 static void intel_tfa_pmu_enable_all(int added)
2095 {
2096         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2097
2098         /*
2099          * If we find PMC3 is no longer used when we enable the PMU, we can
2100          * clear TFA.
2101          */
2102         if (!test_bit(3, cpuc->active_mask))
2103                 intel_set_tfa(cpuc, false);
2104
2105         intel_pmu_enable_all(added);
2106 }
2107
2108 static void enable_counter_freeze(void)
2109 {
2110         update_debugctlmsr(get_debugctlmsr() |
2111                         DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
2112 }
2113
2114 static void disable_counter_freeze(void)
2115 {
2116         update_debugctlmsr(get_debugctlmsr() &
2117                         ~DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
2118 }
2119
2120 static inline u64 intel_pmu_get_status(void)
2121 {
2122         u64 status;
2123
2124         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
2125
2126         return status;
2127 }
2128
2129 static inline void intel_pmu_ack_status(u64 ack)
2130 {
2131         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
2132 }
2133
2134 static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
2135 {
2136         int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
2137         u64 ctrl_val, mask;
2138
2139         mask = 0xfULL << (idx * 4);
2140
2141         rdmsrl(hwc->config_base, ctrl_val);
2142         ctrl_val &= ~mask;
2143         wrmsrl(hwc->config_base, ctrl_val);
2144 }
2145
2146 static inline bool event_is_checkpointed(struct perf_event *event)
2147 {
2148         return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
2149 }
2150
2151 static void intel_pmu_disable_event(struct perf_event *event)
2152 {
2153         struct hw_perf_event *hwc = &event->hw;
2154         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2155
2156         if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
2157                 intel_pmu_disable_bts();
2158                 intel_pmu_drain_bts_buffer();
2159                 return;
2160         }
2161
2162         cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
2163         cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
2164         cpuc->intel_cp_status &= ~(1ull << hwc->idx);
2165
2166         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
2167                 intel_pmu_disable_fixed(hwc);
2168         else
2169                 x86_pmu_disable_event(event);
2170
2171         /*
2172          * Needs to be called after x86_pmu_disable_event,
2173          * so we don't trigger the event without PEBS bit set.
2174          */
2175         if (unlikely(event->attr.precise_ip))
2176                 intel_pmu_pebs_disable(event);
2177 }
2178
2179 static void intel_pmu_del_event(struct perf_event *event)
2180 {
2181         if (needs_branch_stack(event))
2182                 intel_pmu_lbr_del(event);
2183         if (event->attr.precise_ip)
2184                 intel_pmu_pebs_del(event);
2185 }
2186
2187 static void intel_pmu_read_event(struct perf_event *event)
2188 {
2189         if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
2190                 intel_pmu_auto_reload_read(event);
2191         else
2192                 x86_perf_event_update(event);
2193 }
2194
2195 static void intel_pmu_enable_fixed(struct perf_event *event)
2196 {
2197         struct hw_perf_event *hwc = &event->hw;
2198         int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
2199         u64 ctrl_val, mask, bits = 0;
2200
2201         /*
2202          * Enable IRQ generation (0x8), if not PEBS,
2203          * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
2204          * if requested:
2205          */
2206         if (!event->attr.precise_ip)
2207                 bits |= 0x8;
2208         if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
2209                 bits |= 0x2;
2210         if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
2211                 bits |= 0x1;
2212
2213         /*
2214          * ANY bit is supported in v3 and up
2215          */
2216         if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
2217                 bits |= 0x4;
2218
2219         bits <<= (idx * 4);
2220         mask = 0xfULL << (idx * 4);
2221
2222         if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) {
2223                 bits |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
2224                 mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
2225         }
2226
2227         rdmsrl(hwc->config_base, ctrl_val);
2228         ctrl_val &= ~mask;
2229         ctrl_val |= bits;
2230         wrmsrl(hwc->config_base, ctrl_val);
2231 }
2232
2233 static void intel_pmu_enable_event(struct perf_event *event)
2234 {
2235         struct hw_perf_event *hwc = &event->hw;
2236         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2237
2238         if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
2239                 if (!__this_cpu_read(cpu_hw_events.enabled))
2240                         return;
2241
2242                 intel_pmu_enable_bts(hwc->config);
2243                 return;
2244         }
2245
2246         if (event->attr.exclude_host)
2247                 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
2248         if (event->attr.exclude_guest)
2249                 cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
2250
2251         if (unlikely(event_is_checkpointed(event)))
2252                 cpuc->intel_cp_status |= (1ull << hwc->idx);
2253
2254         if (unlikely(event->attr.precise_ip))
2255                 intel_pmu_pebs_enable(event);
2256
2257         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
2258                 intel_pmu_enable_fixed(event);
2259                 return;
2260         }
2261
2262         __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
2263 }
2264
2265 static void intel_pmu_add_event(struct perf_event *event)
2266 {
2267         if (event->attr.precise_ip)
2268                 intel_pmu_pebs_add(event);
2269         if (needs_branch_stack(event))
2270                 intel_pmu_lbr_add(event);
2271 }
2272
2273 /*
2274  * Save and restart an expired event. Called by NMI contexts,
2275  * so it has to be careful about preempting normal event ops:
2276  */
2277 int intel_pmu_save_and_restart(struct perf_event *event)
2278 {
2279         x86_perf_event_update(event);
2280         /*
2281          * For a checkpointed counter always reset back to 0.  This
2282          * avoids a situation where the counter overflows, aborts the
2283          * transaction and is then set back to shortly before the
2284          * overflow, and overflows and aborts again.
2285          */
2286         if (unlikely(event_is_checkpointed(event))) {
2287                 /* No race with NMIs because the counter should not be armed */
2288                 wrmsrl(event->hw.event_base, 0);
2289                 local64_set(&event->hw.prev_count, 0);
2290         }
2291         return x86_perf_event_set_period(event);
2292 }
2293
2294 static void intel_pmu_reset(void)
2295 {
2296         struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
2297         unsigned long flags;
2298         int idx;
2299
2300         if (!x86_pmu.num_counters)
2301                 return;
2302
2303         local_irq_save(flags);
2304
2305         pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
2306
2307         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
2308                 wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
2309                 wrmsrl_safe(x86_pmu_event_addr(idx),  0ull);
2310         }
2311         for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
2312                 wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
2313
2314         if (ds)
2315                 ds->bts_index = ds->bts_buffer_base;
2316
2317         /* Ack all overflows and disable fixed counters */
2318         if (x86_pmu.version >= 2) {
2319                 intel_pmu_ack_status(intel_pmu_get_status());
2320                 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
2321         }
2322
2323         /* Reset LBRs and LBR freezing */
2324         if (x86_pmu.lbr_nr) {
2325                 update_debugctlmsr(get_debugctlmsr() &
2326                         ~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR));
2327         }
2328
2329         local_irq_restore(flags);
2330 }
2331
2332 static int handle_pmi_common(struct pt_regs *regs, u64 status)
2333 {
2334         struct perf_sample_data data;
2335         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2336         struct perf_guest_info_callbacks *guest_cbs;
2337         int bit;
2338         int handled = 0;
2339
2340         inc_irq_stat(apic_perf_irqs);
2341
2342         /*
2343          * Ignore a range of extra bits in status that do not indicate
2344          * overflow by themselves.
2345          */
2346         status &= ~(GLOBAL_STATUS_COND_CHG |
2347                     GLOBAL_STATUS_ASIF |
2348                     GLOBAL_STATUS_LBRS_FROZEN);
2349         if (!status)
2350                 return 0;
2351         /*
2352          * In case multiple PEBS events are sampled at the same time,
2353          * it is possible to have GLOBAL_STATUS bit 62 set indicating
2354          * PEBS buffer overflow and also seeing at most 3 PEBS counters
2355          * having their bits set in the status register. This is a sign
2356          * that there was at least one PEBS record pending at the time
2357          * of the PMU interrupt. PEBS counters must only be processed
2358          * via the drain_pebs() calls and not via the regular sample
2359          * processing loop coming after that the function, otherwise
2360          * phony regular samples may be generated in the sampling buffer
2361          * not marked with the EXACT tag. Another possibility is to have
2362          * one PEBS event and at least one non-PEBS event whic hoverflows
2363          * while PEBS has armed. In this case, bit 62 of GLOBAL_STATUS will
2364          * not be set, yet the overflow status bit for the PEBS counter will
2365          * be on Skylake.
2366          *
2367          * To avoid this problem, we systematically ignore the PEBS-enabled
2368          * counters from the GLOBAL_STATUS mask and we always process PEBS
2369          * events via drain_pebs().
2370          */
2371         if (x86_pmu.flags & PMU_FL_PEBS_ALL)
2372                 status &= ~cpuc->pebs_enabled;
2373         else
2374                 status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
2375
2376         /*
2377          * PEBS overflow sets bit 62 in the global status register
2378          */
2379         if (__test_and_clear_bit(62, (unsigned long *)&status)) {
2380                 handled++;
2381                 x86_pmu.drain_pebs(regs);
2382                 status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
2383         }
2384
2385         /*
2386          * Intel PT
2387          */
2388         if (__test_and_clear_bit(55, (unsigned long *)&status)) {
2389                 handled++;
2390
2391                 guest_cbs = perf_get_guest_cbs();
2392                 if (unlikely(guest_cbs && guest_cbs->is_in_guest() &&
2393                              guest_cbs->handle_intel_pt_intr))
2394                         guest_cbs->handle_intel_pt_intr();
2395                 else
2396                         intel_pt_interrupt();
2397         }
2398
2399         /*
2400          * Checkpointed counters can lead to 'spurious' PMIs because the
2401          * rollback caused by the PMI will have cleared the overflow status
2402          * bit. Therefore always force probe these counters.
2403          */
2404         status |= cpuc->intel_cp_status;
2405
2406         for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
2407                 struct perf_event *event = cpuc->events[bit];
2408
2409                 handled++;
2410
2411                 if (!test_bit(bit, cpuc->active_mask))
2412                         continue;
2413
2414                 if (!intel_pmu_save_and_restart(event))
2415                         continue;
2416
2417                 perf_sample_data_init(&data, 0, event->hw.last_period);
2418
2419                 if (has_branch_stack(event))
2420                         data.br_stack = &cpuc->lbr_stack;
2421
2422                 if (perf_event_overflow(event, &data, regs))
2423                         x86_pmu_stop(event, 0);
2424         }
2425
2426         return handled;
2427 }
2428
2429 static bool disable_counter_freezing = true;
2430 static int __init intel_perf_counter_freezing_setup(char *s)
2431 {
2432         bool res;
2433
2434         if (kstrtobool(s, &res))
2435                 return -EINVAL;
2436
2437         disable_counter_freezing = !res;
2438         return 1;
2439 }
2440 __setup("perf_v4_pmi=", intel_perf_counter_freezing_setup);
2441
2442 /*
2443  * Simplified handler for Arch Perfmon v4:
2444  * - We rely on counter freezing/unfreezing to enable/disable the PMU.
2445  * This is done automatically on PMU ack.
2446  * - Ack the PMU only after the APIC.
2447  */
2448
2449 static int intel_pmu_handle_irq_v4(struct pt_regs *regs)
2450 {
2451         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2452         int handled = 0;
2453         bool bts = false;
2454         u64 status;
2455         int pmu_enabled = cpuc->enabled;
2456         int loops = 0;
2457
2458         /* PMU has been disabled because of counter freezing */
2459         cpuc->enabled = 0;
2460         if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
2461                 bts = true;
2462                 intel_bts_disable_local();
2463                 handled = intel_pmu_drain_bts_buffer();
2464                 handled += intel_bts_interrupt();
2465         }
2466         status = intel_pmu_get_status();
2467         if (!status)
2468                 goto done;
2469 again:
2470         intel_pmu_lbr_read();
2471         if (++loops > 100) {
2472                 static bool warned;
2473
2474                 if (!warned) {
2475                         WARN(1, "perfevents: irq loop stuck!\n");
2476                         perf_event_print_debug();
2477                         warned = true;
2478                 }
2479                 intel_pmu_reset();
2480                 goto done;
2481         }
2482
2483
2484         handled += handle_pmi_common(regs, status);
2485 done:
2486         /* Ack the PMI in the APIC */
2487         apic_write(APIC_LVTPC, APIC_DM_NMI);
2488
2489         /*
2490          * The counters start counting immediately while ack the status.
2491          * Make it as close as possible to IRET. This avoids bogus
2492          * freezing on Skylake CPUs.
2493          */
2494         if (status) {
2495                 intel_pmu_ack_status(status);
2496         } else {
2497                 /*
2498                  * CPU may issues two PMIs very close to each other.
2499                  * When the PMI handler services the first one, the
2500                  * GLOBAL_STATUS is already updated to reflect both.
2501                  * When it IRETs, the second PMI is immediately
2502                  * handled and it sees clear status. At the meantime,
2503                  * there may be a third PMI, because the freezing bit
2504                  * isn't set since the ack in first PMI handlers.
2505                  * Double check if there is more work to be done.
2506                  */
2507                 status = intel_pmu_get_status();
2508                 if (status)
2509                         goto again;
2510         }
2511
2512         if (bts)
2513                 intel_bts_enable_local();
2514         cpuc->enabled = pmu_enabled;
2515         return handled;
2516 }
2517
2518 /*
2519  * This handler is triggered by the local APIC, so the APIC IRQ handling
2520  * rules apply:
2521  */
2522 static int intel_pmu_handle_irq(struct pt_regs *regs)
2523 {
2524         struct cpu_hw_events *cpuc;
2525         int loops;
2526         u64 status;
2527         int handled;
2528         int pmu_enabled;
2529
2530         cpuc = this_cpu_ptr(&cpu_hw_events);
2531
2532         /*
2533          * Save the PMU state.
2534          * It needs to be restored when leaving the handler.
2535          */
2536         pmu_enabled = cpuc->enabled;
2537         /*
2538          * No known reason to not always do late ACK,
2539          * but just in case do it opt-in.
2540          */
2541         if (!x86_pmu.late_ack)
2542                 apic_write(APIC_LVTPC, APIC_DM_NMI);
2543         intel_bts_disable_local();
2544         cpuc->enabled = 0;
2545         __intel_pmu_disable_all();
2546         handled = intel_pmu_drain_bts_buffer();
2547         handled += intel_bts_interrupt();
2548         status = intel_pmu_get_status();
2549         if (!status)
2550                 goto done;
2551
2552         loops = 0;
2553 again:
2554         intel_pmu_lbr_read();
2555         intel_pmu_ack_status(status);
2556         if (++loops > 100) {
2557                 static bool warned;
2558
2559                 if (!warned) {
2560                         WARN(1, "perfevents: irq loop stuck!\n");
2561                         perf_event_print_debug();
2562                         warned = true;
2563                 }
2564                 intel_pmu_reset();
2565                 goto done;
2566         }
2567
2568         handled += handle_pmi_common(regs, status);
2569
2570         /*
2571          * Repeat if there is more work to be done:
2572          */
2573         status = intel_pmu_get_status();
2574         if (status)
2575                 goto again;
2576
2577 done:
2578         /* Only restore PMU state when it's active. See x86_pmu_disable(). */
2579         cpuc->enabled = pmu_enabled;
2580         if (pmu_enabled)
2581                 __intel_pmu_enable_all(0, true);
2582         intel_bts_enable_local();
2583
2584         /*
2585          * Only unmask the NMI after the overflow counters
2586          * have been reset. This avoids spurious NMIs on
2587          * Haswell CPUs.
2588          */
2589         if (x86_pmu.late_ack)
2590                 apic_write(APIC_LVTPC, APIC_DM_NMI);
2591         return handled;
2592 }
2593
2594 static struct event_constraint *
2595 intel_bts_constraints(struct perf_event *event)
2596 {
2597         if (unlikely(intel_pmu_has_bts(event)))
2598                 return &bts_constraint;
2599
2600         return NULL;
2601 }
2602
2603 static int intel_alt_er(int idx, u64 config)
2604 {
2605         int alt_idx = idx;
2606
2607         if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
2608                 return idx;
2609
2610         if (idx == EXTRA_REG_RSP_0)
2611                 alt_idx = EXTRA_REG_RSP_1;
2612
2613         if (idx == EXTRA_REG_RSP_1)
2614                 alt_idx = EXTRA_REG_RSP_0;
2615
2616         if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask)
2617                 return idx;
2618
2619         return alt_idx;
2620 }
2621
2622 static void intel_fixup_er(struct perf_event *event, int idx)
2623 {
2624         event->hw.extra_reg.idx = idx;
2625
2626         if (idx == EXTRA_REG_RSP_0) {
2627                 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
2628                 event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event;
2629                 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
2630         } else if (idx == EXTRA_REG_RSP_1) {
2631                 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
2632                 event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event;
2633                 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
2634         }
2635 }
2636
2637 /*
2638  * manage allocation of shared extra msr for certain events
2639  *
2640  * sharing can be:
2641  * per-cpu: to be shared between the various events on a single PMU
2642  * per-core: per-cpu + shared by HT threads
2643  */
2644 static struct event_constraint *
2645 __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
2646                                    struct perf_event *event,
2647                                    struct hw_perf_event_extra *reg)
2648 {
2649         struct event_constraint *c = &emptyconstraint;
2650         struct er_account *era;
2651         unsigned long flags;
2652         int idx = reg->idx;
2653
2654         /*
2655          * reg->alloc can be set due to existing state, so for fake cpuc we
2656          * need to ignore this, otherwise we might fail to allocate proper fake
2657          * state for this extra reg constraint. Also see the comment below.
2658          */
2659         if (reg->alloc && !cpuc->is_fake)
2660                 return NULL; /* call x86_get_event_constraint() */
2661
2662 again:
2663         era = &cpuc->shared_regs->regs[idx];
2664         /*
2665          * we use spin_lock_irqsave() to avoid lockdep issues when
2666          * passing a fake cpuc
2667          */
2668         raw_spin_lock_irqsave(&era->lock, flags);
2669
2670         if (!atomic_read(&era->ref) || era->config == reg->config) {
2671
2672                 /*
2673                  * If its a fake cpuc -- as per validate_{group,event}() we
2674                  * shouldn't touch event state and we can avoid doing so
2675                  * since both will only call get_event_constraints() once
2676                  * on each event, this avoids the need for reg->alloc.
2677                  *
2678                  * Not doing the ER fixup will only result in era->reg being
2679                  * wrong, but since we won't actually try and program hardware
2680                  * this isn't a problem either.
2681                  */
2682                 if (!cpuc->is_fake) {
2683                         if (idx != reg->idx)
2684                                 intel_fixup_er(event, idx);
2685
2686                         /*
2687                          * x86_schedule_events() can call get_event_constraints()
2688                          * multiple times on events in the case of incremental
2689                          * scheduling(). reg->alloc ensures we only do the ER
2690                          * allocation once.
2691                          */
2692                         reg->alloc = 1;
2693                 }
2694
2695                 /* lock in msr value */
2696                 era->config = reg->config;
2697                 era->reg = reg->reg;
2698
2699                 /* one more user */
2700                 atomic_inc(&era->ref);
2701
2702                 /*
2703                  * need to call x86_get_event_constraint()
2704                  * to check if associated event has constraints
2705                  */
2706                 c = NULL;
2707         } else {
2708                 idx = intel_alt_er(idx, reg->config);
2709                 if (idx != reg->idx) {
2710                         raw_spin_unlock_irqrestore(&era->lock, flags);
2711                         goto again;
2712                 }
2713         }
2714         raw_spin_unlock_irqrestore(&era->lock, flags);
2715
2716         return c;
2717 }
2718
2719 static void
2720 __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
2721                                    struct hw_perf_event_extra *reg)
2722 {
2723         struct er_account *era;
2724
2725         /*
2726          * Only put constraint if extra reg was actually allocated. Also takes
2727          * care of event which do not use an extra shared reg.
2728          *
2729          * Also, if this is a fake cpuc we shouldn't touch any event state
2730          * (reg->alloc) and we don't care about leaving inconsistent cpuc state
2731          * either since it'll be thrown out.
2732          */
2733         if (!reg->alloc || cpuc->is_fake)
2734                 return;
2735
2736         era = &cpuc->shared_regs->regs[reg->idx];
2737
2738         /* one fewer user */
2739         atomic_dec(&era->ref);
2740
2741         /* allocate again next time */
2742         reg->alloc = 0;
2743 }
2744
2745 static struct event_constraint *
2746 intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
2747                               struct perf_event *event)
2748 {
2749         struct event_constraint *c = NULL, *d;
2750         struct hw_perf_event_extra *xreg, *breg;
2751
2752         xreg = &event->hw.extra_reg;
2753         if (xreg->idx != EXTRA_REG_NONE) {
2754                 c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
2755                 if (c == &emptyconstraint)
2756                         return c;
2757         }
2758         breg = &event->hw.branch_reg;
2759         if (breg->idx != EXTRA_REG_NONE) {
2760                 d = __intel_shared_reg_get_constraints(cpuc, event, breg);
2761                 if (d == &emptyconstraint) {
2762                         __intel_shared_reg_put_constraints(cpuc, xreg);
2763                         c = d;
2764                 }
2765         }
2766         return c;
2767 }
2768
2769 struct event_constraint *
2770 x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
2771                           struct perf_event *event)
2772 {
2773         struct event_constraint *c;
2774
2775         if (x86_pmu.event_constraints) {
2776                 for_each_event_constraint(c, x86_pmu.event_constraints) {
2777                         if (constraint_match(c, event->hw.config)) {
2778                                 event->hw.flags |= c->flags;
2779                                 return c;
2780                         }
2781                 }
2782         }
2783
2784         return &unconstrained;
2785 }
2786
2787 static struct event_constraint *
2788 __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
2789                             struct perf_event *event)
2790 {
2791         struct event_constraint *c;
2792
2793         c = intel_bts_constraints(event);
2794         if (c)
2795                 return c;
2796
2797         c = intel_shared_regs_constraints(cpuc, event);
2798         if (c)
2799                 return c;
2800
2801         c = intel_pebs_constraints(event);
2802         if (c)
2803                 return c;
2804
2805         return x86_get_event_constraints(cpuc, idx, event);
2806 }
2807
2808 static void
2809 intel_start_scheduling(struct cpu_hw_events *cpuc)
2810 {
2811         struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
2812         struct intel_excl_states *xl;
2813         int tid = cpuc->excl_thread_id;
2814
2815         /*
2816          * nothing needed if in group validation mode
2817          */
2818         if (cpuc->is_fake || !is_ht_workaround_enabled())
2819                 return;
2820
2821         /*
2822          * no exclusion needed
2823          */
2824         if (WARN_ON_ONCE(!excl_cntrs))
2825                 return;
2826
2827         xl = &excl_cntrs->states[tid];
2828
2829         xl->sched_started = true;
2830         /*
2831          * lock shared state until we are done scheduling
2832          * in stop_event_scheduling()
2833          * makes scheduling appear as a transaction
2834          */
2835         raw_spin_lock(&excl_cntrs->lock);
2836 }
2837
2838 static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
2839 {
2840         struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
2841         struct event_constraint *c = cpuc->event_constraint[idx];
2842         struct intel_excl_states *xl;
2843         int tid = cpuc->excl_thread_id;
2844
2845         if (cpuc->is_fake || !is_ht_workaround_enabled())
2846                 return;
2847
2848         if (WARN_ON_ONCE(!excl_cntrs))
2849                 return;
2850
2851         if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
2852                 return;
2853
2854         xl = &excl_cntrs->states[tid];
2855
2856         lockdep_assert_held(&excl_cntrs->lock);
2857
2858         if (c->flags & PERF_X86_EVENT_EXCL)
2859                 xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
2860         else
2861                 xl->state[cntr] = INTEL_EXCL_SHARED;
2862 }
2863
2864 static void
2865 intel_stop_scheduling(struct cpu_hw_events *cpuc)
2866 {
2867         struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
2868         struct intel_excl_states *xl;
2869         int tid = cpuc->excl_thread_id;
2870
2871         /*
2872          * nothing needed if in group validation mode
2873          */
2874         if (cpuc->is_fake || !is_ht_workaround_enabled())
2875                 return;
2876         /*
2877          * no exclusion needed
2878          */
2879         if (WARN_ON_ONCE(!excl_cntrs))
2880                 return;
2881
2882         xl = &excl_cntrs->states[tid];
2883
2884         xl->sched_started = false;
2885         /*
2886          * release shared state lock (acquired in intel_start_scheduling())
2887          */
2888         raw_spin_unlock(&excl_cntrs->lock);
2889 }
2890
2891 static struct event_constraint *
2892 dyn_constraint(struct cpu_hw_events *cpuc, struct event_constraint *c, int idx)
2893 {
2894         WARN_ON_ONCE(!cpuc->constraint_list);
2895
2896         if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
2897                 struct event_constraint *cx;
2898
2899                 /*
2900                  * grab pre-allocated constraint entry
2901                  */
2902                 cx = &cpuc->constraint_list[idx];
2903
2904                 /*
2905                  * initialize dynamic constraint
2906                  * with static constraint
2907                  */
2908                 *cx = *c;
2909
2910                 /*
2911                  * mark constraint as dynamic
2912                  */
2913                 cx->flags |= PERF_X86_EVENT_DYNAMIC;
2914                 c = cx;
2915         }
2916
2917         return c;
2918 }
2919
2920 static struct event_constraint *
2921 intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
2922                            int idx, struct event_constraint *c)
2923 {
2924         struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
2925         struct intel_excl_states *xlo;
2926         int tid = cpuc->excl_thread_id;
2927         int is_excl, i, w;
2928
2929         /*
2930          * validating a group does not require
2931          * enforcing cross-thread  exclusion
2932          */
2933         if (cpuc->is_fake || !is_ht_workaround_enabled())
2934                 return c;
2935
2936         /*
2937          * no exclusion needed
2938          */
2939         if (WARN_ON_ONCE(!excl_cntrs))
2940                 return c;
2941
2942         /*
2943          * because we modify the constraint, we need
2944          * to make a copy. Static constraints come
2945          * from static const tables.
2946          *
2947          * only needed when constraint has not yet
2948          * been cloned (marked dynamic)
2949          */
2950         c = dyn_constraint(cpuc, c, idx);
2951
2952         /*
2953          * From here on, the constraint is dynamic.
2954          * Either it was just allocated above, or it
2955          * was allocated during a earlier invocation
2956          * of this function
2957          */
2958
2959         /*
2960          * state of sibling HT
2961          */
2962         xlo = &excl_cntrs->states[tid ^ 1];
2963
2964         /*
2965          * event requires exclusive counter access
2966          * across HT threads
2967          */
2968         is_excl = c->flags & PERF_X86_EVENT_EXCL;
2969         if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
2970                 event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
2971                 if (!cpuc->n_excl++)
2972                         WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
2973         }
2974
2975         /*
2976          * Modify static constraint with current dynamic
2977          * state of thread
2978          *
2979          * EXCLUSIVE: sibling counter measuring exclusive event
2980          * SHARED   : sibling counter measuring non-exclusive event
2981          * UNUSED   : sibling counter unused
2982          */
2983         w = c->weight;
2984         for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
2985                 /*
2986                  * exclusive event in sibling counter
2987                  * our corresponding counter cannot be used
2988                  * regardless of our event
2989                  */
2990                 if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) {
2991                         __clear_bit(i, c->idxmsk);
2992                         w--;
2993                         continue;
2994                 }
2995                 /*
2996                  * if measuring an exclusive event, sibling
2997                  * measuring non-exclusive, then counter cannot
2998                  * be used
2999                  */
3000                 if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) {
3001                         __clear_bit(i, c->idxmsk);
3002                         w--;
3003                         continue;
3004                 }
3005         }
3006
3007         /*
3008          * if we return an empty mask, then switch
3009          * back to static empty constraint to avoid
3010          * the cost of freeing later on
3011          */
3012         if (!w)
3013                 c = &emptyconstraint;
3014
3015         c->weight = w;
3016
3017         return c;
3018 }
3019
3020 static struct event_constraint *
3021 intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
3022                             struct perf_event *event)
3023 {
3024         struct event_constraint *c1, *c2;
3025
3026         c1 = cpuc->event_constraint[idx];
3027
3028         /*
3029          * first time only
3030          * - static constraint: no change across incremental scheduling calls
3031          * - dynamic constraint: handled by intel_get_excl_constraints()
3032          */
3033         c2 = __intel_get_event_constraints(cpuc, idx, event);
3034         if (c1) {
3035                 WARN_ON_ONCE(!(c1->flags & PERF_X86_EVENT_DYNAMIC));
3036                 bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
3037                 c1->weight = c2->weight;
3038                 c2 = c1;
3039         }
3040
3041         if (cpuc->excl_cntrs)
3042                 return intel_get_excl_constraints(cpuc, event, idx, c2);
3043
3044         return c2;
3045 }
3046
3047 static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
3048                 struct perf_event *event)
3049 {
3050         struct hw_perf_event *hwc = &event->hw;
3051         struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
3052         int tid = cpuc->excl_thread_id;
3053         struct intel_excl_states *xl;
3054
3055         /*
3056          * nothing needed if in group validation mode
3057          */
3058         if (cpuc->is_fake)
3059                 return;
3060
3061         if (WARN_ON_ONCE(!excl_cntrs))
3062                 return;
3063
3064         if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
3065                 hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
3066                 if (!--cpuc->n_excl)
3067                         WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
3068         }
3069
3070         /*
3071          * If event was actually assigned, then mark the counter state as
3072          * unused now.
3073          */
3074         if (hwc->idx >= 0) {
3075                 xl = &excl_cntrs->states[tid];
3076
3077                 /*
3078                  * put_constraint may be called from x86_schedule_events()
3079                  * which already has the lock held so here make locking
3080                  * conditional.
3081                  */
3082                 if (!xl->sched_started)
3083                         raw_spin_lock(&excl_cntrs->lock);
3084
3085                 xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
3086
3087                 if (!xl->sched_started)
3088                         raw_spin_unlock(&excl_cntrs->lock);
3089         }
3090 }
3091
3092 static void
3093 intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
3094                                         struct perf_event *event)
3095 {
3096         struct hw_perf_event_extra *reg;
3097
3098         reg = &event->hw.extra_reg;
3099         if (reg->idx != EXTRA_REG_NONE)
3100                 __intel_shared_reg_put_constraints(cpuc, reg);
3101
3102         reg = &event->hw.branch_reg;
3103         if (reg->idx != EXTRA_REG_NONE)
3104                 __intel_shared_reg_put_constraints(cpuc, reg);
3105 }
3106
3107 static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
3108                                         struct perf_event *event)
3109 {
3110         intel_put_shared_regs_event_constraints(cpuc, event);
3111
3112         /*
3113          * is PMU has exclusive counter restrictions, then
3114          * all events are subject to and must call the
3115          * put_excl_constraints() routine
3116          */
3117         if (cpuc->excl_cntrs)
3118                 intel_put_excl_constraints(cpuc, event);
3119 }
3120
3121 static void intel_pebs_aliases_core2(struct perf_event *event)
3122 {
3123         if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
3124                 /*
3125                  * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
3126                  * (0x003c) so that we can use it with PEBS.
3127                  *
3128                  * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
3129                  * PEBS capable. However we can use INST_RETIRED.ANY_P
3130                  * (0x00c0), which is a PEBS capable event, to get the same
3131                  * count.
3132                  *
3133                  * INST_RETIRED.ANY_P counts the number of cycles that retires
3134                  * CNTMASK instructions. By setting CNTMASK to a value (16)
3135                  * larger than the maximum number of instructions that can be
3136                  * retired per cycle (4) and then inverting the condition, we
3137                  * count all cycles that retire 16 or less instructions, which
3138                  * is every cycle.
3139                  *
3140                  * Thereby we gain a PEBS capable cycle counter.
3141                  */
3142                 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
3143
3144                 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
3145                 event->hw.config = alt_config;
3146         }
3147 }
3148
3149 static void intel_pebs_aliases_snb(struct perf_event *event)
3150 {
3151         if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
3152                 /*
3153                  * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
3154                  * (0x003c) so that we can use it with PEBS.
3155                  *
3156                  * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
3157                  * PEBS capable. However we can use UOPS_RETIRED.ALL
3158                  * (0x01c2), which is a PEBS capable event, to get the same
3159                  * count.
3160                  *
3161                  * UOPS_RETIRED.ALL counts the number of cycles that retires
3162                  * CNTMASK micro-ops. By setting CNTMASK to a value (16)
3163                  * larger than the maximum number of micro-ops that can be
3164                  * retired per cycle (4) and then inverting the condition, we
3165                  * count all cycles that retire 16 or less micro-ops, which
3166                  * is every cycle.
3167                  *
3168                  * Thereby we gain a PEBS capable cycle counter.
3169                  */
3170                 u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
3171
3172                 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
3173                 event->hw.config = alt_config;
3174         }
3175 }
3176
3177 static void intel_pebs_aliases_precdist(struct perf_event *event)
3178 {
3179         if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
3180                 /*
3181                  * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
3182                  * (0x003c) so that we can use it with PEBS.
3183                  *
3184                  * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
3185                  * PEBS capable. However we can use INST_RETIRED.PREC_DIST
3186                  * (0x01c0), which is a PEBS capable event, to get the same
3187                  * count.
3188                  *
3189                  * The PREC_DIST event has special support to minimize sample
3190                  * shadowing effects. One drawback is that it can be
3191                  * only programmed on counter 1, but that seems like an
3192                  * acceptable trade off.
3193                  */
3194                 u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16);
3195
3196                 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
3197                 event->hw.config = alt_config;
3198         }
3199 }
3200
3201 static void intel_pebs_aliases_ivb(struct perf_event *event)
3202 {
3203         if (event->attr.precise_ip < 3)
3204                 return intel_pebs_aliases_snb(event);
3205         return intel_pebs_aliases_precdist(event);
3206 }
3207
3208 static void intel_pebs_aliases_skl(struct perf_event *event)
3209 {
3210         if (event->attr.precise_ip < 3)
3211                 return intel_pebs_aliases_core2(event);
3212         return intel_pebs_aliases_precdist(event);
3213 }
3214
3215 static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
3216 {
3217         unsigned long flags = x86_pmu.large_pebs_flags;
3218
3219         if (event->attr.use_clockid)
3220                 flags &= ~PERF_SAMPLE_TIME;
3221         if (!event->attr.exclude_kernel)
3222                 flags &= ~PERF_SAMPLE_REGS_USER;
3223         if (event->attr.sample_regs_user & ~PEBS_GP_REGS)
3224                 flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
3225         return flags;
3226 }
3227
3228 static int intel_pmu_bts_config(struct perf_event *event)
3229 {
3230         struct perf_event_attr *attr = &event->attr;
3231
3232         if (unlikely(intel_pmu_has_bts(event))) {
3233                 /* BTS is not supported by this architecture. */
3234                 if (!x86_pmu.bts_active)
3235                         return -EOPNOTSUPP;
3236
3237                 /* BTS is currently only allowed for user-mode. */
3238                 if (!attr->exclude_kernel)
3239                         return -EOPNOTSUPP;
3240
3241                 /* BTS is not allowed for precise events. */
3242                 if (attr->precise_ip)
3243                         return -EOPNOTSUPP;
3244
3245                 /* disallow bts if conflicting events are present */
3246                 if (x86_add_exclusive(x86_lbr_exclusive_lbr))
3247                         return -EBUSY;
3248
3249                 event->destroy = hw_perf_lbr_event_destroy;
3250         }
3251
3252         return 0;
3253 }
3254
3255 static int core_pmu_hw_config(struct perf_event *event)
3256 {
3257         int ret = x86_pmu_hw_config(event);
3258
3259         if (ret)
3260                 return ret;
3261
3262         return intel_pmu_bts_config(event);
3263 }
3264
3265 static int intel_pmu_hw_config(struct perf_event *event)
3266 {
3267         int ret = x86_pmu_hw_config(event);
3268
3269         if (ret)
3270                 return ret;
3271
3272         ret = intel_pmu_bts_config(event);
3273         if (ret)
3274                 return ret;
3275
3276         if (event->attr.precise_ip) {
3277                 if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
3278                         event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
3279                         if (!(event->attr.sample_type &
3280                               ~intel_pmu_large_pebs_flags(event)))
3281                                 event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
3282                 }
3283                 if (x86_pmu.pebs_aliases)
3284                         x86_pmu.pebs_aliases(event);
3285
3286                 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
3287                         event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
3288         }
3289
3290         if (needs_branch_stack(event)) {
3291                 ret = intel_pmu_setup_lbr_filter(event);
3292                 if (ret)
3293                         return ret;
3294
3295                 /*
3296                  * BTS is set up earlier in this path, so don't account twice
3297                  */
3298                 if (!unlikely(intel_pmu_has_bts(event))) {
3299                         /* disallow lbr if conflicting events are present */
3300                         if (x86_add_exclusive(x86_lbr_exclusive_lbr))
3301                                 return -EBUSY;
3302
3303                         event->destroy = hw_perf_lbr_event_destroy;
3304                 }
3305         }
3306
3307         if (event->attr.aux_output) {
3308                 if (!event->attr.precise_ip)
3309                         return -EINVAL;
3310
3311                 event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT;
3312         }
3313
3314         if (event->attr.type != PERF_TYPE_RAW)
3315                 return 0;
3316
3317         if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
3318                 return 0;
3319
3320         if (x86_pmu.version < 3)
3321                 return -EINVAL;
3322
3323         if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
3324                 return -EACCES;
3325
3326         event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
3327
3328         return 0;
3329 }
3330
3331 struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
3332 {
3333         if (x86_pmu.guest_get_msrs)
3334                 return x86_pmu.guest_get_msrs(nr);
3335         *nr = 0;
3336         return NULL;
3337 }
3338 EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
3339
3340 static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
3341 {
3342         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3343         struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
3344
3345         arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
3346         arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
3347         arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
3348         if (x86_pmu.flags & PMU_FL_PEBS_ALL)
3349                 arr[0].guest &= ~cpuc->pebs_enabled;
3350         else
3351                 arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
3352         *nr = 1;
3353
3354         if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) {
3355                 /*
3356                  * If PMU counter has PEBS enabled it is not enough to
3357                  * disable counter on a guest entry since PEBS memory
3358                  * write can overshoot guest entry and corrupt guest
3359                  * memory. Disabling PEBS solves the problem.
3360                  *
3361                  * Don't do this if the CPU already enforces it.
3362                  */
3363                 arr[1].msr = MSR_IA32_PEBS_ENABLE;
3364                 arr[1].host = cpuc->pebs_enabled;
3365                 arr[1].guest = 0;
3366                 *nr = 2;
3367         }
3368
3369         return arr;
3370 }
3371
3372 static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
3373 {
3374         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3375         struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
3376         int idx;
3377
3378         for (idx = 0; idx < x86_pmu.num_counters; idx++)  {
3379                 struct perf_event *event = cpuc->events[idx];
3380
3381                 arr[idx].msr = x86_pmu_config_addr(idx);
3382                 arr[idx].host = arr[idx].guest = 0;
3383
3384                 if (!test_bit(idx, cpuc->active_mask))
3385                         continue;
3386
3387                 arr[idx].host = arr[idx].guest =
3388                         event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
3389
3390                 if (event->attr.exclude_host)
3391                         arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
3392                 else if (event->attr.exclude_guest)
3393                         arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
3394         }
3395
3396         *nr = x86_pmu.num_counters;
3397         return arr;
3398 }
3399
3400 static void core_pmu_enable_event(struct perf_event *event)
3401 {
3402         if (!event->attr.exclude_host)
3403                 x86_pmu_enable_event(event);
3404 }
3405
3406 static void core_pmu_enable_all(int added)
3407 {
3408         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3409         int idx;
3410
3411         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
3412                 struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
3413
3414                 if (!test_bit(idx, cpuc->active_mask) ||
3415                                 cpuc->events[idx]->attr.exclude_host)
3416                         continue;
3417
3418                 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
3419         }
3420 }
3421
3422 static int hsw_hw_config(struct perf_event *event)
3423 {
3424         int ret = intel_pmu_hw_config(event);
3425
3426         if (ret)
3427                 return ret;
3428         if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
3429                 return 0;
3430         event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
3431
3432         /*
3433          * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
3434          * PEBS or in ANY thread mode. Since the results are non-sensical forbid
3435          * this combination.
3436          */
3437         if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
3438              ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
3439               event->attr.precise_ip > 0))
3440                 return -EOPNOTSUPP;
3441
3442         if (event_is_checkpointed(event)) {
3443                 /*
3444                  * Sampling of checkpointed events can cause situations where
3445                  * the CPU constantly aborts because of a overflow, which is
3446                  * then checkpointed back and ignored. Forbid checkpointing
3447                  * for sampling.
3448                  *
3449                  * But still allow a long sampling period, so that perf stat
3450                  * from KVM works.
3451                  */
3452                 if (event->attr.sample_period > 0 &&
3453                     event->attr.sample_period < 0x7fffffff)
3454                         return -EOPNOTSUPP;
3455         }
3456         return 0;
3457 }
3458
3459 static struct event_constraint counter0_constraint =
3460                         INTEL_ALL_EVENT_CONSTRAINT(0, 0x1);
3461
3462 static struct event_constraint counter2_constraint =
3463                         EVENT_CONSTRAINT(0, 0x4, 0);
3464
3465 static struct event_constraint fixed0_constraint =
3466                         FIXED_EVENT_CONSTRAINT(0x00c0, 0);
3467
3468 static struct event_constraint fixed0_counter0_constraint =
3469                         INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL);
3470
3471 static struct event_constraint *
3472 hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
3473                           struct perf_event *event)
3474 {
3475         struct event_constraint *c;
3476
3477         c = intel_get_event_constraints(cpuc, idx, event);
3478
3479         /* Handle special quirk on in_tx_checkpointed only in counter 2 */
3480         if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
3481                 if (c->idxmsk64 & (1U << 2))
3482                         return &counter2_constraint;
3483                 return &emptyconstraint;
3484         }
3485
3486         return c;
3487 }
3488
3489 static struct event_constraint *
3490 icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
3491                           struct perf_event *event)
3492 {
3493         /*
3494          * Fixed counter 0 has less skid.
3495          * Force instruction:ppp in Fixed counter 0
3496          */
3497         if ((event->attr.precise_ip == 3) &&
3498             constraint_match(&fixed0_constraint, event->hw.config))
3499                 return &fixed0_constraint;
3500
3501         return hsw_get_event_constraints(cpuc, idx, event);
3502 }
3503
3504 static struct event_constraint *
3505 glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
3506                           struct perf_event *event)
3507 {
3508         struct event_constraint *c;
3509
3510         /* :ppp means to do reduced skid PEBS which is PMC0 only. */
3511         if (event->attr.precise_ip == 3)
3512                 return &counter0_constraint;
3513
3514         c = intel_get_event_constraints(cpuc, idx, event);
3515
3516         return c;
3517 }
3518
3519 static struct event_constraint *
3520 tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
3521                           struct perf_event *event)
3522 {
3523         struct event_constraint *c;
3524
3525         /*
3526          * :ppp means to do reduced skid PEBS,
3527          * which is available on PMC0 and fixed counter 0.
3528          */
3529         if (event->attr.precise_ip == 3) {
3530                 /* Force instruction:ppp on PMC0 and Fixed counter 0 */
3531                 if (constraint_match(&fixed0_constraint, event->hw.config))
3532                         return &fixed0_counter0_constraint;
3533
3534                 return &counter0_constraint;
3535         }
3536
3537         c = intel_get_event_constraints(cpuc, idx, event);
3538
3539         return c;
3540 }
3541
3542 static bool allow_tsx_force_abort = true;
3543
3544 static struct event_constraint *
3545 tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
3546                           struct perf_event *event)
3547 {
3548         struct event_constraint *c = hsw_get_event_constraints(cpuc, idx, event);
3549
3550         /*
3551          * Without TFA we must not use PMC3.
3552          */
3553         if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
3554                 c = dyn_constraint(cpuc, c, idx);
3555                 c->idxmsk64 &= ~(1ULL << 3);
3556                 c->weight--;
3557         }
3558
3559         return c;
3560 }
3561
3562 /*
3563  * Broadwell:
3564  *
3565  * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared
3566  * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine
3567  * the two to enforce a minimum period of 128 (the smallest value that has bits
3568  * 0-5 cleared and >= 100).
3569  *
3570  * Because of how the code in x86_perf_event_set_period() works, the truncation
3571  * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period
3572  * to make up for the 'lost' events due to carrying the 'error' in period_left.
3573  *
3574  * Therefore the effective (average) period matches the requested period,
3575  * despite coarser hardware granularity.
3576  */
3577 static u64 bdw_limit_period(struct perf_event *event, u64 left)
3578 {
3579         if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
3580                         X86_CONFIG(.event=0xc0, .umask=0x01)) {
3581                 if (left < 128)
3582                         left = 128;
3583                 left &= ~0x3fULL;
3584         }
3585         return left;
3586 }
3587
3588 static u64 nhm_limit_period(struct perf_event *event, u64 left)
3589 {
3590         return max(left, 32ULL);
3591 }
3592
3593 PMU_FORMAT_ATTR(event,  "config:0-7"    );
3594 PMU_FORMAT_ATTR(umask,  "config:8-15"   );
3595 PMU_FORMAT_ATTR(edge,   "config:18"     );
3596 PMU_FORMAT_ATTR(pc,     "config:19"     );
3597 PMU_FORMAT_ATTR(any,    "config:21"     ); /* v3 + */
3598 PMU_FORMAT_ATTR(inv,    "config:23"     );
3599 PMU_FORMAT_ATTR(cmask,  "config:24-31"  );
3600 PMU_FORMAT_ATTR(in_tx,  "config:32");
3601 PMU_FORMAT_ATTR(in_tx_cp, "config:33");
3602
3603 static struct attribute *intel_arch_formats_attr[] = {
3604         &format_attr_event.attr,
3605         &format_attr_umask.attr,
3606         &format_attr_edge.attr,
3607         &format_attr_pc.attr,
3608         &format_attr_inv.attr,
3609         &format_attr_cmask.attr,
3610         NULL,
3611 };
3612
3613 ssize_t intel_event_sysfs_show(char *page, u64 config)
3614 {
3615         u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
3616
3617         return x86_event_sysfs_show(page, config, event);
3618 }
3619
3620 static struct intel_shared_regs *allocate_shared_regs(int cpu)
3621 {
3622         struct intel_shared_regs *regs;
3623         int i;
3624
3625         regs = kzalloc_node(sizeof(struct intel_shared_regs),
3626                             GFP_KERNEL, cpu_to_node(cpu));
3627         if (regs) {
3628                 /*
3629                  * initialize the locks to keep lockdep happy
3630                  */
3631                 for (i = 0; i < EXTRA_REG_MAX; i++)
3632                         raw_spin_lock_init(&regs->regs[i].lock);
3633
3634                 regs->core_id = -1;
3635         }
3636         return regs;
3637 }
3638
3639 static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
3640 {
3641         struct intel_excl_cntrs *c;
3642
3643         c = kzalloc_node(sizeof(struct intel_excl_cntrs),
3644                          GFP_KERNEL, cpu_to_node(cpu));
3645         if (c) {
3646                 raw_spin_lock_init(&c->lock);
3647                 c->core_id = -1;
3648         }
3649         return c;
3650 }
3651
3652
3653 int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
3654 {
3655         cpuc->pebs_record_size = x86_pmu.pebs_record_size;
3656
3657         if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
3658                 cpuc->shared_regs = allocate_shared_regs(cpu);
3659                 if (!cpuc->shared_regs)
3660                         goto err;
3661         }
3662
3663         if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) {
3664                 size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
3665
3666                 cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
3667                 if (!cpuc->constraint_list)
3668                         goto err_shared_regs;
3669         }
3670
3671         if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
3672                 cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
3673                 if (!cpuc->excl_cntrs)
3674                         goto err_constraint_list;
3675
3676                 cpuc->excl_thread_id = 0;
3677         }
3678
3679         return 0;
3680
3681 err_constraint_list:
3682         kfree(cpuc->constraint_list);
3683         cpuc->constraint_list = NULL;
3684
3685 err_shared_regs:
3686         kfree(cpuc->shared_regs);
3687         cpuc->shared_regs = NULL;
3688
3689 err:
3690         return -ENOMEM;
3691 }
3692
3693 static int intel_pmu_cpu_prepare(int cpu)
3694 {
3695         return intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu);
3696 }
3697
3698 static void flip_smm_bit(void *data)
3699 {
3700         unsigned long set = *(unsigned long *)data;
3701
3702         if (set > 0) {
3703                 msr_set_bit(MSR_IA32_DEBUGCTLMSR,
3704                             DEBUGCTLMSR_FREEZE_IN_SMM_BIT);
3705         } else {
3706                 msr_clear_bit(MSR_IA32_DEBUGCTLMSR,
3707                               DEBUGCTLMSR_FREEZE_IN_SMM_BIT);
3708         }
3709 }
3710
3711 static void intel_pmu_cpu_starting(int cpu)
3712 {
3713         struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
3714         int core_id = topology_core_id(cpu);
3715         int i;
3716
3717         init_debug_store_on_cpu(cpu);
3718         /*
3719          * Deal with CPUs that don't clear their LBRs on power-up.
3720          */
3721         intel_pmu_lbr_reset();
3722
3723         cpuc->lbr_sel = NULL;
3724
3725         if (x86_pmu.flags & PMU_FL_TFA) {
3726                 WARN_ON_ONCE(cpuc->tfa_shadow);
3727                 cpuc->tfa_shadow = ~0ULL;
3728                 intel_set_tfa(cpuc, false);
3729         }
3730
3731         if (x86_pmu.version > 1)
3732                 flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
3733
3734         if (x86_pmu.counter_freezing)
3735                 enable_counter_freeze();
3736
3737         if (!cpuc->shared_regs)
3738                 return;
3739
3740         if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
3741                 for_each_cpu(i, topology_sibling_cpumask(cpu)) {
3742                         struct intel_shared_regs *pc;
3743
3744                         pc = per_cpu(cpu_hw_events, i).shared_regs;
3745                         if (pc && pc->core_id == core_id) {
3746                                 cpuc->kfree_on_online[0] = cpuc->shared_regs;
3747                                 cpuc->shared_regs = pc;
3748                                 break;
3749                         }
3750                 }
3751                 cpuc->shared_regs->core_id = core_id;
3752                 cpuc->shared_regs->refcnt++;
3753         }
3754
3755         if (x86_pmu.lbr_sel_map)
3756                 cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
3757
3758         if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
3759                 for_each_cpu(i, topology_sibling_cpumask(cpu)) {
3760                         struct cpu_hw_events *sibling;
3761                         struct intel_excl_cntrs *c;
3762
3763                         sibling = &per_cpu(cpu_hw_events, i);
3764                         c = sibling->excl_cntrs;
3765                         if (c && c->core_id == core_id) {
3766                                 cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
3767                                 cpuc->excl_cntrs = c;
3768                                 if (!sibling->excl_thread_id)
3769                                         cpuc->excl_thread_id = 1;
3770                                 break;
3771                         }
3772                 }
3773                 cpuc->excl_cntrs->core_id = core_id;
3774                 cpuc->excl_cntrs->refcnt++;
3775         }
3776 }
3777
3778 static void free_excl_cntrs(struct cpu_hw_events *cpuc)
3779 {
3780         struct intel_excl_cntrs *c;
3781
3782         c = cpuc->excl_cntrs;
3783         if (c) {
3784                 if (c->core_id == -1 || --c->refcnt == 0)
3785                         kfree(c);
3786                 cpuc->excl_cntrs = NULL;
3787         }
3788
3789         kfree(cpuc->constraint_list);
3790         cpuc->constraint_list = NULL;
3791 }
3792
3793 static void intel_pmu_cpu_dying(int cpu)
3794 {
3795         fini_debug_store_on_cpu(cpu);
3796
3797         if (x86_pmu.counter_freezing)
3798                 disable_counter_freeze();
3799 }
3800
3801 void intel_cpuc_finish(struct cpu_hw_events *cpuc)
3802 {
3803         struct intel_shared_regs *pc;
3804
3805         pc = cpuc->shared_regs;
3806         if (pc) {
3807                 if (pc->core_id == -1 || --pc->refcnt == 0)
3808                         kfree(pc);
3809                 cpuc->shared_regs = NULL;
3810         }
3811
3812         free_excl_cntrs(cpuc);
3813 }
3814
3815 static void intel_pmu_cpu_dead(int cpu)
3816 {
3817         intel_cpuc_finish(&per_cpu(cpu_hw_events, cpu));
3818 }
3819
3820 static void intel_pmu_sched_task(struct perf_event_context *ctx,
3821                                  bool sched_in)
3822 {
3823         intel_pmu_pebs_sched_task(ctx, sched_in);
3824         intel_pmu_lbr_sched_task(ctx, sched_in);
3825 }
3826
3827 static int intel_pmu_check_period(struct perf_event *event, u64 value)
3828 {
3829         return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
3830 }
3831
3832 static int intel_pmu_aux_output_match(struct perf_event *event)
3833 {
3834         if (!x86_pmu.intel_cap.pebs_output_pt_available)
3835                 return 0;
3836
3837         return is_intel_pt_event(event);
3838 }
3839
3840 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
3841
3842 PMU_FORMAT_ATTR(ldlat, "config1:0-15");
3843
3844 PMU_FORMAT_ATTR(frontend, "config1:0-23");
3845
3846 static struct attribute *intel_arch3_formats_attr[] = {
3847         &format_attr_event.attr,
3848         &format_attr_umask.attr,
3849         &format_attr_edge.attr,
3850         &format_attr_pc.attr,
3851         &format_attr_any.attr,
3852         &format_attr_inv.attr,
3853         &format_attr_cmask.attr,
3854         NULL,
3855 };
3856
3857 static struct attribute *hsw_format_attr[] = {
3858         &format_attr_in_tx.attr,
3859         &format_attr_in_tx_cp.attr,
3860         &format_attr_offcore_rsp.attr,
3861         &format_attr_ldlat.attr,
3862         NULL
3863 };
3864
3865 static struct attribute *nhm_format_attr[] = {
3866         &format_attr_offcore_rsp.attr,
3867         &format_attr_ldlat.attr,
3868         NULL
3869 };
3870
3871 static struct attribute *slm_format_attr[] = {
3872         &format_attr_offcore_rsp.attr,
3873         NULL
3874 };
3875
3876 static struct attribute *skl_format_attr[] = {
3877         &format_attr_frontend.attr,
3878         NULL,
3879 };
3880
3881 static __initconst const struct x86_pmu core_pmu = {
3882         .name                   = "core",
3883         .handle_irq             = x86_pmu_handle_irq,
3884         .disable_all            = x86_pmu_disable_all,
3885         .enable_all             = core_pmu_enable_all,
3886         .enable                 = core_pmu_enable_event,
3887         .disable                = x86_pmu_disable_event,
3888         .hw_config              = core_pmu_hw_config,
3889         .schedule_events        = x86_schedule_events,
3890         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
3891         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
3892         .event_map              = intel_pmu_event_map,
3893         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
3894         .apic                   = 1,
3895         .large_pebs_flags       = LARGE_PEBS_FLAGS,
3896
3897         /*
3898          * Intel PMCs cannot be accessed sanely above 32-bit width,
3899          * so we install an artificial 1<<31 period regardless of
3900          * the generic event period:
3901          */
3902         .max_period             = (1ULL<<31) - 1,
3903         .get_event_constraints  = intel_get_event_constraints,
3904         .put_event_constraints  = intel_put_event_constraints,
3905         .event_constraints      = intel_core_event_constraints,
3906         .guest_get_msrs         = core_guest_get_msrs,
3907         .format_attrs           = intel_arch_formats_attr,
3908         .events_sysfs_show      = intel_event_sysfs_show,
3909
3910         /*
3911          * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
3912          * together with PMU version 1 and thus be using core_pmu with
3913          * shared_regs. We need following callbacks here to allocate
3914          * it properly.
3915          */
3916         .cpu_prepare            = intel_pmu_cpu_prepare,
3917         .cpu_starting           = intel_pmu_cpu_starting,
3918         .cpu_dying              = intel_pmu_cpu_dying,
3919         .cpu_dead               = intel_pmu_cpu_dead,
3920
3921         .check_period           = intel_pmu_check_period,
3922 };
3923
3924 static __initconst const struct x86_pmu intel_pmu = {
3925         .name                   = "Intel",
3926         .handle_irq             = intel_pmu_handle_irq,
3927         .disable_all            = intel_pmu_disable_all,
3928         .enable_all             = intel_pmu_enable_all,
3929         .enable                 = intel_pmu_enable_event,
3930         .disable                = intel_pmu_disable_event,
3931         .add                    = intel_pmu_add_event,
3932         .del                    = intel_pmu_del_event,
3933         .read                   = intel_pmu_read_event,
3934         .hw_config              = intel_pmu_hw_config,
3935         .schedule_events        = x86_schedule_events,
3936         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
3937         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
3938         .event_map              = intel_pmu_event_map,
3939         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
3940         .apic                   = 1,
3941         .large_pebs_flags       = LARGE_PEBS_FLAGS,
3942         /*
3943          * Intel PMCs cannot be accessed sanely above 32 bit width,
3944          * so we install an artificial 1<<31 period regardless of
3945          * the generic event period:
3946          */
3947         .max_period             = (1ULL << 31) - 1,
3948         .get_event_constraints  = intel_get_event_constraints,
3949         .put_event_constraints  = intel_put_event_constraints,
3950         .pebs_aliases           = intel_pebs_aliases_core2,
3951
3952         .format_attrs           = intel_arch3_formats_attr,
3953         .events_sysfs_show      = intel_event_sysfs_show,
3954
3955         .cpu_prepare            = intel_pmu_cpu_prepare,
3956         .cpu_starting           = intel_pmu_cpu_starting,
3957         .cpu_dying              = intel_pmu_cpu_dying,
3958         .cpu_dead               = intel_pmu_cpu_dead,
3959
3960         .guest_get_msrs         = intel_guest_get_msrs,
3961         .sched_task             = intel_pmu_sched_task,
3962
3963         .check_period           = intel_pmu_check_period,
3964
3965         .aux_output_match       = intel_pmu_aux_output_match,
3966 };
3967
3968 static __init void intel_clovertown_quirk(void)
3969 {
3970         /*
3971          * PEBS is unreliable due to:
3972          *
3973          *   AJ67  - PEBS may experience CPL leaks
3974          *   AJ68  - PEBS PMI may be delayed by one event
3975          *   AJ69  - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
3976          *   AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
3977          *
3978          * AJ67 could be worked around by restricting the OS/USR flags.
3979          * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
3980          *
3981          * AJ106 could possibly be worked around by not allowing LBR
3982          *       usage from PEBS, including the fixup.
3983          * AJ68  could possibly be worked around by always programming
3984          *       a pebs_event_reset[0] value and coping with the lost events.
3985          *
3986          * But taken together it might just make sense to not enable PEBS on
3987          * these chips.
3988          */
3989         pr_warn("PEBS disabled due to CPU errata\n");
3990         x86_pmu.pebs = 0;
3991         x86_pmu.pebs_constraints = NULL;
3992 }
3993
3994 static const struct x86_cpu_desc isolation_ucodes[] = {
3995         INTEL_CPU_DESC(INTEL_FAM6_HASWELL,               3, 0x0000001f),
3996         INTEL_CPU_DESC(INTEL_FAM6_HASWELL_L,             1, 0x0000001e),
3997         INTEL_CPU_DESC(INTEL_FAM6_HASWELL_G,             1, 0x00000015),
3998         INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X,             2, 0x00000037),
3999         INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X,             4, 0x0000000a),
4000         INTEL_CPU_DESC(INTEL_FAM6_BROADWELL,             4, 0x00000023),
4001         INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_G,           1, 0x00000014),
4002         INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D,           2, 0x00000010),
4003         INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D,           3, 0x07000009),
4004         INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D,           4, 0x0f000009),
4005         INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D,           5, 0x0e000002),
4006         INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X,           1, 0x0b000014),
4007         INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,             3, 0x00000021),
4008         INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,             4, 0x00000000),
4009         INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,             5, 0x00000000),
4010         INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,             6, 0x00000000),
4011         INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,             7, 0x00000000),
4012         INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,            11, 0x00000000),
4013         INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L,             3, 0x0000007c),
4014         INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE,               3, 0x0000007c),
4015         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE,              9, 0x0000004e),
4016         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L,            9, 0x0000004e),
4017         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L,           10, 0x0000004e),
4018         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L,           11, 0x0000004e),
4019         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L,           12, 0x0000004e),
4020         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE,             10, 0x0000004e),
4021         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE,             11, 0x0000004e),
4022         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE,             12, 0x0000004e),
4023         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE,             13, 0x0000004e),
4024         {}
4025 };
4026
4027 static void intel_check_pebs_isolation(void)
4028 {
4029         x86_pmu.pebs_no_isolation = !x86_cpu_has_min_microcode_rev(isolation_ucodes);
4030 }
4031
4032 static __init void intel_pebs_isolation_quirk(void)
4033 {
4034         WARN_ON_ONCE(x86_pmu.check_microcode);
4035         x86_pmu.check_microcode = intel_check_pebs_isolation;
4036         intel_check_pebs_isolation();
4037 }
4038
4039 static const struct x86_cpu_desc pebs_ucodes[] = {
4040         INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE,          7, 0x00000028),
4041         INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X,        6, 0x00000618),
4042         INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X,        7, 0x0000070c),
4043         {}
4044 };
4045
4046 static bool intel_snb_pebs_broken(void)
4047 {
4048         return !x86_cpu_has_min_microcode_rev(pebs_ucodes);
4049 }
4050
4051 static void intel_snb_check_microcode(void)
4052 {
4053         if (intel_snb_pebs_broken() == x86_pmu.pebs_broken)
4054                 return;
4055
4056         /*
4057          * Serialized by the microcode lock..
4058          */
4059         if (x86_pmu.pebs_broken) {
4060                 pr_info("PEBS enabled due to microcode update\n");
4061                 x86_pmu.pebs_broken = 0;
4062         } else {
4063                 pr_info("PEBS disabled due to CPU errata/*(DEBLOBBED)*/\n");
4064                 x86_pmu.pebs_broken = 1;
4065         }
4066 }
4067
4068 static bool is_lbr_from(unsigned long msr)
4069 {
4070         unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr;
4071
4072         return x86_pmu.lbr_from <= msr && msr < lbr_from_nr;
4073 }
4074
4075 /*
4076  * Under certain circumstances, access certain MSR may cause #GP.
4077  * The function tests if the input MSR can be safely accessed.
4078  */
4079 static bool check_msr(unsigned long msr, u64 mask)
4080 {
4081         u64 val_old, val_new, val_tmp;
4082
4083         /*
4084          * Disable the check for real HW, so we don't
4085          * mess with potentionaly enabled registers:
4086          */
4087         if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
4088                 return true;
4089
4090         /*
4091          * Read the current value, change it and read it back to see if it
4092          * matches, this is needed to detect certain hardware emulators
4093          * (qemu/kvm) that don't trap on the MSR access and always return 0s.
4094          */
4095         if (rdmsrl_safe(msr, &val_old))
4096                 return false;
4097
4098         /*
4099          * Only change the bits which can be updated by wrmsrl.
4100          */
4101         val_tmp = val_old ^ mask;
4102
4103         if (is_lbr_from(msr))
4104                 val_tmp = lbr_from_signext_quirk_wr(val_tmp);
4105
4106         if (wrmsrl_safe(msr, val_tmp) ||
4107             rdmsrl_safe(msr, &val_new))
4108                 return false;
4109
4110         /*
4111          * Quirk only affects validation in wrmsr(), so wrmsrl()'s value
4112          * should equal rdmsrl()'s even with the quirk.
4113          */
4114         if (val_new != val_tmp)
4115                 return false;
4116
4117         if (is_lbr_from(msr))
4118                 val_old = lbr_from_signext_quirk_wr(val_old);
4119
4120         /* Here it's sure that the MSR can be safely accessed.
4121          * Restore the old value and return.
4122          */
4123         wrmsrl(msr, val_old);
4124
4125         return true;
4126 }
4127
4128 static __init void intel_sandybridge_quirk(void)
4129 {
4130         x86_pmu.check_microcode = intel_snb_check_microcode;
4131         cpus_read_lock();
4132         intel_snb_check_microcode();
4133         cpus_read_unlock();
4134 }
4135
4136 static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
4137         { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
4138         { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
4139         { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
4140         { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
4141         { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
4142         { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
4143         { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
4144 };
4145
4146 static __init void intel_arch_events_quirk(void)
4147 {
4148         int bit;
4149
4150         /* disable event that reported as not presend by cpuid */
4151         for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
4152                 intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
4153                 pr_warn("CPUID marked event: \'%s\' unavailable\n",
4154                         intel_arch_events_map[bit].name);
4155         }
4156 }
4157
4158 static __init void intel_nehalem_quirk(void)
4159 {
4160         union cpuid10_ebx ebx;
4161
4162         ebx.full = x86_pmu.events_maskl;
4163         if (ebx.split.no_branch_misses_retired) {
4164                 /*
4165                  * Erratum AAJ80 detected, we work it around by using
4166                  * the BR_MISP_EXEC.ANY event. This will over-count
4167                  * branch-misses, but it's still much better than the
4168                  * architectural event which is often completely bogus:
4169                  */
4170                 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
4171                 ebx.split.no_branch_misses_retired = 0;
4172                 x86_pmu.events_maskl = ebx.full;
4173                 pr_info("CPU erratum AAJ80 worked around\n");
4174         }
4175 }
4176
4177 static const struct x86_cpu_desc counter_freezing_ucodes[] = {
4178         INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT,         2, 0x0000000e),
4179         INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT,         9, 0x0000002e),
4180         INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT,        10, 0x00000008),
4181         INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_D,       1, 0x00000028),
4182         INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS,    1, 0x00000028),
4183         INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS,    8, 0x00000006),
4184         {}
4185 };
4186
4187 static bool intel_counter_freezing_broken(void)
4188 {
4189         return !x86_cpu_has_min_microcode_rev(counter_freezing_ucodes);
4190 }
4191
4192 static __init void intel_counter_freezing_quirk(void)
4193 {
4194         /* Check if it's already disabled */
4195         if (disable_counter_freezing)
4196                 return;
4197
4198         /*
4199          * If the system starts with the wrong ucode, leave the
4200          * counter-freezing feature permanently disabled.
4201          */
4202         if (intel_counter_freezing_broken()) {
4203                 pr_info("PMU counter freezing disabled due to CPU errata,"
4204                         "please /*(DEBLOBBED)*/\n");
4205                 x86_pmu.counter_freezing = false;
4206                 x86_pmu.handle_irq = intel_pmu_handle_irq;
4207         }
4208 }
4209
4210 /*
4211  * enable software workaround for errata:
4212  * SNB: BJ122
4213  * IVB: BV98
4214  * HSW: HSD29
4215  *
4216  * Only needed when HT is enabled. However detecting
4217  * if HT is enabled is difficult (model specific). So instead,
4218  * we enable the workaround in the early boot, and verify if
4219  * it is needed in a later initcall phase once we have valid
4220  * topology information to check if HT is actually enabled
4221  */
4222 static __init void intel_ht_bug(void)
4223 {
4224         x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
4225
4226         x86_pmu.start_scheduling = intel_start_scheduling;
4227         x86_pmu.commit_scheduling = intel_commit_scheduling;
4228         x86_pmu.stop_scheduling = intel_stop_scheduling;
4229 }
4230
4231 EVENT_ATTR_STR(mem-loads,       mem_ld_hsw,     "event=0xcd,umask=0x1,ldlat=3");
4232 EVENT_ATTR_STR(mem-stores,      mem_st_hsw,     "event=0xd0,umask=0x82")
4233
4234 /* Haswell special events */
4235 EVENT_ATTR_STR(tx-start,        tx_start,       "event=0xc9,umask=0x1");
4236 EVENT_ATTR_STR(tx-commit,       tx_commit,      "event=0xc9,umask=0x2");
4237 EVENT_ATTR_STR(tx-abort,        tx_abort,       "event=0xc9,umask=0x4");
4238 EVENT_ATTR_STR(tx-capacity,     tx_capacity,    "event=0x54,umask=0x2");
4239 EVENT_ATTR_STR(tx-conflict,     tx_conflict,    "event=0x54,umask=0x1");
4240 EVENT_ATTR_STR(el-start,        el_start,       "event=0xc8,umask=0x1");
4241 EVENT_ATTR_STR(el-commit,       el_commit,      "event=0xc8,umask=0x2");
4242 EVENT_ATTR_STR(el-abort,        el_abort,       "event=0xc8,umask=0x4");
4243 EVENT_ATTR_STR(el-capacity,     el_capacity,    "event=0x54,umask=0x2");
4244 EVENT_ATTR_STR(el-conflict,     el_conflict,    "event=0x54,umask=0x1");
4245 EVENT_ATTR_STR(cycles-t,        cycles_t,       "event=0x3c,in_tx=1");
4246 EVENT_ATTR_STR(cycles-ct,       cycles_ct,      "event=0x3c,in_tx=1,in_tx_cp=1");
4247
4248 static struct attribute *hsw_events_attrs[] = {
4249         EVENT_PTR(td_slots_issued),
4250         EVENT_PTR(td_slots_retired),
4251         EVENT_PTR(td_fetch_bubbles),
4252         EVENT_PTR(td_total_slots),
4253         EVENT_PTR(td_total_slots_scale),
4254         EVENT_PTR(td_recovery_bubbles),
4255         EVENT_PTR(td_recovery_bubbles_scale),
4256         NULL
4257 };
4258
4259 static struct attribute *hsw_mem_events_attrs[] = {
4260         EVENT_PTR(mem_ld_hsw),
4261         EVENT_PTR(mem_st_hsw),
4262         NULL,
4263 };
4264
4265 static struct attribute *hsw_tsx_events_attrs[] = {
4266         EVENT_PTR(tx_start),
4267         EVENT_PTR(tx_commit),
4268         EVENT_PTR(tx_abort),
4269         EVENT_PTR(tx_capacity),
4270         EVENT_PTR(tx_conflict),
4271         EVENT_PTR(el_start),
4272         EVENT_PTR(el_commit),
4273         EVENT_PTR(el_abort),
4274         EVENT_PTR(el_capacity),
4275         EVENT_PTR(el_conflict),
4276         EVENT_PTR(cycles_t),
4277         EVENT_PTR(cycles_ct),
4278         NULL
4279 };
4280
4281 EVENT_ATTR_STR(tx-capacity-read,  tx_capacity_read,  "event=0x54,umask=0x80");
4282 EVENT_ATTR_STR(tx-capacity-write, tx_capacity_write, "event=0x54,umask=0x2");
4283 EVENT_ATTR_STR(el-capacity-read,  el_capacity_read,  "event=0x54,umask=0x80");
4284 EVENT_ATTR_STR(el-capacity-write, el_capacity_write, "event=0x54,umask=0x2");
4285
4286 static struct attribute *icl_events_attrs[] = {
4287         EVENT_PTR(mem_ld_hsw),
4288         EVENT_PTR(mem_st_hsw),
4289         NULL,
4290 };
4291
4292 static struct attribute *icl_tsx_events_attrs[] = {
4293         EVENT_PTR(tx_start),
4294         EVENT_PTR(tx_abort),
4295         EVENT_PTR(tx_commit),
4296         EVENT_PTR(tx_capacity_read),
4297         EVENT_PTR(tx_capacity_write),
4298         EVENT_PTR(tx_conflict),
4299         EVENT_PTR(el_start),
4300         EVENT_PTR(el_abort),
4301         EVENT_PTR(el_commit),
4302         EVENT_PTR(el_capacity_read),
4303         EVENT_PTR(el_capacity_write),
4304         EVENT_PTR(el_conflict),
4305         EVENT_PTR(cycles_t),
4306         EVENT_PTR(cycles_ct),
4307         NULL,
4308 };
4309
4310 static ssize_t freeze_on_smi_show(struct device *cdev,
4311                                   struct device_attribute *attr,
4312                                   char *buf)
4313 {
4314         return sprintf(buf, "%lu\n", x86_pmu.attr_freeze_on_smi);
4315 }
4316
4317 static DEFINE_MUTEX(freeze_on_smi_mutex);
4318
4319 static ssize_t freeze_on_smi_store(struct device *cdev,
4320                                    struct device_attribute *attr,
4321                                    const char *buf, size_t count)
4322 {
4323         unsigned long val;
4324         ssize_t ret;
4325
4326         ret = kstrtoul(buf, 0, &val);
4327         if (ret)
4328                 return ret;
4329
4330         if (val > 1)
4331                 return -EINVAL;
4332
4333         mutex_lock(&freeze_on_smi_mutex);
4334
4335         if (x86_pmu.attr_freeze_on_smi == val)
4336                 goto done;
4337
4338         x86_pmu.attr_freeze_on_smi = val;
4339
4340         get_online_cpus();
4341         on_each_cpu(flip_smm_bit, &val, 1);
4342         put_online_cpus();
4343 done:
4344         mutex_unlock(&freeze_on_smi_mutex);
4345
4346         return count;
4347 }
4348
4349 static void update_tfa_sched(void *ignored)
4350 {
4351         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
4352
4353         /*
4354          * check if PMC3 is used
4355          * and if so force schedule out for all event types all contexts
4356          */
4357         if (test_bit(3, cpuc->active_mask))
4358                 perf_pmu_resched(x86_get_pmu());
4359 }
4360
4361 static ssize_t show_sysctl_tfa(struct device *cdev,
4362                               struct device_attribute *attr,
4363                               char *buf)
4364 {
4365         return snprintf(buf, 40, "%d\n", allow_tsx_force_abort);
4366 }
4367
4368 static ssize_t set_sysctl_tfa(struct device *cdev,
4369                               struct device_attribute *attr,
4370                               const char *buf, size_t count)
4371 {
4372         bool val;
4373         ssize_t ret;
4374
4375         ret = kstrtobool(buf, &val);
4376         if (ret)
4377                 return ret;
4378
4379         /* no change */
4380         if (val == allow_tsx_force_abort)
4381                 return count;
4382
4383         allow_tsx_force_abort = val;
4384
4385         get_online_cpus();
4386         on_each_cpu(update_tfa_sched, NULL, 1);
4387         put_online_cpus();
4388
4389         return count;
4390 }
4391
4392
4393 static DEVICE_ATTR_RW(freeze_on_smi);
4394
4395 static ssize_t branches_show(struct device *cdev,
4396                              struct device_attribute *attr,
4397                              char *buf)
4398 {
4399         return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr);
4400 }
4401
4402 static DEVICE_ATTR_RO(branches);
4403
4404 static struct attribute *lbr_attrs[] = {
4405         &dev_attr_branches.attr,
4406         NULL
4407 };
4408
4409 static char pmu_name_str[30];
4410
4411 static ssize_t pmu_name_show(struct device *cdev,
4412                              struct device_attribute *attr,
4413                              char *buf)
4414 {
4415         return snprintf(buf, PAGE_SIZE, "%s\n", pmu_name_str);
4416 }
4417
4418 static DEVICE_ATTR_RO(pmu_name);
4419
4420 static struct attribute *intel_pmu_caps_attrs[] = {
4421        &dev_attr_pmu_name.attr,
4422        NULL
4423 };
4424
4425 static DEVICE_ATTR(allow_tsx_force_abort, 0644,
4426                    show_sysctl_tfa,
4427                    set_sysctl_tfa);
4428
4429 static struct attribute *intel_pmu_attrs[] = {
4430         &dev_attr_freeze_on_smi.attr,
4431         &dev_attr_allow_tsx_force_abort.attr,
4432         NULL,
4433 };
4434
4435 static umode_t
4436 tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
4437 {
4438         return boot_cpu_has(X86_FEATURE_RTM) ? attr->mode : 0;
4439 }
4440
4441 static umode_t
4442 pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
4443 {
4444         return x86_pmu.pebs ? attr->mode : 0;
4445 }
4446
4447 static umode_t
4448 lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
4449 {
4450         return x86_pmu.lbr_nr ? attr->mode : 0;
4451 }
4452
4453 static umode_t
4454 exra_is_visible(struct kobject *kobj, struct attribute *attr, int i)
4455 {
4456         return x86_pmu.version >= 2 ? attr->mode : 0;
4457 }
4458
4459 static umode_t
4460 default_is_visible(struct kobject *kobj, struct attribute *attr, int i)
4461 {
4462         if (attr == &dev_attr_allow_tsx_force_abort.attr)
4463                 return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0;
4464
4465         return attr->mode;
4466 }
4467
4468 static struct attribute_group group_events_td  = {
4469         .name = "events",
4470 };
4471
4472 static struct attribute_group group_events_mem = {
4473         .name       = "events",
4474         .is_visible = pebs_is_visible,
4475 };
4476
4477 static struct attribute_group group_events_tsx = {
4478         .name       = "events",
4479         .is_visible = tsx_is_visible,
4480 };
4481
4482 static struct attribute_group group_caps_gen = {
4483         .name  = "caps",
4484         .attrs = intel_pmu_caps_attrs,
4485 };
4486
4487 static struct attribute_group group_caps_lbr = {
4488         .name       = "caps",
4489         .attrs      = lbr_attrs,
4490         .is_visible = lbr_is_visible,
4491 };
4492
4493 static struct attribute_group group_format_extra = {
4494         .name       = "format",
4495         .is_visible = exra_is_visible,
4496 };
4497
4498 static struct attribute_group group_format_extra_skl = {
4499         .name       = "format",
4500         .is_visible = exra_is_visible,
4501 };
4502
4503 static struct attribute_group group_default = {
4504         .attrs      = intel_pmu_attrs,
4505         .is_visible = default_is_visible,
4506 };
4507
4508 static const struct attribute_group *attr_update[] = {
4509         &group_events_td,
4510         &group_events_mem,
4511         &group_events_tsx,
4512         &group_caps_gen,
4513         &group_caps_lbr,
4514         &group_format_extra,
4515         &group_format_extra_skl,
4516         &group_default,
4517         NULL,
4518 };
4519
4520 static struct attribute *empty_attrs;
4521
4522 __init int intel_pmu_init(void)
4523 {
4524         struct attribute **extra_skl_attr = &empty_attrs;
4525         struct attribute **extra_attr = &empty_attrs;
4526         struct attribute **td_attr    = &empty_attrs;
4527         struct attribute **mem_attr   = &empty_attrs;
4528         struct attribute **tsx_attr   = &empty_attrs;
4529         union cpuid10_edx edx;
4530         union cpuid10_eax eax;
4531         union cpuid10_ebx ebx;
4532         struct event_constraint *c;
4533         unsigned int unused;
4534         struct extra_reg *er;
4535         bool pmem = false;
4536         int version, i;
4537         char *name;
4538
4539         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
4540                 switch (boot_cpu_data.x86) {
4541                 case 0x6:
4542                         return p6_pmu_init();
4543                 case 0xb:
4544                         return knc_pmu_init();
4545                 case 0xf:
4546                         return p4_pmu_init();
4547                 }
4548                 return -ENODEV;
4549         }
4550
4551         /*
4552          * Check whether the Architectural PerfMon supports
4553          * Branch Misses Retired hw_event or not.
4554          */
4555         cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
4556         if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
4557                 return -ENODEV;
4558
4559         version = eax.split.version_id;
4560         if (version < 2)
4561                 x86_pmu = core_pmu;
4562         else
4563                 x86_pmu = intel_pmu;
4564
4565         x86_pmu.version                 = version;
4566         x86_pmu.num_counters            = eax.split.num_counters;
4567         x86_pmu.cntval_bits             = eax.split.bit_width;
4568         x86_pmu.cntval_mask             = (1ULL << eax.split.bit_width) - 1;
4569
4570         x86_pmu.events_maskl            = ebx.full;
4571         x86_pmu.events_mask_len         = eax.split.mask_length;
4572
4573         x86_pmu.max_pebs_events         = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
4574
4575         /*
4576          * Quirk: v2 perfmon does not report fixed-purpose events, so
4577          * assume at least 3 events, when not running in a hypervisor:
4578          */
4579         if (version > 1) {
4580                 int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);
4581
4582                 x86_pmu.num_counters_fixed =
4583                         max((int)edx.split.num_counters_fixed, assume);
4584         }
4585
4586         if (version >= 4)
4587                 x86_pmu.counter_freezing = !disable_counter_freezing;
4588
4589         if (boot_cpu_has(X86_FEATURE_PDCM)) {
4590                 u64 capabilities;
4591
4592                 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
4593                 x86_pmu.intel_cap.capabilities = capabilities;
4594         }
4595
4596         intel_ds_init();
4597
4598         x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
4599
4600         /*
4601          * Install the hw-cache-events table:
4602          */
4603         switch (boot_cpu_data.x86_model) {
4604         case INTEL_FAM6_CORE_YONAH:
4605                 pr_cont("Core events, ");
4606                 name = "core";
4607                 break;
4608
4609         case INTEL_FAM6_CORE2_MEROM:
4610                 x86_add_quirk(intel_clovertown_quirk);
4611                 /* fall through */
4612
4613         case INTEL_FAM6_CORE2_MEROM_L:
4614         case INTEL_FAM6_CORE2_PENRYN:
4615         case INTEL_FAM6_CORE2_DUNNINGTON:
4616                 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
4617                        sizeof(hw_cache_event_ids));
4618
4619                 intel_pmu_lbr_init_core();
4620
4621                 x86_pmu.event_constraints = intel_core2_event_constraints;
4622                 x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
4623                 pr_cont("Core2 events, ");
4624                 name = "core2";
4625                 break;
4626
4627         case INTEL_FAM6_NEHALEM:
4628         case INTEL_FAM6_NEHALEM_EP:
4629         case INTEL_FAM6_NEHALEM_EX:
4630                 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
4631                        sizeof(hw_cache_event_ids));
4632                 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
4633                        sizeof(hw_cache_extra_regs));
4634
4635                 intel_pmu_lbr_init_nhm();
4636
4637                 x86_pmu.event_constraints = intel_nehalem_event_constraints;
4638                 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
4639                 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
4640                 x86_pmu.extra_regs = intel_nehalem_extra_regs;
4641                 x86_pmu.limit_period = nhm_limit_period;
4642
4643                 mem_attr = nhm_mem_events_attrs;
4644
4645                 /* UOPS_ISSUED.STALLED_CYCLES */
4646                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
4647                         X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
4648                 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
4649                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
4650                         X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
4651
4652                 intel_pmu_pebs_data_source_nhm();
4653                 x86_add_quirk(intel_nehalem_quirk);
4654                 x86_pmu.pebs_no_tlb = 1;
4655                 extra_attr = nhm_format_attr;
4656
4657                 pr_cont("Nehalem events, ");
4658                 name = "nehalem";
4659                 break;
4660
4661         case INTEL_FAM6_ATOM_BONNELL:
4662         case INTEL_FAM6_ATOM_BONNELL_MID:
4663         case INTEL_FAM6_ATOM_SALTWELL:
4664         case INTEL_FAM6_ATOM_SALTWELL_MID:
4665         case INTEL_FAM6_ATOM_SALTWELL_TABLET:
4666                 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
4667                        sizeof(hw_cache_event_ids));
4668
4669                 intel_pmu_lbr_init_atom();
4670
4671                 x86_pmu.event_constraints = intel_gen_event_constraints;
4672                 x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
4673                 x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
4674                 pr_cont("Atom events, ");
4675                 name = "bonnell";
4676                 break;
4677
4678         case INTEL_FAM6_ATOM_SILVERMONT:
4679         case INTEL_FAM6_ATOM_SILVERMONT_D:
4680         case INTEL_FAM6_ATOM_SILVERMONT_MID:
4681         case INTEL_FAM6_ATOM_AIRMONT:
4682         case INTEL_FAM6_ATOM_AIRMONT_MID:
4683                 memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
4684                         sizeof(hw_cache_event_ids));
4685                 memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
4686                        sizeof(hw_cache_extra_regs));
4687
4688                 intel_pmu_lbr_init_slm();
4689
4690                 x86_pmu.event_constraints = intel_slm_event_constraints;
4691                 x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
4692                 x86_pmu.extra_regs = intel_slm_extra_regs;
4693                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4694                 td_attr = slm_events_attrs;
4695                 extra_attr = slm_format_attr;
4696                 pr_cont("Silvermont events, ");
4697                 name = "silvermont";
4698                 break;
4699
4700         case INTEL_FAM6_ATOM_GOLDMONT:
4701         case INTEL_FAM6_ATOM_GOLDMONT_D:
4702                 x86_add_quirk(intel_counter_freezing_quirk);
4703                 memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
4704                        sizeof(hw_cache_event_ids));
4705                 memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
4706                        sizeof(hw_cache_extra_regs));
4707
4708                 intel_pmu_lbr_init_skl();
4709
4710                 x86_pmu.event_constraints = intel_slm_event_constraints;
4711                 x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints;
4712                 x86_pmu.extra_regs = intel_glm_extra_regs;
4713                 /*
4714                  * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
4715                  * for precise cycles.
4716                  * :pp is identical to :ppp
4717                  */
4718                 x86_pmu.pebs_aliases = NULL;
4719                 x86_pmu.pebs_prec_dist = true;
4720                 x86_pmu.lbr_pt_coexist = true;
4721                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4722                 td_attr = glm_events_attrs;
4723                 extra_attr = slm_format_attr;
4724                 pr_cont("Goldmont events, ");
4725                 name = "goldmont";
4726                 break;
4727
4728         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4729                 x86_add_quirk(intel_counter_freezing_quirk);
4730                 memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
4731                        sizeof(hw_cache_event_ids));
4732                 memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
4733                        sizeof(hw_cache_extra_regs));
4734
4735                 intel_pmu_lbr_init_skl();
4736
4737                 x86_pmu.event_constraints = intel_slm_event_constraints;
4738                 x86_pmu.extra_regs = intel_glm_extra_regs;
4739                 /*
4740                  * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
4741                  * for precise cycles.
4742                  */
4743                 x86_pmu.pebs_aliases = NULL;
4744                 x86_pmu.pebs_prec_dist = true;
4745                 x86_pmu.lbr_pt_coexist = true;
4746                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4747                 x86_pmu.flags |= PMU_FL_PEBS_ALL;
4748                 x86_pmu.get_event_constraints = glp_get_event_constraints;
4749                 td_attr = glm_events_attrs;
4750                 /* Goldmont Plus has 4-wide pipeline */
4751                 event_attr_td_total_slots_scale_glm.event_str = "4";
4752                 extra_attr = slm_format_attr;
4753                 pr_cont("Goldmont plus events, ");
4754                 name = "goldmont_plus";
4755                 break;
4756
4757         case INTEL_FAM6_ATOM_TREMONT_D:
4758         case INTEL_FAM6_ATOM_TREMONT:
4759                 x86_pmu.late_ack = true;
4760                 memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
4761                        sizeof(hw_cache_event_ids));
4762                 memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
4763                        sizeof(hw_cache_extra_regs));
4764                 hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
4765
4766                 intel_pmu_lbr_init_skl();
4767
4768                 x86_pmu.event_constraints = intel_slm_event_constraints;
4769                 x86_pmu.extra_regs = intel_tnt_extra_regs;
4770                 /*
4771                  * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
4772                  * for precise cycles.
4773                  */
4774                 x86_pmu.pebs_aliases = NULL;
4775                 x86_pmu.pebs_prec_dist = true;
4776                 x86_pmu.lbr_pt_coexist = true;
4777                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4778                 x86_pmu.get_event_constraints = tnt_get_event_constraints;
4779                 extra_attr = slm_format_attr;
4780                 pr_cont("Tremont events, ");
4781                 name = "Tremont";
4782                 break;
4783
4784         case INTEL_FAM6_WESTMERE:
4785         case INTEL_FAM6_WESTMERE_EP:
4786         case INTEL_FAM6_WESTMERE_EX:
4787                 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
4788                        sizeof(hw_cache_event_ids));
4789                 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
4790                        sizeof(hw_cache_extra_regs));
4791
4792                 intel_pmu_lbr_init_nhm();
4793
4794                 x86_pmu.event_constraints = intel_westmere_event_constraints;
4795                 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
4796                 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
4797                 x86_pmu.extra_regs = intel_westmere_extra_regs;
4798                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4799
4800                 mem_attr = nhm_mem_events_attrs;
4801
4802                 /* UOPS_ISSUED.STALLED_CYCLES */
4803                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
4804                         X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
4805                 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
4806                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
4807                         X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
4808
4809                 intel_pmu_pebs_data_source_nhm();
4810                 extra_attr = nhm_format_attr;
4811                 pr_cont("Westmere events, ");
4812                 name = "westmere";
4813                 break;
4814
4815         case INTEL_FAM6_SANDYBRIDGE:
4816         case INTEL_FAM6_SANDYBRIDGE_X:
4817                 x86_add_quirk(intel_sandybridge_quirk);
4818                 x86_add_quirk(intel_ht_bug);
4819                 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
4820                        sizeof(hw_cache_event_ids));
4821                 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
4822                        sizeof(hw_cache_extra_regs));
4823
4824                 intel_pmu_lbr_init_snb();
4825
4826                 x86_pmu.event_constraints = intel_snb_event_constraints;
4827                 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
4828                 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
4829                 if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
4830                         x86_pmu.extra_regs = intel_snbep_extra_regs;
4831                 else
4832                         x86_pmu.extra_regs = intel_snb_extra_regs;
4833
4834
4835                 /* all extra regs are per-cpu when HT is on */
4836                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4837                 x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
4838
4839                 td_attr  = snb_events_attrs;
4840                 mem_attr = snb_mem_events_attrs;
4841
4842                 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
4843                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
4844                         X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
4845                 /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
4846                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
4847                         X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
4848
4849                 extra_attr = nhm_format_attr;
4850
4851                 pr_cont("SandyBridge events, ");
4852                 name = "sandybridge";
4853                 break;
4854
4855         case INTEL_FAM6_IVYBRIDGE:
4856         case INTEL_FAM6_IVYBRIDGE_X:
4857                 x86_add_quirk(intel_ht_bug);
4858                 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
4859                        sizeof(hw_cache_event_ids));
4860                 /* dTLB-load-misses on IVB is different than SNB */
4861                 hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */
4862
4863                 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
4864                        sizeof(hw_cache_extra_regs));
4865
4866                 intel_pmu_lbr_init_snb();
4867
4868                 x86_pmu.event_constraints = intel_ivb_event_constraints;
4869                 x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
4870                 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
4871                 x86_pmu.pebs_prec_dist = true;
4872                 if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
4873                         x86_pmu.extra_regs = intel_snbep_extra_regs;
4874                 else
4875                         x86_pmu.extra_regs = intel_snb_extra_regs;
4876                 /* all extra regs are per-cpu when HT is on */
4877                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4878                 x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
4879
4880                 td_attr  = snb_events_attrs;
4881                 mem_attr = snb_mem_events_attrs;
4882
4883                 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
4884                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
4885                         X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
4886
4887                 extra_attr = nhm_format_attr;
4888
4889                 pr_cont("IvyBridge events, ");
4890                 name = "ivybridge";
4891                 break;
4892
4893
4894         case INTEL_FAM6_HASWELL:
4895         case INTEL_FAM6_HASWELL_X:
4896         case INTEL_FAM6_HASWELL_L:
4897         case INTEL_FAM6_HASWELL_G:
4898                 x86_add_quirk(intel_ht_bug);
4899                 x86_add_quirk(intel_pebs_isolation_quirk);
4900                 x86_pmu.late_ack = true;
4901                 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
4902                 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
4903
4904                 intel_pmu_lbr_init_hsw();
4905
4906                 x86_pmu.event_constraints = intel_hsw_event_constraints;
4907                 x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
4908                 x86_pmu.extra_regs = intel_snbep_extra_regs;
4909                 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
4910                 x86_pmu.pebs_prec_dist = true;
4911                 /* all extra regs are per-cpu when HT is on */
4912                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4913                 x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
4914
4915                 x86_pmu.hw_config = hsw_hw_config;
4916                 x86_pmu.get_event_constraints = hsw_get_event_constraints;
4917                 x86_pmu.lbr_double_abort = true;
4918                 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
4919                         hsw_format_attr : nhm_format_attr;
4920                 td_attr  = hsw_events_attrs;
4921                 mem_attr = hsw_mem_events_attrs;
4922                 tsx_attr = hsw_tsx_events_attrs;
4923                 pr_cont("Haswell events, ");
4924                 name = "haswell";
4925                 break;
4926
4927         case INTEL_FAM6_BROADWELL:
4928         case INTEL_FAM6_BROADWELL_D:
4929         case INTEL_FAM6_BROADWELL_G:
4930         case INTEL_FAM6_BROADWELL_X:
4931                 x86_add_quirk(intel_pebs_isolation_quirk);
4932                 x86_pmu.late_ack = true;
4933                 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
4934                 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
4935
4936                 /* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */
4937                 hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ |
4938                                                                          BDW_L3_MISS|HSW_SNOOP_DRAM;
4939                 hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS|
4940                                                                           HSW_SNOOP_DRAM;
4941                 hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ|
4942                                                                              BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
4943                 hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
4944                                                                               BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
4945
4946                 intel_pmu_lbr_init_hsw();
4947
4948                 x86_pmu.event_constraints = intel_bdw_event_constraints;
4949                 x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints;
4950                 x86_pmu.extra_regs = intel_snbep_extra_regs;
4951                 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
4952                 x86_pmu.pebs_prec_dist = true;
4953                 /* all extra regs are per-cpu when HT is on */
4954                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4955                 x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
4956
4957                 x86_pmu.hw_config = hsw_hw_config;
4958                 x86_pmu.get_event_constraints = hsw_get_event_constraints;
4959                 x86_pmu.limit_period = bdw_limit_period;
4960                 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
4961                         hsw_format_attr : nhm_format_attr;
4962                 td_attr  = hsw_events_attrs;
4963                 mem_attr = hsw_mem_events_attrs;
4964                 tsx_attr = hsw_tsx_events_attrs;
4965                 pr_cont("Broadwell events, ");
4966                 name = "broadwell";
4967                 break;
4968
4969         case INTEL_FAM6_XEON_PHI_KNL:
4970         case INTEL_FAM6_XEON_PHI_KNM:
4971                 memcpy(hw_cache_event_ids,
4972                        slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
4973                 memcpy(hw_cache_extra_regs,
4974                        knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
4975                 intel_pmu_lbr_init_knl();
4976
4977                 x86_pmu.event_constraints = intel_slm_event_constraints;
4978                 x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
4979                 x86_pmu.extra_regs = intel_knl_extra_regs;
4980
4981                 /* all extra regs are per-cpu when HT is on */
4982                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4983                 x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
4984                 extra_attr = slm_format_attr;
4985                 pr_cont("Knights Landing/Mill events, ");
4986                 name = "knights-landing";
4987                 break;
4988
4989         case INTEL_FAM6_SKYLAKE_X:
4990                 pmem = true;
4991                 /* fall through */
4992         case INTEL_FAM6_SKYLAKE_L:
4993         case INTEL_FAM6_SKYLAKE:
4994         case INTEL_FAM6_KABYLAKE_L:
4995         case INTEL_FAM6_KABYLAKE:
4996         case INTEL_FAM6_COMETLAKE_L:
4997         case INTEL_FAM6_COMETLAKE:
4998                 x86_add_quirk(intel_pebs_isolation_quirk);
4999                 x86_pmu.late_ack = true;
5000                 memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
5001                 memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
5002                 intel_pmu_lbr_init_skl();
5003
5004                 /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */
5005                 event_attr_td_recovery_bubbles.event_str_noht =
5006                         "event=0xd,umask=0x1,cmask=1";
5007                 event_attr_td_recovery_bubbles.event_str_ht =
5008                         "event=0xd,umask=0x1,cmask=1,any=1";
5009
5010                 x86_pmu.event_constraints = intel_skl_event_constraints;
5011                 x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
5012                 x86_pmu.extra_regs = intel_skl_extra_regs;
5013                 x86_pmu.pebs_aliases = intel_pebs_aliases_skl;
5014                 x86_pmu.pebs_prec_dist = true;
5015                 /* all extra regs are per-cpu when HT is on */
5016                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
5017                 x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
5018
5019                 x86_pmu.hw_config = hsw_hw_config;
5020                 x86_pmu.get_event_constraints = hsw_get_event_constraints;
5021                 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
5022                         hsw_format_attr : nhm_format_attr;
5023                 extra_skl_attr = skl_format_attr;
5024                 td_attr  = hsw_events_attrs;
5025                 mem_attr = hsw_mem_events_attrs;
5026                 tsx_attr = hsw_tsx_events_attrs;
5027                 intel_pmu_pebs_data_source_skl(pmem);
5028
5029                 if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
5030                         x86_pmu.flags |= PMU_FL_TFA;
5031                         x86_pmu.get_event_constraints = tfa_get_event_constraints;
5032                         x86_pmu.enable_all = intel_tfa_pmu_enable_all;
5033                         x86_pmu.commit_scheduling = intel_tfa_commit_scheduling;
5034                 }
5035
5036                 pr_cont("Skylake events, ");
5037                 name = "skylake";
5038                 break;
5039
5040         case INTEL_FAM6_ICELAKE_X:
5041         case INTEL_FAM6_ICELAKE_D:
5042                 pmem = true;
5043                 /* fall through */
5044         case INTEL_FAM6_ICELAKE_L:
5045         case INTEL_FAM6_ICELAKE:
5046         case INTEL_FAM6_TIGERLAKE_L:
5047         case INTEL_FAM6_TIGERLAKE:
5048                 x86_pmu.late_ack = true;
5049                 memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
5050                 memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
5051                 hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
5052                 intel_pmu_lbr_init_skl();
5053
5054                 x86_pmu.event_constraints = intel_icl_event_constraints;
5055                 x86_pmu.pebs_constraints = intel_icl_pebs_event_constraints;
5056                 x86_pmu.extra_regs = intel_icl_extra_regs;
5057                 x86_pmu.pebs_aliases = NULL;
5058                 x86_pmu.pebs_prec_dist = true;
5059                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
5060                 x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
5061
5062                 x86_pmu.hw_config = hsw_hw_config;
5063                 x86_pmu.get_event_constraints = icl_get_event_constraints;
5064                 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
5065                         hsw_format_attr : nhm_format_attr;
5066                 extra_skl_attr = skl_format_attr;
5067                 mem_attr = icl_events_attrs;
5068                 tsx_attr = icl_tsx_events_attrs;
5069                 x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
5070                 x86_pmu.lbr_pt_coexist = true;
5071                 intel_pmu_pebs_data_source_skl(pmem);
5072                 pr_cont("Icelake events, ");
5073                 name = "icelake";
5074                 break;
5075
5076         default:
5077                 switch (x86_pmu.version) {
5078                 case 1:
5079                         x86_pmu.event_constraints = intel_v1_event_constraints;
5080                         pr_cont("generic architected perfmon v1, ");
5081                         name = "generic_arch_v1";
5082                         break;
5083                 default:
5084                         /*
5085                          * default constraints for v2 and up
5086                          */
5087                         x86_pmu.event_constraints = intel_gen_event_constraints;
5088                         pr_cont("generic architected perfmon, ");
5089                         name = "generic_arch_v2+";
5090                         break;
5091                 }
5092         }
5093
5094         snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name);
5095
5096
5097         group_events_td.attrs  = td_attr;
5098         group_events_mem.attrs = mem_attr;
5099         group_events_tsx.attrs = tsx_attr;
5100         group_format_extra.attrs = extra_attr;
5101         group_format_extra_skl.attrs = extra_skl_attr;
5102
5103         x86_pmu.attr_update = attr_update;
5104
5105         if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
5106                 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
5107                      x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
5108                 x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
5109         }
5110         x86_pmu.intel_ctrl = (1ULL << x86_pmu.num_counters) - 1;
5111
5112         if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
5113                 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
5114                      x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
5115                 x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
5116         }
5117
5118         x86_pmu.intel_ctrl |=
5119                 ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
5120
5121         if (x86_pmu.event_constraints) {
5122                 /*
5123                  * event on fixed counter2 (REF_CYCLES) only works on this
5124                  * counter, so do not extend mask to generic counters
5125                  */
5126                 for_each_event_constraint(c, x86_pmu.event_constraints) {
5127                         if (c->cmask == FIXED_EVENT_FLAGS
5128                             && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
5129                                 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
5130                         }
5131                         c->idxmsk64 &=
5132                                 ~(~0ULL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
5133                         c->weight = hweight64(c->idxmsk64);
5134                 }
5135         }
5136
5137         /*
5138          * Access LBR MSR may cause #GP under certain circumstances.
5139          * E.g. KVM doesn't support LBR MSR
5140          * Check all LBT MSR here.
5141          * Disable LBR access if any LBR MSRs can not be accessed.
5142          */
5143         if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
5144                 x86_pmu.lbr_nr = 0;
5145         for (i = 0; i < x86_pmu.lbr_nr; i++) {
5146                 if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
5147                       check_msr(x86_pmu.lbr_to + i, 0xffffUL)))
5148                         x86_pmu.lbr_nr = 0;
5149         }
5150
5151         if (x86_pmu.lbr_nr)
5152                 pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
5153
5154         /*
5155          * Access extra MSR may cause #GP under certain circumstances.
5156          * E.g. KVM doesn't support offcore event
5157          * Check all extra_regs here.
5158          */
5159         if (x86_pmu.extra_regs) {
5160                 for (er = x86_pmu.extra_regs; er->msr; er++) {
5161                         er->extra_msr_access = check_msr(er->msr, 0x11UL);
5162                         /* Disable LBR select mapping */
5163                         if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
5164                                 x86_pmu.lbr_sel_map = NULL;
5165                 }
5166         }
5167
5168         /* Support full width counters using alternative MSR range */
5169         if (x86_pmu.intel_cap.full_width_write) {
5170                 x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
5171                 x86_pmu.perfctr = MSR_IA32_PMC0;
5172                 pr_cont("full-width counters, ");
5173         }
5174
5175         /*
5176          * For arch perfmon 4 use counter freezing to avoid
5177          * several MSR accesses in the PMI.
5178          */
5179         if (x86_pmu.counter_freezing)
5180                 x86_pmu.handle_irq = intel_pmu_handle_irq_v4;
5181
5182         return 0;
5183 }
5184
5185 /*
5186  * HT bug: phase 2 init
5187  * Called once we have valid topology information to check
5188  * whether or not HT is enabled
5189  * If HT is off, then we disable the workaround
5190  */
5191 static __init int fixup_ht_bug(void)
5192 {
5193         int c;
5194         /*
5195          * problem not present on this CPU model, nothing to do
5196          */
5197         if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
5198                 return 0;
5199
5200         if (topology_max_smt_threads() > 1) {
5201                 pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
5202                 return 0;
5203         }
5204
5205         cpus_read_lock();
5206
5207         hardlockup_detector_perf_stop();
5208
5209         x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
5210
5211         x86_pmu.start_scheduling = NULL;
5212         x86_pmu.commit_scheduling = NULL;
5213         x86_pmu.stop_scheduling = NULL;
5214
5215         hardlockup_detector_perf_restart();
5216
5217         for_each_online_cpu(c)
5218                 free_excl_cntrs(&per_cpu(cpu_hw_events, c));
5219
5220         cpus_read_unlock();
5221         pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
5222         return 0;
5223 }
5224 subsys_initcall(fixup_ht_bug)