GNU Linux-libre 5.19.9-gnu
[releases.git] / drivers / idle / intel_idle.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_idle.c - native hardware idle loop for modern Intel processors
4  *
5  * Copyright (c) 2013 - 2020, Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
8  */
9
10 /*
11  * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT
12  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
13  * make Linux more efficient on these processors, as intel_idle knows
14  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
15  */
16
17 /*
18  * Design Assumptions
19  *
20  * All CPUs have same idle states as boot CPU
21  *
22  * Chipset BM_STS (bus master status) bit is a NOP
23  *      for preventing entry into deep C-states
24  *
25  * CPU will flush caches as needed when entering a C-state via MWAIT
26  *      (in contrast to entering ACPI C3, in which case the WBINVD
27  *      instruction needs to be executed to flush the caches)
28  */
29
30 /*
31  * Known limitations
32  *
33  * ACPI has a .suspend hack to turn off deep c-statees during suspend
34  * to avoid complications with the lapic timer workaround.
35  * Have not seen issues with suspend, but may need same workaround here.
36  *
37  */
38
39 /* un-comment DEBUG to enable pr_debug() statements */
40 /* #define DEBUG */
41
42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43
44 #include <linux/acpi.h>
45 #include <linux/kernel.h>
46 #include <linux/cpuidle.h>
47 #include <linux/tick.h>
48 #include <trace/events/power.h>
49 #include <linux/sched.h>
50 #include <linux/sched/smt.h>
51 #include <linux/notifier.h>
52 #include <linux/cpu.h>
53 #include <linux/moduleparam.h>
54 #include <asm/cpu_device_id.h>
55 #include <asm/intel-family.h>
56 #include <asm/nospec-branch.h>
57 #include <asm/mwait.h>
58 #include <asm/msr.h>
59
60 #define INTEL_IDLE_VERSION "0.5.1"
61
62 static struct cpuidle_driver intel_idle_driver = {
63         .name = "intel_idle",
64         .owner = THIS_MODULE,
65 };
66 /* intel_idle.max_cstate=0 disables driver */
67 static int max_cstate = CPUIDLE_STATE_MAX - 1;
68 static unsigned int disabled_states_mask;
69 static unsigned int preferred_states_mask;
70
71 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
72
73 static unsigned long auto_demotion_disable_flags;
74
75 static enum {
76         C1E_PROMOTION_PRESERVE,
77         C1E_PROMOTION_ENABLE,
78         C1E_PROMOTION_DISABLE
79 } c1e_promotion = C1E_PROMOTION_PRESERVE;
80
81 struct idle_cpu {
82         struct cpuidle_state *state_table;
83
84         /*
85          * Hardware C-state auto-demotion may not always be optimal.
86          * Indicate which enable bits to clear here.
87          */
88         unsigned long auto_demotion_disable_flags;
89         bool byt_auto_demotion_disable_flag;
90         bool disable_promotion_to_c1e;
91         bool use_acpi;
92 };
93
94 static const struct idle_cpu *icpu __initdata;
95 static struct cpuidle_state *cpuidle_state_table __initdata;
96
97 static unsigned int mwait_substates __initdata;
98
99 /*
100  * Enable interrupts before entering the C-state. On some platforms and for
101  * some C-states, this may measurably decrease interrupt latency.
102  */
103 #define CPUIDLE_FLAG_IRQ_ENABLE         BIT(14)
104
105 /*
106  * Enable this state by default even if the ACPI _CST does not list it.
107  */
108 #define CPUIDLE_FLAG_ALWAYS_ENABLE      BIT(15)
109
110 /*
111  * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
112  * above.
113  */
114 #define CPUIDLE_FLAG_IBRS               BIT(16)
115
116 /*
117  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
118  * the C-state (top nibble) and sub-state (bottom nibble)
119  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
120  *
121  * We store the hint at the top of our "flags" for each state.
122  */
123 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
124 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
125
126 static __always_inline int __intel_idle(struct cpuidle_device *dev,
127                                         struct cpuidle_driver *drv, int index)
128 {
129         struct cpuidle_state *state = &drv->states[index];
130         unsigned long eax = flg2MWAIT(state->flags);
131         unsigned long ecx = 1; /* break on interrupt flag */
132
133         mwait_idle_with_hints(eax, ecx);
134
135         return index;
136 }
137
138 /**
139  * intel_idle - Ask the processor to enter the given idle state.
140  * @dev: cpuidle device of the target CPU.
141  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
142  * @index: Target idle state index.
143  *
144  * Use the MWAIT instruction to notify the processor that the CPU represented by
145  * @dev is idle and it can try to enter the idle state corresponding to @index.
146  *
147  * If the local APIC timer is not known to be reliable in the target idle state,
148  * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
149  *
150  * Must be called under local_irq_disable().
151  */
152 static __cpuidle int intel_idle(struct cpuidle_device *dev,
153                                 struct cpuidle_driver *drv, int index)
154 {
155         return __intel_idle(dev, drv, index);
156 }
157
158 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
159                                     struct cpuidle_driver *drv, int index)
160 {
161         int ret;
162
163         raw_local_irq_enable();
164         ret = __intel_idle(dev, drv, index);
165
166         /*
167          * The lockdep hardirqs state may be changed to 'on' with timer
168          * tick interrupt followed by __do_softirq(). Use local_irq_disable()
169          * to keep the hardirqs state correct.
170          */
171         local_irq_disable();
172
173         return ret;
174 }
175
176 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
177                                      struct cpuidle_driver *drv, int index)
178 {
179         bool smt_active = sched_smt_active();
180         u64 spec_ctrl = spec_ctrl_current();
181         int ret;
182
183         if (smt_active)
184                 wrmsrl(MSR_IA32_SPEC_CTRL, 0);
185
186         ret = __intel_idle(dev, drv, index);
187
188         if (smt_active)
189                 wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
190
191         return ret;
192 }
193
194 /**
195  * intel_idle_s2idle - Ask the processor to enter the given idle state.
196  * @dev: cpuidle device of the target CPU.
197  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
198  * @index: Target idle state index.
199  *
200  * Use the MWAIT instruction to notify the processor that the CPU represented by
201  * @dev is idle and it can try to enter the idle state corresponding to @index.
202  *
203  * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
204  * scheduler tick and suspended scheduler clock on the target CPU.
205  */
206 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
207                                        struct cpuidle_driver *drv, int index)
208 {
209         unsigned long eax = flg2MWAIT(drv->states[index].flags);
210         unsigned long ecx = 1; /* break on interrupt flag */
211
212         mwait_idle_with_hints(eax, ecx);
213
214         return 0;
215 }
216
217 /*
218  * States are indexed by the cstate number,
219  * which is also the index into the MWAIT hint array.
220  * Thus C0 is a dummy.
221  */
222 static struct cpuidle_state nehalem_cstates[] __initdata = {
223         {
224                 .name = "C1",
225                 .desc = "MWAIT 0x00",
226                 .flags = MWAIT2flg(0x00),
227                 .exit_latency = 3,
228                 .target_residency = 6,
229                 .enter = &intel_idle,
230                 .enter_s2idle = intel_idle_s2idle, },
231         {
232                 .name = "C1E",
233                 .desc = "MWAIT 0x01",
234                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
235                 .exit_latency = 10,
236                 .target_residency = 20,
237                 .enter = &intel_idle,
238                 .enter_s2idle = intel_idle_s2idle, },
239         {
240                 .name = "C3",
241                 .desc = "MWAIT 0x10",
242                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
243                 .exit_latency = 20,
244                 .target_residency = 80,
245                 .enter = &intel_idle,
246                 .enter_s2idle = intel_idle_s2idle, },
247         {
248                 .name = "C6",
249                 .desc = "MWAIT 0x20",
250                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
251                 .exit_latency = 200,
252                 .target_residency = 800,
253                 .enter = &intel_idle,
254                 .enter_s2idle = intel_idle_s2idle, },
255         {
256                 .enter = NULL }
257 };
258
259 static struct cpuidle_state snb_cstates[] __initdata = {
260         {
261                 .name = "C1",
262                 .desc = "MWAIT 0x00",
263                 .flags = MWAIT2flg(0x00),
264                 .exit_latency = 2,
265                 .target_residency = 2,
266                 .enter = &intel_idle,
267                 .enter_s2idle = intel_idle_s2idle, },
268         {
269                 .name = "C1E",
270                 .desc = "MWAIT 0x01",
271                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
272                 .exit_latency = 10,
273                 .target_residency = 20,
274                 .enter = &intel_idle,
275                 .enter_s2idle = intel_idle_s2idle, },
276         {
277                 .name = "C3",
278                 .desc = "MWAIT 0x10",
279                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
280                 .exit_latency = 80,
281                 .target_residency = 211,
282                 .enter = &intel_idle,
283                 .enter_s2idle = intel_idle_s2idle, },
284         {
285                 .name = "C6",
286                 .desc = "MWAIT 0x20",
287                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
288                 .exit_latency = 104,
289                 .target_residency = 345,
290                 .enter = &intel_idle,
291                 .enter_s2idle = intel_idle_s2idle, },
292         {
293                 .name = "C7",
294                 .desc = "MWAIT 0x30",
295                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
296                 .exit_latency = 109,
297                 .target_residency = 345,
298                 .enter = &intel_idle,
299                 .enter_s2idle = intel_idle_s2idle, },
300         {
301                 .enter = NULL }
302 };
303
304 static struct cpuidle_state byt_cstates[] __initdata = {
305         {
306                 .name = "C1",
307                 .desc = "MWAIT 0x00",
308                 .flags = MWAIT2flg(0x00),
309                 .exit_latency = 1,
310                 .target_residency = 1,
311                 .enter = &intel_idle,
312                 .enter_s2idle = intel_idle_s2idle, },
313         {
314                 .name = "C6N",
315                 .desc = "MWAIT 0x58",
316                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
317                 .exit_latency = 300,
318                 .target_residency = 275,
319                 .enter = &intel_idle,
320                 .enter_s2idle = intel_idle_s2idle, },
321         {
322                 .name = "C6S",
323                 .desc = "MWAIT 0x52",
324                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
325                 .exit_latency = 500,
326                 .target_residency = 560,
327                 .enter = &intel_idle,
328                 .enter_s2idle = intel_idle_s2idle, },
329         {
330                 .name = "C7",
331                 .desc = "MWAIT 0x60",
332                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
333                 .exit_latency = 1200,
334                 .target_residency = 4000,
335                 .enter = &intel_idle,
336                 .enter_s2idle = intel_idle_s2idle, },
337         {
338                 .name = "C7S",
339                 .desc = "MWAIT 0x64",
340                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
341                 .exit_latency = 10000,
342                 .target_residency = 20000,
343                 .enter = &intel_idle,
344                 .enter_s2idle = intel_idle_s2idle, },
345         {
346                 .enter = NULL }
347 };
348
349 static struct cpuidle_state cht_cstates[] __initdata = {
350         {
351                 .name = "C1",
352                 .desc = "MWAIT 0x00",
353                 .flags = MWAIT2flg(0x00),
354                 .exit_latency = 1,
355                 .target_residency = 1,
356                 .enter = &intel_idle,
357                 .enter_s2idle = intel_idle_s2idle, },
358         {
359                 .name = "C6N",
360                 .desc = "MWAIT 0x58",
361                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
362                 .exit_latency = 80,
363                 .target_residency = 275,
364                 .enter = &intel_idle,
365                 .enter_s2idle = intel_idle_s2idle, },
366         {
367                 .name = "C6S",
368                 .desc = "MWAIT 0x52",
369                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
370                 .exit_latency = 200,
371                 .target_residency = 560,
372                 .enter = &intel_idle,
373                 .enter_s2idle = intel_idle_s2idle, },
374         {
375                 .name = "C7",
376                 .desc = "MWAIT 0x60",
377                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
378                 .exit_latency = 1200,
379                 .target_residency = 4000,
380                 .enter = &intel_idle,
381                 .enter_s2idle = intel_idle_s2idle, },
382         {
383                 .name = "C7S",
384                 .desc = "MWAIT 0x64",
385                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
386                 .exit_latency = 10000,
387                 .target_residency = 20000,
388                 .enter = &intel_idle,
389                 .enter_s2idle = intel_idle_s2idle, },
390         {
391                 .enter = NULL }
392 };
393
394 static struct cpuidle_state ivb_cstates[] __initdata = {
395         {
396                 .name = "C1",
397                 .desc = "MWAIT 0x00",
398                 .flags = MWAIT2flg(0x00),
399                 .exit_latency = 1,
400                 .target_residency = 1,
401                 .enter = &intel_idle,
402                 .enter_s2idle = intel_idle_s2idle, },
403         {
404                 .name = "C1E",
405                 .desc = "MWAIT 0x01",
406                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
407                 .exit_latency = 10,
408                 .target_residency = 20,
409                 .enter = &intel_idle,
410                 .enter_s2idle = intel_idle_s2idle, },
411         {
412                 .name = "C3",
413                 .desc = "MWAIT 0x10",
414                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
415                 .exit_latency = 59,
416                 .target_residency = 156,
417                 .enter = &intel_idle,
418                 .enter_s2idle = intel_idle_s2idle, },
419         {
420                 .name = "C6",
421                 .desc = "MWAIT 0x20",
422                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
423                 .exit_latency = 80,
424                 .target_residency = 300,
425                 .enter = &intel_idle,
426                 .enter_s2idle = intel_idle_s2idle, },
427         {
428                 .name = "C7",
429                 .desc = "MWAIT 0x30",
430                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
431                 .exit_latency = 87,
432                 .target_residency = 300,
433                 .enter = &intel_idle,
434                 .enter_s2idle = intel_idle_s2idle, },
435         {
436                 .enter = NULL }
437 };
438
439 static struct cpuidle_state ivt_cstates[] __initdata = {
440         {
441                 .name = "C1",
442                 .desc = "MWAIT 0x00",
443                 .flags = MWAIT2flg(0x00),
444                 .exit_latency = 1,
445                 .target_residency = 1,
446                 .enter = &intel_idle,
447                 .enter_s2idle = intel_idle_s2idle, },
448         {
449                 .name = "C1E",
450                 .desc = "MWAIT 0x01",
451                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
452                 .exit_latency = 10,
453                 .target_residency = 80,
454                 .enter = &intel_idle,
455                 .enter_s2idle = intel_idle_s2idle, },
456         {
457                 .name = "C3",
458                 .desc = "MWAIT 0x10",
459                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
460                 .exit_latency = 59,
461                 .target_residency = 156,
462                 .enter = &intel_idle,
463                 .enter_s2idle = intel_idle_s2idle, },
464         {
465                 .name = "C6",
466                 .desc = "MWAIT 0x20",
467                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
468                 .exit_latency = 82,
469                 .target_residency = 300,
470                 .enter = &intel_idle,
471                 .enter_s2idle = intel_idle_s2idle, },
472         {
473                 .enter = NULL }
474 };
475
476 static struct cpuidle_state ivt_cstates_4s[] __initdata = {
477         {
478                 .name = "C1",
479                 .desc = "MWAIT 0x00",
480                 .flags = MWAIT2flg(0x00),
481                 .exit_latency = 1,
482                 .target_residency = 1,
483                 .enter = &intel_idle,
484                 .enter_s2idle = intel_idle_s2idle, },
485         {
486                 .name = "C1E",
487                 .desc = "MWAIT 0x01",
488                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
489                 .exit_latency = 10,
490                 .target_residency = 250,
491                 .enter = &intel_idle,
492                 .enter_s2idle = intel_idle_s2idle, },
493         {
494                 .name = "C3",
495                 .desc = "MWAIT 0x10",
496                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
497                 .exit_latency = 59,
498                 .target_residency = 300,
499                 .enter = &intel_idle,
500                 .enter_s2idle = intel_idle_s2idle, },
501         {
502                 .name = "C6",
503                 .desc = "MWAIT 0x20",
504                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
505                 .exit_latency = 84,
506                 .target_residency = 400,
507                 .enter = &intel_idle,
508                 .enter_s2idle = intel_idle_s2idle, },
509         {
510                 .enter = NULL }
511 };
512
513 static struct cpuidle_state ivt_cstates_8s[] __initdata = {
514         {
515                 .name = "C1",
516                 .desc = "MWAIT 0x00",
517                 .flags = MWAIT2flg(0x00),
518                 .exit_latency = 1,
519                 .target_residency = 1,
520                 .enter = &intel_idle,
521                 .enter_s2idle = intel_idle_s2idle, },
522         {
523                 .name = "C1E",
524                 .desc = "MWAIT 0x01",
525                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
526                 .exit_latency = 10,
527                 .target_residency = 500,
528                 .enter = &intel_idle,
529                 .enter_s2idle = intel_idle_s2idle, },
530         {
531                 .name = "C3",
532                 .desc = "MWAIT 0x10",
533                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
534                 .exit_latency = 59,
535                 .target_residency = 600,
536                 .enter = &intel_idle,
537                 .enter_s2idle = intel_idle_s2idle, },
538         {
539                 .name = "C6",
540                 .desc = "MWAIT 0x20",
541                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
542                 .exit_latency = 88,
543                 .target_residency = 700,
544                 .enter = &intel_idle,
545                 .enter_s2idle = intel_idle_s2idle, },
546         {
547                 .enter = NULL }
548 };
549
550 static struct cpuidle_state hsw_cstates[] __initdata = {
551         {
552                 .name = "C1",
553                 .desc = "MWAIT 0x00",
554                 .flags = MWAIT2flg(0x00),
555                 .exit_latency = 2,
556                 .target_residency = 2,
557                 .enter = &intel_idle,
558                 .enter_s2idle = intel_idle_s2idle, },
559         {
560                 .name = "C1E",
561                 .desc = "MWAIT 0x01",
562                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
563                 .exit_latency = 10,
564                 .target_residency = 20,
565                 .enter = &intel_idle,
566                 .enter_s2idle = intel_idle_s2idle, },
567         {
568                 .name = "C3",
569                 .desc = "MWAIT 0x10",
570                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
571                 .exit_latency = 33,
572                 .target_residency = 100,
573                 .enter = &intel_idle,
574                 .enter_s2idle = intel_idle_s2idle, },
575         {
576                 .name = "C6",
577                 .desc = "MWAIT 0x20",
578                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
579                 .exit_latency = 133,
580                 .target_residency = 400,
581                 .enter = &intel_idle,
582                 .enter_s2idle = intel_idle_s2idle, },
583         {
584                 .name = "C7s",
585                 .desc = "MWAIT 0x32",
586                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
587                 .exit_latency = 166,
588                 .target_residency = 500,
589                 .enter = &intel_idle,
590                 .enter_s2idle = intel_idle_s2idle, },
591         {
592                 .name = "C8",
593                 .desc = "MWAIT 0x40",
594                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
595                 .exit_latency = 300,
596                 .target_residency = 900,
597                 .enter = &intel_idle,
598                 .enter_s2idle = intel_idle_s2idle, },
599         {
600                 .name = "C9",
601                 .desc = "MWAIT 0x50",
602                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
603                 .exit_latency = 600,
604                 .target_residency = 1800,
605                 .enter = &intel_idle,
606                 .enter_s2idle = intel_idle_s2idle, },
607         {
608                 .name = "C10",
609                 .desc = "MWAIT 0x60",
610                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
611                 .exit_latency = 2600,
612                 .target_residency = 7700,
613                 .enter = &intel_idle,
614                 .enter_s2idle = intel_idle_s2idle, },
615         {
616                 .enter = NULL }
617 };
618 static struct cpuidle_state bdw_cstates[] __initdata = {
619         {
620                 .name = "C1",
621                 .desc = "MWAIT 0x00",
622                 .flags = MWAIT2flg(0x00),
623                 .exit_latency = 2,
624                 .target_residency = 2,
625                 .enter = &intel_idle,
626                 .enter_s2idle = intel_idle_s2idle, },
627         {
628                 .name = "C1E",
629                 .desc = "MWAIT 0x01",
630                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
631                 .exit_latency = 10,
632                 .target_residency = 20,
633                 .enter = &intel_idle,
634                 .enter_s2idle = intel_idle_s2idle, },
635         {
636                 .name = "C3",
637                 .desc = "MWAIT 0x10",
638                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
639                 .exit_latency = 40,
640                 .target_residency = 100,
641                 .enter = &intel_idle,
642                 .enter_s2idle = intel_idle_s2idle, },
643         {
644                 .name = "C6",
645                 .desc = "MWAIT 0x20",
646                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
647                 .exit_latency = 133,
648                 .target_residency = 400,
649                 .enter = &intel_idle,
650                 .enter_s2idle = intel_idle_s2idle, },
651         {
652                 .name = "C7s",
653                 .desc = "MWAIT 0x32",
654                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
655                 .exit_latency = 166,
656                 .target_residency = 500,
657                 .enter = &intel_idle,
658                 .enter_s2idle = intel_idle_s2idle, },
659         {
660                 .name = "C8",
661                 .desc = "MWAIT 0x40",
662                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
663                 .exit_latency = 300,
664                 .target_residency = 900,
665                 .enter = &intel_idle,
666                 .enter_s2idle = intel_idle_s2idle, },
667         {
668                 .name = "C9",
669                 .desc = "MWAIT 0x50",
670                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
671                 .exit_latency = 600,
672                 .target_residency = 1800,
673                 .enter = &intel_idle,
674                 .enter_s2idle = intel_idle_s2idle, },
675         {
676                 .name = "C10",
677                 .desc = "MWAIT 0x60",
678                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
679                 .exit_latency = 2600,
680                 .target_residency = 7700,
681                 .enter = &intel_idle,
682                 .enter_s2idle = intel_idle_s2idle, },
683         {
684                 .enter = NULL }
685 };
686
687 static struct cpuidle_state skl_cstates[] __initdata = {
688         {
689                 .name = "C1",
690                 .desc = "MWAIT 0x00",
691                 .flags = MWAIT2flg(0x00),
692                 .exit_latency = 2,
693                 .target_residency = 2,
694                 .enter = &intel_idle,
695                 .enter_s2idle = intel_idle_s2idle, },
696         {
697                 .name = "C1E",
698                 .desc = "MWAIT 0x01",
699                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
700                 .exit_latency = 10,
701                 .target_residency = 20,
702                 .enter = &intel_idle,
703                 .enter_s2idle = intel_idle_s2idle, },
704         {
705                 .name = "C3",
706                 .desc = "MWAIT 0x10",
707                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
708                 .exit_latency = 70,
709                 .target_residency = 100,
710                 .enter = &intel_idle,
711                 .enter_s2idle = intel_idle_s2idle, },
712         {
713                 .name = "C6",
714                 .desc = "MWAIT 0x20",
715                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
716                 .exit_latency = 85,
717                 .target_residency = 200,
718                 .enter = &intel_idle,
719                 .enter_s2idle = intel_idle_s2idle, },
720         {
721                 .name = "C7s",
722                 .desc = "MWAIT 0x33",
723                 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
724                 .exit_latency = 124,
725                 .target_residency = 800,
726                 .enter = &intel_idle,
727                 .enter_s2idle = intel_idle_s2idle, },
728         {
729                 .name = "C8",
730                 .desc = "MWAIT 0x40",
731                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
732                 .exit_latency = 200,
733                 .target_residency = 800,
734                 .enter = &intel_idle,
735                 .enter_s2idle = intel_idle_s2idle, },
736         {
737                 .name = "C9",
738                 .desc = "MWAIT 0x50",
739                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
740                 .exit_latency = 480,
741                 .target_residency = 5000,
742                 .enter = &intel_idle,
743                 .enter_s2idle = intel_idle_s2idle, },
744         {
745                 .name = "C10",
746                 .desc = "MWAIT 0x60",
747                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
748                 .exit_latency = 890,
749                 .target_residency = 5000,
750                 .enter = &intel_idle,
751                 .enter_s2idle = intel_idle_s2idle, },
752         {
753                 .enter = NULL }
754 };
755
756 static struct cpuidle_state skx_cstates[] __initdata = {
757         {
758                 .name = "C1",
759                 .desc = "MWAIT 0x00",
760                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
761                 .exit_latency = 2,
762                 .target_residency = 2,
763                 .enter = &intel_idle,
764                 .enter_s2idle = intel_idle_s2idle, },
765         {
766                 .name = "C1E",
767                 .desc = "MWAIT 0x01",
768                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
769                 .exit_latency = 10,
770                 .target_residency = 20,
771                 .enter = &intel_idle,
772                 .enter_s2idle = intel_idle_s2idle, },
773         {
774                 .name = "C6",
775                 .desc = "MWAIT 0x20",
776                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
777                 .exit_latency = 133,
778                 .target_residency = 600,
779                 .enter = &intel_idle,
780                 .enter_s2idle = intel_idle_s2idle, },
781         {
782                 .enter = NULL }
783 };
784
785 static struct cpuidle_state icx_cstates[] __initdata = {
786         {
787                 .name = "C1",
788                 .desc = "MWAIT 0x00",
789                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
790                 .exit_latency = 1,
791                 .target_residency = 1,
792                 .enter = &intel_idle,
793                 .enter_s2idle = intel_idle_s2idle, },
794         {
795                 .name = "C1E",
796                 .desc = "MWAIT 0x01",
797                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
798                 .exit_latency = 4,
799                 .target_residency = 4,
800                 .enter = &intel_idle,
801                 .enter_s2idle = intel_idle_s2idle, },
802         {
803                 .name = "C6",
804                 .desc = "MWAIT 0x20",
805                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
806                 .exit_latency = 170,
807                 .target_residency = 600,
808                 .enter = &intel_idle,
809                 .enter_s2idle = intel_idle_s2idle, },
810         {
811                 .enter = NULL }
812 };
813
814 /*
815  * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
816  * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
817  * But in this case there is effectively no C1, because C1 requests are
818  * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
819  * and C1E requests end up with C1, so there is effectively no C1E.
820  *
821  * By default we enable C1E and disable C1 by marking it with
822  * 'CPUIDLE_FLAG_UNUSABLE'.
823  */
824 static struct cpuidle_state adl_cstates[] __initdata = {
825         {
826                 .name = "C1",
827                 .desc = "MWAIT 0x00",
828                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
829                 .exit_latency = 1,
830                 .target_residency = 1,
831                 .enter = &intel_idle,
832                 .enter_s2idle = intel_idle_s2idle, },
833         {
834                 .name = "C1E",
835                 .desc = "MWAIT 0x01",
836                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
837                 .exit_latency = 2,
838                 .target_residency = 4,
839                 .enter = &intel_idle,
840                 .enter_s2idle = intel_idle_s2idle, },
841         {
842                 .name = "C6",
843                 .desc = "MWAIT 0x20",
844                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
845                 .exit_latency = 220,
846                 .target_residency = 600,
847                 .enter = &intel_idle,
848                 .enter_s2idle = intel_idle_s2idle, },
849         {
850                 .name = "C8",
851                 .desc = "MWAIT 0x40",
852                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
853                 .exit_latency = 280,
854                 .target_residency = 800,
855                 .enter = &intel_idle,
856                 .enter_s2idle = intel_idle_s2idle, },
857         {
858                 .name = "C10",
859                 .desc = "MWAIT 0x60",
860                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
861                 .exit_latency = 680,
862                 .target_residency = 2000,
863                 .enter = &intel_idle,
864                 .enter_s2idle = intel_idle_s2idle, },
865         {
866                 .enter = NULL }
867 };
868
869 static struct cpuidle_state adl_l_cstates[] __initdata = {
870         {
871                 .name = "C1",
872                 .desc = "MWAIT 0x00",
873                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
874                 .exit_latency = 1,
875                 .target_residency = 1,
876                 .enter = &intel_idle,
877                 .enter_s2idle = intel_idle_s2idle, },
878         {
879                 .name = "C1E",
880                 .desc = "MWAIT 0x01",
881                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
882                 .exit_latency = 2,
883                 .target_residency = 4,
884                 .enter = &intel_idle,
885                 .enter_s2idle = intel_idle_s2idle, },
886         {
887                 .name = "C6",
888                 .desc = "MWAIT 0x20",
889                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
890                 .exit_latency = 170,
891                 .target_residency = 500,
892                 .enter = &intel_idle,
893                 .enter_s2idle = intel_idle_s2idle, },
894         {
895                 .name = "C8",
896                 .desc = "MWAIT 0x40",
897                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
898                 .exit_latency = 200,
899                 .target_residency = 600,
900                 .enter = &intel_idle,
901                 .enter_s2idle = intel_idle_s2idle, },
902         {
903                 .name = "C10",
904                 .desc = "MWAIT 0x60",
905                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
906                 .exit_latency = 230,
907                 .target_residency = 700,
908                 .enter = &intel_idle,
909                 .enter_s2idle = intel_idle_s2idle, },
910         {
911                 .enter = NULL }
912 };
913
914 static struct cpuidle_state spr_cstates[] __initdata = {
915         {
916                 .name = "C1",
917                 .desc = "MWAIT 0x00",
918                 .flags = MWAIT2flg(0x00),
919                 .exit_latency = 1,
920                 .target_residency = 1,
921                 .enter = &intel_idle,
922                 .enter_s2idle = intel_idle_s2idle, },
923         {
924                 .name = "C1E",
925                 .desc = "MWAIT 0x01",
926                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
927                 .exit_latency = 2,
928                 .target_residency = 4,
929                 .enter = &intel_idle,
930                 .enter_s2idle = intel_idle_s2idle, },
931         {
932                 .name = "C6",
933                 .desc = "MWAIT 0x20",
934                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
935                 .exit_latency = 290,
936                 .target_residency = 800,
937                 .enter = &intel_idle,
938                 .enter_s2idle = intel_idle_s2idle, },
939         {
940                 .enter = NULL }
941 };
942
943 static struct cpuidle_state atom_cstates[] __initdata = {
944         {
945                 .name = "C1E",
946                 .desc = "MWAIT 0x00",
947                 .flags = MWAIT2flg(0x00),
948                 .exit_latency = 10,
949                 .target_residency = 20,
950                 .enter = &intel_idle,
951                 .enter_s2idle = intel_idle_s2idle, },
952         {
953                 .name = "C2",
954                 .desc = "MWAIT 0x10",
955                 .flags = MWAIT2flg(0x10),
956                 .exit_latency = 20,
957                 .target_residency = 80,
958                 .enter = &intel_idle,
959                 .enter_s2idle = intel_idle_s2idle, },
960         {
961                 .name = "C4",
962                 .desc = "MWAIT 0x30",
963                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
964                 .exit_latency = 100,
965                 .target_residency = 400,
966                 .enter = &intel_idle,
967                 .enter_s2idle = intel_idle_s2idle, },
968         {
969                 .name = "C6",
970                 .desc = "MWAIT 0x52",
971                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
972                 .exit_latency = 140,
973                 .target_residency = 560,
974                 .enter = &intel_idle,
975                 .enter_s2idle = intel_idle_s2idle, },
976         {
977                 .enter = NULL }
978 };
979 static struct cpuidle_state tangier_cstates[] __initdata = {
980         {
981                 .name = "C1",
982                 .desc = "MWAIT 0x00",
983                 .flags = MWAIT2flg(0x00),
984                 .exit_latency = 1,
985                 .target_residency = 4,
986                 .enter = &intel_idle,
987                 .enter_s2idle = intel_idle_s2idle, },
988         {
989                 .name = "C4",
990                 .desc = "MWAIT 0x30",
991                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
992                 .exit_latency = 100,
993                 .target_residency = 400,
994                 .enter = &intel_idle,
995                 .enter_s2idle = intel_idle_s2idle, },
996         {
997                 .name = "C6",
998                 .desc = "MWAIT 0x52",
999                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
1000                 .exit_latency = 140,
1001                 .target_residency = 560,
1002                 .enter = &intel_idle,
1003                 .enter_s2idle = intel_idle_s2idle, },
1004         {
1005                 .name = "C7",
1006                 .desc = "MWAIT 0x60",
1007                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1008                 .exit_latency = 1200,
1009                 .target_residency = 4000,
1010                 .enter = &intel_idle,
1011                 .enter_s2idle = intel_idle_s2idle, },
1012         {
1013                 .name = "C9",
1014                 .desc = "MWAIT 0x64",
1015                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
1016                 .exit_latency = 10000,
1017                 .target_residency = 20000,
1018                 .enter = &intel_idle,
1019                 .enter_s2idle = intel_idle_s2idle, },
1020         {
1021                 .enter = NULL }
1022 };
1023 static struct cpuidle_state avn_cstates[] __initdata = {
1024         {
1025                 .name = "C1",
1026                 .desc = "MWAIT 0x00",
1027                 .flags = MWAIT2flg(0x00),
1028                 .exit_latency = 2,
1029                 .target_residency = 2,
1030                 .enter = &intel_idle,
1031                 .enter_s2idle = intel_idle_s2idle, },
1032         {
1033                 .name = "C6",
1034                 .desc = "MWAIT 0x51",
1035                 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
1036                 .exit_latency = 15,
1037                 .target_residency = 45,
1038                 .enter = &intel_idle,
1039                 .enter_s2idle = intel_idle_s2idle, },
1040         {
1041                 .enter = NULL }
1042 };
1043 static struct cpuidle_state knl_cstates[] __initdata = {
1044         {
1045                 .name = "C1",
1046                 .desc = "MWAIT 0x00",
1047                 .flags = MWAIT2flg(0x00),
1048                 .exit_latency = 1,
1049                 .target_residency = 2,
1050                 .enter = &intel_idle,
1051                 .enter_s2idle = intel_idle_s2idle },
1052         {
1053                 .name = "C6",
1054                 .desc = "MWAIT 0x10",
1055                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
1056                 .exit_latency = 120,
1057                 .target_residency = 500,
1058                 .enter = &intel_idle,
1059                 .enter_s2idle = intel_idle_s2idle },
1060         {
1061                 .enter = NULL }
1062 };
1063
1064 static struct cpuidle_state bxt_cstates[] __initdata = {
1065         {
1066                 .name = "C1",
1067                 .desc = "MWAIT 0x00",
1068                 .flags = MWAIT2flg(0x00),
1069                 .exit_latency = 2,
1070                 .target_residency = 2,
1071                 .enter = &intel_idle,
1072                 .enter_s2idle = intel_idle_s2idle, },
1073         {
1074                 .name = "C1E",
1075                 .desc = "MWAIT 0x01",
1076                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1077                 .exit_latency = 10,
1078                 .target_residency = 20,
1079                 .enter = &intel_idle,
1080                 .enter_s2idle = intel_idle_s2idle, },
1081         {
1082                 .name = "C6",
1083                 .desc = "MWAIT 0x20",
1084                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1085                 .exit_latency = 133,
1086                 .target_residency = 133,
1087                 .enter = &intel_idle,
1088                 .enter_s2idle = intel_idle_s2idle, },
1089         {
1090                 .name = "C7s",
1091                 .desc = "MWAIT 0x31",
1092                 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
1093                 .exit_latency = 155,
1094                 .target_residency = 155,
1095                 .enter = &intel_idle,
1096                 .enter_s2idle = intel_idle_s2idle, },
1097         {
1098                 .name = "C8",
1099                 .desc = "MWAIT 0x40",
1100                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
1101                 .exit_latency = 1000,
1102                 .target_residency = 1000,
1103                 .enter = &intel_idle,
1104                 .enter_s2idle = intel_idle_s2idle, },
1105         {
1106                 .name = "C9",
1107                 .desc = "MWAIT 0x50",
1108                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
1109                 .exit_latency = 2000,
1110                 .target_residency = 2000,
1111                 .enter = &intel_idle,
1112                 .enter_s2idle = intel_idle_s2idle, },
1113         {
1114                 .name = "C10",
1115                 .desc = "MWAIT 0x60",
1116                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1117                 .exit_latency = 10000,
1118                 .target_residency = 10000,
1119                 .enter = &intel_idle,
1120                 .enter_s2idle = intel_idle_s2idle, },
1121         {
1122                 .enter = NULL }
1123 };
1124
1125 static struct cpuidle_state dnv_cstates[] __initdata = {
1126         {
1127                 .name = "C1",
1128                 .desc = "MWAIT 0x00",
1129                 .flags = MWAIT2flg(0x00),
1130                 .exit_latency = 2,
1131                 .target_residency = 2,
1132                 .enter = &intel_idle,
1133                 .enter_s2idle = intel_idle_s2idle, },
1134         {
1135                 .name = "C1E",
1136                 .desc = "MWAIT 0x01",
1137                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1138                 .exit_latency = 10,
1139                 .target_residency = 20,
1140                 .enter = &intel_idle,
1141                 .enter_s2idle = intel_idle_s2idle, },
1142         {
1143                 .name = "C6",
1144                 .desc = "MWAIT 0x20",
1145                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1146                 .exit_latency = 50,
1147                 .target_residency = 500,
1148                 .enter = &intel_idle,
1149                 .enter_s2idle = intel_idle_s2idle, },
1150         {
1151                 .enter = NULL }
1152 };
1153
1154 /*
1155  * Note, depending on HW and FW revision, SnowRidge SoC may or may not support
1156  * C6, and this is indicated in the CPUID mwait leaf.
1157  */
1158 static struct cpuidle_state snr_cstates[] __initdata = {
1159         {
1160                 .name = "C1",
1161                 .desc = "MWAIT 0x00",
1162                 .flags = MWAIT2flg(0x00),
1163                 .exit_latency = 2,
1164                 .target_residency = 2,
1165                 .enter = &intel_idle,
1166                 .enter_s2idle = intel_idle_s2idle, },
1167         {
1168                 .name = "C1E",
1169                 .desc = "MWAIT 0x01",
1170                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1171                 .exit_latency = 15,
1172                 .target_residency = 25,
1173                 .enter = &intel_idle,
1174                 .enter_s2idle = intel_idle_s2idle, },
1175         {
1176                 .name = "C6",
1177                 .desc = "MWAIT 0x20",
1178                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1179                 .exit_latency = 130,
1180                 .target_residency = 500,
1181                 .enter = &intel_idle,
1182                 .enter_s2idle = intel_idle_s2idle, },
1183         {
1184                 .enter = NULL }
1185 };
1186
1187 static const struct idle_cpu idle_cpu_nehalem __initconst = {
1188         .state_table = nehalem_cstates,
1189         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1190         .disable_promotion_to_c1e = true,
1191 };
1192
1193 static const struct idle_cpu idle_cpu_nhx __initconst = {
1194         .state_table = nehalem_cstates,
1195         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1196         .disable_promotion_to_c1e = true,
1197         .use_acpi = true,
1198 };
1199
1200 static const struct idle_cpu idle_cpu_atom __initconst = {
1201         .state_table = atom_cstates,
1202 };
1203
1204 static const struct idle_cpu idle_cpu_tangier __initconst = {
1205         .state_table = tangier_cstates,
1206 };
1207
1208 static const struct idle_cpu idle_cpu_lincroft __initconst = {
1209         .state_table = atom_cstates,
1210         .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1211 };
1212
1213 static const struct idle_cpu idle_cpu_snb __initconst = {
1214         .state_table = snb_cstates,
1215         .disable_promotion_to_c1e = true,
1216 };
1217
1218 static const struct idle_cpu idle_cpu_snx __initconst = {
1219         .state_table = snb_cstates,
1220         .disable_promotion_to_c1e = true,
1221         .use_acpi = true,
1222 };
1223
1224 static const struct idle_cpu idle_cpu_byt __initconst = {
1225         .state_table = byt_cstates,
1226         .disable_promotion_to_c1e = true,
1227         .byt_auto_demotion_disable_flag = true,
1228 };
1229
1230 static const struct idle_cpu idle_cpu_cht __initconst = {
1231         .state_table = cht_cstates,
1232         .disable_promotion_to_c1e = true,
1233         .byt_auto_demotion_disable_flag = true,
1234 };
1235
1236 static const struct idle_cpu idle_cpu_ivb __initconst = {
1237         .state_table = ivb_cstates,
1238         .disable_promotion_to_c1e = true,
1239 };
1240
1241 static const struct idle_cpu idle_cpu_ivt __initconst = {
1242         .state_table = ivt_cstates,
1243         .disable_promotion_to_c1e = true,
1244         .use_acpi = true,
1245 };
1246
1247 static const struct idle_cpu idle_cpu_hsw __initconst = {
1248         .state_table = hsw_cstates,
1249         .disable_promotion_to_c1e = true,
1250 };
1251
1252 static const struct idle_cpu idle_cpu_hsx __initconst = {
1253         .state_table = hsw_cstates,
1254         .disable_promotion_to_c1e = true,
1255         .use_acpi = true,
1256 };
1257
1258 static const struct idle_cpu idle_cpu_bdw __initconst = {
1259         .state_table = bdw_cstates,
1260         .disable_promotion_to_c1e = true,
1261 };
1262
1263 static const struct idle_cpu idle_cpu_bdx __initconst = {
1264         .state_table = bdw_cstates,
1265         .disable_promotion_to_c1e = true,
1266         .use_acpi = true,
1267 };
1268
1269 static const struct idle_cpu idle_cpu_skl __initconst = {
1270         .state_table = skl_cstates,
1271         .disable_promotion_to_c1e = true,
1272 };
1273
1274 static const struct idle_cpu idle_cpu_skx __initconst = {
1275         .state_table = skx_cstates,
1276         .disable_promotion_to_c1e = true,
1277         .use_acpi = true,
1278 };
1279
1280 static const struct idle_cpu idle_cpu_icx __initconst = {
1281         .state_table = icx_cstates,
1282         .disable_promotion_to_c1e = true,
1283         .use_acpi = true,
1284 };
1285
1286 static const struct idle_cpu idle_cpu_adl __initconst = {
1287         .state_table = adl_cstates,
1288 };
1289
1290 static const struct idle_cpu idle_cpu_adl_l __initconst = {
1291         .state_table = adl_l_cstates,
1292 };
1293
1294 static const struct idle_cpu idle_cpu_spr __initconst = {
1295         .state_table = spr_cstates,
1296         .disable_promotion_to_c1e = true,
1297         .use_acpi = true,
1298 };
1299
1300 static const struct idle_cpu idle_cpu_avn __initconst = {
1301         .state_table = avn_cstates,
1302         .disable_promotion_to_c1e = true,
1303         .use_acpi = true,
1304 };
1305
1306 static const struct idle_cpu idle_cpu_knl __initconst = {
1307         .state_table = knl_cstates,
1308         .use_acpi = true,
1309 };
1310
1311 static const struct idle_cpu idle_cpu_bxt __initconst = {
1312         .state_table = bxt_cstates,
1313         .disable_promotion_to_c1e = true,
1314 };
1315
1316 static const struct idle_cpu idle_cpu_dnv __initconst = {
1317         .state_table = dnv_cstates,
1318         .disable_promotion_to_c1e = true,
1319         .use_acpi = true,
1320 };
1321
1322 static const struct idle_cpu idle_cpu_snr __initconst = {
1323         .state_table = snr_cstates,
1324         .disable_promotion_to_c1e = true,
1325         .use_acpi = true,
1326 };
1327
1328 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1329         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,          &idle_cpu_nhx),
1330         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,             &idle_cpu_nehalem),
1331         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G,           &idle_cpu_nehalem),
1332         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,            &idle_cpu_nehalem),
1333         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,         &idle_cpu_nhx),
1334         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,          &idle_cpu_nhx),
1335         X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL,        &idle_cpu_atom),
1336         X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID,    &idle_cpu_lincroft),
1337         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,         &idle_cpu_nhx),
1338         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,         &idle_cpu_snb),
1339         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,       &idle_cpu_snx),
1340         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL,       &idle_cpu_atom),
1341         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT,     &idle_cpu_byt),
1342         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier),
1343         X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT,        &idle_cpu_cht),
1344         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,           &idle_cpu_ivb),
1345         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,         &idle_cpu_ivt),
1346         X86_MATCH_INTEL_FAM6_MODEL(HASWELL,             &idle_cpu_hsw),
1347         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,           &idle_cpu_hsx),
1348         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,           &idle_cpu_hsw),
1349         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,           &idle_cpu_hsw),
1350         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D,   &idle_cpu_avn),
1351         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,           &idle_cpu_bdw),
1352         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,         &idle_cpu_bdw),
1353         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,         &idle_cpu_bdx),
1354         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,         &idle_cpu_bdx),
1355         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,           &idle_cpu_skl),
1356         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,             &idle_cpu_skl),
1357         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,          &idle_cpu_skl),
1358         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,            &idle_cpu_skl),
1359         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,           &idle_cpu_skx),
1360         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &idle_cpu_icx),
1361         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           &idle_cpu_icx),
1362         X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &idle_cpu_adl),
1363         X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &idle_cpu_adl_l),
1364         X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &idle_cpu_spr),
1365         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,        &idle_cpu_knl),
1366         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,        &idle_cpu_knl),
1367         X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,       &idle_cpu_bxt),
1368         X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,  &idle_cpu_bxt),
1369         X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D,     &idle_cpu_dnv),
1370         X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &idle_cpu_snr),
1371         {}
1372 };
1373
1374 static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1375         X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
1376         {}
1377 };
1378
1379 static bool __init intel_idle_max_cstate_reached(int cstate)
1380 {
1381         if (cstate + 1 > max_cstate) {
1382                 pr_info("max_cstate %d reached\n", max_cstate);
1383                 return true;
1384         }
1385         return false;
1386 }
1387
1388 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
1389 {
1390         unsigned long eax = flg2MWAIT(state->flags);
1391
1392         if (boot_cpu_has(X86_FEATURE_ARAT))
1393                 return false;
1394
1395         /*
1396          * Switch over to one-shot tick broadcast if the target C-state
1397          * is deeper than C1.
1398          */
1399         return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
1400 }
1401
1402 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1403 #include <acpi/processor.h>
1404
1405 static bool no_acpi __read_mostly;
1406 module_param(no_acpi, bool, 0444);
1407 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1408
1409 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
1410 module_param_named(use_acpi, force_use_acpi, bool, 0444);
1411 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
1412
1413 static struct acpi_processor_power acpi_state_table __initdata;
1414
1415 /**
1416  * intel_idle_cst_usable - Check if the _CST information can be used.
1417  *
1418  * Check if all of the C-states listed by _CST in the max_cstate range are
1419  * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1420  */
1421 static bool __init intel_idle_cst_usable(void)
1422 {
1423         int cstate, limit;
1424
1425         limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1426                       acpi_state_table.count);
1427
1428         for (cstate = 1; cstate < limit; cstate++) {
1429                 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1430
1431                 if (cx->entry_method != ACPI_CSTATE_FFH)
1432                         return false;
1433         }
1434
1435         return true;
1436 }
1437
1438 static bool __init intel_idle_acpi_cst_extract(void)
1439 {
1440         unsigned int cpu;
1441
1442         if (no_acpi) {
1443                 pr_debug("Not allowed to use ACPI _CST\n");
1444                 return false;
1445         }
1446
1447         for_each_possible_cpu(cpu) {
1448                 struct acpi_processor *pr = per_cpu(processors, cpu);
1449
1450                 if (!pr)
1451                         continue;
1452
1453                 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1454                         continue;
1455
1456                 acpi_state_table.count++;
1457
1458                 if (!intel_idle_cst_usable())
1459                         continue;
1460
1461                 if (!acpi_processor_claim_cst_control())
1462                         break;
1463
1464                 return true;
1465         }
1466
1467         acpi_state_table.count = 0;
1468         pr_debug("ACPI _CST not found or not usable\n");
1469         return false;
1470 }
1471
1472 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1473 {
1474         int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1475
1476         /*
1477          * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1478          * the interesting states are ACPI_CSTATE_FFH.
1479          */
1480         for (cstate = 1; cstate < limit; cstate++) {
1481                 struct acpi_processor_cx *cx;
1482                 struct cpuidle_state *state;
1483
1484                 if (intel_idle_max_cstate_reached(cstate - 1))
1485                         break;
1486
1487                 cx = &acpi_state_table.states[cstate];
1488
1489                 state = &drv->states[drv->state_count++];
1490
1491                 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1492                 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1493                 state->exit_latency = cx->latency;
1494                 /*
1495                  * For C1-type C-states use the same number for both the exit
1496                  * latency and target residency, because that is the case for
1497                  * C1 in the majority of the static C-states tables above.
1498                  * For the other types of C-states, however, set the target
1499                  * residency to 3 times the exit latency which should lead to
1500                  * a reasonable balance between energy-efficiency and
1501                  * performance in the majority of interesting cases.
1502                  */
1503                 state->target_residency = cx->latency;
1504                 if (cx->type > ACPI_STATE_C1)
1505                         state->target_residency *= 3;
1506
1507                 state->flags = MWAIT2flg(cx->address);
1508                 if (cx->type > ACPI_STATE_C2)
1509                         state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1510
1511                 if (disabled_states_mask & BIT(cstate))
1512                         state->flags |= CPUIDLE_FLAG_OFF;
1513
1514                 if (intel_idle_state_needs_timer_stop(state))
1515                         state->flags |= CPUIDLE_FLAG_TIMER_STOP;
1516
1517                 state->enter = intel_idle;
1518                 state->enter_s2idle = intel_idle_s2idle;
1519         }
1520 }
1521
1522 static bool __init intel_idle_off_by_default(u32 mwait_hint)
1523 {
1524         int cstate, limit;
1525
1526         /*
1527          * If there are no _CST C-states, do not disable any C-states by
1528          * default.
1529          */
1530         if (!acpi_state_table.count)
1531                 return false;
1532
1533         limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1534         /*
1535          * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1536          * the interesting states are ACPI_CSTATE_FFH.
1537          */
1538         for (cstate = 1; cstate < limit; cstate++) {
1539                 if (acpi_state_table.states[cstate].address == mwait_hint)
1540                         return false;
1541         }
1542         return true;
1543 }
1544 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1545 #define force_use_acpi  (false)
1546
1547 static inline bool intel_idle_acpi_cst_extract(void) { return false; }
1548 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
1549 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
1550 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1551
1552 /**
1553  * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
1554  *
1555  * Tune IVT multi-socket targets.
1556  * Assumption: num_sockets == (max_package_num + 1).
1557  */
1558 static void __init ivt_idle_state_table_update(void)
1559 {
1560         /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1561         int cpu, package_num, num_sockets = 1;
1562
1563         for_each_online_cpu(cpu) {
1564                 package_num = topology_physical_package_id(cpu);
1565                 if (package_num + 1 > num_sockets) {
1566                         num_sockets = package_num + 1;
1567
1568                         if (num_sockets > 4) {
1569                                 cpuidle_state_table = ivt_cstates_8s;
1570                                 return;
1571                         }
1572                 }
1573         }
1574
1575         if (num_sockets > 2)
1576                 cpuidle_state_table = ivt_cstates_4s;
1577
1578         /* else, 1 and 2 socket systems use default ivt_cstates */
1579 }
1580
1581 /**
1582  * irtl_2_usec - IRTL to microseconds conversion.
1583  * @irtl: IRTL MSR value.
1584  *
1585  * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
1586  */
1587 static unsigned long long __init irtl_2_usec(unsigned long long irtl)
1588 {
1589         static const unsigned int irtl_ns_units[] __initconst = {
1590                 1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1591         };
1592         unsigned long long ns;
1593
1594         if (!irtl)
1595                 return 0;
1596
1597         ns = irtl_ns_units[(irtl >> 10) & 0x7];
1598
1599         return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
1600 }
1601
1602 /**
1603  * bxt_idle_state_table_update - Fix up the Broxton idle states table.
1604  *
1605  * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
1606  * definitive maximum latency and use the same value for target_residency.
1607  */
1608 static void __init bxt_idle_state_table_update(void)
1609 {
1610         unsigned long long msr;
1611         unsigned int usec;
1612
1613         rdmsrl(MSR_PKGC6_IRTL, msr);
1614         usec = irtl_2_usec(msr);
1615         if (usec) {
1616                 bxt_cstates[2].exit_latency = usec;
1617                 bxt_cstates[2].target_residency = usec;
1618         }
1619
1620         rdmsrl(MSR_PKGC7_IRTL, msr);
1621         usec = irtl_2_usec(msr);
1622         if (usec) {
1623                 bxt_cstates[3].exit_latency = usec;
1624                 bxt_cstates[3].target_residency = usec;
1625         }
1626
1627         rdmsrl(MSR_PKGC8_IRTL, msr);
1628         usec = irtl_2_usec(msr);
1629         if (usec) {
1630                 bxt_cstates[4].exit_latency = usec;
1631                 bxt_cstates[4].target_residency = usec;
1632         }
1633
1634         rdmsrl(MSR_PKGC9_IRTL, msr);
1635         usec = irtl_2_usec(msr);
1636         if (usec) {
1637                 bxt_cstates[5].exit_latency = usec;
1638                 bxt_cstates[5].target_residency = usec;
1639         }
1640
1641         rdmsrl(MSR_PKGC10_IRTL, msr);
1642         usec = irtl_2_usec(msr);
1643         if (usec) {
1644                 bxt_cstates[6].exit_latency = usec;
1645                 bxt_cstates[6].target_residency = usec;
1646         }
1647
1648 }
1649
1650 /**
1651  * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
1652  *
1653  * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
1654  */
1655 static void __init sklh_idle_state_table_update(void)
1656 {
1657         unsigned long long msr;
1658         unsigned int eax, ebx, ecx, edx;
1659
1660
1661         /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1662         if (max_cstate <= 7)
1663                 return;
1664
1665         /* if PC10 not present in CPUID.MWAIT.EDX */
1666         if ((mwait_substates & (0xF << 28)) == 0)
1667                 return;
1668
1669         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1670
1671         /* PC10 is not enabled in PKG C-state limit */
1672         if ((msr & 0xF) != 8)
1673                 return;
1674
1675         ecx = 0;
1676         cpuid(7, &eax, &ebx, &ecx, &edx);
1677
1678         /* if SGX is present */
1679         if (ebx & (1 << 2)) {
1680
1681                 rdmsrl(MSR_IA32_FEAT_CTL, msr);
1682
1683                 /* if SGX is enabled */
1684                 if (msr & (1 << 18))
1685                         return;
1686         }
1687
1688         skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C8-SKL */
1689         skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C9-SKL */
1690 }
1691
1692 /**
1693  * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake
1694  * idle states table.
1695  */
1696 static void __init skx_idle_state_table_update(void)
1697 {
1698         unsigned long long msr;
1699
1700         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1701
1702         /*
1703          * 000b: C0/C1 (no package C-state support)
1704          * 001b: C2
1705          * 010b: C6 (non-retention)
1706          * 011b: C6 (retention)
1707          * 111b: No Package C state limits.
1708          */
1709         if ((msr & 0x7) < 2) {
1710                 /*
1711                  * Uses the CC6 + PC0 latency and 3 times of
1712                  * latency for target_residency if the PC6
1713                  * is disabled in BIOS. This is consistent
1714                  * with how intel_idle driver uses _CST
1715                  * to set the target_residency.
1716                  */
1717                 skx_cstates[2].exit_latency = 92;
1718                 skx_cstates[2].target_residency = 276;
1719         }
1720 }
1721
1722 /**
1723  * adl_idle_state_table_update - Adjust AlderLake idle states table.
1724  */
1725 static void __init adl_idle_state_table_update(void)
1726 {
1727         /* Check if user prefers C1 over C1E. */
1728         if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
1729                 cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
1730                 cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
1731
1732                 /* Disable C1E by clearing the "C1E promotion" bit. */
1733                 c1e_promotion = C1E_PROMOTION_DISABLE;
1734                 return;
1735         }
1736
1737         /* Make sure C1E is enabled by default */
1738         c1e_promotion = C1E_PROMOTION_ENABLE;
1739 }
1740
1741 /**
1742  * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
1743  */
1744 static void __init spr_idle_state_table_update(void)
1745 {
1746         unsigned long long msr;
1747
1748         /*
1749          * By default, the C6 state assumes the worst-case scenario of package
1750          * C6. However, if PC6 is disabled, we update the numbers to match
1751          * core C6.
1752          */
1753         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1754
1755         /* Limit value 2 and above allow for PC6. */
1756         if ((msr & 0x7) < 2) {
1757                 spr_cstates[2].exit_latency = 190;
1758                 spr_cstates[2].target_residency = 600;
1759         }
1760 }
1761
1762 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
1763 {
1764         unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
1765         unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
1766                                         MWAIT_SUBSTATE_MASK;
1767
1768         /* Ignore the C-state if there are NO sub-states in CPUID for it. */
1769         if (num_substates == 0)
1770                 return false;
1771
1772         if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1773                 mark_tsc_unstable("TSC halts in idle states deeper than C2");
1774
1775         return true;
1776 }
1777
1778 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
1779 {
1780         int cstate;
1781
1782         switch (boot_cpu_data.x86_model) {
1783         case INTEL_FAM6_IVYBRIDGE_X:
1784                 ivt_idle_state_table_update();
1785                 break;
1786         case INTEL_FAM6_ATOM_GOLDMONT:
1787         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1788                 bxt_idle_state_table_update();
1789                 break;
1790         case INTEL_FAM6_SKYLAKE:
1791                 sklh_idle_state_table_update();
1792                 break;
1793         case INTEL_FAM6_SKYLAKE_X:
1794                 skx_idle_state_table_update();
1795                 break;
1796         case INTEL_FAM6_SAPPHIRERAPIDS_X:
1797                 spr_idle_state_table_update();
1798                 break;
1799         case INTEL_FAM6_ALDERLAKE:
1800         case INTEL_FAM6_ALDERLAKE_L:
1801                 adl_idle_state_table_update();
1802                 break;
1803         }
1804
1805         for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1806                 unsigned int mwait_hint;
1807
1808                 if (intel_idle_max_cstate_reached(cstate))
1809                         break;
1810
1811                 if (!cpuidle_state_table[cstate].enter &&
1812                     !cpuidle_state_table[cstate].enter_s2idle)
1813                         break;
1814
1815                 /* If marked as unusable, skip this state. */
1816                 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
1817                         pr_debug("state %s is disabled\n",
1818                                  cpuidle_state_table[cstate].name);
1819                         continue;
1820                 }
1821
1822                 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1823                 if (!intel_idle_verify_cstate(mwait_hint))
1824                         continue;
1825
1826                 /* Structure copy. */
1827                 drv->states[drv->state_count] = cpuidle_state_table[cstate];
1828
1829                 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE)
1830                         drv->states[drv->state_count].enter = intel_idle_irq;
1831
1832                 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
1833                     cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
1834                         WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE);
1835                         drv->states[drv->state_count].enter = intel_idle_ibrs;
1836                 }
1837
1838                 if ((disabled_states_mask & BIT(drv->state_count)) ||
1839                     ((icpu->use_acpi || force_use_acpi) &&
1840                      intel_idle_off_by_default(mwait_hint) &&
1841                      !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
1842                         drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
1843
1844                 if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
1845                         drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
1846
1847                 drv->state_count++;
1848         }
1849
1850         if (icpu->byt_auto_demotion_disable_flag) {
1851                 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1852                 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1853         }
1854 }
1855
1856 /**
1857  * intel_idle_cpuidle_driver_init - Create the list of available idle states.
1858  * @drv: cpuidle driver structure to initialize.
1859  */
1860 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
1861 {
1862         cpuidle_poll_state_init(drv);
1863
1864         if (disabled_states_mask & BIT(0))
1865                 drv->states[0].flags |= CPUIDLE_FLAG_OFF;
1866
1867         drv->state_count = 1;
1868
1869         if (icpu)
1870                 intel_idle_init_cstates_icpu(drv);
1871         else
1872                 intel_idle_init_cstates_acpi(drv);
1873 }
1874
1875 static void auto_demotion_disable(void)
1876 {
1877         unsigned long long msr_bits;
1878
1879         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1880         msr_bits &= ~auto_demotion_disable_flags;
1881         wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1882 }
1883
1884 static void c1e_promotion_enable(void)
1885 {
1886         unsigned long long msr_bits;
1887
1888         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1889         msr_bits |= 0x2;
1890         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1891 }
1892
1893 static void c1e_promotion_disable(void)
1894 {
1895         unsigned long long msr_bits;
1896
1897         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1898         msr_bits &= ~0x2;
1899         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1900 }
1901
1902 /**
1903  * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
1904  * @cpu: CPU to initialize.
1905  *
1906  * Register a cpuidle device object for @cpu and update its MSRs in accordance
1907  * with the processor model flags.
1908  */
1909 static int intel_idle_cpu_init(unsigned int cpu)
1910 {
1911         struct cpuidle_device *dev;
1912
1913         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1914         dev->cpu = cpu;
1915
1916         if (cpuidle_register_device(dev)) {
1917                 pr_debug("cpuidle_register_device %d failed!\n", cpu);
1918                 return -EIO;
1919         }
1920
1921         if (auto_demotion_disable_flags)
1922                 auto_demotion_disable();
1923
1924         if (c1e_promotion == C1E_PROMOTION_ENABLE)
1925                 c1e_promotion_enable();
1926         else if (c1e_promotion == C1E_PROMOTION_DISABLE)
1927                 c1e_promotion_disable();
1928
1929         return 0;
1930 }
1931
1932 static int intel_idle_cpu_online(unsigned int cpu)
1933 {
1934         struct cpuidle_device *dev;
1935
1936         if (!boot_cpu_has(X86_FEATURE_ARAT))
1937                 tick_broadcast_enable();
1938
1939         /*
1940          * Some systems can hotplug a cpu at runtime after
1941          * the kernel has booted, we have to initialize the
1942          * driver in this case
1943          */
1944         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1945         if (!dev->registered)
1946                 return intel_idle_cpu_init(cpu);
1947
1948         return 0;
1949 }
1950
1951 /**
1952  * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
1953  */
1954 static void __init intel_idle_cpuidle_devices_uninit(void)
1955 {
1956         int i;
1957
1958         for_each_online_cpu(i)
1959                 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
1960 }
1961
1962 static int __init intel_idle_init(void)
1963 {
1964         const struct x86_cpu_id *id;
1965         unsigned int eax, ebx, ecx;
1966         int retval;
1967
1968         /* Do not load intel_idle at all for now if idle= is passed */
1969         if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1970                 return -ENODEV;
1971
1972         if (max_cstate == 0) {
1973                 pr_debug("disabled\n");
1974                 return -EPERM;
1975         }
1976
1977         id = x86_match_cpu(intel_idle_ids);
1978         if (id) {
1979                 if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1980                         pr_debug("Please enable MWAIT in BIOS SETUP\n");
1981                         return -ENODEV;
1982                 }
1983         } else {
1984                 id = x86_match_cpu(intel_mwait_ids);
1985                 if (!id)
1986                         return -ENODEV;
1987         }
1988
1989         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1990                 return -ENODEV;
1991
1992         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1993
1994         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1995             !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1996             !mwait_substates)
1997                         return -ENODEV;
1998
1999         pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
2000
2001         icpu = (const struct idle_cpu *)id->driver_data;
2002         if (icpu) {
2003                 cpuidle_state_table = icpu->state_table;
2004                 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
2005                 if (icpu->disable_promotion_to_c1e)
2006                         c1e_promotion = C1E_PROMOTION_DISABLE;
2007                 if (icpu->use_acpi || force_use_acpi)
2008                         intel_idle_acpi_cst_extract();
2009         } else if (!intel_idle_acpi_cst_extract()) {
2010                 return -ENODEV;
2011         }
2012
2013         pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
2014                  boot_cpu_data.x86_model);
2015
2016         intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
2017         if (!intel_idle_cpuidle_devices)
2018                 return -ENOMEM;
2019
2020         intel_idle_cpuidle_driver_init(&intel_idle_driver);
2021
2022         retval = cpuidle_register_driver(&intel_idle_driver);
2023         if (retval) {
2024                 struct cpuidle_driver *drv = cpuidle_get_driver();
2025                 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
2026                        drv ? drv->name : "none");
2027                 goto init_driver_fail;
2028         }
2029
2030         retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
2031                                    intel_idle_cpu_online, NULL);
2032         if (retval < 0)
2033                 goto hp_setup_fail;
2034
2035         pr_debug("Local APIC timer is reliable in %s\n",
2036                  boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
2037
2038         return 0;
2039
2040 hp_setup_fail:
2041         intel_idle_cpuidle_devices_uninit();
2042         cpuidle_unregister_driver(&intel_idle_driver);
2043 init_driver_fail:
2044         free_percpu(intel_idle_cpuidle_devices);
2045         return retval;
2046
2047 }
2048 device_initcall(intel_idle_init);
2049
2050 /*
2051  * We are not really modular, but we used to support that.  Meaning we also
2052  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
2053  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
2054  * is the easiest way (currently) to continue doing that.
2055  */
2056 module_param(max_cstate, int, 0444);
2057 /*
2058  * The positions of the bits that are set in this number are the indices of the
2059  * idle states to be disabled by default (as reflected by the names of the
2060  * corresponding idle state directories in sysfs, "state0", "state1" ...
2061  * "state<i>" ..., where <i> is the index of the given state).
2062  */
2063 module_param_named(states_off, disabled_states_mask, uint, 0444);
2064 MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
2065 /*
2066  * Some platforms come with mutually exclusive C-states, so that if one is
2067  * enabled, the other C-states must not be used. Example: C1 and C1E on
2068  * Sapphire Rapids platform. This parameter allows for selecting the
2069  * preferred C-states among the groups of mutually exclusive C-states - the
2070  * selected C-states will be registered, the other C-states from the mutually
2071  * exclusive group won't be registered. If the platform has no mutually
2072  * exclusive C-states, this parameter has no effect.
2073  */
2074 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444);
2075 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states");