1 // SPDX-License-Identifier: GPL-2.0-only
3 * intel_idle.c - native hardware idle loop for modern Intel processors
5 * Copyright (c) 2013 - 2020, Intel Corporation.
6 * Len Brown <len.brown@intel.com>
7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT
12 * in lieu of the legacy ACPI processor_idle driver. The intent is to
13 * make Linux more efficient on these processors, as intel_idle knows
14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
20 * All CPUs have same idle states as boot CPU
22 * Chipset BM_STS (bus master status) bit is a NOP
23 * for preventing entry into deep C-states
25 * CPU will flush caches as needed when entering a C-state via MWAIT
26 * (in contrast to entering ACPI C3, in which case the WBINVD
27 * instruction needs to be executed to flush the caches)
33 * ACPI has a .suspend hack to turn off deep c-statees during suspend
34 * to avoid complications with the lapic timer workaround.
35 * Have not seen issues with suspend, but may need same workaround here.
39 /* un-comment DEBUG to enable pr_debug() statements */
42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
44 #include <linux/acpi.h>
45 #include <linux/kernel.h>
46 #include <linux/cpuidle.h>
47 #include <linux/tick.h>
48 #include <trace/events/power.h>
49 #include <linux/sched.h>
50 #include <linux/sched/smt.h>
51 #include <linux/notifier.h>
52 #include <linux/cpu.h>
53 #include <linux/moduleparam.h>
54 #include <asm/cpu_device_id.h>
55 #include <asm/intel-family.h>
56 #include <asm/nospec-branch.h>
57 #include <asm/mwait.h>
60 #define INTEL_IDLE_VERSION "0.5.1"
62 static struct cpuidle_driver intel_idle_driver = {
66 /* intel_idle.max_cstate=0 disables driver */
67 static int max_cstate = CPUIDLE_STATE_MAX - 1;
68 static unsigned int disabled_states_mask;
69 static unsigned int preferred_states_mask;
71 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
73 static unsigned long auto_demotion_disable_flags;
76 C1E_PROMOTION_PRESERVE,
79 } c1e_promotion = C1E_PROMOTION_PRESERVE;
82 struct cpuidle_state *state_table;
85 * Hardware C-state auto-demotion may not always be optimal.
86 * Indicate which enable bits to clear here.
88 unsigned long auto_demotion_disable_flags;
89 bool byt_auto_demotion_disable_flag;
90 bool disable_promotion_to_c1e;
94 static const struct idle_cpu *icpu __initdata;
95 static struct cpuidle_state *cpuidle_state_table __initdata;
97 static unsigned int mwait_substates __initdata;
100 * Enable interrupts before entering the C-state. On some platforms and for
101 * some C-states, this may measurably decrease interrupt latency.
103 #define CPUIDLE_FLAG_IRQ_ENABLE BIT(14)
106 * Enable this state by default even if the ACPI _CST does not list it.
108 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15)
111 * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
114 #define CPUIDLE_FLAG_IBRS BIT(16)
117 * MWAIT takes an 8-bit "hint" in EAX "suggesting"
118 * the C-state (top nibble) and sub-state (bottom nibble)
119 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
121 * We store the hint at the top of our "flags" for each state.
123 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
124 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
126 static __always_inline int __intel_idle(struct cpuidle_device *dev,
127 struct cpuidle_driver *drv, int index)
129 struct cpuidle_state *state = &drv->states[index];
130 unsigned long eax = flg2MWAIT(state->flags);
131 unsigned long ecx = 1; /* break on interrupt flag */
133 mwait_idle_with_hints(eax, ecx);
139 * intel_idle - Ask the processor to enter the given idle state.
140 * @dev: cpuidle device of the target CPU.
141 * @drv: cpuidle driver (assumed to point to intel_idle_driver).
142 * @index: Target idle state index.
144 * Use the MWAIT instruction to notify the processor that the CPU represented by
145 * @dev is idle and it can try to enter the idle state corresponding to @index.
147 * If the local APIC timer is not known to be reliable in the target idle state,
148 * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
150 * Must be called under local_irq_disable().
152 static __cpuidle int intel_idle(struct cpuidle_device *dev,
153 struct cpuidle_driver *drv, int index)
155 return __intel_idle(dev, drv, index);
158 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
159 struct cpuidle_driver *drv, int index)
163 raw_local_irq_enable();
164 ret = __intel_idle(dev, drv, index);
167 * The lockdep hardirqs state may be changed to 'on' with timer
168 * tick interrupt followed by __do_softirq(). Use local_irq_disable()
169 * to keep the hardirqs state correct.
176 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
177 struct cpuidle_driver *drv, int index)
179 bool smt_active = sched_smt_active();
180 u64 spec_ctrl = spec_ctrl_current();
184 wrmsrl(MSR_IA32_SPEC_CTRL, 0);
186 ret = __intel_idle(dev, drv, index);
189 wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
195 * intel_idle_s2idle - Ask the processor to enter the given idle state.
196 * @dev: cpuidle device of the target CPU.
197 * @drv: cpuidle driver (assumed to point to intel_idle_driver).
198 * @index: Target idle state index.
200 * Use the MWAIT instruction to notify the processor that the CPU represented by
201 * @dev is idle and it can try to enter the idle state corresponding to @index.
203 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
204 * scheduler tick and suspended scheduler clock on the target CPU.
206 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
207 struct cpuidle_driver *drv, int index)
209 unsigned long eax = flg2MWAIT(drv->states[index].flags);
210 unsigned long ecx = 1; /* break on interrupt flag */
212 mwait_idle_with_hints(eax, ecx);
218 * States are indexed by the cstate number,
219 * which is also the index into the MWAIT hint array.
220 * Thus C0 is a dummy.
222 static struct cpuidle_state nehalem_cstates[] __initdata = {
225 .desc = "MWAIT 0x00",
226 .flags = MWAIT2flg(0x00),
228 .target_residency = 6,
229 .enter = &intel_idle,
230 .enter_s2idle = intel_idle_s2idle, },
233 .desc = "MWAIT 0x01",
234 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
236 .target_residency = 20,
237 .enter = &intel_idle,
238 .enter_s2idle = intel_idle_s2idle, },
241 .desc = "MWAIT 0x10",
242 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
244 .target_residency = 80,
245 .enter = &intel_idle,
246 .enter_s2idle = intel_idle_s2idle, },
249 .desc = "MWAIT 0x20",
250 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
252 .target_residency = 800,
253 .enter = &intel_idle,
254 .enter_s2idle = intel_idle_s2idle, },
259 static struct cpuidle_state snb_cstates[] __initdata = {
262 .desc = "MWAIT 0x00",
263 .flags = MWAIT2flg(0x00),
265 .target_residency = 2,
266 .enter = &intel_idle,
267 .enter_s2idle = intel_idle_s2idle, },
270 .desc = "MWAIT 0x01",
271 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
273 .target_residency = 20,
274 .enter = &intel_idle,
275 .enter_s2idle = intel_idle_s2idle, },
278 .desc = "MWAIT 0x10",
279 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
281 .target_residency = 211,
282 .enter = &intel_idle,
283 .enter_s2idle = intel_idle_s2idle, },
286 .desc = "MWAIT 0x20",
287 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
289 .target_residency = 345,
290 .enter = &intel_idle,
291 .enter_s2idle = intel_idle_s2idle, },
294 .desc = "MWAIT 0x30",
295 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
297 .target_residency = 345,
298 .enter = &intel_idle,
299 .enter_s2idle = intel_idle_s2idle, },
304 static struct cpuidle_state byt_cstates[] __initdata = {
307 .desc = "MWAIT 0x00",
308 .flags = MWAIT2flg(0x00),
310 .target_residency = 1,
311 .enter = &intel_idle,
312 .enter_s2idle = intel_idle_s2idle, },
315 .desc = "MWAIT 0x58",
316 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
318 .target_residency = 275,
319 .enter = &intel_idle,
320 .enter_s2idle = intel_idle_s2idle, },
323 .desc = "MWAIT 0x52",
324 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
326 .target_residency = 560,
327 .enter = &intel_idle,
328 .enter_s2idle = intel_idle_s2idle, },
331 .desc = "MWAIT 0x60",
332 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
333 .exit_latency = 1200,
334 .target_residency = 4000,
335 .enter = &intel_idle,
336 .enter_s2idle = intel_idle_s2idle, },
339 .desc = "MWAIT 0x64",
340 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
341 .exit_latency = 10000,
342 .target_residency = 20000,
343 .enter = &intel_idle,
344 .enter_s2idle = intel_idle_s2idle, },
349 static struct cpuidle_state cht_cstates[] __initdata = {
352 .desc = "MWAIT 0x00",
353 .flags = MWAIT2flg(0x00),
355 .target_residency = 1,
356 .enter = &intel_idle,
357 .enter_s2idle = intel_idle_s2idle, },
360 .desc = "MWAIT 0x58",
361 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
363 .target_residency = 275,
364 .enter = &intel_idle,
365 .enter_s2idle = intel_idle_s2idle, },
368 .desc = "MWAIT 0x52",
369 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
371 .target_residency = 560,
372 .enter = &intel_idle,
373 .enter_s2idle = intel_idle_s2idle, },
376 .desc = "MWAIT 0x60",
377 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
378 .exit_latency = 1200,
379 .target_residency = 4000,
380 .enter = &intel_idle,
381 .enter_s2idle = intel_idle_s2idle, },
384 .desc = "MWAIT 0x64",
385 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
386 .exit_latency = 10000,
387 .target_residency = 20000,
388 .enter = &intel_idle,
389 .enter_s2idle = intel_idle_s2idle, },
394 static struct cpuidle_state ivb_cstates[] __initdata = {
397 .desc = "MWAIT 0x00",
398 .flags = MWAIT2flg(0x00),
400 .target_residency = 1,
401 .enter = &intel_idle,
402 .enter_s2idle = intel_idle_s2idle, },
405 .desc = "MWAIT 0x01",
406 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
408 .target_residency = 20,
409 .enter = &intel_idle,
410 .enter_s2idle = intel_idle_s2idle, },
413 .desc = "MWAIT 0x10",
414 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
416 .target_residency = 156,
417 .enter = &intel_idle,
418 .enter_s2idle = intel_idle_s2idle, },
421 .desc = "MWAIT 0x20",
422 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
424 .target_residency = 300,
425 .enter = &intel_idle,
426 .enter_s2idle = intel_idle_s2idle, },
429 .desc = "MWAIT 0x30",
430 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
432 .target_residency = 300,
433 .enter = &intel_idle,
434 .enter_s2idle = intel_idle_s2idle, },
439 static struct cpuidle_state ivt_cstates[] __initdata = {
442 .desc = "MWAIT 0x00",
443 .flags = MWAIT2flg(0x00),
445 .target_residency = 1,
446 .enter = &intel_idle,
447 .enter_s2idle = intel_idle_s2idle, },
450 .desc = "MWAIT 0x01",
451 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
453 .target_residency = 80,
454 .enter = &intel_idle,
455 .enter_s2idle = intel_idle_s2idle, },
458 .desc = "MWAIT 0x10",
459 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
461 .target_residency = 156,
462 .enter = &intel_idle,
463 .enter_s2idle = intel_idle_s2idle, },
466 .desc = "MWAIT 0x20",
467 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
469 .target_residency = 300,
470 .enter = &intel_idle,
471 .enter_s2idle = intel_idle_s2idle, },
476 static struct cpuidle_state ivt_cstates_4s[] __initdata = {
479 .desc = "MWAIT 0x00",
480 .flags = MWAIT2flg(0x00),
482 .target_residency = 1,
483 .enter = &intel_idle,
484 .enter_s2idle = intel_idle_s2idle, },
487 .desc = "MWAIT 0x01",
488 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
490 .target_residency = 250,
491 .enter = &intel_idle,
492 .enter_s2idle = intel_idle_s2idle, },
495 .desc = "MWAIT 0x10",
496 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
498 .target_residency = 300,
499 .enter = &intel_idle,
500 .enter_s2idle = intel_idle_s2idle, },
503 .desc = "MWAIT 0x20",
504 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
506 .target_residency = 400,
507 .enter = &intel_idle,
508 .enter_s2idle = intel_idle_s2idle, },
513 static struct cpuidle_state ivt_cstates_8s[] __initdata = {
516 .desc = "MWAIT 0x00",
517 .flags = MWAIT2flg(0x00),
519 .target_residency = 1,
520 .enter = &intel_idle,
521 .enter_s2idle = intel_idle_s2idle, },
524 .desc = "MWAIT 0x01",
525 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
527 .target_residency = 500,
528 .enter = &intel_idle,
529 .enter_s2idle = intel_idle_s2idle, },
532 .desc = "MWAIT 0x10",
533 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
535 .target_residency = 600,
536 .enter = &intel_idle,
537 .enter_s2idle = intel_idle_s2idle, },
540 .desc = "MWAIT 0x20",
541 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
543 .target_residency = 700,
544 .enter = &intel_idle,
545 .enter_s2idle = intel_idle_s2idle, },
550 static struct cpuidle_state hsw_cstates[] __initdata = {
553 .desc = "MWAIT 0x00",
554 .flags = MWAIT2flg(0x00),
556 .target_residency = 2,
557 .enter = &intel_idle,
558 .enter_s2idle = intel_idle_s2idle, },
561 .desc = "MWAIT 0x01",
562 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
564 .target_residency = 20,
565 .enter = &intel_idle,
566 .enter_s2idle = intel_idle_s2idle, },
569 .desc = "MWAIT 0x10",
570 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
572 .target_residency = 100,
573 .enter = &intel_idle,
574 .enter_s2idle = intel_idle_s2idle, },
577 .desc = "MWAIT 0x20",
578 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
580 .target_residency = 400,
581 .enter = &intel_idle,
582 .enter_s2idle = intel_idle_s2idle, },
585 .desc = "MWAIT 0x32",
586 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
588 .target_residency = 500,
589 .enter = &intel_idle,
590 .enter_s2idle = intel_idle_s2idle, },
593 .desc = "MWAIT 0x40",
594 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
596 .target_residency = 900,
597 .enter = &intel_idle,
598 .enter_s2idle = intel_idle_s2idle, },
601 .desc = "MWAIT 0x50",
602 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
604 .target_residency = 1800,
605 .enter = &intel_idle,
606 .enter_s2idle = intel_idle_s2idle, },
609 .desc = "MWAIT 0x60",
610 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
611 .exit_latency = 2600,
612 .target_residency = 7700,
613 .enter = &intel_idle,
614 .enter_s2idle = intel_idle_s2idle, },
618 static struct cpuidle_state bdw_cstates[] __initdata = {
621 .desc = "MWAIT 0x00",
622 .flags = MWAIT2flg(0x00),
624 .target_residency = 2,
625 .enter = &intel_idle,
626 .enter_s2idle = intel_idle_s2idle, },
629 .desc = "MWAIT 0x01",
630 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
632 .target_residency = 20,
633 .enter = &intel_idle,
634 .enter_s2idle = intel_idle_s2idle, },
637 .desc = "MWAIT 0x10",
638 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
640 .target_residency = 100,
641 .enter = &intel_idle,
642 .enter_s2idle = intel_idle_s2idle, },
645 .desc = "MWAIT 0x20",
646 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
648 .target_residency = 400,
649 .enter = &intel_idle,
650 .enter_s2idle = intel_idle_s2idle, },
653 .desc = "MWAIT 0x32",
654 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
656 .target_residency = 500,
657 .enter = &intel_idle,
658 .enter_s2idle = intel_idle_s2idle, },
661 .desc = "MWAIT 0x40",
662 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
664 .target_residency = 900,
665 .enter = &intel_idle,
666 .enter_s2idle = intel_idle_s2idle, },
669 .desc = "MWAIT 0x50",
670 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
672 .target_residency = 1800,
673 .enter = &intel_idle,
674 .enter_s2idle = intel_idle_s2idle, },
677 .desc = "MWAIT 0x60",
678 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
679 .exit_latency = 2600,
680 .target_residency = 7700,
681 .enter = &intel_idle,
682 .enter_s2idle = intel_idle_s2idle, },
687 static struct cpuidle_state skl_cstates[] __initdata = {
690 .desc = "MWAIT 0x00",
691 .flags = MWAIT2flg(0x00),
693 .target_residency = 2,
694 .enter = &intel_idle,
695 .enter_s2idle = intel_idle_s2idle, },
698 .desc = "MWAIT 0x01",
699 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
701 .target_residency = 20,
702 .enter = &intel_idle,
703 .enter_s2idle = intel_idle_s2idle, },
706 .desc = "MWAIT 0x10",
707 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
709 .target_residency = 100,
710 .enter = &intel_idle,
711 .enter_s2idle = intel_idle_s2idle, },
714 .desc = "MWAIT 0x20",
715 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
717 .target_residency = 200,
718 .enter = &intel_idle,
719 .enter_s2idle = intel_idle_s2idle, },
722 .desc = "MWAIT 0x33",
723 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
725 .target_residency = 800,
726 .enter = &intel_idle,
727 .enter_s2idle = intel_idle_s2idle, },
730 .desc = "MWAIT 0x40",
731 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
733 .target_residency = 800,
734 .enter = &intel_idle,
735 .enter_s2idle = intel_idle_s2idle, },
738 .desc = "MWAIT 0x50",
739 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
741 .target_residency = 5000,
742 .enter = &intel_idle,
743 .enter_s2idle = intel_idle_s2idle, },
746 .desc = "MWAIT 0x60",
747 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
749 .target_residency = 5000,
750 .enter = &intel_idle,
751 .enter_s2idle = intel_idle_s2idle, },
756 static struct cpuidle_state skx_cstates[] __initdata = {
759 .desc = "MWAIT 0x00",
760 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
762 .target_residency = 2,
763 .enter = &intel_idle,
764 .enter_s2idle = intel_idle_s2idle, },
767 .desc = "MWAIT 0x01",
768 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
770 .target_residency = 20,
771 .enter = &intel_idle,
772 .enter_s2idle = intel_idle_s2idle, },
775 .desc = "MWAIT 0x20",
776 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
778 .target_residency = 600,
779 .enter = &intel_idle,
780 .enter_s2idle = intel_idle_s2idle, },
785 static struct cpuidle_state icx_cstates[] __initdata = {
788 .desc = "MWAIT 0x00",
789 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
791 .target_residency = 1,
792 .enter = &intel_idle,
793 .enter_s2idle = intel_idle_s2idle, },
796 .desc = "MWAIT 0x01",
797 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
799 .target_residency = 4,
800 .enter = &intel_idle,
801 .enter_s2idle = intel_idle_s2idle, },
804 .desc = "MWAIT 0x20",
805 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
807 .target_residency = 600,
808 .enter = &intel_idle,
809 .enter_s2idle = intel_idle_s2idle, },
815 * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
816 * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
817 * But in this case there is effectively no C1, because C1 requests are
818 * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
819 * and C1E requests end up with C1, so there is effectively no C1E.
821 * By default we enable C1E and disable C1 by marking it with
822 * 'CPUIDLE_FLAG_UNUSABLE'.
824 static struct cpuidle_state adl_cstates[] __initdata = {
827 .desc = "MWAIT 0x00",
828 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
830 .target_residency = 1,
831 .enter = &intel_idle,
832 .enter_s2idle = intel_idle_s2idle, },
835 .desc = "MWAIT 0x01",
836 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
838 .target_residency = 4,
839 .enter = &intel_idle,
840 .enter_s2idle = intel_idle_s2idle, },
843 .desc = "MWAIT 0x20",
844 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
846 .target_residency = 600,
847 .enter = &intel_idle,
848 .enter_s2idle = intel_idle_s2idle, },
851 .desc = "MWAIT 0x40",
852 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
854 .target_residency = 800,
855 .enter = &intel_idle,
856 .enter_s2idle = intel_idle_s2idle, },
859 .desc = "MWAIT 0x60",
860 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
862 .target_residency = 2000,
863 .enter = &intel_idle,
864 .enter_s2idle = intel_idle_s2idle, },
869 static struct cpuidle_state adl_l_cstates[] __initdata = {
872 .desc = "MWAIT 0x00",
873 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
875 .target_residency = 1,
876 .enter = &intel_idle,
877 .enter_s2idle = intel_idle_s2idle, },
880 .desc = "MWAIT 0x01",
881 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
883 .target_residency = 4,
884 .enter = &intel_idle,
885 .enter_s2idle = intel_idle_s2idle, },
888 .desc = "MWAIT 0x20",
889 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
891 .target_residency = 500,
892 .enter = &intel_idle,
893 .enter_s2idle = intel_idle_s2idle, },
896 .desc = "MWAIT 0x40",
897 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
899 .target_residency = 600,
900 .enter = &intel_idle,
901 .enter_s2idle = intel_idle_s2idle, },
904 .desc = "MWAIT 0x60",
905 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
907 .target_residency = 700,
908 .enter = &intel_idle,
909 .enter_s2idle = intel_idle_s2idle, },
914 static struct cpuidle_state spr_cstates[] __initdata = {
917 .desc = "MWAIT 0x00",
918 .flags = MWAIT2flg(0x00),
920 .target_residency = 1,
921 .enter = &intel_idle,
922 .enter_s2idle = intel_idle_s2idle, },
925 .desc = "MWAIT 0x01",
926 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
928 .target_residency = 4,
929 .enter = &intel_idle,
930 .enter_s2idle = intel_idle_s2idle, },
933 .desc = "MWAIT 0x20",
934 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
936 .target_residency = 800,
937 .enter = &intel_idle,
938 .enter_s2idle = intel_idle_s2idle, },
943 static struct cpuidle_state atom_cstates[] __initdata = {
946 .desc = "MWAIT 0x00",
947 .flags = MWAIT2flg(0x00),
949 .target_residency = 20,
950 .enter = &intel_idle,
951 .enter_s2idle = intel_idle_s2idle, },
954 .desc = "MWAIT 0x10",
955 .flags = MWAIT2flg(0x10),
957 .target_residency = 80,
958 .enter = &intel_idle,
959 .enter_s2idle = intel_idle_s2idle, },
962 .desc = "MWAIT 0x30",
963 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
965 .target_residency = 400,
966 .enter = &intel_idle,
967 .enter_s2idle = intel_idle_s2idle, },
970 .desc = "MWAIT 0x52",
971 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
973 .target_residency = 560,
974 .enter = &intel_idle,
975 .enter_s2idle = intel_idle_s2idle, },
979 static struct cpuidle_state tangier_cstates[] __initdata = {
982 .desc = "MWAIT 0x00",
983 .flags = MWAIT2flg(0x00),
985 .target_residency = 4,
986 .enter = &intel_idle,
987 .enter_s2idle = intel_idle_s2idle, },
990 .desc = "MWAIT 0x30",
991 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
993 .target_residency = 400,
994 .enter = &intel_idle,
995 .enter_s2idle = intel_idle_s2idle, },
998 .desc = "MWAIT 0x52",
999 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
1000 .exit_latency = 140,
1001 .target_residency = 560,
1002 .enter = &intel_idle,
1003 .enter_s2idle = intel_idle_s2idle, },
1006 .desc = "MWAIT 0x60",
1007 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1008 .exit_latency = 1200,
1009 .target_residency = 4000,
1010 .enter = &intel_idle,
1011 .enter_s2idle = intel_idle_s2idle, },
1014 .desc = "MWAIT 0x64",
1015 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
1016 .exit_latency = 10000,
1017 .target_residency = 20000,
1018 .enter = &intel_idle,
1019 .enter_s2idle = intel_idle_s2idle, },
1023 static struct cpuidle_state avn_cstates[] __initdata = {
1026 .desc = "MWAIT 0x00",
1027 .flags = MWAIT2flg(0x00),
1029 .target_residency = 2,
1030 .enter = &intel_idle,
1031 .enter_s2idle = intel_idle_s2idle, },
1034 .desc = "MWAIT 0x51",
1035 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
1037 .target_residency = 45,
1038 .enter = &intel_idle,
1039 .enter_s2idle = intel_idle_s2idle, },
1043 static struct cpuidle_state knl_cstates[] __initdata = {
1046 .desc = "MWAIT 0x00",
1047 .flags = MWAIT2flg(0x00),
1049 .target_residency = 2,
1050 .enter = &intel_idle,
1051 .enter_s2idle = intel_idle_s2idle },
1054 .desc = "MWAIT 0x10",
1055 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
1056 .exit_latency = 120,
1057 .target_residency = 500,
1058 .enter = &intel_idle,
1059 .enter_s2idle = intel_idle_s2idle },
1064 static struct cpuidle_state bxt_cstates[] __initdata = {
1067 .desc = "MWAIT 0x00",
1068 .flags = MWAIT2flg(0x00),
1070 .target_residency = 2,
1071 .enter = &intel_idle,
1072 .enter_s2idle = intel_idle_s2idle, },
1075 .desc = "MWAIT 0x01",
1076 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1078 .target_residency = 20,
1079 .enter = &intel_idle,
1080 .enter_s2idle = intel_idle_s2idle, },
1083 .desc = "MWAIT 0x20",
1084 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1085 .exit_latency = 133,
1086 .target_residency = 133,
1087 .enter = &intel_idle,
1088 .enter_s2idle = intel_idle_s2idle, },
1091 .desc = "MWAIT 0x31",
1092 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
1093 .exit_latency = 155,
1094 .target_residency = 155,
1095 .enter = &intel_idle,
1096 .enter_s2idle = intel_idle_s2idle, },
1099 .desc = "MWAIT 0x40",
1100 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
1101 .exit_latency = 1000,
1102 .target_residency = 1000,
1103 .enter = &intel_idle,
1104 .enter_s2idle = intel_idle_s2idle, },
1107 .desc = "MWAIT 0x50",
1108 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
1109 .exit_latency = 2000,
1110 .target_residency = 2000,
1111 .enter = &intel_idle,
1112 .enter_s2idle = intel_idle_s2idle, },
1115 .desc = "MWAIT 0x60",
1116 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1117 .exit_latency = 10000,
1118 .target_residency = 10000,
1119 .enter = &intel_idle,
1120 .enter_s2idle = intel_idle_s2idle, },
1125 static struct cpuidle_state dnv_cstates[] __initdata = {
1128 .desc = "MWAIT 0x00",
1129 .flags = MWAIT2flg(0x00),
1131 .target_residency = 2,
1132 .enter = &intel_idle,
1133 .enter_s2idle = intel_idle_s2idle, },
1136 .desc = "MWAIT 0x01",
1137 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1139 .target_residency = 20,
1140 .enter = &intel_idle,
1141 .enter_s2idle = intel_idle_s2idle, },
1144 .desc = "MWAIT 0x20",
1145 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1147 .target_residency = 500,
1148 .enter = &intel_idle,
1149 .enter_s2idle = intel_idle_s2idle, },
1155 * Note, depending on HW and FW revision, SnowRidge SoC may or may not support
1156 * C6, and this is indicated in the CPUID mwait leaf.
1158 static struct cpuidle_state snr_cstates[] __initdata = {
1161 .desc = "MWAIT 0x00",
1162 .flags = MWAIT2flg(0x00),
1164 .target_residency = 2,
1165 .enter = &intel_idle,
1166 .enter_s2idle = intel_idle_s2idle, },
1169 .desc = "MWAIT 0x01",
1170 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1172 .target_residency = 25,
1173 .enter = &intel_idle,
1174 .enter_s2idle = intel_idle_s2idle, },
1177 .desc = "MWAIT 0x20",
1178 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1179 .exit_latency = 130,
1180 .target_residency = 500,
1181 .enter = &intel_idle,
1182 .enter_s2idle = intel_idle_s2idle, },
1187 static const struct idle_cpu idle_cpu_nehalem __initconst = {
1188 .state_table = nehalem_cstates,
1189 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1190 .disable_promotion_to_c1e = true,
1193 static const struct idle_cpu idle_cpu_nhx __initconst = {
1194 .state_table = nehalem_cstates,
1195 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1196 .disable_promotion_to_c1e = true,
1200 static const struct idle_cpu idle_cpu_atom __initconst = {
1201 .state_table = atom_cstates,
1204 static const struct idle_cpu idle_cpu_tangier __initconst = {
1205 .state_table = tangier_cstates,
1208 static const struct idle_cpu idle_cpu_lincroft __initconst = {
1209 .state_table = atom_cstates,
1210 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1213 static const struct idle_cpu idle_cpu_snb __initconst = {
1214 .state_table = snb_cstates,
1215 .disable_promotion_to_c1e = true,
1218 static const struct idle_cpu idle_cpu_snx __initconst = {
1219 .state_table = snb_cstates,
1220 .disable_promotion_to_c1e = true,
1224 static const struct idle_cpu idle_cpu_byt __initconst = {
1225 .state_table = byt_cstates,
1226 .disable_promotion_to_c1e = true,
1227 .byt_auto_demotion_disable_flag = true,
1230 static const struct idle_cpu idle_cpu_cht __initconst = {
1231 .state_table = cht_cstates,
1232 .disable_promotion_to_c1e = true,
1233 .byt_auto_demotion_disable_flag = true,
1236 static const struct idle_cpu idle_cpu_ivb __initconst = {
1237 .state_table = ivb_cstates,
1238 .disable_promotion_to_c1e = true,
1241 static const struct idle_cpu idle_cpu_ivt __initconst = {
1242 .state_table = ivt_cstates,
1243 .disable_promotion_to_c1e = true,
1247 static const struct idle_cpu idle_cpu_hsw __initconst = {
1248 .state_table = hsw_cstates,
1249 .disable_promotion_to_c1e = true,
1252 static const struct idle_cpu idle_cpu_hsx __initconst = {
1253 .state_table = hsw_cstates,
1254 .disable_promotion_to_c1e = true,
1258 static const struct idle_cpu idle_cpu_bdw __initconst = {
1259 .state_table = bdw_cstates,
1260 .disable_promotion_to_c1e = true,
1263 static const struct idle_cpu idle_cpu_bdx __initconst = {
1264 .state_table = bdw_cstates,
1265 .disable_promotion_to_c1e = true,
1269 static const struct idle_cpu idle_cpu_skl __initconst = {
1270 .state_table = skl_cstates,
1271 .disable_promotion_to_c1e = true,
1274 static const struct idle_cpu idle_cpu_skx __initconst = {
1275 .state_table = skx_cstates,
1276 .disable_promotion_to_c1e = true,
1280 static const struct idle_cpu idle_cpu_icx __initconst = {
1281 .state_table = icx_cstates,
1282 .disable_promotion_to_c1e = true,
1286 static const struct idle_cpu idle_cpu_adl __initconst = {
1287 .state_table = adl_cstates,
1290 static const struct idle_cpu idle_cpu_adl_l __initconst = {
1291 .state_table = adl_l_cstates,
1294 static const struct idle_cpu idle_cpu_spr __initconst = {
1295 .state_table = spr_cstates,
1296 .disable_promotion_to_c1e = true,
1300 static const struct idle_cpu idle_cpu_avn __initconst = {
1301 .state_table = avn_cstates,
1302 .disable_promotion_to_c1e = true,
1306 static const struct idle_cpu idle_cpu_knl __initconst = {
1307 .state_table = knl_cstates,
1311 static const struct idle_cpu idle_cpu_bxt __initconst = {
1312 .state_table = bxt_cstates,
1313 .disable_promotion_to_c1e = true,
1316 static const struct idle_cpu idle_cpu_dnv __initconst = {
1317 .state_table = dnv_cstates,
1318 .disable_promotion_to_c1e = true,
1322 static const struct idle_cpu idle_cpu_snr __initconst = {
1323 .state_table = snr_cstates,
1324 .disable_promotion_to_c1e = true,
1328 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1329 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx),
1330 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem),
1331 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem),
1332 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem),
1333 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx),
1334 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx),
1335 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom),
1336 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft),
1337 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx),
1338 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb),
1339 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx),
1340 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom),
1341 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt),
1342 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier),
1343 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht),
1344 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb),
1345 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt),
1346 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw),
1347 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx),
1348 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw),
1349 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw),
1350 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn),
1351 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw),
1352 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw),
1353 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx),
1354 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx),
1355 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl),
1356 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl),
1357 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl),
1358 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl),
1359 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx),
1360 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx),
1361 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx),
1362 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl),
1363 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l),
1364 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr),
1365 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl),
1366 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl),
1367 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt),
1368 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt),
1369 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv),
1370 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr),
1374 static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1375 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
1379 static bool __init intel_idle_max_cstate_reached(int cstate)
1381 if (cstate + 1 > max_cstate) {
1382 pr_info("max_cstate %d reached\n", max_cstate);
1388 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
1390 unsigned long eax = flg2MWAIT(state->flags);
1392 if (boot_cpu_has(X86_FEATURE_ARAT))
1396 * Switch over to one-shot tick broadcast if the target C-state
1397 * is deeper than C1.
1399 return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
1402 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1403 #include <acpi/processor.h>
1405 static bool no_acpi __read_mostly;
1406 module_param(no_acpi, bool, 0444);
1407 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1409 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
1410 module_param_named(use_acpi, force_use_acpi, bool, 0444);
1411 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
1413 static struct acpi_processor_power acpi_state_table __initdata;
1416 * intel_idle_cst_usable - Check if the _CST information can be used.
1418 * Check if all of the C-states listed by _CST in the max_cstate range are
1419 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1421 static bool __init intel_idle_cst_usable(void)
1425 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1426 acpi_state_table.count);
1428 for (cstate = 1; cstate < limit; cstate++) {
1429 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1431 if (cx->entry_method != ACPI_CSTATE_FFH)
1438 static bool __init intel_idle_acpi_cst_extract(void)
1443 pr_debug("Not allowed to use ACPI _CST\n");
1447 for_each_possible_cpu(cpu) {
1448 struct acpi_processor *pr = per_cpu(processors, cpu);
1453 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1456 acpi_state_table.count++;
1458 if (!intel_idle_cst_usable())
1461 if (!acpi_processor_claim_cst_control())
1467 acpi_state_table.count = 0;
1468 pr_debug("ACPI _CST not found or not usable\n");
1472 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1474 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1477 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1478 * the interesting states are ACPI_CSTATE_FFH.
1480 for (cstate = 1; cstate < limit; cstate++) {
1481 struct acpi_processor_cx *cx;
1482 struct cpuidle_state *state;
1484 if (intel_idle_max_cstate_reached(cstate - 1))
1487 cx = &acpi_state_table.states[cstate];
1489 state = &drv->states[drv->state_count++];
1491 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1492 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1493 state->exit_latency = cx->latency;
1495 * For C1-type C-states use the same number for both the exit
1496 * latency and target residency, because that is the case for
1497 * C1 in the majority of the static C-states tables above.
1498 * For the other types of C-states, however, set the target
1499 * residency to 3 times the exit latency which should lead to
1500 * a reasonable balance between energy-efficiency and
1501 * performance in the majority of interesting cases.
1503 state->target_residency = cx->latency;
1504 if (cx->type > ACPI_STATE_C1)
1505 state->target_residency *= 3;
1507 state->flags = MWAIT2flg(cx->address);
1508 if (cx->type > ACPI_STATE_C2)
1509 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1511 if (disabled_states_mask & BIT(cstate))
1512 state->flags |= CPUIDLE_FLAG_OFF;
1514 if (intel_idle_state_needs_timer_stop(state))
1515 state->flags |= CPUIDLE_FLAG_TIMER_STOP;
1517 state->enter = intel_idle;
1518 state->enter_s2idle = intel_idle_s2idle;
1522 static bool __init intel_idle_off_by_default(u32 mwait_hint)
1527 * If there are no _CST C-states, do not disable any C-states by
1530 if (!acpi_state_table.count)
1533 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1535 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1536 * the interesting states are ACPI_CSTATE_FFH.
1538 for (cstate = 1; cstate < limit; cstate++) {
1539 if (acpi_state_table.states[cstate].address == mwait_hint)
1544 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1545 #define force_use_acpi (false)
1547 static inline bool intel_idle_acpi_cst_extract(void) { return false; }
1548 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
1549 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
1550 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1553 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
1555 * Tune IVT multi-socket targets.
1556 * Assumption: num_sockets == (max_package_num + 1).
1558 static void __init ivt_idle_state_table_update(void)
1560 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1561 int cpu, package_num, num_sockets = 1;
1563 for_each_online_cpu(cpu) {
1564 package_num = topology_physical_package_id(cpu);
1565 if (package_num + 1 > num_sockets) {
1566 num_sockets = package_num + 1;
1568 if (num_sockets > 4) {
1569 cpuidle_state_table = ivt_cstates_8s;
1575 if (num_sockets > 2)
1576 cpuidle_state_table = ivt_cstates_4s;
1578 /* else, 1 and 2 socket systems use default ivt_cstates */
1582 * irtl_2_usec - IRTL to microseconds conversion.
1583 * @irtl: IRTL MSR value.
1585 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
1587 static unsigned long long __init irtl_2_usec(unsigned long long irtl)
1589 static const unsigned int irtl_ns_units[] __initconst = {
1590 1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1592 unsigned long long ns;
1597 ns = irtl_ns_units[(irtl >> 10) & 0x7];
1599 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
1603 * bxt_idle_state_table_update - Fix up the Broxton idle states table.
1605 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
1606 * definitive maximum latency and use the same value for target_residency.
1608 static void __init bxt_idle_state_table_update(void)
1610 unsigned long long msr;
1613 rdmsrl(MSR_PKGC6_IRTL, msr);
1614 usec = irtl_2_usec(msr);
1616 bxt_cstates[2].exit_latency = usec;
1617 bxt_cstates[2].target_residency = usec;
1620 rdmsrl(MSR_PKGC7_IRTL, msr);
1621 usec = irtl_2_usec(msr);
1623 bxt_cstates[3].exit_latency = usec;
1624 bxt_cstates[3].target_residency = usec;
1627 rdmsrl(MSR_PKGC8_IRTL, msr);
1628 usec = irtl_2_usec(msr);
1630 bxt_cstates[4].exit_latency = usec;
1631 bxt_cstates[4].target_residency = usec;
1634 rdmsrl(MSR_PKGC9_IRTL, msr);
1635 usec = irtl_2_usec(msr);
1637 bxt_cstates[5].exit_latency = usec;
1638 bxt_cstates[5].target_residency = usec;
1641 rdmsrl(MSR_PKGC10_IRTL, msr);
1642 usec = irtl_2_usec(msr);
1644 bxt_cstates[6].exit_latency = usec;
1645 bxt_cstates[6].target_residency = usec;
1651 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
1653 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
1655 static void __init sklh_idle_state_table_update(void)
1657 unsigned long long msr;
1658 unsigned int eax, ebx, ecx, edx;
1661 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1662 if (max_cstate <= 7)
1665 /* if PC10 not present in CPUID.MWAIT.EDX */
1666 if ((mwait_substates & (0xF << 28)) == 0)
1669 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1671 /* PC10 is not enabled in PKG C-state limit */
1672 if ((msr & 0xF) != 8)
1676 cpuid(7, &eax, &ebx, &ecx, &edx);
1678 /* if SGX is present */
1679 if (ebx & (1 << 2)) {
1681 rdmsrl(MSR_IA32_FEAT_CTL, msr);
1683 /* if SGX is enabled */
1684 if (msr & (1 << 18))
1688 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */
1689 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */
1693 * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake
1694 * idle states table.
1696 static void __init skx_idle_state_table_update(void)
1698 unsigned long long msr;
1700 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1703 * 000b: C0/C1 (no package C-state support)
1705 * 010b: C6 (non-retention)
1706 * 011b: C6 (retention)
1707 * 111b: No Package C state limits.
1709 if ((msr & 0x7) < 2) {
1711 * Uses the CC6 + PC0 latency and 3 times of
1712 * latency for target_residency if the PC6
1713 * is disabled in BIOS. This is consistent
1714 * with how intel_idle driver uses _CST
1715 * to set the target_residency.
1717 skx_cstates[2].exit_latency = 92;
1718 skx_cstates[2].target_residency = 276;
1723 * adl_idle_state_table_update - Adjust AlderLake idle states table.
1725 static void __init adl_idle_state_table_update(void)
1727 /* Check if user prefers C1 over C1E. */
1728 if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
1729 cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
1730 cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
1732 /* Disable C1E by clearing the "C1E promotion" bit. */
1733 c1e_promotion = C1E_PROMOTION_DISABLE;
1737 /* Make sure C1E is enabled by default */
1738 c1e_promotion = C1E_PROMOTION_ENABLE;
1742 * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
1744 static void __init spr_idle_state_table_update(void)
1746 unsigned long long msr;
1749 * By default, the C6 state assumes the worst-case scenario of package
1750 * C6. However, if PC6 is disabled, we update the numbers to match
1753 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1755 /* Limit value 2 and above allow for PC6. */
1756 if ((msr & 0x7) < 2) {
1757 spr_cstates[2].exit_latency = 190;
1758 spr_cstates[2].target_residency = 600;
1762 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
1764 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
1765 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
1766 MWAIT_SUBSTATE_MASK;
1768 /* Ignore the C-state if there are NO sub-states in CPUID for it. */
1769 if (num_substates == 0)
1772 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1773 mark_tsc_unstable("TSC halts in idle states deeper than C2");
1778 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
1782 switch (boot_cpu_data.x86_model) {
1783 case INTEL_FAM6_IVYBRIDGE_X:
1784 ivt_idle_state_table_update();
1786 case INTEL_FAM6_ATOM_GOLDMONT:
1787 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1788 bxt_idle_state_table_update();
1790 case INTEL_FAM6_SKYLAKE:
1791 sklh_idle_state_table_update();
1793 case INTEL_FAM6_SKYLAKE_X:
1794 skx_idle_state_table_update();
1796 case INTEL_FAM6_SAPPHIRERAPIDS_X:
1797 spr_idle_state_table_update();
1799 case INTEL_FAM6_ALDERLAKE:
1800 case INTEL_FAM6_ALDERLAKE_L:
1801 adl_idle_state_table_update();
1805 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1806 unsigned int mwait_hint;
1808 if (intel_idle_max_cstate_reached(cstate))
1811 if (!cpuidle_state_table[cstate].enter &&
1812 !cpuidle_state_table[cstate].enter_s2idle)
1815 /* If marked as unusable, skip this state. */
1816 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
1817 pr_debug("state %s is disabled\n",
1818 cpuidle_state_table[cstate].name);
1822 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1823 if (!intel_idle_verify_cstate(mwait_hint))
1826 /* Structure copy. */
1827 drv->states[drv->state_count] = cpuidle_state_table[cstate];
1829 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE)
1830 drv->states[drv->state_count].enter = intel_idle_irq;
1832 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
1833 cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
1834 WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE);
1835 drv->states[drv->state_count].enter = intel_idle_ibrs;
1838 if ((disabled_states_mask & BIT(drv->state_count)) ||
1839 ((icpu->use_acpi || force_use_acpi) &&
1840 intel_idle_off_by_default(mwait_hint) &&
1841 !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
1842 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
1844 if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
1845 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
1850 if (icpu->byt_auto_demotion_disable_flag) {
1851 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1852 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1857 * intel_idle_cpuidle_driver_init - Create the list of available idle states.
1858 * @drv: cpuidle driver structure to initialize.
1860 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
1862 cpuidle_poll_state_init(drv);
1864 if (disabled_states_mask & BIT(0))
1865 drv->states[0].flags |= CPUIDLE_FLAG_OFF;
1867 drv->state_count = 1;
1870 intel_idle_init_cstates_icpu(drv);
1872 intel_idle_init_cstates_acpi(drv);
1875 static void auto_demotion_disable(void)
1877 unsigned long long msr_bits;
1879 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1880 msr_bits &= ~auto_demotion_disable_flags;
1881 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1884 static void c1e_promotion_enable(void)
1886 unsigned long long msr_bits;
1888 rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1890 wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1893 static void c1e_promotion_disable(void)
1895 unsigned long long msr_bits;
1897 rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1899 wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1903 * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
1904 * @cpu: CPU to initialize.
1906 * Register a cpuidle device object for @cpu and update its MSRs in accordance
1907 * with the processor model flags.
1909 static int intel_idle_cpu_init(unsigned int cpu)
1911 struct cpuidle_device *dev;
1913 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1916 if (cpuidle_register_device(dev)) {
1917 pr_debug("cpuidle_register_device %d failed!\n", cpu);
1921 if (auto_demotion_disable_flags)
1922 auto_demotion_disable();
1924 if (c1e_promotion == C1E_PROMOTION_ENABLE)
1925 c1e_promotion_enable();
1926 else if (c1e_promotion == C1E_PROMOTION_DISABLE)
1927 c1e_promotion_disable();
1932 static int intel_idle_cpu_online(unsigned int cpu)
1934 struct cpuidle_device *dev;
1936 if (!boot_cpu_has(X86_FEATURE_ARAT))
1937 tick_broadcast_enable();
1940 * Some systems can hotplug a cpu at runtime after
1941 * the kernel has booted, we have to initialize the
1942 * driver in this case
1944 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1945 if (!dev->registered)
1946 return intel_idle_cpu_init(cpu);
1952 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
1954 static void __init intel_idle_cpuidle_devices_uninit(void)
1958 for_each_online_cpu(i)
1959 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
1962 static int __init intel_idle_init(void)
1964 const struct x86_cpu_id *id;
1965 unsigned int eax, ebx, ecx;
1968 /* Do not load intel_idle at all for now if idle= is passed */
1969 if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1972 if (max_cstate == 0) {
1973 pr_debug("disabled\n");
1977 id = x86_match_cpu(intel_idle_ids);
1979 if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1980 pr_debug("Please enable MWAIT in BIOS SETUP\n");
1984 id = x86_match_cpu(intel_mwait_ids);
1989 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1992 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1994 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1995 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1999 pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
2001 icpu = (const struct idle_cpu *)id->driver_data;
2003 cpuidle_state_table = icpu->state_table;
2004 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
2005 if (icpu->disable_promotion_to_c1e)
2006 c1e_promotion = C1E_PROMOTION_DISABLE;
2007 if (icpu->use_acpi || force_use_acpi)
2008 intel_idle_acpi_cst_extract();
2009 } else if (!intel_idle_acpi_cst_extract()) {
2013 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
2014 boot_cpu_data.x86_model);
2016 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
2017 if (!intel_idle_cpuidle_devices)
2020 intel_idle_cpuidle_driver_init(&intel_idle_driver);
2022 retval = cpuidle_register_driver(&intel_idle_driver);
2024 struct cpuidle_driver *drv = cpuidle_get_driver();
2025 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
2026 drv ? drv->name : "none");
2027 goto init_driver_fail;
2030 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
2031 intel_idle_cpu_online, NULL);
2035 pr_debug("Local APIC timer is reliable in %s\n",
2036 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
2041 intel_idle_cpuidle_devices_uninit();
2042 cpuidle_unregister_driver(&intel_idle_driver);
2044 free_percpu(intel_idle_cpuidle_devices);
2048 device_initcall(intel_idle_init);
2051 * We are not really modular, but we used to support that. Meaning we also
2052 * support "intel_idle.max_cstate=..." at boot and also a read-only export of
2053 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
2054 * is the easiest way (currently) to continue doing that.
2056 module_param(max_cstate, int, 0444);
2058 * The positions of the bits that are set in this number are the indices of the
2059 * idle states to be disabled by default (as reflected by the names of the
2060 * corresponding idle state directories in sysfs, "state0", "state1" ...
2061 * "state<i>" ..., where <i> is the index of the given state).
2063 module_param_named(states_off, disabled_states_mask, uint, 0444);
2064 MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
2066 * Some platforms come with mutually exclusive C-states, so that if one is
2067 * enabled, the other C-states must not be used. Example: C1 and C1E on
2068 * Sapphire Rapids platform. This parameter allows for selecting the
2069 * preferred C-states among the groups of mutually exclusive C-states - the
2070 * selected C-states will be registered, the other C-states from the mutually
2071 * exclusive group won't be registered. If the platform has no mutually
2072 * exclusive C-states, this parameter has no effect.
2074 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444);
2075 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states");