GNU Linux-libre 4.14.313-gnu1
[releases.git] / drivers / idle / intel_idle.c
1 /*
2  * intel_idle.c - native hardware idle loop for modern Intel processors
3  *
4  * Copyright (c) 2013, Intel Corporation.
5  * Len Brown <len.brown@intel.com>
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms and conditions of the GNU General Public License,
9  * version 2, as published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14  * more details.
15  *
16  * You should have received a copy of the GNU General Public License along with
17  * this program; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20
21 /*
22  * intel_idle is a cpuidle driver that loads on specific Intel processors
23  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
24  * make Linux more efficient on these processors, as intel_idle knows
25  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
26  */
27
28 /*
29  * Design Assumptions
30  *
31  * All CPUs have same idle states as boot CPU
32  *
33  * Chipset BM_STS (bus master status) bit is a NOP
34  *      for preventing entry into deep C-stats
35  */
36
37 /*
38  * Known limitations
39  *
40  * The driver currently initializes for_each_online_cpu() upon modprobe.
41  * It it unaware of subsequent processors hot-added to the system.
42  * This means that if you boot with maxcpus=n and later online
43  * processors above n, those processors will use C1 only.
44  *
45  * ACPI has a .suspend hack to turn off deep c-statees during suspend
46  * to avoid complications with the lapic timer workaround.
47  * Have not seen issues with suspend, but may need same workaround here.
48  *
49  */
50
51 /* un-comment DEBUG to enable pr_debug() statements */
52 #define DEBUG
53
54 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
55
56 #include <linux/kernel.h>
57 #include <linux/cpuidle.h>
58 #include <linux/tick.h>
59 #include <trace/events/power.h>
60 #include <linux/sched.h>
61 #include <linux/sched/smt.h>
62 #include <linux/notifier.h>
63 #include <linux/cpu.h>
64 #include <linux/moduleparam.h>
65 #include <asm/cpu_device_id.h>
66 #include <asm/intel-family.h>
67 #include <asm/nospec-branch.h>
68 #include <asm/mwait.h>
69 #include <asm/msr.h>
70
71 #define INTEL_IDLE_VERSION "0.4.1"
72
73 static struct cpuidle_driver intel_idle_driver = {
74         .name = "intel_idle",
75         .owner = THIS_MODULE,
76 };
77 /* intel_idle.max_cstate=0 disables driver */
78 static int max_cstate = CPUIDLE_STATE_MAX - 1;
79
80 static unsigned int mwait_substates;
81
82 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
83 /* Reliable LAPIC Timer States, bit 1 for C1 etc.  */
84 static unsigned int lapic_timer_reliable_states = (1 << 1);      /* Default to only C1 */
85
86 struct idle_cpu {
87         struct cpuidle_state *state_table;
88
89         /*
90          * Hardware C-state auto-demotion may not always be optimal.
91          * Indicate which enable bits to clear here.
92          */
93         unsigned long auto_demotion_disable_flags;
94         bool byt_auto_demotion_disable_flag;
95         bool disable_promotion_to_c1e;
96 };
97
98 static const struct idle_cpu *icpu;
99 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
100 static int intel_idle(struct cpuidle_device *dev,
101                         struct cpuidle_driver *drv, int index);
102 static int intel_idle_ibrs(struct cpuidle_device *dev,
103                            struct cpuidle_driver *drv, int index);
104 static void intel_idle_s2idle(struct cpuidle_device *dev,
105                               struct cpuidle_driver *drv, int index);
106 static struct cpuidle_state *cpuidle_state_table;
107
108 /*
109  * Set this flag for states where the HW flushes the TLB for us
110  * and so we don't need cross-calls to keep it consistent.
111  * If this flag is set, SW flushes the TLB, so even if the
112  * HW doesn't do the flushing, this flag is safe to use.
113  */
114 #define CPUIDLE_FLAG_TLB_FLUSHED        0x10000
115
116 /*
117  * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
118  * above.
119  */
120 #define CPUIDLE_FLAG_IBRS               BIT(16)
121
122 /*
123  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
124  * the C-state (top nibble) and sub-state (bottom nibble)
125  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
126  *
127  * We store the hint at the top of our "flags" for each state.
128  */
129 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
130 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
131
132 /*
133  * States are indexed by the cstate number,
134  * which is also the index into the MWAIT hint array.
135  * Thus C0 is a dummy.
136  */
137 static struct cpuidle_state nehalem_cstates[] = {
138         {
139                 .name = "C1",
140                 .desc = "MWAIT 0x00",
141                 .flags = MWAIT2flg(0x00),
142                 .exit_latency = 3,
143                 .target_residency = 6,
144                 .enter = &intel_idle,
145                 .enter_s2idle = intel_idle_s2idle, },
146         {
147                 .name = "C1E",
148                 .desc = "MWAIT 0x01",
149                 .flags = MWAIT2flg(0x01),
150                 .exit_latency = 10,
151                 .target_residency = 20,
152                 .enter = &intel_idle,
153                 .enter_s2idle = intel_idle_s2idle, },
154         {
155                 .name = "C3",
156                 .desc = "MWAIT 0x10",
157                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
158                 .exit_latency = 20,
159                 .target_residency = 80,
160                 .enter = &intel_idle,
161                 .enter_s2idle = intel_idle_s2idle, },
162         {
163                 .name = "C6",
164                 .desc = "MWAIT 0x20",
165                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
166                 .exit_latency = 200,
167                 .target_residency = 800,
168                 .enter = &intel_idle,
169                 .enter_s2idle = intel_idle_s2idle, },
170         {
171                 .enter = NULL }
172 };
173
174 static struct cpuidle_state snb_cstates[] = {
175         {
176                 .name = "C1",
177                 .desc = "MWAIT 0x00",
178                 .flags = MWAIT2flg(0x00),
179                 .exit_latency = 2,
180                 .target_residency = 2,
181                 .enter = &intel_idle,
182                 .enter_s2idle = intel_idle_s2idle, },
183         {
184                 .name = "C1E",
185                 .desc = "MWAIT 0x01",
186                 .flags = MWAIT2flg(0x01),
187                 .exit_latency = 10,
188                 .target_residency = 20,
189                 .enter = &intel_idle,
190                 .enter_s2idle = intel_idle_s2idle, },
191         {
192                 .name = "C3",
193                 .desc = "MWAIT 0x10",
194                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
195                 .exit_latency = 80,
196                 .target_residency = 211,
197                 .enter = &intel_idle,
198                 .enter_s2idle = intel_idle_s2idle, },
199         {
200                 .name = "C6",
201                 .desc = "MWAIT 0x20",
202                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
203                 .exit_latency = 104,
204                 .target_residency = 345,
205                 .enter = &intel_idle,
206                 .enter_s2idle = intel_idle_s2idle, },
207         {
208                 .name = "C7",
209                 .desc = "MWAIT 0x30",
210                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
211                 .exit_latency = 109,
212                 .target_residency = 345,
213                 .enter = &intel_idle,
214                 .enter_s2idle = intel_idle_s2idle, },
215         {
216                 .enter = NULL }
217 };
218
219 static struct cpuidle_state byt_cstates[] = {
220         {
221                 .name = "C1",
222                 .desc = "MWAIT 0x00",
223                 .flags = MWAIT2flg(0x00),
224                 .exit_latency = 1,
225                 .target_residency = 1,
226                 .enter = &intel_idle,
227                 .enter_s2idle = intel_idle_s2idle, },
228         {
229                 .name = "C6N",
230                 .desc = "MWAIT 0x58",
231                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
232                 .exit_latency = 300,
233                 .target_residency = 275,
234                 .enter = &intel_idle,
235                 .enter_s2idle = intel_idle_s2idle, },
236         {
237                 .name = "C6S",
238                 .desc = "MWAIT 0x52",
239                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
240                 .exit_latency = 500,
241                 .target_residency = 560,
242                 .enter = &intel_idle,
243                 .enter_s2idle = intel_idle_s2idle, },
244         {
245                 .name = "C7",
246                 .desc = "MWAIT 0x60",
247                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
248                 .exit_latency = 1200,
249                 .target_residency = 4000,
250                 .enter = &intel_idle,
251                 .enter_s2idle = intel_idle_s2idle, },
252         {
253                 .name = "C7S",
254                 .desc = "MWAIT 0x64",
255                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
256                 .exit_latency = 10000,
257                 .target_residency = 20000,
258                 .enter = &intel_idle,
259                 .enter_s2idle = intel_idle_s2idle, },
260         {
261                 .enter = NULL }
262 };
263
264 static struct cpuidle_state cht_cstates[] = {
265         {
266                 .name = "C1",
267                 .desc = "MWAIT 0x00",
268                 .flags = MWAIT2flg(0x00),
269                 .exit_latency = 1,
270                 .target_residency = 1,
271                 .enter = &intel_idle,
272                 .enter_s2idle = intel_idle_s2idle, },
273         {
274                 .name = "C6N",
275                 .desc = "MWAIT 0x58",
276                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
277                 .exit_latency = 80,
278                 .target_residency = 275,
279                 .enter = &intel_idle,
280                 .enter_s2idle = intel_idle_s2idle, },
281         {
282                 .name = "C6S",
283                 .desc = "MWAIT 0x52",
284                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
285                 .exit_latency = 200,
286                 .target_residency = 560,
287                 .enter = &intel_idle,
288                 .enter_s2idle = intel_idle_s2idle, },
289         {
290                 .name = "C7",
291                 .desc = "MWAIT 0x60",
292                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
293                 .exit_latency = 1200,
294                 .target_residency = 4000,
295                 .enter = &intel_idle,
296                 .enter_s2idle = intel_idle_s2idle, },
297         {
298                 .name = "C7S",
299                 .desc = "MWAIT 0x64",
300                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
301                 .exit_latency = 10000,
302                 .target_residency = 20000,
303                 .enter = &intel_idle,
304                 .enter_s2idle = intel_idle_s2idle, },
305         {
306                 .enter = NULL }
307 };
308
309 static struct cpuidle_state ivb_cstates[] = {
310         {
311                 .name = "C1",
312                 .desc = "MWAIT 0x00",
313                 .flags = MWAIT2flg(0x00),
314                 .exit_latency = 1,
315                 .target_residency = 1,
316                 .enter = &intel_idle,
317                 .enter_s2idle = intel_idle_s2idle, },
318         {
319                 .name = "C1E",
320                 .desc = "MWAIT 0x01",
321                 .flags = MWAIT2flg(0x01),
322                 .exit_latency = 10,
323                 .target_residency = 20,
324                 .enter = &intel_idle,
325                 .enter_s2idle = intel_idle_s2idle, },
326         {
327                 .name = "C3",
328                 .desc = "MWAIT 0x10",
329                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
330                 .exit_latency = 59,
331                 .target_residency = 156,
332                 .enter = &intel_idle,
333                 .enter_s2idle = intel_idle_s2idle, },
334         {
335                 .name = "C6",
336                 .desc = "MWAIT 0x20",
337                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
338                 .exit_latency = 80,
339                 .target_residency = 300,
340                 .enter = &intel_idle,
341                 .enter_s2idle = intel_idle_s2idle, },
342         {
343                 .name = "C7",
344                 .desc = "MWAIT 0x30",
345                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
346                 .exit_latency = 87,
347                 .target_residency = 300,
348                 .enter = &intel_idle,
349                 .enter_s2idle = intel_idle_s2idle, },
350         {
351                 .enter = NULL }
352 };
353
354 static struct cpuidle_state ivt_cstates[] = {
355         {
356                 .name = "C1",
357                 .desc = "MWAIT 0x00",
358                 .flags = MWAIT2flg(0x00),
359                 .exit_latency = 1,
360                 .target_residency = 1,
361                 .enter = &intel_idle,
362                 .enter_s2idle = intel_idle_s2idle, },
363         {
364                 .name = "C1E",
365                 .desc = "MWAIT 0x01",
366                 .flags = MWAIT2flg(0x01),
367                 .exit_latency = 10,
368                 .target_residency = 80,
369                 .enter = &intel_idle,
370                 .enter_s2idle = intel_idle_s2idle, },
371         {
372                 .name = "C3",
373                 .desc = "MWAIT 0x10",
374                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
375                 .exit_latency = 59,
376                 .target_residency = 156,
377                 .enter = &intel_idle,
378                 .enter_s2idle = intel_idle_s2idle, },
379         {
380                 .name = "C6",
381                 .desc = "MWAIT 0x20",
382                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
383                 .exit_latency = 82,
384                 .target_residency = 300,
385                 .enter = &intel_idle,
386                 .enter_s2idle = intel_idle_s2idle, },
387         {
388                 .enter = NULL }
389 };
390
391 static struct cpuidle_state ivt_cstates_4s[] = {
392         {
393                 .name = "C1",
394                 .desc = "MWAIT 0x00",
395                 .flags = MWAIT2flg(0x00),
396                 .exit_latency = 1,
397                 .target_residency = 1,
398                 .enter = &intel_idle,
399                 .enter_s2idle = intel_idle_s2idle, },
400         {
401                 .name = "C1E",
402                 .desc = "MWAIT 0x01",
403                 .flags = MWAIT2flg(0x01),
404                 .exit_latency = 10,
405                 .target_residency = 250,
406                 .enter = &intel_idle,
407                 .enter_s2idle = intel_idle_s2idle, },
408         {
409                 .name = "C3",
410                 .desc = "MWAIT 0x10",
411                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
412                 .exit_latency = 59,
413                 .target_residency = 300,
414                 .enter = &intel_idle,
415                 .enter_s2idle = intel_idle_s2idle, },
416         {
417                 .name = "C6",
418                 .desc = "MWAIT 0x20",
419                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
420                 .exit_latency = 84,
421                 .target_residency = 400,
422                 .enter = &intel_idle,
423                 .enter_s2idle = intel_idle_s2idle, },
424         {
425                 .enter = NULL }
426 };
427
428 static struct cpuidle_state ivt_cstates_8s[] = {
429         {
430                 .name = "C1",
431                 .desc = "MWAIT 0x00",
432                 .flags = MWAIT2flg(0x00),
433                 .exit_latency = 1,
434                 .target_residency = 1,
435                 .enter = &intel_idle,
436                 .enter_s2idle = intel_idle_s2idle, },
437         {
438                 .name = "C1E",
439                 .desc = "MWAIT 0x01",
440                 .flags = MWAIT2flg(0x01),
441                 .exit_latency = 10,
442                 .target_residency = 500,
443                 .enter = &intel_idle,
444                 .enter_s2idle = intel_idle_s2idle, },
445         {
446                 .name = "C3",
447                 .desc = "MWAIT 0x10",
448                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
449                 .exit_latency = 59,
450                 .target_residency = 600,
451                 .enter = &intel_idle,
452                 .enter_s2idle = intel_idle_s2idle, },
453         {
454                 .name = "C6",
455                 .desc = "MWAIT 0x20",
456                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
457                 .exit_latency = 88,
458                 .target_residency = 700,
459                 .enter = &intel_idle,
460                 .enter_s2idle = intel_idle_s2idle, },
461         {
462                 .enter = NULL }
463 };
464
465 static struct cpuidle_state hsw_cstates[] = {
466         {
467                 .name = "C1",
468                 .desc = "MWAIT 0x00",
469                 .flags = MWAIT2flg(0x00),
470                 .exit_latency = 2,
471                 .target_residency = 2,
472                 .enter = &intel_idle,
473                 .enter_s2idle = intel_idle_s2idle, },
474         {
475                 .name = "C1E",
476                 .desc = "MWAIT 0x01",
477                 .flags = MWAIT2flg(0x01),
478                 .exit_latency = 10,
479                 .target_residency = 20,
480                 .enter = &intel_idle,
481                 .enter_s2idle = intel_idle_s2idle, },
482         {
483                 .name = "C3",
484                 .desc = "MWAIT 0x10",
485                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
486                 .exit_latency = 33,
487                 .target_residency = 100,
488                 .enter = &intel_idle,
489                 .enter_s2idle = intel_idle_s2idle, },
490         {
491                 .name = "C6",
492                 .desc = "MWAIT 0x20",
493                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
494                 .exit_latency = 133,
495                 .target_residency = 400,
496                 .enter = &intel_idle,
497                 .enter_s2idle = intel_idle_s2idle, },
498         {
499                 .name = "C7s",
500                 .desc = "MWAIT 0x32",
501                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
502                 .exit_latency = 166,
503                 .target_residency = 500,
504                 .enter = &intel_idle,
505                 .enter_s2idle = intel_idle_s2idle, },
506         {
507                 .name = "C8",
508                 .desc = "MWAIT 0x40",
509                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
510                 .exit_latency = 300,
511                 .target_residency = 900,
512                 .enter = &intel_idle,
513                 .enter_s2idle = intel_idle_s2idle, },
514         {
515                 .name = "C9",
516                 .desc = "MWAIT 0x50",
517                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
518                 .exit_latency = 600,
519                 .target_residency = 1800,
520                 .enter = &intel_idle,
521                 .enter_s2idle = intel_idle_s2idle, },
522         {
523                 .name = "C10",
524                 .desc = "MWAIT 0x60",
525                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
526                 .exit_latency = 2600,
527                 .target_residency = 7700,
528                 .enter = &intel_idle,
529                 .enter_s2idle = intel_idle_s2idle, },
530         {
531                 .enter = NULL }
532 };
533 static struct cpuidle_state bdw_cstates[] = {
534         {
535                 .name = "C1",
536                 .desc = "MWAIT 0x00",
537                 .flags = MWAIT2flg(0x00),
538                 .exit_latency = 2,
539                 .target_residency = 2,
540                 .enter = &intel_idle,
541                 .enter_s2idle = intel_idle_s2idle, },
542         {
543                 .name = "C1E",
544                 .desc = "MWAIT 0x01",
545                 .flags = MWAIT2flg(0x01),
546                 .exit_latency = 10,
547                 .target_residency = 20,
548                 .enter = &intel_idle,
549                 .enter_s2idle = intel_idle_s2idle, },
550         {
551                 .name = "C3",
552                 .desc = "MWAIT 0x10",
553                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
554                 .exit_latency = 40,
555                 .target_residency = 100,
556                 .enter = &intel_idle,
557                 .enter_s2idle = intel_idle_s2idle, },
558         {
559                 .name = "C6",
560                 .desc = "MWAIT 0x20",
561                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
562                 .exit_latency = 133,
563                 .target_residency = 400,
564                 .enter = &intel_idle,
565                 .enter_s2idle = intel_idle_s2idle, },
566         {
567                 .name = "C7s",
568                 .desc = "MWAIT 0x32",
569                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
570                 .exit_latency = 166,
571                 .target_residency = 500,
572                 .enter = &intel_idle,
573                 .enter_s2idle = intel_idle_s2idle, },
574         {
575                 .name = "C8",
576                 .desc = "MWAIT 0x40",
577                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
578                 .exit_latency = 300,
579                 .target_residency = 900,
580                 .enter = &intel_idle,
581                 .enter_s2idle = intel_idle_s2idle, },
582         {
583                 .name = "C9",
584                 .desc = "MWAIT 0x50",
585                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
586                 .exit_latency = 600,
587                 .target_residency = 1800,
588                 .enter = &intel_idle,
589                 .enter_s2idle = intel_idle_s2idle, },
590         {
591                 .name = "C10",
592                 .desc = "MWAIT 0x60",
593                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
594                 .exit_latency = 2600,
595                 .target_residency = 7700,
596                 .enter = &intel_idle,
597                 .enter_s2idle = intel_idle_s2idle, },
598         {
599                 .enter = NULL }
600 };
601
602 static struct cpuidle_state skl_cstates[] = {
603         {
604                 .name = "C1",
605                 .desc = "MWAIT 0x00",
606                 .flags = MWAIT2flg(0x00),
607                 .exit_latency = 2,
608                 .target_residency = 2,
609                 .enter = &intel_idle,
610                 .enter_s2idle = intel_idle_s2idle, },
611         {
612                 .name = "C1E",
613                 .desc = "MWAIT 0x01",
614                 .flags = MWAIT2flg(0x01),
615                 .exit_latency = 10,
616                 .target_residency = 20,
617                 .enter = &intel_idle,
618                 .enter_s2idle = intel_idle_s2idle, },
619         {
620                 .name = "C3",
621                 .desc = "MWAIT 0x10",
622                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
623                 .exit_latency = 70,
624                 .target_residency = 100,
625                 .enter = &intel_idle,
626                 .enter_s2idle = intel_idle_s2idle, },
627         {
628                 .name = "C6",
629                 .desc = "MWAIT 0x20",
630                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
631                 .exit_latency = 85,
632                 .target_residency = 200,
633                 .enter = &intel_idle,
634                 .enter_s2idle = intel_idle_s2idle, },
635         {
636                 .name = "C7s",
637                 .desc = "MWAIT 0x33",
638                 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
639                 .exit_latency = 124,
640                 .target_residency = 800,
641                 .enter = &intel_idle,
642                 .enter_s2idle = intel_idle_s2idle, },
643         {
644                 .name = "C8",
645                 .desc = "MWAIT 0x40",
646                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
647                 .exit_latency = 200,
648                 .target_residency = 800,
649                 .enter = &intel_idle,
650                 .enter_s2idle = intel_idle_s2idle, },
651         {
652                 .name = "C9",
653                 .desc = "MWAIT 0x50",
654                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
655                 .exit_latency = 480,
656                 .target_residency = 5000,
657                 .enter = &intel_idle,
658                 .enter_s2idle = intel_idle_s2idle, },
659         {
660                 .name = "C10",
661                 .desc = "MWAIT 0x60",
662                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
663                 .exit_latency = 890,
664                 .target_residency = 5000,
665                 .enter = &intel_idle,
666                 .enter_s2idle = intel_idle_s2idle, },
667         {
668                 .enter = NULL }
669 };
670
671 static struct cpuidle_state skx_cstates[] = {
672         {
673                 .name = "C1",
674                 .desc = "MWAIT 0x00",
675                 .flags = MWAIT2flg(0x00),
676                 .exit_latency = 2,
677                 .target_residency = 2,
678                 .enter = &intel_idle,
679                 .enter_s2idle = intel_idle_s2idle, },
680         {
681                 .name = "C1E",
682                 .desc = "MWAIT 0x01",
683                 .flags = MWAIT2flg(0x01),
684                 .exit_latency = 10,
685                 .target_residency = 20,
686                 .enter = &intel_idle,
687                 .enter_s2idle = intel_idle_s2idle, },
688         {
689                 .name = "C6",
690                 .desc = "MWAIT 0x20",
691                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
692                 .exit_latency = 133,
693                 .target_residency = 600,
694                 .enter = &intel_idle,
695                 .enter_s2idle = intel_idle_s2idle, },
696         {
697                 .enter = NULL }
698 };
699
700 static struct cpuidle_state atom_cstates[] = {
701         {
702                 .name = "C1E",
703                 .desc = "MWAIT 0x00",
704                 .flags = MWAIT2flg(0x00),
705                 .exit_latency = 10,
706                 .target_residency = 20,
707                 .enter = &intel_idle,
708                 .enter_s2idle = intel_idle_s2idle, },
709         {
710                 .name = "C2",
711                 .desc = "MWAIT 0x10",
712                 .flags = MWAIT2flg(0x10),
713                 .exit_latency = 20,
714                 .target_residency = 80,
715                 .enter = &intel_idle,
716                 .enter_s2idle = intel_idle_s2idle, },
717         {
718                 .name = "C4",
719                 .desc = "MWAIT 0x30",
720                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
721                 .exit_latency = 100,
722                 .target_residency = 400,
723                 .enter = &intel_idle,
724                 .enter_s2idle = intel_idle_s2idle, },
725         {
726                 .name = "C6",
727                 .desc = "MWAIT 0x52",
728                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
729                 .exit_latency = 140,
730                 .target_residency = 560,
731                 .enter = &intel_idle,
732                 .enter_s2idle = intel_idle_s2idle, },
733         {
734                 .enter = NULL }
735 };
736 static struct cpuidle_state tangier_cstates[] = {
737         {
738                 .name = "C1",
739                 .desc = "MWAIT 0x00",
740                 .flags = MWAIT2flg(0x00),
741                 .exit_latency = 1,
742                 .target_residency = 4,
743                 .enter = &intel_idle,
744                 .enter_s2idle = intel_idle_s2idle, },
745         {
746                 .name = "C4",
747                 .desc = "MWAIT 0x30",
748                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
749                 .exit_latency = 100,
750                 .target_residency = 400,
751                 .enter = &intel_idle,
752                 .enter_s2idle = intel_idle_s2idle, },
753         {
754                 .name = "C6",
755                 .desc = "MWAIT 0x52",
756                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
757                 .exit_latency = 140,
758                 .target_residency = 560,
759                 .enter = &intel_idle,
760                 .enter_s2idle = intel_idle_s2idle, },
761         {
762                 .name = "C7",
763                 .desc = "MWAIT 0x60",
764                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
765                 .exit_latency = 1200,
766                 .target_residency = 4000,
767                 .enter = &intel_idle,
768                 .enter_s2idle = intel_idle_s2idle, },
769         {
770                 .name = "C9",
771                 .desc = "MWAIT 0x64",
772                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
773                 .exit_latency = 10000,
774                 .target_residency = 20000,
775                 .enter = &intel_idle,
776                 .enter_s2idle = intel_idle_s2idle, },
777         {
778                 .enter = NULL }
779 };
780 static struct cpuidle_state avn_cstates[] = {
781         {
782                 .name = "C1",
783                 .desc = "MWAIT 0x00",
784                 .flags = MWAIT2flg(0x00),
785                 .exit_latency = 2,
786                 .target_residency = 2,
787                 .enter = &intel_idle,
788                 .enter_s2idle = intel_idle_s2idle, },
789         {
790                 .name = "C6",
791                 .desc = "MWAIT 0x51",
792                 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
793                 .exit_latency = 15,
794                 .target_residency = 45,
795                 .enter = &intel_idle,
796                 .enter_s2idle = intel_idle_s2idle, },
797         {
798                 .enter = NULL }
799 };
800 static struct cpuidle_state knl_cstates[] = {
801         {
802                 .name = "C1",
803                 .desc = "MWAIT 0x00",
804                 .flags = MWAIT2flg(0x00),
805                 .exit_latency = 1,
806                 .target_residency = 2,
807                 .enter = &intel_idle,
808                 .enter_s2idle = intel_idle_s2idle },
809         {
810                 .name = "C6",
811                 .desc = "MWAIT 0x10",
812                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
813                 .exit_latency = 120,
814                 .target_residency = 500,
815                 .enter = &intel_idle,
816                 .enter_s2idle = intel_idle_s2idle },
817         {
818                 .enter = NULL }
819 };
820
821 static struct cpuidle_state bxt_cstates[] = {
822         {
823                 .name = "C1",
824                 .desc = "MWAIT 0x00",
825                 .flags = MWAIT2flg(0x00),
826                 .exit_latency = 2,
827                 .target_residency = 2,
828                 .enter = &intel_idle,
829                 .enter_s2idle = intel_idle_s2idle, },
830         {
831                 .name = "C1E",
832                 .desc = "MWAIT 0x01",
833                 .flags = MWAIT2flg(0x01),
834                 .exit_latency = 10,
835                 .target_residency = 20,
836                 .enter = &intel_idle,
837                 .enter_s2idle = intel_idle_s2idle, },
838         {
839                 .name = "C6",
840                 .desc = "MWAIT 0x20",
841                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
842                 .exit_latency = 133,
843                 .target_residency = 133,
844                 .enter = &intel_idle,
845                 .enter_s2idle = intel_idle_s2idle, },
846         {
847                 .name = "C7s",
848                 .desc = "MWAIT 0x31",
849                 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
850                 .exit_latency = 155,
851                 .target_residency = 155,
852                 .enter = &intel_idle,
853                 .enter_s2idle = intel_idle_s2idle, },
854         {
855                 .name = "C8",
856                 .desc = "MWAIT 0x40",
857                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
858                 .exit_latency = 1000,
859                 .target_residency = 1000,
860                 .enter = &intel_idle,
861                 .enter_s2idle = intel_idle_s2idle, },
862         {
863                 .name = "C9",
864                 .desc = "MWAIT 0x50",
865                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
866                 .exit_latency = 2000,
867                 .target_residency = 2000,
868                 .enter = &intel_idle,
869                 .enter_s2idle = intel_idle_s2idle, },
870         {
871                 .name = "C10",
872                 .desc = "MWAIT 0x60",
873                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
874                 .exit_latency = 10000,
875                 .target_residency = 10000,
876                 .enter = &intel_idle,
877                 .enter_s2idle = intel_idle_s2idle, },
878         {
879                 .enter = NULL }
880 };
881
882 static struct cpuidle_state dnv_cstates[] = {
883         {
884                 .name = "C1",
885                 .desc = "MWAIT 0x00",
886                 .flags = MWAIT2flg(0x00),
887                 .exit_latency = 2,
888                 .target_residency = 2,
889                 .enter = &intel_idle,
890                 .enter_s2idle = intel_idle_s2idle, },
891         {
892                 .name = "C1E",
893                 .desc = "MWAIT 0x01",
894                 .flags = MWAIT2flg(0x01),
895                 .exit_latency = 10,
896                 .target_residency = 20,
897                 .enter = &intel_idle,
898                 .enter_s2idle = intel_idle_s2idle, },
899         {
900                 .name = "C6",
901                 .desc = "MWAIT 0x20",
902                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
903                 .exit_latency = 50,
904                 .target_residency = 500,
905                 .enter = &intel_idle,
906                 .enter_s2idle = intel_idle_s2idle, },
907         {
908                 .enter = NULL }
909 };
910
911 /**
912  * intel_idle
913  * @dev: cpuidle_device
914  * @drv: cpuidle driver
915  * @index: index of cpuidle state
916  *
917  * Must be called under local_irq_disable().
918  */
919 static __cpuidle int intel_idle(struct cpuidle_device *dev,
920                                 struct cpuidle_driver *drv, int index)
921 {
922         unsigned long ecx = 1; /* break on interrupt flag */
923         struct cpuidle_state *state = &drv->states[index];
924         unsigned long eax = flg2MWAIT(state->flags);
925         unsigned int cstate;
926         int cpu = smp_processor_id();
927
928         cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1;
929
930         /*
931          * leave_mm() to avoid costly and often unnecessary wakeups
932          * for flushing the user TLB's associated with the active mm.
933          */
934         if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
935                 leave_mm(cpu);
936
937         if (!(lapic_timer_reliable_states & (1 << (cstate))))
938                 tick_broadcast_enter();
939
940         mwait_idle_with_hints(eax, ecx);
941
942         if (!(lapic_timer_reliable_states & (1 << (cstate))))
943                 tick_broadcast_exit();
944
945         return index;
946 }
947
948 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
949                                      struct cpuidle_driver *drv, int index)
950 {
951         bool smt_active = sched_smt_active();
952         u64 spec_ctrl = spec_ctrl_current();
953         int ret;
954
955         if (smt_active)
956                 wrmsrl(MSR_IA32_SPEC_CTRL, 0);
957
958         ret = intel_idle(dev, drv, index);
959
960         if (smt_active)
961                 wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
962
963         return ret;
964 }
965
966 /**
967  * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle
968  * @dev: cpuidle_device
969  * @drv: cpuidle driver
970  * @index: state index
971  */
972 static void intel_idle_s2idle(struct cpuidle_device *dev,
973                              struct cpuidle_driver *drv, int index)
974 {
975         unsigned long ecx = 1; /* break on interrupt flag */
976         unsigned long eax = flg2MWAIT(drv->states[index].flags);
977
978         mwait_idle_with_hints(eax, ecx);
979 }
980
981 static void __setup_broadcast_timer(bool on)
982 {
983         if (on)
984                 tick_broadcast_enable();
985         else
986                 tick_broadcast_disable();
987 }
988
989 static void auto_demotion_disable(void)
990 {
991         unsigned long long msr_bits;
992
993         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
994         msr_bits &= ~(icpu->auto_demotion_disable_flags);
995         wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
996 }
997 static void c1e_promotion_disable(void)
998 {
999         unsigned long long msr_bits;
1000
1001         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1002         msr_bits &= ~0x2;
1003         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1004 }
1005
1006 static const struct idle_cpu idle_cpu_nehalem = {
1007         .state_table = nehalem_cstates,
1008         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1009         .disable_promotion_to_c1e = true,
1010 };
1011
1012 static const struct idle_cpu idle_cpu_atom = {
1013         .state_table = atom_cstates,
1014 };
1015
1016 static const struct idle_cpu idle_cpu_tangier = {
1017         .state_table = tangier_cstates,
1018 };
1019
1020 static const struct idle_cpu idle_cpu_lincroft = {
1021         .state_table = atom_cstates,
1022         .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1023 };
1024
1025 static const struct idle_cpu idle_cpu_snb = {
1026         .state_table = snb_cstates,
1027         .disable_promotion_to_c1e = true,
1028 };
1029
1030 static const struct idle_cpu idle_cpu_byt = {
1031         .state_table = byt_cstates,
1032         .disable_promotion_to_c1e = true,
1033         .byt_auto_demotion_disable_flag = true,
1034 };
1035
1036 static const struct idle_cpu idle_cpu_cht = {
1037         .state_table = cht_cstates,
1038         .disable_promotion_to_c1e = true,
1039         .byt_auto_demotion_disable_flag = true,
1040 };
1041
1042 static const struct idle_cpu idle_cpu_ivb = {
1043         .state_table = ivb_cstates,
1044         .disable_promotion_to_c1e = true,
1045 };
1046
1047 static const struct idle_cpu idle_cpu_ivt = {
1048         .state_table = ivt_cstates,
1049         .disable_promotion_to_c1e = true,
1050 };
1051
1052 static const struct idle_cpu idle_cpu_hsw = {
1053         .state_table = hsw_cstates,
1054         .disable_promotion_to_c1e = true,
1055 };
1056
1057 static const struct idle_cpu idle_cpu_bdw = {
1058         .state_table = bdw_cstates,
1059         .disable_promotion_to_c1e = true,
1060 };
1061
1062 static const struct idle_cpu idle_cpu_skl = {
1063         .state_table = skl_cstates,
1064         .disable_promotion_to_c1e = true,
1065 };
1066
1067 static const struct idle_cpu idle_cpu_skx = {
1068         .state_table = skx_cstates,
1069         .disable_promotion_to_c1e = true,
1070 };
1071
1072 static const struct idle_cpu idle_cpu_avn = {
1073         .state_table = avn_cstates,
1074         .disable_promotion_to_c1e = true,
1075 };
1076
1077 static const struct idle_cpu idle_cpu_knl = {
1078         .state_table = knl_cstates,
1079 };
1080
1081 static const struct idle_cpu idle_cpu_bxt = {
1082         .state_table = bxt_cstates,
1083         .disable_promotion_to_c1e = true,
1084 };
1085
1086 static const struct idle_cpu idle_cpu_dnv = {
1087         .state_table = dnv_cstates,
1088         .disable_promotion_to_c1e = true,
1089 };
1090
1091 #define ICPU(model, cpu) \
1092         { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&cpu }
1093
1094 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1095         ICPU(INTEL_FAM6_NEHALEM_EP,             idle_cpu_nehalem),
1096         ICPU(INTEL_FAM6_NEHALEM,                idle_cpu_nehalem),
1097         ICPU(INTEL_FAM6_NEHALEM_G,              idle_cpu_nehalem),
1098         ICPU(INTEL_FAM6_WESTMERE,               idle_cpu_nehalem),
1099         ICPU(INTEL_FAM6_WESTMERE_EP,            idle_cpu_nehalem),
1100         ICPU(INTEL_FAM6_NEHALEM_EX,             idle_cpu_nehalem),
1101         ICPU(INTEL_FAM6_ATOM_BONNELL,           idle_cpu_atom),
1102         ICPU(INTEL_FAM6_ATOM_BONNELL_MID,               idle_cpu_lincroft),
1103         ICPU(INTEL_FAM6_WESTMERE_EX,            idle_cpu_nehalem),
1104         ICPU(INTEL_FAM6_SANDYBRIDGE,            idle_cpu_snb),
1105         ICPU(INTEL_FAM6_SANDYBRIDGE_X,          idle_cpu_snb),
1106         ICPU(INTEL_FAM6_ATOM_SALTWELL,          idle_cpu_atom),
1107         ICPU(INTEL_FAM6_ATOM_SILVERMONT,        idle_cpu_byt),
1108         ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID,    idle_cpu_tangier),
1109         ICPU(INTEL_FAM6_ATOM_AIRMONT,           idle_cpu_cht),
1110         ICPU(INTEL_FAM6_IVYBRIDGE,              idle_cpu_ivb),
1111         ICPU(INTEL_FAM6_IVYBRIDGE_X,            idle_cpu_ivt),
1112         ICPU(INTEL_FAM6_HASWELL_CORE,           idle_cpu_hsw),
1113         ICPU(INTEL_FAM6_HASWELL_X,              idle_cpu_hsw),
1114         ICPU(INTEL_FAM6_HASWELL_ULT,            idle_cpu_hsw),
1115         ICPU(INTEL_FAM6_HASWELL_GT3E,           idle_cpu_hsw),
1116         ICPU(INTEL_FAM6_ATOM_SILVERMONT_X,      idle_cpu_avn),
1117         ICPU(INTEL_FAM6_BROADWELL_CORE,         idle_cpu_bdw),
1118         ICPU(INTEL_FAM6_BROADWELL_GT3E,         idle_cpu_bdw),
1119         ICPU(INTEL_FAM6_BROADWELL_X,            idle_cpu_bdw),
1120         ICPU(INTEL_FAM6_BROADWELL_XEON_D,       idle_cpu_bdw),
1121         ICPU(INTEL_FAM6_SKYLAKE_MOBILE,         idle_cpu_skl),
1122         ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,        idle_cpu_skl),
1123         ICPU(INTEL_FAM6_KABYLAKE_MOBILE,        idle_cpu_skl),
1124         ICPU(INTEL_FAM6_KABYLAKE_DESKTOP,       idle_cpu_skl),
1125         ICPU(INTEL_FAM6_SKYLAKE_X,              idle_cpu_skx),
1126         ICPU(INTEL_FAM6_XEON_PHI_KNL,           idle_cpu_knl),
1127         ICPU(INTEL_FAM6_XEON_PHI_KNM,           idle_cpu_knl),
1128         ICPU(INTEL_FAM6_ATOM_GOLDMONT,          idle_cpu_bxt),
1129         ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS,     idle_cpu_bxt),
1130         ICPU(INTEL_FAM6_ATOM_GOLDMONT_X,        idle_cpu_dnv),
1131         {}
1132 };
1133
1134 /*
1135  * intel_idle_probe()
1136  */
1137 static int __init intel_idle_probe(void)
1138 {
1139         unsigned int eax, ebx, ecx;
1140         const struct x86_cpu_id *id;
1141
1142         if (max_cstate == 0) {
1143                 pr_debug("disabled\n");
1144                 return -EPERM;
1145         }
1146
1147         id = x86_match_cpu(intel_idle_ids);
1148         if (!id) {
1149                 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
1150                     boot_cpu_data.x86 == 6)
1151                         pr_debug("does not run on family %d model %d\n",
1152                                  boot_cpu_data.x86, boot_cpu_data.x86_model);
1153                 return -ENODEV;
1154         }
1155
1156         if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1157                 pr_debug("Please enable MWAIT in BIOS SETUP\n");
1158                 return -ENODEV;
1159         }
1160
1161         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1162                 return -ENODEV;
1163
1164         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1165
1166         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1167             !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1168             !mwait_substates)
1169                         return -ENODEV;
1170
1171         pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1172
1173         icpu = (const struct idle_cpu *)id->driver_data;
1174         cpuidle_state_table = icpu->state_table;
1175
1176         pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1177                  boot_cpu_data.x86_model);
1178
1179         return 0;
1180 }
1181
1182 /*
1183  * intel_idle_cpuidle_devices_uninit()
1184  * Unregisters the cpuidle devices.
1185  */
1186 static void intel_idle_cpuidle_devices_uninit(void)
1187 {
1188         int i;
1189         struct cpuidle_device *dev;
1190
1191         for_each_online_cpu(i) {
1192                 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
1193                 cpuidle_unregister_device(dev);
1194         }
1195 }
1196
1197 /*
1198  * ivt_idle_state_table_update(void)
1199  *
1200  * Tune IVT multi-socket targets
1201  * Assumption: num_sockets == (max_package_num + 1)
1202  */
1203 static void ivt_idle_state_table_update(void)
1204 {
1205         /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1206         int cpu, package_num, num_sockets = 1;
1207
1208         for_each_online_cpu(cpu) {
1209                 package_num = topology_physical_package_id(cpu);
1210                 if (package_num + 1 > num_sockets) {
1211                         num_sockets = package_num + 1;
1212
1213                         if (num_sockets > 4) {
1214                                 cpuidle_state_table = ivt_cstates_8s;
1215                                 return;
1216                         }
1217                 }
1218         }
1219
1220         if (num_sockets > 2)
1221                 cpuidle_state_table = ivt_cstates_4s;
1222
1223         /* else, 1 and 2 socket systems use default ivt_cstates */
1224 }
1225
1226 /*
1227  * Translate IRTL (Interrupt Response Time Limit) MSR to usec
1228  */
1229
1230 static unsigned int irtl_ns_units[] = {
1231         1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
1232
1233 static unsigned long long irtl_2_usec(unsigned long long irtl)
1234 {
1235         unsigned long long ns;
1236
1237         if (!irtl)
1238                 return 0;
1239
1240         ns = irtl_ns_units[(irtl >> 10) & 0x7];
1241
1242         return div64_u64((irtl & 0x3FF) * ns, 1000);
1243 }
1244 /*
1245  * bxt_idle_state_table_update(void)
1246  *
1247  * On BXT, we trust the IRTL to show the definitive maximum latency
1248  * We use the same value for target_residency.
1249  */
1250 static void bxt_idle_state_table_update(void)
1251 {
1252         unsigned long long msr;
1253         unsigned int usec;
1254
1255         rdmsrl(MSR_PKGC6_IRTL, msr);
1256         usec = irtl_2_usec(msr);
1257         if (usec) {
1258                 bxt_cstates[2].exit_latency = usec;
1259                 bxt_cstates[2].target_residency = usec;
1260         }
1261
1262         rdmsrl(MSR_PKGC7_IRTL, msr);
1263         usec = irtl_2_usec(msr);
1264         if (usec) {
1265                 bxt_cstates[3].exit_latency = usec;
1266                 bxt_cstates[3].target_residency = usec;
1267         }
1268
1269         rdmsrl(MSR_PKGC8_IRTL, msr);
1270         usec = irtl_2_usec(msr);
1271         if (usec) {
1272                 bxt_cstates[4].exit_latency = usec;
1273                 bxt_cstates[4].target_residency = usec;
1274         }
1275
1276         rdmsrl(MSR_PKGC9_IRTL, msr);
1277         usec = irtl_2_usec(msr);
1278         if (usec) {
1279                 bxt_cstates[5].exit_latency = usec;
1280                 bxt_cstates[5].target_residency = usec;
1281         }
1282
1283         rdmsrl(MSR_PKGC10_IRTL, msr);
1284         usec = irtl_2_usec(msr);
1285         if (usec) {
1286                 bxt_cstates[6].exit_latency = usec;
1287                 bxt_cstates[6].target_residency = usec;
1288         }
1289
1290 }
1291 /*
1292  * sklh_idle_state_table_update(void)
1293  *
1294  * On SKL-H (model 0x5e) disable C8 and C9 if:
1295  * C10 is enabled and SGX disabled
1296  */
1297 static void sklh_idle_state_table_update(void)
1298 {
1299         unsigned long long msr;
1300         unsigned int eax, ebx, ecx, edx;
1301
1302
1303         /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1304         if (max_cstate <= 7)
1305                 return;
1306
1307         /* if PC10 not present in CPUID.MWAIT.EDX */
1308         if ((mwait_substates & (0xF << 28)) == 0)
1309                 return;
1310
1311         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1312
1313         /* PC10 is not enabled in PKG C-state limit */
1314         if ((msr & 0xF) != 8)
1315                 return;
1316
1317         ecx = 0;
1318         cpuid(7, &eax, &ebx, &ecx, &edx);
1319
1320         /* if SGX is present */
1321         if (ebx & (1 << 2)) {
1322
1323                 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1324
1325                 /* if SGX is enabled */
1326                 if (msr & (1 << 18))
1327                         return;
1328         }
1329
1330         skl_cstates[5].disabled = 1;    /* C8-SKL */
1331         skl_cstates[6].disabled = 1;    /* C9-SKL */
1332 }
1333 /*
1334  * intel_idle_state_table_update()
1335  *
1336  * Update the default state_table for this CPU-id
1337  */
1338
1339 static void intel_idle_state_table_update(void)
1340 {
1341         switch (boot_cpu_data.x86_model) {
1342
1343         case INTEL_FAM6_IVYBRIDGE_X:
1344                 ivt_idle_state_table_update();
1345                 break;
1346         case INTEL_FAM6_ATOM_GOLDMONT:
1347         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1348                 bxt_idle_state_table_update();
1349                 break;
1350         case INTEL_FAM6_SKYLAKE_DESKTOP:
1351                 sklh_idle_state_table_update();
1352                 break;
1353         }
1354 }
1355
1356 /*
1357  * intel_idle_cpuidle_driver_init()
1358  * allocate, initialize cpuidle_states
1359  */
1360 static void __init intel_idle_cpuidle_driver_init(void)
1361 {
1362         int cstate;
1363         struct cpuidle_driver *drv = &intel_idle_driver;
1364
1365         intel_idle_state_table_update();
1366
1367         cpuidle_poll_state_init(drv);
1368         drv->state_count = 1;
1369
1370         for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1371                 int num_substates, mwait_hint, mwait_cstate;
1372
1373                 if ((cpuidle_state_table[cstate].enter == NULL) &&
1374                     (cpuidle_state_table[cstate].enter_s2idle == NULL))
1375                         break;
1376
1377                 if (cstate + 1 > max_cstate) {
1378                         pr_info("max_cstate %d reached\n", max_cstate);
1379                         break;
1380                 }
1381
1382                 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1383                 mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
1384
1385                 /* number of sub-states for this state in CPUID.MWAIT */
1386                 num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
1387                                         & MWAIT_SUBSTATE_MASK;
1388
1389                 /* if NO sub-states for this state in CPUID, skip it */
1390                 if (num_substates == 0)
1391                         continue;
1392
1393                 /* if state marked as disabled, skip it */
1394                 if (cpuidle_state_table[cstate].disabled != 0) {
1395                         pr_debug("state %s is disabled\n",
1396                                  cpuidle_state_table[cstate].name);
1397                         continue;
1398                 }
1399
1400
1401                 if (((mwait_cstate + 1) > 2) &&
1402                         !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1403                         mark_tsc_unstable("TSC halts in idle"
1404                                         " states deeper than C2");
1405
1406                 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
1407                     cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
1408                         drv->states[drv->state_count].enter = intel_idle_ibrs;
1409                 }
1410
1411                 drv->states[drv->state_count] = /* structure copy */
1412                         cpuidle_state_table[cstate];
1413
1414                 drv->state_count += 1;
1415         }
1416
1417         if (icpu->byt_auto_demotion_disable_flag) {
1418                 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1419                 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1420         }
1421 }
1422
1423
1424 /*
1425  * intel_idle_cpu_init()
1426  * allocate, initialize, register cpuidle_devices
1427  * @cpu: cpu/core to initialize
1428  */
1429 static int intel_idle_cpu_init(unsigned int cpu)
1430 {
1431         struct cpuidle_device *dev;
1432
1433         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1434         dev->cpu = cpu;
1435
1436         if (cpuidle_register_device(dev)) {
1437                 pr_debug("cpuidle_register_device %d failed!\n", cpu);
1438                 return -EIO;
1439         }
1440
1441         if (icpu->auto_demotion_disable_flags)
1442                 auto_demotion_disable();
1443
1444         if (icpu->disable_promotion_to_c1e)
1445                 c1e_promotion_disable();
1446
1447         return 0;
1448 }
1449
1450 static int intel_idle_cpu_online(unsigned int cpu)
1451 {
1452         struct cpuidle_device *dev;
1453
1454         if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
1455                 __setup_broadcast_timer(true);
1456
1457         /*
1458          * Some systems can hotplug a cpu at runtime after
1459          * the kernel has booted, we have to initialize the
1460          * driver in this case
1461          */
1462         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1463         if (!dev->registered)
1464                 return intel_idle_cpu_init(cpu);
1465
1466         return 0;
1467 }
1468
1469 static int __init intel_idle_init(void)
1470 {
1471         int retval;
1472
1473         /* Do not load intel_idle at all for now if idle= is passed */
1474         if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1475                 return -ENODEV;
1476
1477         retval = intel_idle_probe();
1478         if (retval)
1479                 return retval;
1480
1481         intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1482         if (intel_idle_cpuidle_devices == NULL)
1483                 return -ENOMEM;
1484
1485         intel_idle_cpuidle_driver_init();
1486         retval = cpuidle_register_driver(&intel_idle_driver);
1487         if (retval) {
1488                 struct cpuidle_driver *drv = cpuidle_get_driver();
1489                 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
1490                        drv ? drv->name : "none");
1491                 goto init_driver_fail;
1492         }
1493
1494         if (boot_cpu_has(X86_FEATURE_ARAT))     /* Always Reliable APIC Timer */
1495                 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
1496
1497         retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1498                                    intel_idle_cpu_online, NULL);
1499         if (retval < 0)
1500                 goto hp_setup_fail;
1501
1502         pr_debug("lapic_timer_reliable_states 0x%x\n",
1503                  lapic_timer_reliable_states);
1504
1505         return 0;
1506
1507 hp_setup_fail:
1508         intel_idle_cpuidle_devices_uninit();
1509         cpuidle_unregister_driver(&intel_idle_driver);
1510 init_driver_fail:
1511         free_percpu(intel_idle_cpuidle_devices);
1512         return retval;
1513
1514 }
1515 device_initcall(intel_idle_init);
1516
1517 /*
1518  * We are not really modular, but we used to support that.  Meaning we also
1519  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1520  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1521  * is the easiest way (currently) to continue doing that.
1522  */
1523 module_param(max_cstate, int, 0444);