GNU Linux-libre 6.1.24-gnu
[releases.git] / drivers / thermal / intel / intel_powerclamp.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_powerclamp.c - package c-state idle injection
4  *
5  * Copyright (c) 2012, Intel Corporation.
6  *
7  * Authors:
8  *     Arjan van de Ven <arjan@linux.intel.com>
9  *     Jacob Pan <jacob.jun.pan@linux.intel.com>
10  *
11  *      TODO:
12  *           1. better handle wakeup from external interrupts, currently a fixed
13  *              compensation is added to clamping duration when excessive amount
14  *              of wakeups are observed during idle time. the reason is that in
15  *              case of external interrupts without need for ack, clamping down
16  *              cpu in non-irq context does not reduce irq. for majority of the
17  *              cases, clamping down cpu does help reduce irq as well, we should
18  *              be able to differentiate the two cases and give a quantitative
19  *              solution for the irqs that we can control. perhaps based on
20  *              get_cpu_iowait_time_us()
21  *
22  *           2. synchronization with other hw blocks
23  */
24
25 #define pr_fmt(fmt)     KBUILD_MODNAME ": " fmt
26
27 #include <linux/module.h>
28 #include <linux/kernel.h>
29 #include <linux/delay.h>
30 #include <linux/kthread.h>
31 #include <linux/cpu.h>
32 #include <linux/thermal.h>
33 #include <linux/slab.h>
34 #include <linux/tick.h>
35 #include <linux/debugfs.h>
36 #include <linux/seq_file.h>
37 #include <linux/sched/rt.h>
38 #include <uapi/linux/sched/types.h>
39
40 #include <asm/nmi.h>
41 #include <asm/msr.h>
42 #include <asm/mwait.h>
43 #include <asm/cpu_device_id.h>
44 #include <asm/hardirq.h>
45
46 #define MAX_TARGET_RATIO (50U)
47 /* For each undisturbed clamping period (no extra wake ups during idle time),
48  * we increment the confidence counter for the given target ratio.
49  * CONFIDENCE_OK defines the level where runtime calibration results are
50  * valid.
51  */
52 #define CONFIDENCE_OK (3)
53 /* Default idle injection duration, driver adjust sleep time to meet target
54  * idle ratio. Similar to frequency modulation.
55  */
56 #define DEFAULT_DURATION_JIFFIES (6)
57
58 static unsigned int target_mwait;
59 static struct dentry *debug_dir;
60 static bool poll_pkg_cstate_enable;
61
62 /* user selected target */
63 static unsigned int set_target_ratio;
64 static unsigned int current_ratio;
65 static bool should_skip;
66
67 static unsigned int control_cpu; /* The cpu assigned to collect stat and update
68                                   * control parameters. default to BSP but BSP
69                                   * can be offlined.
70                                   */
71 static bool clamping;
72
73 struct powerclamp_worker_data {
74         struct kthread_worker *worker;
75         struct kthread_work balancing_work;
76         struct kthread_delayed_work idle_injection_work;
77         unsigned int cpu;
78         unsigned int count;
79         unsigned int guard;
80         unsigned int window_size_now;
81         unsigned int target_ratio;
82         unsigned int duration_jiffies;
83         bool clamping;
84 };
85
86 static struct powerclamp_worker_data __percpu *worker_data;
87 static struct thermal_cooling_device *cooling_dev;
88 static unsigned long *cpu_clamping_mask;  /* bit map for tracking per cpu
89                                            * clamping kthread worker
90                                            */
91
92 static unsigned int duration;
93 static unsigned int pkg_cstate_ratio_cur;
94 static unsigned int window_size;
95
96 static int duration_set(const char *arg, const struct kernel_param *kp)
97 {
98         int ret = 0;
99         unsigned long new_duration;
100
101         ret = kstrtoul(arg, 10, &new_duration);
102         if (ret)
103                 goto exit;
104         if (new_duration > 25 || new_duration < 6) {
105                 pr_err("Out of recommended range %lu, between 6-25ms\n",
106                         new_duration);
107                 ret = -EINVAL;
108         }
109
110         duration = clamp(new_duration, 6ul, 25ul);
111         smp_mb();
112
113 exit:
114
115         return ret;
116 }
117
118 static const struct kernel_param_ops duration_ops = {
119         .set = duration_set,
120         .get = param_get_int,
121 };
122
123
124 module_param_cb(duration, &duration_ops, &duration, 0644);
125 MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec.");
126
127 struct powerclamp_calibration_data {
128         unsigned long confidence;  /* used for calibration, basically a counter
129                                     * gets incremented each time a clamping
130                                     * period is completed without extra wakeups
131                                     * once that counter is reached given level,
132                                     * compensation is deemed usable.
133                                     */
134         unsigned long steady_comp; /* steady state compensation used when
135                                     * no extra wakeups occurred.
136                                     */
137         unsigned long dynamic_comp; /* compensate excessive wakeup from idle
138                                      * mostly from external interrupts.
139                                      */
140 };
141
142 static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO];
143
144 static int window_size_set(const char *arg, const struct kernel_param *kp)
145 {
146         int ret = 0;
147         unsigned long new_window_size;
148
149         ret = kstrtoul(arg, 10, &new_window_size);
150         if (ret)
151                 goto exit_win;
152         if (new_window_size > 10 || new_window_size < 2) {
153                 pr_err("Out of recommended window size %lu, between 2-10\n",
154                         new_window_size);
155                 ret = -EINVAL;
156         }
157
158         window_size = clamp(new_window_size, 2ul, 10ul);
159         smp_mb();
160
161 exit_win:
162
163         return ret;
164 }
165
166 static const struct kernel_param_ops window_size_ops = {
167         .set = window_size_set,
168         .get = param_get_int,
169 };
170
171 module_param_cb(window_size, &window_size_ops, &window_size, 0644);
172 MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n"
173         "\tpowerclamp controls idle ratio within this window. larger\n"
174         "\twindow size results in slower response time but more smooth\n"
175         "\tclamping results. default to 2.");
176
177 static void find_target_mwait(void)
178 {
179         unsigned int eax, ebx, ecx, edx;
180         unsigned int highest_cstate = 0;
181         unsigned int highest_subcstate = 0;
182         int i;
183
184         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
185                 return;
186
187         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
188
189         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
190             !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
191                 return;
192
193         edx >>= MWAIT_SUBSTATE_SIZE;
194         for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
195                 if (edx & MWAIT_SUBSTATE_MASK) {
196                         highest_cstate = i;
197                         highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
198                 }
199         }
200         target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
201                 (highest_subcstate - 1);
202
203 }
204
205 struct pkg_cstate_info {
206         bool skip;
207         int msr_index;
208         int cstate_id;
209 };
210
211 #define PKG_CSTATE_INIT(id) {                           \
212                 .msr_index = MSR_PKG_C##id##_RESIDENCY, \
213                 .cstate_id = id                         \
214                         }
215
216 static struct pkg_cstate_info pkg_cstates[] = {
217         PKG_CSTATE_INIT(2),
218         PKG_CSTATE_INIT(3),
219         PKG_CSTATE_INIT(6),
220         PKG_CSTATE_INIT(7),
221         PKG_CSTATE_INIT(8),
222         PKG_CSTATE_INIT(9),
223         PKG_CSTATE_INIT(10),
224         {NULL},
225 };
226
227 static bool has_pkg_state_counter(void)
228 {
229         u64 val;
230         struct pkg_cstate_info *info = pkg_cstates;
231
232         /* check if any one of the counter msrs exists */
233         while (info->msr_index) {
234                 if (!rdmsrl_safe(info->msr_index, &val))
235                         return true;
236                 info++;
237         }
238
239         return false;
240 }
241
242 static u64 pkg_state_counter(void)
243 {
244         u64 val;
245         u64 count = 0;
246         struct pkg_cstate_info *info = pkg_cstates;
247
248         while (info->msr_index) {
249                 if (!info->skip) {
250                         if (!rdmsrl_safe(info->msr_index, &val))
251                                 count += val;
252                         else
253                                 info->skip = true;
254                 }
255                 info++;
256         }
257
258         return count;
259 }
260
261 static unsigned int get_compensation(int ratio)
262 {
263         unsigned int comp = 0;
264
265         if (!poll_pkg_cstate_enable)
266                 return 0;
267
268         /* we only use compensation if all adjacent ones are good */
269         if (ratio == 1 &&
270                 cal_data[ratio].confidence >= CONFIDENCE_OK &&
271                 cal_data[ratio + 1].confidence >= CONFIDENCE_OK &&
272                 cal_data[ratio + 2].confidence >= CONFIDENCE_OK) {
273                 comp = (cal_data[ratio].steady_comp +
274                         cal_data[ratio + 1].steady_comp +
275                         cal_data[ratio + 2].steady_comp) / 3;
276         } else if (ratio == MAX_TARGET_RATIO - 1 &&
277                 cal_data[ratio].confidence >= CONFIDENCE_OK &&
278                 cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
279                 cal_data[ratio - 2].confidence >= CONFIDENCE_OK) {
280                 comp = (cal_data[ratio].steady_comp +
281                         cal_data[ratio - 1].steady_comp +
282                         cal_data[ratio - 2].steady_comp) / 3;
283         } else if (cal_data[ratio].confidence >= CONFIDENCE_OK &&
284                 cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
285                 cal_data[ratio + 1].confidence >= CONFIDENCE_OK) {
286                 comp = (cal_data[ratio].steady_comp +
287                         cal_data[ratio - 1].steady_comp +
288                         cal_data[ratio + 1].steady_comp) / 3;
289         }
290
291         /* do not exceed limit */
292         if (comp + ratio >= MAX_TARGET_RATIO)
293                 comp = MAX_TARGET_RATIO - ratio - 1;
294
295         return comp;
296 }
297
298 static void adjust_compensation(int target_ratio, unsigned int win)
299 {
300         int delta;
301         struct powerclamp_calibration_data *d = &cal_data[target_ratio];
302
303         /*
304          * adjust compensations if confidence level has not been reached.
305          */
306         if (d->confidence >= CONFIDENCE_OK)
307                 return;
308
309         delta = set_target_ratio - current_ratio;
310         /* filter out bad data */
311         if (delta >= 0 && delta <= (1+target_ratio/10)) {
312                 if (d->steady_comp)
313                         d->steady_comp =
314                                 roundup(delta+d->steady_comp, 2)/2;
315                 else
316                         d->steady_comp = delta;
317                 d->confidence++;
318         }
319 }
320
321 static bool powerclamp_adjust_controls(unsigned int target_ratio,
322                                 unsigned int guard, unsigned int win)
323 {
324         static u64 msr_last, tsc_last;
325         u64 msr_now, tsc_now;
326         u64 val64;
327
328         /* check result for the last window */
329         msr_now = pkg_state_counter();
330         tsc_now = rdtsc();
331
332         /* calculate pkg cstate vs tsc ratio */
333         if (!msr_last || !tsc_last)
334                 current_ratio = 1;
335         else if (tsc_now-tsc_last) {
336                 val64 = 100*(msr_now-msr_last);
337                 do_div(val64, (tsc_now-tsc_last));
338                 current_ratio = val64;
339         }
340
341         /* update record */
342         msr_last = msr_now;
343         tsc_last = tsc_now;
344
345         adjust_compensation(target_ratio, win);
346
347         /* if we are above target+guard, skip */
348         return set_target_ratio + guard <= current_ratio;
349 }
350
351 static void clamp_balancing_func(struct kthread_work *work)
352 {
353         struct powerclamp_worker_data *w_data;
354         int sleeptime;
355         unsigned long target_jiffies;
356         unsigned int compensated_ratio;
357         int interval; /* jiffies to sleep for each attempt */
358
359         w_data = container_of(work, struct powerclamp_worker_data,
360                               balancing_work);
361
362         /*
363          * make sure user selected ratio does not take effect until
364          * the next round. adjust target_ratio if user has changed
365          * target such that we can converge quickly.
366          */
367         w_data->target_ratio = READ_ONCE(set_target_ratio);
368         w_data->guard = 1 + w_data->target_ratio / 20;
369         w_data->window_size_now = window_size;
370         w_data->duration_jiffies = msecs_to_jiffies(duration);
371         w_data->count++;
372
373         /*
374          * systems may have different ability to enter package level
375          * c-states, thus we need to compensate the injected idle ratio
376          * to achieve the actual target reported by the HW.
377          */
378         compensated_ratio = w_data->target_ratio +
379                 get_compensation(w_data->target_ratio);
380         if (compensated_ratio <= 0)
381                 compensated_ratio = 1;
382         interval = w_data->duration_jiffies * 100 / compensated_ratio;
383
384         /* align idle time */
385         target_jiffies = roundup(jiffies, interval);
386         sleeptime = target_jiffies - jiffies;
387         if (sleeptime <= 0)
388                 sleeptime = 1;
389
390         if (clamping && w_data->clamping && cpu_online(w_data->cpu))
391                 kthread_queue_delayed_work(w_data->worker,
392                                            &w_data->idle_injection_work,
393                                            sleeptime);
394 }
395
396 static void clamp_idle_injection_func(struct kthread_work *work)
397 {
398         struct powerclamp_worker_data *w_data;
399
400         w_data = container_of(work, struct powerclamp_worker_data,
401                               idle_injection_work.work);
402
403         /*
404          * only elected controlling cpu can collect stats and update
405          * control parameters.
406          */
407         if (w_data->cpu == control_cpu &&
408             !(w_data->count % w_data->window_size_now)) {
409                 should_skip =
410                         powerclamp_adjust_controls(w_data->target_ratio,
411                                                    w_data->guard,
412                                                    w_data->window_size_now);
413                 smp_mb();
414         }
415
416         if (should_skip)
417                 goto balance;
418
419         play_idle(jiffies_to_usecs(w_data->duration_jiffies));
420
421 balance:
422         if (clamping && w_data->clamping && cpu_online(w_data->cpu))
423                 kthread_queue_work(w_data->worker, &w_data->balancing_work);
424 }
425
426 /*
427  * 1 HZ polling while clamping is active, useful for userspace
428  * to monitor actual idle ratio.
429  */
430 static void poll_pkg_cstate(struct work_struct *dummy);
431 static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
432 static void poll_pkg_cstate(struct work_struct *dummy)
433 {
434         static u64 msr_last;
435         static u64 tsc_last;
436
437         u64 msr_now;
438         u64 tsc_now;
439         u64 val64;
440
441         msr_now = pkg_state_counter();
442         tsc_now = rdtsc();
443
444         /* calculate pkg cstate vs tsc ratio */
445         if (!msr_last || !tsc_last)
446                 pkg_cstate_ratio_cur = 1;
447         else {
448                 if (tsc_now - tsc_last) {
449                         val64 = 100 * (msr_now - msr_last);
450                         do_div(val64, (tsc_now - tsc_last));
451                         pkg_cstate_ratio_cur = val64;
452                 }
453         }
454
455         /* update record */
456         msr_last = msr_now;
457         tsc_last = tsc_now;
458
459         if (true == clamping)
460                 schedule_delayed_work(&poll_pkg_cstate_work, HZ);
461 }
462
463 static void start_power_clamp_worker(unsigned long cpu)
464 {
465         struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
466         struct kthread_worker *worker;
467
468         worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inj/%ld", cpu);
469         if (IS_ERR(worker))
470                 return;
471
472         w_data->worker = worker;
473         w_data->count = 0;
474         w_data->cpu = cpu;
475         w_data->clamping = true;
476         set_bit(cpu, cpu_clamping_mask);
477         sched_set_fifo(worker->task);
478         kthread_init_work(&w_data->balancing_work, clamp_balancing_func);
479         kthread_init_delayed_work(&w_data->idle_injection_work,
480                                   clamp_idle_injection_func);
481         kthread_queue_work(w_data->worker, &w_data->balancing_work);
482 }
483
484 static void stop_power_clamp_worker(unsigned long cpu)
485 {
486         struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
487
488         if (!w_data->worker)
489                 return;
490
491         w_data->clamping = false;
492         /*
493          * Make sure that all works that get queued after this point see
494          * the clamping disabled. The counter part is not needed because
495          * there is an implicit memory barrier when the queued work
496          * is proceed.
497          */
498         smp_wmb();
499         kthread_cancel_work_sync(&w_data->balancing_work);
500         kthread_cancel_delayed_work_sync(&w_data->idle_injection_work);
501         /*
502          * The balancing work still might be queued here because
503          * the handling of the "clapming" variable, cancel, and queue
504          * operations are not synchronized via a lock. But it is not
505          * a big deal. The balancing work is fast and destroy kthread
506          * will wait for it.
507          */
508         clear_bit(w_data->cpu, cpu_clamping_mask);
509         kthread_destroy_worker(w_data->worker);
510
511         w_data->worker = NULL;
512 }
513
514 static int start_power_clamp(void)
515 {
516         unsigned long cpu;
517
518         set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
519         /* prevent cpu hotplug */
520         cpus_read_lock();
521
522         /* prefer BSP */
523         control_cpu = cpumask_first(cpu_online_mask);
524
525         clamping = true;
526         if (poll_pkg_cstate_enable)
527                 schedule_delayed_work(&poll_pkg_cstate_work, 0);
528
529         /* start one kthread worker per online cpu */
530         for_each_online_cpu(cpu) {
531                 start_power_clamp_worker(cpu);
532         }
533         cpus_read_unlock();
534
535         return 0;
536 }
537
538 static void end_power_clamp(void)
539 {
540         int i;
541
542         /*
543          * Block requeuing in all the kthread workers. They will flush and
544          * stop faster.
545          */
546         clamping = false;
547         for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
548                 pr_debug("clamping worker for cpu %d alive, destroy\n", i);
549                 stop_power_clamp_worker(i);
550         }
551 }
552
553 static int powerclamp_cpu_online(unsigned int cpu)
554 {
555         if (clamping == false)
556                 return 0;
557         start_power_clamp_worker(cpu);
558         /* prefer BSP as controlling CPU */
559         if (cpu == 0) {
560                 control_cpu = 0;
561                 smp_mb();
562         }
563         return 0;
564 }
565
566 static int powerclamp_cpu_predown(unsigned int cpu)
567 {
568         if (clamping == false)
569                 return 0;
570
571         stop_power_clamp_worker(cpu);
572         if (cpu != control_cpu)
573                 return 0;
574
575         control_cpu = cpumask_first(cpu_online_mask);
576         if (control_cpu == cpu)
577                 control_cpu = cpumask_next(cpu, cpu_online_mask);
578         smp_mb();
579         return 0;
580 }
581
582 static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
583                                  unsigned long *state)
584 {
585         *state = MAX_TARGET_RATIO;
586
587         return 0;
588 }
589
590 static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
591                                  unsigned long *state)
592 {
593         if (clamping) {
594                 if (poll_pkg_cstate_enable)
595                         *state = pkg_cstate_ratio_cur;
596                 else
597                         *state = set_target_ratio;
598         } else {
599                 /* to save power, do not poll idle ratio while not clamping */
600                 *state = -1; /* indicates invalid state */
601         }
602
603         return 0;
604 }
605
606 static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
607                                  unsigned long new_target_ratio)
608 {
609         int ret = 0;
610
611         new_target_ratio = clamp(new_target_ratio, 0UL,
612                                 (unsigned long) (MAX_TARGET_RATIO-1));
613         if (set_target_ratio == 0 && new_target_ratio > 0) {
614                 pr_info("Start idle injection to reduce power\n");
615                 set_target_ratio = new_target_ratio;
616                 ret = start_power_clamp();
617                 goto exit_set;
618         } else  if (set_target_ratio > 0 && new_target_ratio == 0) {
619                 pr_info("Stop forced idle injection\n");
620                 end_power_clamp();
621                 set_target_ratio = 0;
622         } else  /* adjust currently running */ {
623                 set_target_ratio = new_target_ratio;
624                 /* make new set_target_ratio visible to other cpus */
625                 smp_mb();
626         }
627
628 exit_set:
629         return ret;
630 }
631
632 /* bind to generic thermal layer as cooling device*/
633 static const struct thermal_cooling_device_ops powerclamp_cooling_ops = {
634         .get_max_state = powerclamp_get_max_state,
635         .get_cur_state = powerclamp_get_cur_state,
636         .set_cur_state = powerclamp_set_cur_state,
637 };
638
639 static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = {
640         X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_MWAIT, NULL),
641         {}
642 };
643 MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
644
645 static int __init powerclamp_probe(void)
646 {
647
648         if (!x86_match_cpu(intel_powerclamp_ids)) {
649                 pr_err("CPU does not support MWAIT\n");
650                 return -ENODEV;
651         }
652
653         /* The goal for idle time alignment is to achieve package cstate. */
654         if (!has_pkg_state_counter()) {
655                 pr_info("No package C-state available\n");
656                 return -ENODEV;
657         }
658
659         /* find the deepest mwait value */
660         find_target_mwait();
661
662         return 0;
663 }
664
665 static int powerclamp_debug_show(struct seq_file *m, void *unused)
666 {
667         int i = 0;
668
669         seq_printf(m, "controlling cpu: %d\n", control_cpu);
670         seq_printf(m, "pct confidence steady dynamic (compensation)\n");
671         for (i = 0; i < MAX_TARGET_RATIO; i++) {
672                 seq_printf(m, "%d\t%lu\t%lu\t%lu\n",
673                         i,
674                         cal_data[i].confidence,
675                         cal_data[i].steady_comp,
676                         cal_data[i].dynamic_comp);
677         }
678
679         return 0;
680 }
681
682 DEFINE_SHOW_ATTRIBUTE(powerclamp_debug);
683
684 static inline void powerclamp_create_debug_files(void)
685 {
686         debug_dir = debugfs_create_dir("intel_powerclamp", NULL);
687
688         debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir, cal_data,
689                             &powerclamp_debug_fops);
690 }
691
692 static enum cpuhp_state hp_state;
693
694 static int __init powerclamp_init(void)
695 {
696         int retval;
697
698         cpu_clamping_mask = bitmap_zalloc(num_possible_cpus(), GFP_KERNEL);
699         if (!cpu_clamping_mask)
700                 return -ENOMEM;
701
702         /* probe cpu features and ids here */
703         retval = powerclamp_probe();
704         if (retval)
705                 goto exit_free;
706
707         /* set default limit, maybe adjusted during runtime based on feedback */
708         window_size = 2;
709         retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
710                                            "thermal/intel_powerclamp:online",
711                                            powerclamp_cpu_online,
712                                            powerclamp_cpu_predown);
713         if (retval < 0)
714                 goto exit_free;
715
716         hp_state = retval;
717
718         worker_data = alloc_percpu(struct powerclamp_worker_data);
719         if (!worker_data) {
720                 retval = -ENOMEM;
721                 goto exit_unregister;
722         }
723
724         if (topology_max_packages() == 1 && topology_max_die_per_package() == 1)
725                 poll_pkg_cstate_enable = true;
726
727         cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
728                                                 &powerclamp_cooling_ops);
729         if (IS_ERR(cooling_dev)) {
730                 retval = -ENODEV;
731                 goto exit_free_thread;
732         }
733
734         if (!duration)
735                 duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
736
737         powerclamp_create_debug_files();
738
739         return 0;
740
741 exit_free_thread:
742         free_percpu(worker_data);
743 exit_unregister:
744         cpuhp_remove_state_nocalls(hp_state);
745 exit_free:
746         bitmap_free(cpu_clamping_mask);
747         return retval;
748 }
749 module_init(powerclamp_init);
750
751 static void __exit powerclamp_exit(void)
752 {
753         end_power_clamp();
754         cpuhp_remove_state_nocalls(hp_state);
755         free_percpu(worker_data);
756         thermal_cooling_device_unregister(cooling_dev);
757         bitmap_free(cpu_clamping_mask);
758
759         cancel_delayed_work_sync(&poll_pkg_cstate_work);
760         debugfs_remove_recursive(debug_dir);
761 }
762 module_exit(powerclamp_exit);
763
764 MODULE_LICENSE("GPL");
765 MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
766 MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>");
767 MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs");