GNU Linux-libre 5.4.200-gnu1
[releases.git] / kernel / sched / debug.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * kernel/sched/debug.c
4  *
5  * Print the CFS rbtree and other debugging details
6  *
7  * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
8  */
9 #include "sched.h"
10
11 /*
12  * This allows printing both to /proc/sched_debug and
13  * to the console
14  */
15 #define SEQ_printf(m, x...)                     \
16  do {                                           \
17         if (m)                                  \
18                 seq_printf(m, x);               \
19         else                                    \
20                 pr_cont(x);                     \
21  } while (0)
22
23 /*
24  * Ease the printing of nsec fields:
25  */
26 static long long nsec_high(unsigned long long nsec)
27 {
28         if ((long long)nsec < 0) {
29                 nsec = -nsec;
30                 do_div(nsec, 1000000);
31                 return -nsec;
32         }
33         do_div(nsec, 1000000);
34
35         return nsec;
36 }
37
38 static unsigned long nsec_low(unsigned long long nsec)
39 {
40         if ((long long)nsec < 0)
41                 nsec = -nsec;
42
43         return do_div(nsec, 1000000);
44 }
45
46 #define SPLIT_NS(x) nsec_high(x), nsec_low(x)
47
48 #define SCHED_FEAT(name, enabled)       \
49         #name ,
50
51 static const char * const sched_feat_names[] = {
52 #include "features.h"
53 };
54
55 #undef SCHED_FEAT
56
57 static int sched_feat_show(struct seq_file *m, void *v)
58 {
59         int i;
60
61         for (i = 0; i < __SCHED_FEAT_NR; i++) {
62                 if (!(sysctl_sched_features & (1UL << i)))
63                         seq_puts(m, "NO_");
64                 seq_printf(m, "%s ", sched_feat_names[i]);
65         }
66         seq_puts(m, "\n");
67
68         return 0;
69 }
70
71 #ifdef CONFIG_JUMP_LABEL
72
73 #define jump_label_key__true  STATIC_KEY_INIT_TRUE
74 #define jump_label_key__false STATIC_KEY_INIT_FALSE
75
76 #define SCHED_FEAT(name, enabled)       \
77         jump_label_key__##enabled ,
78
79 struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
80 #include "features.h"
81 };
82
83 #undef SCHED_FEAT
84
85 static void sched_feat_disable(int i)
86 {
87         static_key_disable_cpuslocked(&sched_feat_keys[i]);
88 }
89
90 static void sched_feat_enable(int i)
91 {
92         static_key_enable_cpuslocked(&sched_feat_keys[i]);
93 }
94 #else
95 static void sched_feat_disable(int i) { };
96 static void sched_feat_enable(int i) { };
97 #endif /* CONFIG_JUMP_LABEL */
98
99 static int sched_feat_set(char *cmp)
100 {
101         int i;
102         int neg = 0;
103
104         if (strncmp(cmp, "NO_", 3) == 0) {
105                 neg = 1;
106                 cmp += 3;
107         }
108
109         i = match_string(sched_feat_names, __SCHED_FEAT_NR, cmp);
110         if (i < 0)
111                 return i;
112
113         if (neg) {
114                 sysctl_sched_features &= ~(1UL << i);
115                 sched_feat_disable(i);
116         } else {
117                 sysctl_sched_features |= (1UL << i);
118                 sched_feat_enable(i);
119         }
120
121         return 0;
122 }
123
124 static ssize_t
125 sched_feat_write(struct file *filp, const char __user *ubuf,
126                 size_t cnt, loff_t *ppos)
127 {
128         char buf[64];
129         char *cmp;
130         int ret;
131         struct inode *inode;
132
133         if (cnt > 63)
134                 cnt = 63;
135
136         if (copy_from_user(&buf, ubuf, cnt))
137                 return -EFAULT;
138
139         buf[cnt] = 0;
140         cmp = strstrip(buf);
141
142         /* Ensure the static_key remains in a consistent state */
143         inode = file_inode(filp);
144         cpus_read_lock();
145         inode_lock(inode);
146         ret = sched_feat_set(cmp);
147         inode_unlock(inode);
148         cpus_read_unlock();
149         if (ret < 0)
150                 return ret;
151
152         *ppos += cnt;
153
154         return cnt;
155 }
156
157 static int sched_feat_open(struct inode *inode, struct file *filp)
158 {
159         return single_open(filp, sched_feat_show, NULL);
160 }
161
162 static const struct file_operations sched_feat_fops = {
163         .open           = sched_feat_open,
164         .write          = sched_feat_write,
165         .read           = seq_read,
166         .llseek         = seq_lseek,
167         .release        = single_release,
168 };
169
170 __read_mostly bool sched_debug_enabled;
171
172 static __init int sched_init_debug(void)
173 {
174         debugfs_create_file("sched_features", 0644, NULL, NULL,
175                         &sched_feat_fops);
176
177         debugfs_create_bool("sched_debug", 0644, NULL,
178                         &sched_debug_enabled);
179
180         return 0;
181 }
182 late_initcall(sched_init_debug);
183
184 #ifdef CONFIG_SMP
185
186 #ifdef CONFIG_SYSCTL
187
188 static struct ctl_table sd_ctl_dir[] = {
189         {
190                 .procname       = "sched_domain",
191                 .mode           = 0555,
192         },
193         {}
194 };
195
196 static struct ctl_table sd_ctl_root[] = {
197         {
198                 .procname       = "kernel",
199                 .mode           = 0555,
200                 .child          = sd_ctl_dir,
201         },
202         {}
203 };
204
205 static struct ctl_table *sd_alloc_ctl_entry(int n)
206 {
207         struct ctl_table *entry =
208                 kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
209
210         return entry;
211 }
212
213 static void sd_free_ctl_entry(struct ctl_table **tablep)
214 {
215         struct ctl_table *entry;
216
217         /*
218          * In the intermediate directories, both the child directory and
219          * procname are dynamically allocated and could fail but the mode
220          * will always be set. In the lowest directory the names are
221          * static strings and all have proc handlers.
222          */
223         for (entry = *tablep; entry->mode; entry++) {
224                 if (entry->child)
225                         sd_free_ctl_entry(&entry->child);
226                 if (entry->proc_handler == NULL)
227                         kfree(entry->procname);
228         }
229
230         kfree(*tablep);
231         *tablep = NULL;
232 }
233
234 static void
235 set_table_entry(struct ctl_table *entry,
236                 const char *procname, void *data, int maxlen,
237                 umode_t mode, proc_handler *proc_handler)
238 {
239         entry->procname = procname;
240         entry->data = data;
241         entry->maxlen = maxlen;
242         entry->mode = mode;
243         entry->proc_handler = proc_handler;
244 }
245
246 static struct ctl_table *
247 sd_alloc_ctl_domain_table(struct sched_domain *sd)
248 {
249         struct ctl_table *table = sd_alloc_ctl_entry(9);
250
251         if (table == NULL)
252                 return NULL;
253
254         set_table_entry(&table[0], "min_interval",        &sd->min_interval,        sizeof(long), 0644, proc_doulongvec_minmax);
255         set_table_entry(&table[1], "max_interval",        &sd->max_interval,        sizeof(long), 0644, proc_doulongvec_minmax);
256         set_table_entry(&table[2], "busy_factor",         &sd->busy_factor,         sizeof(int),  0644, proc_dointvec_minmax);
257         set_table_entry(&table[3], "imbalance_pct",       &sd->imbalance_pct,       sizeof(int),  0644, proc_dointvec_minmax);
258         set_table_entry(&table[4], "cache_nice_tries",    &sd->cache_nice_tries,    sizeof(int),  0644, proc_dointvec_minmax);
259         set_table_entry(&table[5], "flags",               &sd->flags,               sizeof(int),  0444, proc_dointvec_minmax);
260         set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax);
261         set_table_entry(&table[7], "name",                sd->name,            CORENAME_MAX_SIZE, 0444, proc_dostring);
262         /* &table[8] is terminator */
263
264         return table;
265 }
266
267 static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
268 {
269         struct ctl_table *entry, *table;
270         struct sched_domain *sd;
271         int domain_num = 0, i;
272         char buf[32];
273
274         for_each_domain(cpu, sd)
275                 domain_num++;
276         entry = table = sd_alloc_ctl_entry(domain_num + 1);
277         if (table == NULL)
278                 return NULL;
279
280         i = 0;
281         for_each_domain(cpu, sd) {
282                 snprintf(buf, 32, "domain%d", i);
283                 entry->procname = kstrdup(buf, GFP_KERNEL);
284                 entry->mode = 0555;
285                 entry->child = sd_alloc_ctl_domain_table(sd);
286                 entry++;
287                 i++;
288         }
289         return table;
290 }
291
292 static cpumask_var_t            sd_sysctl_cpus;
293 static struct ctl_table_header  *sd_sysctl_header;
294
295 void register_sched_domain_sysctl(void)
296 {
297         static struct ctl_table *cpu_entries;
298         static struct ctl_table **cpu_idx;
299         static bool init_done = false;
300         char buf[32];
301         int i;
302
303         if (!cpu_entries) {
304                 cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
305                 if (!cpu_entries)
306                         return;
307
308                 WARN_ON(sd_ctl_dir[0].child);
309                 sd_ctl_dir[0].child = cpu_entries;
310         }
311
312         if (!cpu_idx) {
313                 struct ctl_table *e = cpu_entries;
314
315                 cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL);
316                 if (!cpu_idx)
317                         return;
318
319                 /* deal with sparse possible map */
320                 for_each_possible_cpu(i) {
321                         cpu_idx[i] = e;
322                         e++;
323                 }
324         }
325
326         if (!cpumask_available(sd_sysctl_cpus)) {
327                 if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
328                         return;
329         }
330
331         if (!init_done) {
332                 init_done = true;
333                 /* init to possible to not have holes in @cpu_entries */
334                 cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
335         }
336
337         for_each_cpu(i, sd_sysctl_cpus) {
338                 struct ctl_table *e = cpu_idx[i];
339
340                 if (e->child)
341                         sd_free_ctl_entry(&e->child);
342
343                 if (!e->procname) {
344                         snprintf(buf, 32, "cpu%d", i);
345                         e->procname = kstrdup(buf, GFP_KERNEL);
346                 }
347                 e->mode = 0555;
348                 e->child = sd_alloc_ctl_cpu_table(i);
349
350                 __cpumask_clear_cpu(i, sd_sysctl_cpus);
351         }
352
353         WARN_ON(sd_sysctl_header);
354         sd_sysctl_header = register_sysctl_table(sd_ctl_root);
355 }
356
357 void dirty_sched_domain_sysctl(int cpu)
358 {
359         if (cpumask_available(sd_sysctl_cpus))
360                 __cpumask_set_cpu(cpu, sd_sysctl_cpus);
361 }
362
363 /* may be called multiple times per register */
364 void unregister_sched_domain_sysctl(void)
365 {
366         unregister_sysctl_table(sd_sysctl_header);
367         sd_sysctl_header = NULL;
368 }
369 #endif /* CONFIG_SYSCTL */
370 #endif /* CONFIG_SMP */
371
372 #ifdef CONFIG_FAIR_GROUP_SCHED
373 static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
374 {
375         struct sched_entity *se = tg->se[cpu];
376
377 #define P(F)            SEQ_printf(m, "  .%-30s: %lld\n",       #F, (long long)F)
378 #define P_SCHEDSTAT(F)  SEQ_printf(m, "  .%-30s: %lld\n",       #F, (long long)schedstat_val(F))
379 #define PN(F)           SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
380 #define PN_SCHEDSTAT(F) SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
381
382         if (!se)
383                 return;
384
385         PN(se->exec_start);
386         PN(se->vruntime);
387         PN(se->sum_exec_runtime);
388
389         if (schedstat_enabled()) {
390                 PN_SCHEDSTAT(se->statistics.wait_start);
391                 PN_SCHEDSTAT(se->statistics.sleep_start);
392                 PN_SCHEDSTAT(se->statistics.block_start);
393                 PN_SCHEDSTAT(se->statistics.sleep_max);
394                 PN_SCHEDSTAT(se->statistics.block_max);
395                 PN_SCHEDSTAT(se->statistics.exec_max);
396                 PN_SCHEDSTAT(se->statistics.slice_max);
397                 PN_SCHEDSTAT(se->statistics.wait_max);
398                 PN_SCHEDSTAT(se->statistics.wait_sum);
399                 P_SCHEDSTAT(se->statistics.wait_count);
400         }
401
402         P(se->load.weight);
403         P(se->runnable_weight);
404 #ifdef CONFIG_SMP
405         P(se->avg.load_avg);
406         P(se->avg.util_avg);
407         P(se->avg.runnable_load_avg);
408 #endif
409
410 #undef PN_SCHEDSTAT
411 #undef PN
412 #undef P_SCHEDSTAT
413 #undef P
414 }
415 #endif
416
417 #ifdef CONFIG_CGROUP_SCHED
418 static DEFINE_SPINLOCK(sched_debug_lock);
419 static char group_path[PATH_MAX];
420
421 static void task_group_path(struct task_group *tg, char *path, int plen)
422 {
423         if (autogroup_path(tg, path, plen))
424                 return;
425
426         cgroup_path(tg->css.cgroup, path, plen);
427 }
428
429 /*
430  * Only 1 SEQ_printf_task_group_path() caller can use the full length
431  * group_path[] for cgroup path. Other simultaneous callers will have
432  * to use a shorter stack buffer. A "..." suffix is appended at the end
433  * of the stack buffer so that it will show up in case the output length
434  * matches the given buffer size to indicate possible path name truncation.
435  */
436 #define SEQ_printf_task_group_path(m, tg, fmt...)                       \
437 {                                                                       \
438         if (spin_trylock(&sched_debug_lock)) {                          \
439                 task_group_path(tg, group_path, sizeof(group_path));    \
440                 SEQ_printf(m, fmt, group_path);                         \
441                 spin_unlock(&sched_debug_lock);                         \
442         } else {                                                        \
443                 char buf[128];                                          \
444                 char *bufend = buf + sizeof(buf) - 3;                   \
445                 task_group_path(tg, buf, bufend - buf);                 \
446                 strcpy(bufend - 1, "...");                              \
447                 SEQ_printf(m, fmt, buf);                                \
448         }                                                               \
449 }
450 #endif
451
452 static void
453 print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
454 {
455         if (rq->curr == p)
456                 SEQ_printf(m, ">R");
457         else
458                 SEQ_printf(m, " %c", task_state_to_char(p));
459
460         SEQ_printf(m, "%15s %5d %9Ld.%06ld %9Ld %5d ",
461                 p->comm, task_pid_nr(p),
462                 SPLIT_NS(p->se.vruntime),
463                 (long long)(p->nvcsw + p->nivcsw),
464                 p->prio);
465
466         SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
467                 SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)),
468                 SPLIT_NS(p->se.sum_exec_runtime),
469                 SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime)));
470
471 #ifdef CONFIG_NUMA_BALANCING
472         SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
473 #endif
474 #ifdef CONFIG_CGROUP_SCHED
475         SEQ_printf_task_group_path(m, task_group(p), " %s")
476 #endif
477
478         SEQ_printf(m, "\n");
479 }
480
481 static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
482 {
483         struct task_struct *g, *p;
484
485         SEQ_printf(m, "\n");
486         SEQ_printf(m, "runnable tasks:\n");
487         SEQ_printf(m, " S           task   PID         tree-key  switches  prio"
488                    "     wait-time             sum-exec        sum-sleep\n");
489         SEQ_printf(m, "-------------------------------------------------------"
490                    "----------------------------------------------------\n");
491
492         rcu_read_lock();
493         for_each_process_thread(g, p) {
494                 if (task_cpu(p) != rq_cpu)
495                         continue;
496
497                 print_task(m, rq, p);
498         }
499         rcu_read_unlock();
500 }
501
502 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
503 {
504         s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
505                 spread, rq0_min_vruntime, spread0;
506         struct rq *rq = cpu_rq(cpu);
507         struct sched_entity *last;
508         unsigned long flags;
509
510 #ifdef CONFIG_FAIR_GROUP_SCHED
511         SEQ_printf(m, "\n");
512         SEQ_printf_task_group_path(m, cfs_rq->tg, "cfs_rq[%d]:%s\n", cpu);
513 #else
514         SEQ_printf(m, "\n");
515         SEQ_printf(m, "cfs_rq[%d]:\n", cpu);
516 #endif
517         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
518                         SPLIT_NS(cfs_rq->exec_clock));
519
520         raw_spin_lock_irqsave(&rq->lock, flags);
521         if (rb_first_cached(&cfs_rq->tasks_timeline))
522                 MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
523         last = __pick_last_entity(cfs_rq);
524         if (last)
525                 max_vruntime = last->vruntime;
526         min_vruntime = cfs_rq->min_vruntime;
527         rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
528         raw_spin_unlock_irqrestore(&rq->lock, flags);
529         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime",
530                         SPLIT_NS(MIN_vruntime));
531         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
532                         SPLIT_NS(min_vruntime));
533         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "max_vruntime",
534                         SPLIT_NS(max_vruntime));
535         spread = max_vruntime - MIN_vruntime;
536         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread",
537                         SPLIT_NS(spread));
538         spread0 = min_vruntime - rq0_min_vruntime;
539         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
540                         SPLIT_NS(spread0));
541         SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over",
542                         cfs_rq->nr_spread_over);
543         SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
544         SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
545 #ifdef CONFIG_SMP
546         SEQ_printf(m, "  .%-30s: %ld\n", "runnable_weight", cfs_rq->runnable_weight);
547         SEQ_printf(m, "  .%-30s: %lu\n", "load_avg",
548                         cfs_rq->avg.load_avg);
549         SEQ_printf(m, "  .%-30s: %lu\n", "runnable_load_avg",
550                         cfs_rq->avg.runnable_load_avg);
551         SEQ_printf(m, "  .%-30s: %lu\n", "util_avg",
552                         cfs_rq->avg.util_avg);
553         SEQ_printf(m, "  .%-30s: %u\n", "util_est_enqueued",
554                         cfs_rq->avg.util_est.enqueued);
555         SEQ_printf(m, "  .%-30s: %ld\n", "removed.load_avg",
556                         cfs_rq->removed.load_avg);
557         SEQ_printf(m, "  .%-30s: %ld\n", "removed.util_avg",
558                         cfs_rq->removed.util_avg);
559         SEQ_printf(m, "  .%-30s: %ld\n", "removed.runnable_sum",
560                         cfs_rq->removed.runnable_sum);
561 #ifdef CONFIG_FAIR_GROUP_SCHED
562         SEQ_printf(m, "  .%-30s: %lu\n", "tg_load_avg_contrib",
563                         cfs_rq->tg_load_avg_contrib);
564         SEQ_printf(m, "  .%-30s: %ld\n", "tg_load_avg",
565                         atomic_long_read(&cfs_rq->tg->load_avg));
566 #endif
567 #endif
568 #ifdef CONFIG_CFS_BANDWIDTH
569         SEQ_printf(m, "  .%-30s: %d\n", "throttled",
570                         cfs_rq->throttled);
571         SEQ_printf(m, "  .%-30s: %d\n", "throttle_count",
572                         cfs_rq->throttle_count);
573 #endif
574
575 #ifdef CONFIG_FAIR_GROUP_SCHED
576         print_cfs_group_stats(m, cpu, cfs_rq->tg);
577 #endif
578 }
579
580 void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
581 {
582 #ifdef CONFIG_RT_GROUP_SCHED
583         SEQ_printf(m, "\n");
584         SEQ_printf_task_group_path(m, rt_rq->tg, "rt_rq[%d]:%s\n", cpu);
585 #else
586         SEQ_printf(m, "\n");
587         SEQ_printf(m, "rt_rq[%d]:\n", cpu);
588 #endif
589
590 #define P(x) \
591         SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
592 #define PU(x) \
593         SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x))
594 #define PN(x) \
595         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
596
597         PU(rt_nr_running);
598 #ifdef CONFIG_SMP
599         PU(rt_nr_migratory);
600 #endif
601         P(rt_throttled);
602         PN(rt_time);
603         PN(rt_runtime);
604
605 #undef PN
606 #undef PU
607 #undef P
608 }
609
610 void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
611 {
612         struct dl_bw *dl_bw;
613
614         SEQ_printf(m, "\n");
615         SEQ_printf(m, "dl_rq[%d]:\n", cpu);
616
617 #define PU(x) \
618         SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x))
619
620         PU(dl_nr_running);
621 #ifdef CONFIG_SMP
622         PU(dl_nr_migratory);
623         dl_bw = &cpu_rq(cpu)->rd->dl_bw;
624 #else
625         dl_bw = &dl_rq->dl_bw;
626 #endif
627         SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
628         SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
629
630 #undef PU
631 }
632
633 static void print_cpu(struct seq_file *m, int cpu)
634 {
635         struct rq *rq = cpu_rq(cpu);
636
637 #ifdef CONFIG_X86
638         {
639                 unsigned int freq = cpu_khz ? : 1;
640
641                 SEQ_printf(m, "cpu#%d, %u.%03u MHz\n",
642                            cpu, freq / 1000, (freq % 1000));
643         }
644 #else
645         SEQ_printf(m, "cpu#%d\n", cpu);
646 #endif
647
648 #define P(x)                                                            \
649 do {                                                                    \
650         if (sizeof(rq->x) == 4)                                         \
651                 SEQ_printf(m, "  .%-30s: %ld\n", #x, (long)(rq->x));    \
652         else                                                            \
653                 SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rq->x));\
654 } while (0)
655
656 #define PN(x) \
657         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
658
659         P(nr_running);
660         P(nr_switches);
661         P(nr_load_updates);
662         P(nr_uninterruptible);
663         PN(next_balance);
664         SEQ_printf(m, "  .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
665         PN(clock);
666         PN(clock_task);
667 #undef P
668 #undef PN
669
670 #ifdef CONFIG_SMP
671 #define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
672         P64(avg_idle);
673         P64(max_idle_balance_cost);
674 #undef P64
675 #endif
676
677 #define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, schedstat_val(rq->n));
678         if (schedstat_enabled()) {
679                 P(yld_count);
680                 P(sched_count);
681                 P(sched_goidle);
682                 P(ttwu_count);
683                 P(ttwu_local);
684         }
685 #undef P
686
687         print_cfs_stats(m, cpu);
688         print_rt_stats(m, cpu);
689         print_dl_stats(m, cpu);
690
691         print_rq(m, rq, cpu);
692         SEQ_printf(m, "\n");
693 }
694
695 static const char *sched_tunable_scaling_names[] = {
696         "none",
697         "logarithmic",
698         "linear"
699 };
700
701 static void sched_debug_header(struct seq_file *m)
702 {
703         u64 ktime, sched_clk, cpu_clk;
704         unsigned long flags;
705
706         local_irq_save(flags);
707         ktime = ktime_to_ns(ktime_get());
708         sched_clk = sched_clock();
709         cpu_clk = local_clock();
710         local_irq_restore(flags);
711
712         SEQ_printf(m, "Sched Debug Version: v0.11, %s %.*s\n",
713                 init_utsname()->release,
714                 (int)strcspn(init_utsname()->version, " "),
715                 init_utsname()->version);
716
717 #define P(x) \
718         SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
719 #define PN(x) \
720         SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
721         PN(ktime);
722         PN(sched_clk);
723         PN(cpu_clk);
724         P(jiffies);
725 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
726         P(sched_clock_stable());
727 #endif
728 #undef PN
729 #undef P
730
731         SEQ_printf(m, "\n");
732         SEQ_printf(m, "sysctl_sched\n");
733
734 #define P(x) \
735         SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
736 #define PN(x) \
737         SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
738         PN(sysctl_sched_latency);
739         PN(sysctl_sched_min_granularity);
740         PN(sysctl_sched_wakeup_granularity);
741         P(sysctl_sched_child_runs_first);
742         P(sysctl_sched_features);
743 #undef PN
744 #undef P
745
746         SEQ_printf(m, "  .%-40s: %d (%s)\n",
747                 "sysctl_sched_tunable_scaling",
748                 sysctl_sched_tunable_scaling,
749                 sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
750         SEQ_printf(m, "\n");
751 }
752
753 static int sched_debug_show(struct seq_file *m, void *v)
754 {
755         int cpu = (unsigned long)(v - 2);
756
757         if (cpu != -1)
758                 print_cpu(m, cpu);
759         else
760                 sched_debug_header(m);
761
762         return 0;
763 }
764
765 void sysrq_sched_debug_show(void)
766 {
767         int cpu;
768
769         sched_debug_header(NULL);
770         for_each_online_cpu(cpu)
771                 print_cpu(NULL, cpu);
772
773 }
774
775 /*
776  * This itererator needs some explanation.
777  * It returns 1 for the header position.
778  * This means 2 is CPU 0.
779  * In a hotplugged system some CPUs, including CPU 0, may be missing so we have
780  * to use cpumask_* to iterate over the CPUs.
781  */
782 static void *sched_debug_start(struct seq_file *file, loff_t *offset)
783 {
784         unsigned long n = *offset;
785
786         if (n == 0)
787                 return (void *) 1;
788
789         n--;
790
791         if (n > 0)
792                 n = cpumask_next(n - 1, cpu_online_mask);
793         else
794                 n = cpumask_first(cpu_online_mask);
795
796         *offset = n + 1;
797
798         if (n < nr_cpu_ids)
799                 return (void *)(unsigned long)(n + 2);
800
801         return NULL;
802 }
803
804 static void *sched_debug_next(struct seq_file *file, void *data, loff_t *offset)
805 {
806         (*offset)++;
807         return sched_debug_start(file, offset);
808 }
809
810 static void sched_debug_stop(struct seq_file *file, void *data)
811 {
812 }
813
814 static const struct seq_operations sched_debug_sops = {
815         .start          = sched_debug_start,
816         .next           = sched_debug_next,
817         .stop           = sched_debug_stop,
818         .show           = sched_debug_show,
819 };
820
821 static int __init init_sched_debug_procfs(void)
822 {
823         if (!proc_create_seq("sched_debug", 0444, NULL, &sched_debug_sops))
824                 return -ENOMEM;
825         return 0;
826 }
827
828 __initcall(init_sched_debug_procfs);
829
830 #define __P(F)  SEQ_printf(m, "%-45s:%21Ld\n",       #F, (long long)F)
831 #define   P(F)  SEQ_printf(m, "%-45s:%21Ld\n",       #F, (long long)p->F)
832 #define __PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
833 #define   PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
834
835
836 #ifdef CONFIG_NUMA_BALANCING
837 void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
838                 unsigned long tpf, unsigned long gsf, unsigned long gpf)
839 {
840         SEQ_printf(m, "numa_faults node=%d ", node);
841         SEQ_printf(m, "task_private=%lu task_shared=%lu ", tpf, tsf);
842         SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gpf, gsf);
843 }
844 #endif
845
846
847 static void sched_show_numa(struct task_struct *p, struct seq_file *m)
848 {
849 #ifdef CONFIG_NUMA_BALANCING
850         if (p->mm)
851                 P(mm->numa_scan_seq);
852
853         P(numa_pages_migrated);
854         P(numa_preferred_nid);
855         P(total_numa_faults);
856         SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
857                         task_node(p), task_numa_group_id(p));
858         show_numa_stats(p, m);
859 #endif
860 }
861
862 void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
863                                                   struct seq_file *m)
864 {
865         unsigned long nr_switches;
866
867         SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
868                                                 get_nr_threads(p));
869         SEQ_printf(m,
870                 "---------------------------------------------------------"
871                 "----------\n");
872 #define __P(F) \
873         SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
874 #define P(F) \
875         SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
876 #define P_SCHEDSTAT(F) \
877         SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)schedstat_val(p->F))
878 #define __PN(F) \
879         SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
880 #define PN(F) \
881         SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
882 #define PN_SCHEDSTAT(F) \
883         SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(p->F)))
884
885         PN(se.exec_start);
886         PN(se.vruntime);
887         PN(se.sum_exec_runtime);
888
889         nr_switches = p->nvcsw + p->nivcsw;
890
891         P(se.nr_migrations);
892
893         if (schedstat_enabled()) {
894                 u64 avg_atom, avg_per_cpu;
895
896                 PN_SCHEDSTAT(se.statistics.sum_sleep_runtime);
897                 PN_SCHEDSTAT(se.statistics.wait_start);
898                 PN_SCHEDSTAT(se.statistics.sleep_start);
899                 PN_SCHEDSTAT(se.statistics.block_start);
900                 PN_SCHEDSTAT(se.statistics.sleep_max);
901                 PN_SCHEDSTAT(se.statistics.block_max);
902                 PN_SCHEDSTAT(se.statistics.exec_max);
903                 PN_SCHEDSTAT(se.statistics.slice_max);
904                 PN_SCHEDSTAT(se.statistics.wait_max);
905                 PN_SCHEDSTAT(se.statistics.wait_sum);
906                 P_SCHEDSTAT(se.statistics.wait_count);
907                 PN_SCHEDSTAT(se.statistics.iowait_sum);
908                 P_SCHEDSTAT(se.statistics.iowait_count);
909                 P_SCHEDSTAT(se.statistics.nr_migrations_cold);
910                 P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine);
911                 P_SCHEDSTAT(se.statistics.nr_failed_migrations_running);
912                 P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot);
913                 P_SCHEDSTAT(se.statistics.nr_forced_migrations);
914                 P_SCHEDSTAT(se.statistics.nr_wakeups);
915                 P_SCHEDSTAT(se.statistics.nr_wakeups_sync);
916                 P_SCHEDSTAT(se.statistics.nr_wakeups_migrate);
917                 P_SCHEDSTAT(se.statistics.nr_wakeups_local);
918                 P_SCHEDSTAT(se.statistics.nr_wakeups_remote);
919                 P_SCHEDSTAT(se.statistics.nr_wakeups_affine);
920                 P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts);
921                 P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
922                 P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
923
924                 avg_atom = p->se.sum_exec_runtime;
925                 if (nr_switches)
926                         avg_atom = div64_ul(avg_atom, nr_switches);
927                 else
928                         avg_atom = -1LL;
929
930                 avg_per_cpu = p->se.sum_exec_runtime;
931                 if (p->se.nr_migrations) {
932                         avg_per_cpu = div64_u64(avg_per_cpu,
933                                                 p->se.nr_migrations);
934                 } else {
935                         avg_per_cpu = -1LL;
936                 }
937
938                 __PN(avg_atom);
939                 __PN(avg_per_cpu);
940         }
941
942         __P(nr_switches);
943         SEQ_printf(m, "%-45s:%21Ld\n",
944                    "nr_voluntary_switches", (long long)p->nvcsw);
945         SEQ_printf(m, "%-45s:%21Ld\n",
946                    "nr_involuntary_switches", (long long)p->nivcsw);
947
948         P(se.load.weight);
949         P(se.runnable_weight);
950 #ifdef CONFIG_SMP
951         P(se.avg.load_sum);
952         P(se.avg.runnable_load_sum);
953         P(se.avg.util_sum);
954         P(se.avg.load_avg);
955         P(se.avg.runnable_load_avg);
956         P(se.avg.util_avg);
957         P(se.avg.last_update_time);
958         P(se.avg.util_est.ewma);
959         P(se.avg.util_est.enqueued);
960 #endif
961         P(policy);
962         P(prio);
963         if (task_has_dl_policy(p)) {
964                 P(dl.runtime);
965                 P(dl.deadline);
966         }
967 #undef PN_SCHEDSTAT
968 #undef PN
969 #undef __PN
970 #undef P_SCHEDSTAT
971 #undef P
972 #undef __P
973
974         {
975                 unsigned int this_cpu = raw_smp_processor_id();
976                 u64 t0, t1;
977
978                 t0 = cpu_clock(this_cpu);
979                 t1 = cpu_clock(this_cpu);
980                 SEQ_printf(m, "%-45s:%21Ld\n",
981                            "clock-delta", (long long)(t1-t0));
982         }
983
984         sched_show_numa(p, m);
985 }
986
987 void proc_sched_set_task(struct task_struct *p)
988 {
989 #ifdef CONFIG_SCHEDSTATS
990         memset(&p->se.statistics, 0, sizeof(p->se.statistics));
991 #endif
992 }