GNU Linux-libre 4.19.245-gnu1
[releases.git] / kernel / sched / debug.c
1 /*
2  * kernel/sched/debug.c
3  *
4  * Print the CFS rbtree and other debugging details
5  *
6  * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12 #include "sched.h"
13
14 /*
15  * This allows printing both to /proc/sched_debug and
16  * to the console
17  */
18 #define SEQ_printf(m, x...)                     \
19  do {                                           \
20         if (m)                                  \
21                 seq_printf(m, x);               \
22         else                                    \
23                 pr_cont(x);                     \
24  } while (0)
25
26 /*
27  * Ease the printing of nsec fields:
28  */
29 static long long nsec_high(unsigned long long nsec)
30 {
31         if ((long long)nsec < 0) {
32                 nsec = -nsec;
33                 do_div(nsec, 1000000);
34                 return -nsec;
35         }
36         do_div(nsec, 1000000);
37
38         return nsec;
39 }
40
41 static unsigned long nsec_low(unsigned long long nsec)
42 {
43         if ((long long)nsec < 0)
44                 nsec = -nsec;
45
46         return do_div(nsec, 1000000);
47 }
48
49 #define SPLIT_NS(x) nsec_high(x), nsec_low(x)
50
51 #define SCHED_FEAT(name, enabled)       \
52         #name ,
53
54 static const char * const sched_feat_names[] = {
55 #include "features.h"
56 };
57
58 #undef SCHED_FEAT
59
60 static int sched_feat_show(struct seq_file *m, void *v)
61 {
62         int i;
63
64         for (i = 0; i < __SCHED_FEAT_NR; i++) {
65                 if (!(sysctl_sched_features & (1UL << i)))
66                         seq_puts(m, "NO_");
67                 seq_printf(m, "%s ", sched_feat_names[i]);
68         }
69         seq_puts(m, "\n");
70
71         return 0;
72 }
73
74 #ifdef CONFIG_JUMP_LABEL
75
76 #define jump_label_key__true  STATIC_KEY_INIT_TRUE
77 #define jump_label_key__false STATIC_KEY_INIT_FALSE
78
79 #define SCHED_FEAT(name, enabled)       \
80         jump_label_key__##enabled ,
81
82 struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
83 #include "features.h"
84 };
85
86 #undef SCHED_FEAT
87
88 static void sched_feat_disable(int i)
89 {
90         static_key_disable_cpuslocked(&sched_feat_keys[i]);
91 }
92
93 static void sched_feat_enable(int i)
94 {
95         static_key_enable_cpuslocked(&sched_feat_keys[i]);
96 }
97 #else
98 static void sched_feat_disable(int i) { };
99 static void sched_feat_enable(int i) { };
100 #endif /* CONFIG_JUMP_LABEL */
101
102 static int sched_feat_set(char *cmp)
103 {
104         int i;
105         int neg = 0;
106
107         if (strncmp(cmp, "NO_", 3) == 0) {
108                 neg = 1;
109                 cmp += 3;
110         }
111
112         i = match_string(sched_feat_names, __SCHED_FEAT_NR, cmp);
113         if (i < 0)
114                 return i;
115
116         if (neg) {
117                 sysctl_sched_features &= ~(1UL << i);
118                 sched_feat_disable(i);
119         } else {
120                 sysctl_sched_features |= (1UL << i);
121                 sched_feat_enable(i);
122         }
123
124         return 0;
125 }
126
127 static ssize_t
128 sched_feat_write(struct file *filp, const char __user *ubuf,
129                 size_t cnt, loff_t *ppos)
130 {
131         char buf[64];
132         char *cmp;
133         int ret;
134         struct inode *inode;
135
136         if (cnt > 63)
137                 cnt = 63;
138
139         if (copy_from_user(&buf, ubuf, cnt))
140                 return -EFAULT;
141
142         buf[cnt] = 0;
143         cmp = strstrip(buf);
144
145         /* Ensure the static_key remains in a consistent state */
146         inode = file_inode(filp);
147         cpus_read_lock();
148         inode_lock(inode);
149         ret = sched_feat_set(cmp);
150         inode_unlock(inode);
151         cpus_read_unlock();
152         if (ret < 0)
153                 return ret;
154
155         *ppos += cnt;
156
157         return cnt;
158 }
159
160 static int sched_feat_open(struct inode *inode, struct file *filp)
161 {
162         return single_open(filp, sched_feat_show, NULL);
163 }
164
165 static const struct file_operations sched_feat_fops = {
166         .open           = sched_feat_open,
167         .write          = sched_feat_write,
168         .read           = seq_read,
169         .llseek         = seq_lseek,
170         .release        = single_release,
171 };
172
173 __read_mostly bool sched_debug_enabled;
174
175 static __init int sched_init_debug(void)
176 {
177         debugfs_create_file("sched_features", 0644, NULL, NULL,
178                         &sched_feat_fops);
179
180         debugfs_create_bool("sched_debug", 0644, NULL,
181                         &sched_debug_enabled);
182
183         return 0;
184 }
185 late_initcall(sched_init_debug);
186
187 #ifdef CONFIG_SMP
188
189 #ifdef CONFIG_SYSCTL
190
191 static struct ctl_table sd_ctl_dir[] = {
192         {
193                 .procname       = "sched_domain",
194                 .mode           = 0555,
195         },
196         {}
197 };
198
199 static struct ctl_table sd_ctl_root[] = {
200         {
201                 .procname       = "kernel",
202                 .mode           = 0555,
203                 .child          = sd_ctl_dir,
204         },
205         {}
206 };
207
208 static struct ctl_table *sd_alloc_ctl_entry(int n)
209 {
210         struct ctl_table *entry =
211                 kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
212
213         return entry;
214 }
215
216 static void sd_free_ctl_entry(struct ctl_table **tablep)
217 {
218         struct ctl_table *entry;
219
220         /*
221          * In the intermediate directories, both the child directory and
222          * procname are dynamically allocated and could fail but the mode
223          * will always be set. In the lowest directory the names are
224          * static strings and all have proc handlers.
225          */
226         for (entry = *tablep; entry->mode; entry++) {
227                 if (entry->child)
228                         sd_free_ctl_entry(&entry->child);
229                 if (entry->proc_handler == NULL)
230                         kfree(entry->procname);
231         }
232
233         kfree(*tablep);
234         *tablep = NULL;
235 }
236
237 static int min_load_idx = 0;
238 static int max_load_idx = CPU_LOAD_IDX_MAX-1;
239
240 static void
241 set_table_entry(struct ctl_table *entry,
242                 const char *procname, void *data, int maxlen,
243                 umode_t mode, proc_handler *proc_handler,
244                 bool load_idx)
245 {
246         entry->procname = procname;
247         entry->data = data;
248         entry->maxlen = maxlen;
249         entry->mode = mode;
250         entry->proc_handler = proc_handler;
251
252         if (load_idx) {
253                 entry->extra1 = &min_load_idx;
254                 entry->extra2 = &max_load_idx;
255         }
256 }
257
258 static struct ctl_table *
259 sd_alloc_ctl_domain_table(struct sched_domain *sd)
260 {
261         struct ctl_table *table = sd_alloc_ctl_entry(14);
262
263         if (table == NULL)
264                 return NULL;
265
266         set_table_entry(&table[0] , "min_interval",        &sd->min_interval,        sizeof(long), 0644, proc_doulongvec_minmax, false);
267         set_table_entry(&table[1] , "max_interval",        &sd->max_interval,        sizeof(long), 0644, proc_doulongvec_minmax, false);
268         set_table_entry(&table[2] , "busy_idx",            &sd->busy_idx,            sizeof(int) , 0644, proc_dointvec_minmax,   true );
269         set_table_entry(&table[3] , "idle_idx",            &sd->idle_idx,            sizeof(int) , 0644, proc_dointvec_minmax,   true );
270         set_table_entry(&table[4] , "newidle_idx",         &sd->newidle_idx,         sizeof(int) , 0644, proc_dointvec_minmax,   true );
271         set_table_entry(&table[5] , "wake_idx",            &sd->wake_idx,            sizeof(int) , 0644, proc_dointvec_minmax,   true );
272         set_table_entry(&table[6] , "forkexec_idx",        &sd->forkexec_idx,        sizeof(int) , 0644, proc_dointvec_minmax,   true );
273         set_table_entry(&table[7] , "busy_factor",         &sd->busy_factor,         sizeof(int) , 0644, proc_dointvec_minmax,   false);
274         set_table_entry(&table[8] , "imbalance_pct",       &sd->imbalance_pct,       sizeof(int) , 0644, proc_dointvec_minmax,   false);
275         set_table_entry(&table[9] , "cache_nice_tries",    &sd->cache_nice_tries,    sizeof(int) , 0644, proc_dointvec_minmax,   false);
276         set_table_entry(&table[10], "flags",               &sd->flags,               sizeof(int) , 0644, proc_dointvec_minmax,   false);
277         set_table_entry(&table[11], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax, false);
278         set_table_entry(&table[12], "name",                sd->name,            CORENAME_MAX_SIZE, 0444, proc_dostring,          false);
279         /* &table[13] is terminator */
280
281         return table;
282 }
283
284 static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
285 {
286         struct ctl_table *entry, *table;
287         struct sched_domain *sd;
288         int domain_num = 0, i;
289         char buf[32];
290
291         for_each_domain(cpu, sd)
292                 domain_num++;
293         entry = table = sd_alloc_ctl_entry(domain_num + 1);
294         if (table == NULL)
295                 return NULL;
296
297         i = 0;
298         for_each_domain(cpu, sd) {
299                 snprintf(buf, 32, "domain%d", i);
300                 entry->procname = kstrdup(buf, GFP_KERNEL);
301                 entry->mode = 0555;
302                 entry->child = sd_alloc_ctl_domain_table(sd);
303                 entry++;
304                 i++;
305         }
306         return table;
307 }
308
309 static cpumask_var_t            sd_sysctl_cpus;
310 static struct ctl_table_header  *sd_sysctl_header;
311
312 void register_sched_domain_sysctl(void)
313 {
314         static struct ctl_table *cpu_entries;
315         static struct ctl_table **cpu_idx;
316         static bool init_done = false;
317         char buf[32];
318         int i;
319
320         if (!cpu_entries) {
321                 cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
322                 if (!cpu_entries)
323                         return;
324
325                 WARN_ON(sd_ctl_dir[0].child);
326                 sd_ctl_dir[0].child = cpu_entries;
327         }
328
329         if (!cpu_idx) {
330                 struct ctl_table *e = cpu_entries;
331
332                 cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL);
333                 if (!cpu_idx)
334                         return;
335
336                 /* deal with sparse possible map */
337                 for_each_possible_cpu(i) {
338                         cpu_idx[i] = e;
339                         e++;
340                 }
341         }
342
343         if (!cpumask_available(sd_sysctl_cpus)) {
344                 if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
345                         return;
346         }
347
348         if (!init_done) {
349                 init_done = true;
350                 /* init to possible to not have holes in @cpu_entries */
351                 cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
352         }
353
354         for_each_cpu(i, sd_sysctl_cpus) {
355                 struct ctl_table *e = cpu_idx[i];
356
357                 if (e->child)
358                         sd_free_ctl_entry(&e->child);
359
360                 if (!e->procname) {
361                         snprintf(buf, 32, "cpu%d", i);
362                         e->procname = kstrdup(buf, GFP_KERNEL);
363                 }
364                 e->mode = 0555;
365                 e->child = sd_alloc_ctl_cpu_table(i);
366
367                 __cpumask_clear_cpu(i, sd_sysctl_cpus);
368         }
369
370         WARN_ON(sd_sysctl_header);
371         sd_sysctl_header = register_sysctl_table(sd_ctl_root);
372 }
373
374 void dirty_sched_domain_sysctl(int cpu)
375 {
376         if (cpumask_available(sd_sysctl_cpus))
377                 __cpumask_set_cpu(cpu, sd_sysctl_cpus);
378 }
379
380 /* may be called multiple times per register */
381 void unregister_sched_domain_sysctl(void)
382 {
383         unregister_sysctl_table(sd_sysctl_header);
384         sd_sysctl_header = NULL;
385 }
386 #endif /* CONFIG_SYSCTL */
387 #endif /* CONFIG_SMP */
388
389 #ifdef CONFIG_FAIR_GROUP_SCHED
390 static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
391 {
392         struct sched_entity *se = tg->se[cpu];
393
394 #define P(F)            SEQ_printf(m, "  .%-30s: %lld\n",       #F, (long long)F)
395 #define P_SCHEDSTAT(F)  SEQ_printf(m, "  .%-30s: %lld\n",       #F, (long long)schedstat_val(F))
396 #define PN(F)           SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
397 #define PN_SCHEDSTAT(F) SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
398
399         if (!se)
400                 return;
401
402         PN(se->exec_start);
403         PN(se->vruntime);
404         PN(se->sum_exec_runtime);
405
406         if (schedstat_enabled()) {
407                 PN_SCHEDSTAT(se->statistics.wait_start);
408                 PN_SCHEDSTAT(se->statistics.sleep_start);
409                 PN_SCHEDSTAT(se->statistics.block_start);
410                 PN_SCHEDSTAT(se->statistics.sleep_max);
411                 PN_SCHEDSTAT(se->statistics.block_max);
412                 PN_SCHEDSTAT(se->statistics.exec_max);
413                 PN_SCHEDSTAT(se->statistics.slice_max);
414                 PN_SCHEDSTAT(se->statistics.wait_max);
415                 PN_SCHEDSTAT(se->statistics.wait_sum);
416                 P_SCHEDSTAT(se->statistics.wait_count);
417         }
418
419         P(se->load.weight);
420         P(se->runnable_weight);
421 #ifdef CONFIG_SMP
422         P(se->avg.load_avg);
423         P(se->avg.util_avg);
424         P(se->avg.runnable_load_avg);
425 #endif
426
427 #undef PN_SCHEDSTAT
428 #undef PN
429 #undef P_SCHEDSTAT
430 #undef P
431 }
432 #endif
433
434 #ifdef CONFIG_CGROUP_SCHED
435 static DEFINE_SPINLOCK(sched_debug_lock);
436 static char group_path[PATH_MAX];
437
438 static void task_group_path(struct task_group *tg, char *path, int plen)
439 {
440         if (autogroup_path(tg, path, plen))
441                 return;
442
443         cgroup_path(tg->css.cgroup, path, plen);
444 }
445
446 /*
447  * Only 1 SEQ_printf_task_group_path() caller can use the full length
448  * group_path[] for cgroup path. Other simultaneous callers will have
449  * to use a shorter stack buffer. A "..." suffix is appended at the end
450  * of the stack buffer so that it will show up in case the output length
451  * matches the given buffer size to indicate possible path name truncation.
452  */
453 #define SEQ_printf_task_group_path(m, tg, fmt...)                       \
454 {                                                                       \
455         if (spin_trylock(&sched_debug_lock)) {                          \
456                 task_group_path(tg, group_path, sizeof(group_path));    \
457                 SEQ_printf(m, fmt, group_path);                         \
458                 spin_unlock(&sched_debug_lock);                         \
459         } else {                                                        \
460                 char buf[128];                                          \
461                 char *bufend = buf + sizeof(buf) - 3;                   \
462                 task_group_path(tg, buf, bufend - buf);                 \
463                 strcpy(bufend - 1, "...");                              \
464                 SEQ_printf(m, fmt, buf);                                \
465         }                                                               \
466 }
467 #endif
468
469 static void
470 print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
471 {
472         if (rq->curr == p)
473                 SEQ_printf(m, ">R");
474         else
475                 SEQ_printf(m, " %c", task_state_to_char(p));
476
477         SEQ_printf(m, "%15s %5d %9Ld.%06ld %9Ld %5d ",
478                 p->comm, task_pid_nr(p),
479                 SPLIT_NS(p->se.vruntime),
480                 (long long)(p->nvcsw + p->nivcsw),
481                 p->prio);
482
483         SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
484                 SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)),
485                 SPLIT_NS(p->se.sum_exec_runtime),
486                 SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime)));
487
488 #ifdef CONFIG_NUMA_BALANCING
489         SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
490 #endif
491 #ifdef CONFIG_CGROUP_SCHED
492         SEQ_printf_task_group_path(m, task_group(p), " %s")
493 #endif
494
495         SEQ_printf(m, "\n");
496 }
497
498 static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
499 {
500         struct task_struct *g, *p;
501
502         SEQ_printf(m, "\n");
503         SEQ_printf(m, "runnable tasks:\n");
504         SEQ_printf(m, " S           task   PID         tree-key  switches  prio"
505                    "     wait-time             sum-exec        sum-sleep\n");
506         SEQ_printf(m, "-------------------------------------------------------"
507                    "----------------------------------------------------\n");
508
509         rcu_read_lock();
510         for_each_process_thread(g, p) {
511                 if (task_cpu(p) != rq_cpu)
512                         continue;
513
514                 print_task(m, rq, p);
515         }
516         rcu_read_unlock();
517 }
518
519 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
520 {
521         s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
522                 spread, rq0_min_vruntime, spread0;
523         struct rq *rq = cpu_rq(cpu);
524         struct sched_entity *last;
525         unsigned long flags;
526
527 #ifdef CONFIG_FAIR_GROUP_SCHED
528         SEQ_printf(m, "\n");
529         SEQ_printf_task_group_path(m, cfs_rq->tg, "cfs_rq[%d]:%s\n", cpu);
530 #else
531         SEQ_printf(m, "\n");
532         SEQ_printf(m, "cfs_rq[%d]:\n", cpu);
533 #endif
534         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
535                         SPLIT_NS(cfs_rq->exec_clock));
536
537         raw_spin_lock_irqsave(&rq->lock, flags);
538         if (rb_first_cached(&cfs_rq->tasks_timeline))
539                 MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
540         last = __pick_last_entity(cfs_rq);
541         if (last)
542                 max_vruntime = last->vruntime;
543         min_vruntime = cfs_rq->min_vruntime;
544         rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
545         raw_spin_unlock_irqrestore(&rq->lock, flags);
546         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime",
547                         SPLIT_NS(MIN_vruntime));
548         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
549                         SPLIT_NS(min_vruntime));
550         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "max_vruntime",
551                         SPLIT_NS(max_vruntime));
552         spread = max_vruntime - MIN_vruntime;
553         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread",
554                         SPLIT_NS(spread));
555         spread0 = min_vruntime - rq0_min_vruntime;
556         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
557                         SPLIT_NS(spread0));
558         SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over",
559                         cfs_rq->nr_spread_over);
560         SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
561         SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
562 #ifdef CONFIG_SMP
563         SEQ_printf(m, "  .%-30s: %ld\n", "runnable_weight", cfs_rq->runnable_weight);
564         SEQ_printf(m, "  .%-30s: %lu\n", "load_avg",
565                         cfs_rq->avg.load_avg);
566         SEQ_printf(m, "  .%-30s: %lu\n", "runnable_load_avg",
567                         cfs_rq->avg.runnable_load_avg);
568         SEQ_printf(m, "  .%-30s: %lu\n", "util_avg",
569                         cfs_rq->avg.util_avg);
570         SEQ_printf(m, "  .%-30s: %u\n", "util_est_enqueued",
571                         cfs_rq->avg.util_est.enqueued);
572         SEQ_printf(m, "  .%-30s: %ld\n", "removed.load_avg",
573                         cfs_rq->removed.load_avg);
574         SEQ_printf(m, "  .%-30s: %ld\n", "removed.util_avg",
575                         cfs_rq->removed.util_avg);
576         SEQ_printf(m, "  .%-30s: %ld\n", "removed.runnable_sum",
577                         cfs_rq->removed.runnable_sum);
578 #ifdef CONFIG_FAIR_GROUP_SCHED
579         SEQ_printf(m, "  .%-30s: %lu\n", "tg_load_avg_contrib",
580                         cfs_rq->tg_load_avg_contrib);
581         SEQ_printf(m, "  .%-30s: %ld\n", "tg_load_avg",
582                         atomic_long_read(&cfs_rq->tg->load_avg));
583 #endif
584 #endif
585 #ifdef CONFIG_CFS_BANDWIDTH
586         SEQ_printf(m, "  .%-30s: %d\n", "throttled",
587                         cfs_rq->throttled);
588         SEQ_printf(m, "  .%-30s: %d\n", "throttle_count",
589                         cfs_rq->throttle_count);
590 #endif
591
592 #ifdef CONFIG_FAIR_GROUP_SCHED
593         print_cfs_group_stats(m, cpu, cfs_rq->tg);
594 #endif
595 }
596
597 void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
598 {
599 #ifdef CONFIG_RT_GROUP_SCHED
600         SEQ_printf(m, "\n");
601         SEQ_printf_task_group_path(m, rt_rq->tg, "rt_rq[%d]:%s\n", cpu);
602 #else
603         SEQ_printf(m, "\n");
604         SEQ_printf(m, "rt_rq[%d]:\n", cpu);
605 #endif
606
607 #define P(x) \
608         SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
609 #define PU(x) \
610         SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x))
611 #define PN(x) \
612         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
613
614         PU(rt_nr_running);
615 #ifdef CONFIG_SMP
616         PU(rt_nr_migratory);
617 #endif
618         P(rt_throttled);
619         PN(rt_time);
620         PN(rt_runtime);
621
622 #undef PN
623 #undef PU
624 #undef P
625 }
626
627 void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
628 {
629         struct dl_bw *dl_bw;
630
631         SEQ_printf(m, "\n");
632         SEQ_printf(m, "dl_rq[%d]:\n", cpu);
633
634 #define PU(x) \
635         SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x))
636
637         PU(dl_nr_running);
638 #ifdef CONFIG_SMP
639         PU(dl_nr_migratory);
640         dl_bw = &cpu_rq(cpu)->rd->dl_bw;
641 #else
642         dl_bw = &dl_rq->dl_bw;
643 #endif
644         SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
645         SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
646
647 #undef PU
648 }
649
650 static void print_cpu(struct seq_file *m, int cpu)
651 {
652         struct rq *rq = cpu_rq(cpu);
653
654 #ifdef CONFIG_X86
655         {
656                 unsigned int freq = cpu_khz ? : 1;
657
658                 SEQ_printf(m, "cpu#%d, %u.%03u MHz\n",
659                            cpu, freq / 1000, (freq % 1000));
660         }
661 #else
662         SEQ_printf(m, "cpu#%d\n", cpu);
663 #endif
664
665 #define P(x)                                                            \
666 do {                                                                    \
667         if (sizeof(rq->x) == 4)                                         \
668                 SEQ_printf(m, "  .%-30s: %ld\n", #x, (long)(rq->x));    \
669         else                                                            \
670                 SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rq->x));\
671 } while (0)
672
673 #define PN(x) \
674         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
675
676         P(nr_running);
677         SEQ_printf(m, "  .%-30s: %lu\n", "load",
678                    rq->load.weight);
679         P(nr_switches);
680         P(nr_load_updates);
681         P(nr_uninterruptible);
682         PN(next_balance);
683         SEQ_printf(m, "  .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
684         PN(clock);
685         PN(clock_task);
686         P(cpu_load[0]);
687         P(cpu_load[1]);
688         P(cpu_load[2]);
689         P(cpu_load[3]);
690         P(cpu_load[4]);
691 #undef P
692 #undef PN
693
694 #ifdef CONFIG_SMP
695 #define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
696         P64(avg_idle);
697         P64(max_idle_balance_cost);
698 #undef P64
699 #endif
700
701 #define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, schedstat_val(rq->n));
702         if (schedstat_enabled()) {
703                 P(yld_count);
704                 P(sched_count);
705                 P(sched_goidle);
706                 P(ttwu_count);
707                 P(ttwu_local);
708         }
709 #undef P
710
711         print_cfs_stats(m, cpu);
712         print_rt_stats(m, cpu);
713         print_dl_stats(m, cpu);
714
715         print_rq(m, rq, cpu);
716         SEQ_printf(m, "\n");
717 }
718
719 static const char *sched_tunable_scaling_names[] = {
720         "none",
721         "logaritmic",
722         "linear"
723 };
724
725 static void sched_debug_header(struct seq_file *m)
726 {
727         u64 ktime, sched_clk, cpu_clk;
728         unsigned long flags;
729
730         local_irq_save(flags);
731         ktime = ktime_to_ns(ktime_get());
732         sched_clk = sched_clock();
733         cpu_clk = local_clock();
734         local_irq_restore(flags);
735
736         SEQ_printf(m, "Sched Debug Version: v0.11, %s %.*s\n",
737                 init_utsname()->release,
738                 (int)strcspn(init_utsname()->version, " "),
739                 init_utsname()->version);
740
741 #define P(x) \
742         SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
743 #define PN(x) \
744         SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
745         PN(ktime);
746         PN(sched_clk);
747         PN(cpu_clk);
748         P(jiffies);
749 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
750         P(sched_clock_stable());
751 #endif
752 #undef PN
753 #undef P
754
755         SEQ_printf(m, "\n");
756         SEQ_printf(m, "sysctl_sched\n");
757
758 #define P(x) \
759         SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
760 #define PN(x) \
761         SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
762         PN(sysctl_sched_latency);
763         PN(sysctl_sched_min_granularity);
764         PN(sysctl_sched_wakeup_granularity);
765         P(sysctl_sched_child_runs_first);
766         P(sysctl_sched_features);
767 #undef PN
768 #undef P
769
770         SEQ_printf(m, "  .%-40s: %d (%s)\n",
771                 "sysctl_sched_tunable_scaling",
772                 sysctl_sched_tunable_scaling,
773                 sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
774         SEQ_printf(m, "\n");
775 }
776
777 static int sched_debug_show(struct seq_file *m, void *v)
778 {
779         int cpu = (unsigned long)(v - 2);
780
781         if (cpu != -1)
782                 print_cpu(m, cpu);
783         else
784                 sched_debug_header(m);
785
786         return 0;
787 }
788
789 void sysrq_sched_debug_show(void)
790 {
791         int cpu;
792
793         sched_debug_header(NULL);
794         for_each_online_cpu(cpu)
795                 print_cpu(NULL, cpu);
796
797 }
798
799 /*
800  * This itererator needs some explanation.
801  * It returns 1 for the header position.
802  * This means 2 is CPU 0.
803  * In a hotplugged system some CPUs, including CPU 0, may be missing so we have
804  * to use cpumask_* to iterate over the CPUs.
805  */
806 static void *sched_debug_start(struct seq_file *file, loff_t *offset)
807 {
808         unsigned long n = *offset;
809
810         if (n == 0)
811                 return (void *) 1;
812
813         n--;
814
815         if (n > 0)
816                 n = cpumask_next(n - 1, cpu_online_mask);
817         else
818                 n = cpumask_first(cpu_online_mask);
819
820         *offset = n + 1;
821
822         if (n < nr_cpu_ids)
823                 return (void *)(unsigned long)(n + 2);
824
825         return NULL;
826 }
827
828 static void *sched_debug_next(struct seq_file *file, void *data, loff_t *offset)
829 {
830         (*offset)++;
831         return sched_debug_start(file, offset);
832 }
833
834 static void sched_debug_stop(struct seq_file *file, void *data)
835 {
836 }
837
838 static const struct seq_operations sched_debug_sops = {
839         .start          = sched_debug_start,
840         .next           = sched_debug_next,
841         .stop           = sched_debug_stop,
842         .show           = sched_debug_show,
843 };
844
845 static int __init init_sched_debug_procfs(void)
846 {
847         if (!proc_create_seq("sched_debug", 0444, NULL, &sched_debug_sops))
848                 return -ENOMEM;
849         return 0;
850 }
851
852 __initcall(init_sched_debug_procfs);
853
854 #define __P(F)  SEQ_printf(m, "%-45s:%21Ld\n",       #F, (long long)F)
855 #define   P(F)  SEQ_printf(m, "%-45s:%21Ld\n",       #F, (long long)p->F)
856 #define __PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
857 #define   PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
858
859
860 #ifdef CONFIG_NUMA_BALANCING
861 void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
862                 unsigned long tpf, unsigned long gsf, unsigned long gpf)
863 {
864         SEQ_printf(m, "numa_faults node=%d ", node);
865         SEQ_printf(m, "task_private=%lu task_shared=%lu ", tpf, tsf);
866         SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gpf, gsf);
867 }
868 #endif
869
870
871 static void sched_show_numa(struct task_struct *p, struct seq_file *m)
872 {
873 #ifdef CONFIG_NUMA_BALANCING
874         if (p->mm)
875                 P(mm->numa_scan_seq);
876
877         P(numa_pages_migrated);
878         P(numa_preferred_nid);
879         P(total_numa_faults);
880         SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
881                         task_node(p), task_numa_group_id(p));
882         show_numa_stats(p, m);
883 #endif
884 }
885
886 void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
887                                                   struct seq_file *m)
888 {
889         unsigned long nr_switches;
890
891         SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
892                                                 get_nr_threads(p));
893         SEQ_printf(m,
894                 "---------------------------------------------------------"
895                 "----------\n");
896 #define __P(F) \
897         SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
898 #define P(F) \
899         SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
900 #define P_SCHEDSTAT(F) \
901         SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)schedstat_val(p->F))
902 #define __PN(F) \
903         SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
904 #define PN(F) \
905         SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
906 #define PN_SCHEDSTAT(F) \
907         SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(p->F)))
908
909         PN(se.exec_start);
910         PN(se.vruntime);
911         PN(se.sum_exec_runtime);
912
913         nr_switches = p->nvcsw + p->nivcsw;
914
915         P(se.nr_migrations);
916
917         if (schedstat_enabled()) {
918                 u64 avg_atom, avg_per_cpu;
919
920                 PN_SCHEDSTAT(se.statistics.sum_sleep_runtime);
921                 PN_SCHEDSTAT(se.statistics.wait_start);
922                 PN_SCHEDSTAT(se.statistics.sleep_start);
923                 PN_SCHEDSTAT(se.statistics.block_start);
924                 PN_SCHEDSTAT(se.statistics.sleep_max);
925                 PN_SCHEDSTAT(se.statistics.block_max);
926                 PN_SCHEDSTAT(se.statistics.exec_max);
927                 PN_SCHEDSTAT(se.statistics.slice_max);
928                 PN_SCHEDSTAT(se.statistics.wait_max);
929                 PN_SCHEDSTAT(se.statistics.wait_sum);
930                 P_SCHEDSTAT(se.statistics.wait_count);
931                 PN_SCHEDSTAT(se.statistics.iowait_sum);
932                 P_SCHEDSTAT(se.statistics.iowait_count);
933                 P_SCHEDSTAT(se.statistics.nr_migrations_cold);
934                 P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine);
935                 P_SCHEDSTAT(se.statistics.nr_failed_migrations_running);
936                 P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot);
937                 P_SCHEDSTAT(se.statistics.nr_forced_migrations);
938                 P_SCHEDSTAT(se.statistics.nr_wakeups);
939                 P_SCHEDSTAT(se.statistics.nr_wakeups_sync);
940                 P_SCHEDSTAT(se.statistics.nr_wakeups_migrate);
941                 P_SCHEDSTAT(se.statistics.nr_wakeups_local);
942                 P_SCHEDSTAT(se.statistics.nr_wakeups_remote);
943                 P_SCHEDSTAT(se.statistics.nr_wakeups_affine);
944                 P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts);
945                 P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
946                 P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
947
948                 avg_atom = p->se.sum_exec_runtime;
949                 if (nr_switches)
950                         avg_atom = div64_ul(avg_atom, nr_switches);
951                 else
952                         avg_atom = -1LL;
953
954                 avg_per_cpu = p->se.sum_exec_runtime;
955                 if (p->se.nr_migrations) {
956                         avg_per_cpu = div64_u64(avg_per_cpu,
957                                                 p->se.nr_migrations);
958                 } else {
959                         avg_per_cpu = -1LL;
960                 }
961
962                 __PN(avg_atom);
963                 __PN(avg_per_cpu);
964         }
965
966         __P(nr_switches);
967         SEQ_printf(m, "%-45s:%21Ld\n",
968                    "nr_voluntary_switches", (long long)p->nvcsw);
969         SEQ_printf(m, "%-45s:%21Ld\n",
970                    "nr_involuntary_switches", (long long)p->nivcsw);
971
972         P(se.load.weight);
973         P(se.runnable_weight);
974 #ifdef CONFIG_SMP
975         P(se.avg.load_sum);
976         P(se.avg.runnable_load_sum);
977         P(se.avg.util_sum);
978         P(se.avg.load_avg);
979         P(se.avg.runnable_load_avg);
980         P(se.avg.util_avg);
981         P(se.avg.last_update_time);
982         P(se.avg.util_est.ewma);
983         P(se.avg.util_est.enqueued);
984 #endif
985         P(policy);
986         P(prio);
987         if (p->policy == SCHED_DEADLINE) {
988                 P(dl.runtime);
989                 P(dl.deadline);
990         }
991 #undef PN_SCHEDSTAT
992 #undef PN
993 #undef __PN
994 #undef P_SCHEDSTAT
995 #undef P
996 #undef __P
997
998         {
999                 unsigned int this_cpu = raw_smp_processor_id();
1000                 u64 t0, t1;
1001
1002                 t0 = cpu_clock(this_cpu);
1003                 t1 = cpu_clock(this_cpu);
1004                 SEQ_printf(m, "%-45s:%21Ld\n",
1005                            "clock-delta", (long long)(t1-t0));
1006         }
1007
1008         sched_show_numa(p, m);
1009 }
1010
1011 void proc_sched_set_task(struct task_struct *p)
1012 {
1013 #ifdef CONFIG_SCHEDSTATS
1014         memset(&p->se.statistics, 0, sizeof(p->se.statistics));
1015 #endif
1016 }