GNU Linux-libre 5.17.9-gnu
[releases.git] / kernel / sched / cpuacct.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * CPU accounting code for task groups.
4  *
5  * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
6  * (balbir@in.ibm.com).
7  */
8 #include <asm/irq_regs.h>
9 #include "sched.h"
10
11 /* Time spent by the tasks of the CPU accounting group executing in ... */
12 enum cpuacct_stat_index {
13         CPUACCT_STAT_USER,      /* ... user mode */
14         CPUACCT_STAT_SYSTEM,    /* ... kernel mode */
15
16         CPUACCT_STAT_NSTATS,
17 };
18
19 static const char * const cpuacct_stat_desc[] = {
20         [CPUACCT_STAT_USER] = "user",
21         [CPUACCT_STAT_SYSTEM] = "system",
22 };
23
24 /* track CPU usage of a group of tasks and its child groups */
25 struct cpuacct {
26         struct cgroup_subsys_state      css;
27         /* cpuusage holds pointer to a u64-type object on every CPU */
28         u64 __percpu    *cpuusage;
29         struct kernel_cpustat __percpu  *cpustat;
30 };
31
32 static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
33 {
34         return css ? container_of(css, struct cpuacct, css) : NULL;
35 }
36
37 /* Return CPU accounting group to which this task belongs */
38 static inline struct cpuacct *task_ca(struct task_struct *tsk)
39 {
40         return css_ca(task_css(tsk, cpuacct_cgrp_id));
41 }
42
43 static inline struct cpuacct *parent_ca(struct cpuacct *ca)
44 {
45         return css_ca(ca->css.parent);
46 }
47
48 static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
49 static struct cpuacct root_cpuacct = {
50         .cpustat        = &kernel_cpustat,
51         .cpuusage       = &root_cpuacct_cpuusage,
52 };
53
54 /* Create a new CPU accounting group */
55 static struct cgroup_subsys_state *
56 cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
57 {
58         struct cpuacct *ca;
59
60         if (!parent_css)
61                 return &root_cpuacct.css;
62
63         ca = kzalloc(sizeof(*ca), GFP_KERNEL);
64         if (!ca)
65                 goto out;
66
67         ca->cpuusage = alloc_percpu(u64);
68         if (!ca->cpuusage)
69                 goto out_free_ca;
70
71         ca->cpustat = alloc_percpu(struct kernel_cpustat);
72         if (!ca->cpustat)
73                 goto out_free_cpuusage;
74
75         return &ca->css;
76
77 out_free_cpuusage:
78         free_percpu(ca->cpuusage);
79 out_free_ca:
80         kfree(ca);
81 out:
82         return ERR_PTR(-ENOMEM);
83 }
84
85 /* Destroy an existing CPU accounting group */
86 static void cpuacct_css_free(struct cgroup_subsys_state *css)
87 {
88         struct cpuacct *ca = css_ca(css);
89
90         free_percpu(ca->cpustat);
91         free_percpu(ca->cpuusage);
92         kfree(ca);
93 }
94
95 static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
96                                  enum cpuacct_stat_index index)
97 {
98         u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
99         u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
100         u64 data;
101
102         /*
103          * We allow index == CPUACCT_STAT_NSTATS here to read
104          * the sum of usages.
105          */
106         if (WARN_ON_ONCE(index > CPUACCT_STAT_NSTATS))
107                 return 0;
108
109 #ifndef CONFIG_64BIT
110         /*
111          * Take rq->lock to make 64-bit read safe on 32-bit platforms.
112          */
113         raw_spin_rq_lock_irq(cpu_rq(cpu));
114 #endif
115
116         switch (index) {
117         case CPUACCT_STAT_USER:
118                 data = cpustat[CPUTIME_USER] + cpustat[CPUTIME_NICE];
119                 break;
120         case CPUACCT_STAT_SYSTEM:
121                 data = cpustat[CPUTIME_SYSTEM] + cpustat[CPUTIME_IRQ] +
122                         cpustat[CPUTIME_SOFTIRQ];
123                 break;
124         case CPUACCT_STAT_NSTATS:
125                 data = *cpuusage;
126                 break;
127         }
128
129 #ifndef CONFIG_64BIT
130         raw_spin_rq_unlock_irq(cpu_rq(cpu));
131 #endif
132
133         return data;
134 }
135
136 static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu)
137 {
138         u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
139         u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
140
141         /* Don't allow to reset global kernel_cpustat */
142         if (ca == &root_cpuacct)
143                 return;
144
145 #ifndef CONFIG_64BIT
146         /*
147          * Take rq->lock to make 64-bit write safe on 32-bit platforms.
148          */
149         raw_spin_rq_lock_irq(cpu_rq(cpu));
150 #endif
151         *cpuusage = 0;
152         cpustat[CPUTIME_USER] = cpustat[CPUTIME_NICE] = 0;
153         cpustat[CPUTIME_SYSTEM] = cpustat[CPUTIME_IRQ] = 0;
154         cpustat[CPUTIME_SOFTIRQ] = 0;
155
156 #ifndef CONFIG_64BIT
157         raw_spin_rq_unlock_irq(cpu_rq(cpu));
158 #endif
159 }
160
161 /* Return total CPU usage (in nanoseconds) of a group */
162 static u64 __cpuusage_read(struct cgroup_subsys_state *css,
163                            enum cpuacct_stat_index index)
164 {
165         struct cpuacct *ca = css_ca(css);
166         u64 totalcpuusage = 0;
167         int i;
168
169         for_each_possible_cpu(i)
170                 totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
171
172         return totalcpuusage;
173 }
174
175 static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
176                               struct cftype *cft)
177 {
178         return __cpuusage_read(css, CPUACCT_STAT_USER);
179 }
180
181 static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
182                              struct cftype *cft)
183 {
184         return __cpuusage_read(css, CPUACCT_STAT_SYSTEM);
185 }
186
187 static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
188 {
189         return __cpuusage_read(css, CPUACCT_STAT_NSTATS);
190 }
191
192 static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
193                           u64 val)
194 {
195         struct cpuacct *ca = css_ca(css);
196         int cpu;
197
198         /*
199          * Only allow '0' here to do a reset.
200          */
201         if (val)
202                 return -EINVAL;
203
204         for_each_possible_cpu(cpu)
205                 cpuacct_cpuusage_write(ca, cpu);
206
207         return 0;
208 }
209
210 static int __cpuacct_percpu_seq_show(struct seq_file *m,
211                                      enum cpuacct_stat_index index)
212 {
213         struct cpuacct *ca = css_ca(seq_css(m));
214         u64 percpu;
215         int i;
216
217         for_each_possible_cpu(i) {
218                 percpu = cpuacct_cpuusage_read(ca, i, index);
219                 seq_printf(m, "%llu ", (unsigned long long) percpu);
220         }
221         seq_printf(m, "\n");
222         return 0;
223 }
224
225 static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
226 {
227         return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER);
228 }
229
230 static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
231 {
232         return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM);
233 }
234
235 static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
236 {
237         return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS);
238 }
239
240 static int cpuacct_all_seq_show(struct seq_file *m, void *V)
241 {
242         struct cpuacct *ca = css_ca(seq_css(m));
243         int index;
244         int cpu;
245
246         seq_puts(m, "cpu");
247         for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
248                 seq_printf(m, " %s", cpuacct_stat_desc[index]);
249         seq_puts(m, "\n");
250
251         for_each_possible_cpu(cpu) {
252                 seq_printf(m, "%d", cpu);
253                 for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
254                         seq_printf(m, " %llu",
255                                    cpuacct_cpuusage_read(ca, cpu, index));
256                 seq_puts(m, "\n");
257         }
258         return 0;
259 }
260
261 static int cpuacct_stats_show(struct seq_file *sf, void *v)
262 {
263         struct cpuacct *ca = css_ca(seq_css(sf));
264         struct task_cputime cputime;
265         u64 val[CPUACCT_STAT_NSTATS];
266         int cpu;
267         int stat;
268
269         memset(&cputime, 0, sizeof(cputime));
270         for_each_possible_cpu(cpu) {
271                 u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
272
273                 cputime.utime += cpustat[CPUTIME_USER];
274                 cputime.utime += cpustat[CPUTIME_NICE];
275                 cputime.stime += cpustat[CPUTIME_SYSTEM];
276                 cputime.stime += cpustat[CPUTIME_IRQ];
277                 cputime.stime += cpustat[CPUTIME_SOFTIRQ];
278
279                 cputime.sum_exec_runtime += *per_cpu_ptr(ca->cpuusage, cpu);
280         }
281
282         cputime_adjust(&cputime, &seq_css(sf)->cgroup->prev_cputime,
283                 &val[CPUACCT_STAT_USER], &val[CPUACCT_STAT_SYSTEM]);
284
285         for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
286                 seq_printf(sf, "%s %llu\n", cpuacct_stat_desc[stat],
287                         nsec_to_clock_t(val[stat]));
288         }
289
290         return 0;
291 }
292
293 static struct cftype files[] = {
294         {
295                 .name = "usage",
296                 .read_u64 = cpuusage_read,
297                 .write_u64 = cpuusage_write,
298         },
299         {
300                 .name = "usage_user",
301                 .read_u64 = cpuusage_user_read,
302         },
303         {
304                 .name = "usage_sys",
305                 .read_u64 = cpuusage_sys_read,
306         },
307         {
308                 .name = "usage_percpu",
309                 .seq_show = cpuacct_percpu_seq_show,
310         },
311         {
312                 .name = "usage_percpu_user",
313                 .seq_show = cpuacct_percpu_user_seq_show,
314         },
315         {
316                 .name = "usage_percpu_sys",
317                 .seq_show = cpuacct_percpu_sys_seq_show,
318         },
319         {
320                 .name = "usage_all",
321                 .seq_show = cpuacct_all_seq_show,
322         },
323         {
324                 .name = "stat",
325                 .seq_show = cpuacct_stats_show,
326         },
327         { }     /* terminate */
328 };
329
330 /*
331  * charge this task's execution time to its accounting group.
332  *
333  * called with rq->lock held.
334  */
335 void cpuacct_charge(struct task_struct *tsk, u64 cputime)
336 {
337         unsigned int cpu = task_cpu(tsk);
338         struct cpuacct *ca;
339
340         rcu_read_lock();
341
342         for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
343                 *per_cpu_ptr(ca->cpuusage, cpu) += cputime;
344
345         rcu_read_unlock();
346 }
347
348 /*
349  * Add user/system time to cpuacct.
350  *
351  * Note: it's the caller that updates the account of the root cgroup.
352  */
353 void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
354 {
355         struct cpuacct *ca;
356
357         rcu_read_lock();
358         for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
359                 __this_cpu_add(ca->cpustat->cpustat[index], val);
360         rcu_read_unlock();
361 }
362
363 struct cgroup_subsys cpuacct_cgrp_subsys = {
364         .css_alloc      = cpuacct_css_alloc,
365         .css_free       = cpuacct_css_free,
366         .legacy_cftypes = files,
367         .early_init     = true,
368 };