GNU Linux-libre 4.19.245-gnu1
[releases.git] / arch / arm64 / kernel / topology.c
1 /*
2  * arch/arm64/kernel/topology.c
3  *
4  * Copyright (C) 2011,2013,2014 Linaro Limited.
5  *
6  * Based on the arm32 version written by Vincent Guittot in turn based on
7  * arch/sh/kernel/topology.c
8  *
9  * This file is subject to the terms and conditions of the GNU General Public
10  * License.  See the file "COPYING" in the main directory of this archive
11  * for more details.
12  */
13
14 #include <linux/acpi.h>
15 #include <linux/arch_topology.h>
16 #include <linux/cacheinfo.h>
17 #include <linux/cpu.h>
18 #include <linux/cpumask.h>
19 #include <linux/init.h>
20 #include <linux/percpu.h>
21 #include <linux/node.h>
22 #include <linux/nodemask.h>
23 #include <linux/of.h>
24 #include <linux/sched.h>
25 #include <linux/sched/topology.h>
26 #include <linux/slab.h>
27 #include <linux/smp.h>
28 #include <linux/string.h>
29
30 #include <asm/cpu.h>
31 #include <asm/cputype.h>
32 #include <asm/topology.h>
33
34 static int __init get_cpu_for_node(struct device_node *node)
35 {
36         struct device_node *cpu_node;
37         int cpu;
38
39         cpu_node = of_parse_phandle(node, "cpu", 0);
40         if (!cpu_node)
41                 return -1;
42
43         cpu = of_cpu_node_to_id(cpu_node);
44         if (cpu >= 0)
45                 topology_parse_cpu_capacity(cpu_node, cpu);
46         else
47                 pr_crit("Unable to find CPU node for %pOF\n", cpu_node);
48
49         of_node_put(cpu_node);
50         return cpu;
51 }
52
53 static int __init parse_core(struct device_node *core, int package_id,
54                              int core_id)
55 {
56         char name[10];
57         bool leaf = true;
58         int i = 0;
59         int cpu;
60         struct device_node *t;
61
62         do {
63                 snprintf(name, sizeof(name), "thread%d", i);
64                 t = of_get_child_by_name(core, name);
65                 if (t) {
66                         leaf = false;
67                         cpu = get_cpu_for_node(t);
68                         if (cpu >= 0) {
69                                 cpu_topology[cpu].package_id = package_id;
70                                 cpu_topology[cpu].core_id = core_id;
71                                 cpu_topology[cpu].thread_id = i;
72                         } else {
73                                 pr_err("%pOF: Can't get CPU for thread\n",
74                                        t);
75                                 of_node_put(t);
76                                 return -EINVAL;
77                         }
78                         of_node_put(t);
79                 }
80                 i++;
81         } while (t);
82
83         cpu = get_cpu_for_node(core);
84         if (cpu >= 0) {
85                 if (!leaf) {
86                         pr_err("%pOF: Core has both threads and CPU\n",
87                                core);
88                         return -EINVAL;
89                 }
90
91                 cpu_topology[cpu].package_id = package_id;
92                 cpu_topology[cpu].core_id = core_id;
93         } else if (leaf) {
94                 pr_err("%pOF: Can't get CPU for leaf core\n", core);
95                 return -EINVAL;
96         }
97
98         return 0;
99 }
100
101 static int __init parse_cluster(struct device_node *cluster, int depth)
102 {
103         char name[10];
104         bool leaf = true;
105         bool has_cores = false;
106         struct device_node *c;
107         static int package_id __initdata;
108         int core_id = 0;
109         int i, ret;
110
111         /*
112          * First check for child clusters; we currently ignore any
113          * information about the nesting of clusters and present the
114          * scheduler with a flat list of them.
115          */
116         i = 0;
117         do {
118                 snprintf(name, sizeof(name), "cluster%d", i);
119                 c = of_get_child_by_name(cluster, name);
120                 if (c) {
121                         leaf = false;
122                         ret = parse_cluster(c, depth + 1);
123                         of_node_put(c);
124                         if (ret != 0)
125                                 return ret;
126                 }
127                 i++;
128         } while (c);
129
130         /* Now check for cores */
131         i = 0;
132         do {
133                 snprintf(name, sizeof(name), "core%d", i);
134                 c = of_get_child_by_name(cluster, name);
135                 if (c) {
136                         has_cores = true;
137
138                         if (depth == 0) {
139                                 pr_err("%pOF: cpu-map children should be clusters\n",
140                                        c);
141                                 of_node_put(c);
142                                 return -EINVAL;
143                         }
144
145                         if (leaf) {
146                                 ret = parse_core(c, package_id, core_id++);
147                         } else {
148                                 pr_err("%pOF: Non-leaf cluster with core %s\n",
149                                        cluster, name);
150                                 ret = -EINVAL;
151                         }
152
153                         of_node_put(c);
154                         if (ret != 0)
155                                 return ret;
156                 }
157                 i++;
158         } while (c);
159
160         if (leaf && !has_cores)
161                 pr_warn("%pOF: empty cluster\n", cluster);
162
163         if (leaf)
164                 package_id++;
165
166         return 0;
167 }
168
169 static int __init parse_dt_topology(void)
170 {
171         struct device_node *cn, *map;
172         int ret = 0;
173         int cpu;
174
175         cn = of_find_node_by_path("/cpus");
176         if (!cn) {
177                 pr_err("No CPU information found in DT\n");
178                 return 0;
179         }
180
181         /*
182          * When topology is provided cpu-map is essentially a root
183          * cluster with restricted subnodes.
184          */
185         map = of_get_child_by_name(cn, "cpu-map");
186         if (!map)
187                 goto out;
188
189         ret = parse_cluster(map, 0);
190         if (ret != 0)
191                 goto out_map;
192
193         topology_normalize_cpu_scale();
194
195         /*
196          * Check that all cores are in the topology; the SMP code will
197          * only mark cores described in the DT as possible.
198          */
199         for_each_possible_cpu(cpu)
200                 if (cpu_topology[cpu].package_id == -1)
201                         ret = -EINVAL;
202
203 out_map:
204         of_node_put(map);
205 out:
206         of_node_put(cn);
207         return ret;
208 }
209
210 /*
211  * cpu topology table
212  */
213 struct cpu_topology cpu_topology[NR_CPUS];
214 EXPORT_SYMBOL_GPL(cpu_topology);
215
216 const struct cpumask *cpu_coregroup_mask(int cpu)
217 {
218         const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
219
220         /* Find the smaller of NUMA, core or LLC siblings */
221         if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
222                 /* not numa in package, lets use the package siblings */
223                 core_mask = &cpu_topology[cpu].core_sibling;
224         }
225         if (cpu_topology[cpu].llc_id != -1) {
226                 if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
227                         core_mask = &cpu_topology[cpu].llc_sibling;
228         }
229
230         return core_mask;
231 }
232
233 static void update_siblings_masks(unsigned int cpuid)
234 {
235         struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
236         int cpu;
237
238         /* update core and thread sibling masks */
239         for_each_online_cpu(cpu) {
240                 cpu_topo = &cpu_topology[cpu];
241
242                 if (cpuid_topo->llc_id == cpu_topo->llc_id) {
243                         cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
244                         cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
245                 }
246
247                 if (cpuid_topo->package_id != cpu_topo->package_id)
248                         continue;
249
250                 cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
251                 cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
252
253                 if (cpuid_topo->core_id != cpu_topo->core_id)
254                         continue;
255
256                 cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
257                 cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
258         }
259 }
260
261 void store_cpu_topology(unsigned int cpuid)
262 {
263         struct cpu_topology *cpuid_topo = &cpu_topology[cpuid];
264         u64 mpidr;
265
266         if (cpuid_topo->package_id != -1)
267                 goto topology_populated;
268
269         mpidr = read_cpuid_mpidr();
270
271         /* Uniprocessor systems can rely on default topology values */
272         if (mpidr & MPIDR_UP_BITMASK)
273                 return;
274
275         /*
276          * This would be the place to create cpu topology based on MPIDR.
277          *
278          * However, it cannot be trusted to depict the actual topology; some
279          * pieces of the architecture enforce an artificial cap on Aff0 values
280          * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an
281          * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up
282          * having absolutely no relationship to the actual underlying system
283          * topology, and cannot be reasonably used as core / package ID.
284          *
285          * If the MT bit is set, Aff0 *could* be used to define a thread ID, but
286          * we still wouldn't be able to obtain a sane core ID. This means we
287          * need to entirely ignore MPIDR for any topology deduction.
288          */
289         cpuid_topo->thread_id  = -1;
290         cpuid_topo->core_id    = cpuid;
291         cpuid_topo->package_id = cpu_to_node(cpuid);
292
293         pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n",
294                  cpuid, cpuid_topo->package_id, cpuid_topo->core_id,
295                  cpuid_topo->thread_id, mpidr);
296
297 topology_populated:
298         update_siblings_masks(cpuid);
299 }
300
301 static void clear_cpu_topology(int cpu)
302 {
303         struct cpu_topology *cpu_topo = &cpu_topology[cpu];
304
305         cpumask_clear(&cpu_topo->llc_sibling);
306         cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
307
308         cpumask_clear(&cpu_topo->core_sibling);
309         cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
310         cpumask_clear(&cpu_topo->thread_sibling);
311         cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
312 }
313
314 static void __init reset_cpu_topology(void)
315 {
316         unsigned int cpu;
317
318         for_each_possible_cpu(cpu) {
319                 struct cpu_topology *cpu_topo = &cpu_topology[cpu];
320
321                 cpu_topo->thread_id = -1;
322                 cpu_topo->core_id = 0;
323                 cpu_topo->package_id = -1;
324                 cpu_topo->llc_id = -1;
325
326                 clear_cpu_topology(cpu);
327         }
328 }
329
330 void remove_cpu_topology(unsigned int cpu)
331 {
332         int sibling;
333
334         for_each_cpu(sibling, topology_core_cpumask(cpu))
335                 cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
336         for_each_cpu(sibling, topology_sibling_cpumask(cpu))
337                 cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
338         for_each_cpu(sibling, topology_llc_cpumask(cpu))
339                 cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
340
341         clear_cpu_topology(cpu);
342 }
343
344 #ifdef CONFIG_ACPI
345 static bool __init acpi_cpu_is_threaded(int cpu)
346 {
347         int is_threaded = acpi_pptt_cpu_is_thread(cpu);
348
349         /*
350          * if the PPTT doesn't have thread information, assume a homogeneous
351          * machine and return the current CPU's thread state.
352          */
353         if (is_threaded < 0)
354                 is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK;
355
356         return !!is_threaded;
357 }
358
359 /*
360  * Propagate the topology information of the processor_topology_node tree to the
361  * cpu_topology array.
362  */
363 static int __init parse_acpi_topology(void)
364 {
365         int cpu, topology_id;
366
367         for_each_possible_cpu(cpu) {
368                 int i, cache_id;
369
370                 topology_id = find_acpi_cpu_topology(cpu, 0);
371                 if (topology_id < 0)
372                         return topology_id;
373
374                 if (acpi_cpu_is_threaded(cpu)) {
375                         cpu_topology[cpu].thread_id = topology_id;
376                         topology_id = find_acpi_cpu_topology(cpu, 1);
377                         cpu_topology[cpu].core_id   = topology_id;
378                 } else {
379                         cpu_topology[cpu].thread_id  = -1;
380                         cpu_topology[cpu].core_id    = topology_id;
381                 }
382                 topology_id = find_acpi_cpu_topology_package(cpu);
383                 cpu_topology[cpu].package_id = topology_id;
384
385                 i = acpi_find_last_cache_level(cpu);
386
387                 if (i > 0) {
388                         /*
389                          * this is the only part of cpu_topology that has
390                          * a direct relationship with the cache topology
391                          */
392                         cache_id = find_acpi_cpu_cache_topology(cpu, i);
393                         if (cache_id > 0)
394                                 cpu_topology[cpu].llc_id = cache_id;
395                 }
396         }
397
398         return 0;
399 }
400
401 #else
402 static inline int __init parse_acpi_topology(void)
403 {
404         return -EINVAL;
405 }
406 #endif
407
408 void __init init_cpu_topology(void)
409 {
410         reset_cpu_topology();
411
412         /*
413          * Discard anything that was parsed if we hit an error so we
414          * don't use partial information.
415          */
416         if (!acpi_disabled && parse_acpi_topology())
417                 reset_cpu_topology();
418         else if (of_have_populated_dt() && parse_dt_topology())
419                 reset_cpu_topology();
420 }