arch_topology.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Arch specific cpu topology information
   4  *
   5  * Copyright (C) 2016, ARM Ltd.
   6  * Written by: Juri Lelli, ARM Ltd.
   7  */
   8
   9 #include <linux/acpi.h>
  10 #include <linux/cpu.h>
  11 #include <linux/cpufreq.h>
  12 #include <linux/device.h>
  13 #include <linux/of.h>
  14 #include <linux/slab.h>
  15 #include <linux/string.h>
  16 #include <linux/sched/topology.h>
  17 #include <linux/cpuset.h>
  18 #include <linux/cpumask.h>
  19 #include <linux/init.h>
  20 #include <linux/percpu.h>
  21 #include <linux/sched.h>
  22 #include <linux/smp.h>
  23
  24 bool topology_scale_freq_invariant(void)
  25 {
  26         return cpufreq_supports_freq_invariance() ||
  27                arch_freq_counters_available(cpu_online_mask);
  28 }
  29
  30 __weak bool arch_freq_counters_available(const struct cpumask *cpus)
  31 {
  32         return false;
  33 }
  34 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
  35
  36 void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
  37                              unsigned long max_freq)
  38 {
  39         unsigned long scale;
  40         int i;
  41
  42         if (WARN_ON_ONCE(!cur_freq || !max_freq))
  43                 return;
  44
  45         /*
  46          * If the use of counters for FIE is enabled, just return as we don't
  47          * want to update the scale factor with information from CPUFREQ.
  48          * Instead the scale factor will be updated from arch_scale_freq_tick.
  49          */
  50         if (arch_freq_counters_available(cpus))
  51                 return;
  52
  53         scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
  54
  55         for_each_cpu(i, cpus)
  56                 per_cpu(freq_scale, i) = scale;
  57 }
  58
  59 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
  60
  61 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
  62 {
  63         per_cpu(cpu_scale, cpu) = capacity;
  64 }
  65
  66 DEFINE_PER_CPU(unsigned long, thermal_pressure);
  67
  68 void topology_set_thermal_pressure(const struct cpumask *cpus,
  69                                unsigned long th_pressure)
  70 {
  71         int cpu;
  72
  73         for_each_cpu(cpu, cpus)
  74                 WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
  75 }
  76
  77 static ssize_t cpu_capacity_show(struct device *dev,
  78                                  struct device_attribute *attr,
  79                                  char *buf)
  80 {
  81         struct cpu *cpu = container_of(dev, struct cpu, dev);
  82
  83         return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
  84 }
  85
  86 static void update_topology_flags_workfn(struct work_struct *work);
  87 static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
  88
  89 static DEVICE_ATTR_RO(cpu_capacity);
  90
  91 static int register_cpu_capacity_sysctl(void)
  92 {
  93         int i;
  94         struct device *cpu;
  95
  96         for_each_possible_cpu(i) {
  97                 cpu = get_cpu_device(i);
  98                 if (!cpu) {
  99                         pr_err("%s: too early to get CPU%d device!\n",
 100                                __func__, i);
 101                         continue;
 102                 }
 103                 device_create_file(cpu, &dev_attr_cpu_capacity);
 104         }
 105
 106         return 0;
 107 }
 108 subsys_initcall(register_cpu_capacity_sysctl);
 109
 110 static int update_topology;
 111
 112 int topology_update_cpu_topology(void)
 113 {
 114         return update_topology;
 115 }
 116
 117 /*
 118  * Updating the sched_domains can't be done directly from cpufreq callbacks
 119  * due to locking, so queue the work for later.
 120  */
 121 static void update_topology_flags_workfn(struct work_struct *work)
 122 {
 123         update_topology = 1;
 124         rebuild_sched_domains();
 125         pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
 126         update_topology = 0;
 127 }
 128
 129 static DEFINE_PER_CPU(u32, freq_factor) = 1;
 130 static u32 *raw_capacity;
 131
 132 static int free_raw_capacity(void)
 133 {
 134         kfree(raw_capacity);
 135         raw_capacity = NULL;
 136
 137         return 0;
 138 }
 139
 140 void topology_normalize_cpu_scale(void)
 141 {
 142         u64 capacity;
 143         u64 capacity_scale;
 144         int cpu;
 145
 146         if (!raw_capacity)
 147                 return;
 148
 149         capacity_scale = 1;
 150         for_each_possible_cpu(cpu) {
 151                 capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
 152                 capacity_scale = max(capacity, capacity_scale);
 153         }
 154
 155         pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale);
 156         for_each_possible_cpu(cpu) {
 157                 capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
 158                 capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
 159                         capacity_scale);
 160                 topology_set_cpu_scale(cpu, capacity);
 161                 pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
 162                         cpu, topology_get_cpu_scale(cpu));
 163         }
 164 }
 165
 166 bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
 167 {
 168         struct clk *cpu_clk;
 169         static bool cap_parsing_failed;
 170         int ret;
 171         u32 cpu_capacity;
 172
 173         if (cap_parsing_failed)
 174                 return false;
 175
 176         ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
 177                                    &cpu_capacity);
 178         if (!ret) {
 179                 if (!raw_capacity) {
 180                         raw_capacity = kcalloc(num_possible_cpus(),
 181                                                sizeof(*raw_capacity),
 182                                                GFP_KERNEL);
 183                         if (!raw_capacity) {
 184                                 cap_parsing_failed = true;
 185                                 return false;
 186                         }
 187                 }
 188                 raw_capacity[cpu] = cpu_capacity;
 189                 pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
 190                         cpu_node, raw_capacity[cpu]);
 191
 192                 /*
 193                  * Update freq_factor for calculating early boot cpu capacities.
 194                  * For non-clk CPU DVFS mechanism, there's no way to get the
 195                  * frequency value now, assuming they are running at the same
 196                  * frequency (by keeping the initial freq_factor value).
 197                  */
 198                 cpu_clk = of_clk_get(cpu_node, 0);
 199                 if (!PTR_ERR_OR_ZERO(cpu_clk)) {
 200                         per_cpu(freq_factor, cpu) =
 201                                 clk_get_rate(cpu_clk) / 1000;
 202                         clk_put(cpu_clk);
 203                 }
 204         } else {
 205                 if (raw_capacity) {
 206                         pr_err("cpu_capacity: missing %pOF raw capacity\n",
 207                                 cpu_node);
 208                         pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
 209                 }
 210                 cap_parsing_failed = true;
 211                 free_raw_capacity();
 212         }
 213
 214         return !ret;
 215 }
 216
 217 #ifdef CONFIG_CPU_FREQ
 218 static cpumask_var_t cpus_to_visit;
 219 static void parsing_done_workfn(struct work_struct *work);
 220 static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
 221
 222 static int
 223 init_cpu_capacity_callback(struct notifier_block *nb,
 224                            unsigned long val,
 225                            void *data)
 226 {
 227         struct cpufreq_policy *policy = data;
 228         int cpu;
 229
 230         if (!raw_capacity)
 231                 return 0;
 232
 233         if (val != CPUFREQ_CREATE_POLICY)
 234                 return 0;
 235
 236         pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
 237                  cpumask_pr_args(policy->related_cpus),
 238                  cpumask_pr_args(cpus_to_visit));
 239
 240         cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
 241
 242         for_each_cpu(cpu, policy->related_cpus)
 243                 per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000;
 244
 245         if (cpumask_empty(cpus_to_visit)) {
 246                 topology_normalize_cpu_scale();
 247                 schedule_work(&update_topology_flags_work);
 248                 free_raw_capacity();
 249                 pr_debug("cpu_capacity: parsing done\n");
 250                 schedule_work(&parsing_done_work);
 251         }
 252
 253         return 0;
 254 }
 255
 256 static struct notifier_block init_cpu_capacity_notifier = {
 257         .notifier_call = init_cpu_capacity_callback,
 258 };
 259
 260 static int __init register_cpufreq_notifier(void)
 261 {
 262         int ret;
 263
 264         /*
 265          * on ACPI-based systems we need to use the default cpu capacity
 266          * until we have the necessary code to parse the cpu capacity, so
 267          * skip registering cpufreq notifier.
 268          */
 269         if (!acpi_disabled || !raw_capacity)
 270                 return -EINVAL;
 271
 272         if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
 273                 return -ENOMEM;
 274
 275         cpumask_copy(cpus_to_visit, cpu_possible_mask);
 276
 277         ret = cpufreq_register_notifier(&init_cpu_capacity_notifier,
 278                                         CPUFREQ_POLICY_NOTIFIER);
 279
 280         if (ret)
 281                 free_cpumask_var(cpus_to_visit);
 282
 283         return ret;
 284 }
 285 core_initcall(register_cpufreq_notifier);
 286
 287 static void parsing_done_workfn(struct work_struct *work)
 288 {
 289         cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
 290                                          CPUFREQ_POLICY_NOTIFIER);
 291         free_cpumask_var(cpus_to_visit);
 292 }
 293
 294 #else
 295 core_initcall(free_raw_capacity);
 296 #endif
 297
 298 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
 299 /*
 300  * This function returns the logic cpu number of the node.
 301  * There are basically three kinds of return values:
 302  * (1) logic cpu number which is > 0.
 303  * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but
 304  * there is no possible logical CPU in the kernel to match. This happens
 305  * when CONFIG_NR_CPUS is configure to be smaller than the number of
 306  * CPU nodes in DT. We need to just ignore this case.
 307  * (3) -1 if the node does not exist in the device tree
 308  */
 309 static int __init get_cpu_for_node(struct device_node *node)
 310 {
 311         struct device_node *cpu_node;
 312         int cpu;
 313
 314         cpu_node = of_parse_phandle(node, "cpu", 0);
 315         if (!cpu_node)
 316                 return -1;
 317
 318         cpu = of_cpu_node_to_id(cpu_node);
 319         if (cpu >= 0)
 320                 topology_parse_cpu_capacity(cpu_node, cpu);
 321         else
 322                 pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n",
 323                         cpu_node, cpumask_pr_args(cpu_possible_mask));
 324
 325         of_node_put(cpu_node);
 326         return cpu;
 327 }
 328
 329 static int __init parse_core(struct device_node *core, int package_id,
 330                              int core_id)
 331 {
 332         char name[20];
 333         bool leaf = true;
 334         int i = 0;
 335         int cpu;
 336         struct device_node *t;
 337
 338         do {
 339                 snprintf(name, sizeof(name), "thread%d", i);
 340                 t = of_get_child_by_name(core, name);
 341                 if (t) {
 342                         leaf = false;
 343                         cpu = get_cpu_for_node(t);
 344                         if (cpu >= 0) {
 345                                 cpu_topology[cpu].package_id = package_id;
 346                                 cpu_topology[cpu].core_id = core_id;
 347                                 cpu_topology[cpu].thread_id = i;
 348                         } else if (cpu != -ENODEV) {
 349                                 pr_err("%pOF: Can't get CPU for thread\n", t);
 350                                 of_node_put(t);
 351                                 return -EINVAL;
 352                         }
 353                         of_node_put(t);
 354                 }
 355                 i++;
 356         } while (t);
 357
 358         cpu = get_cpu_for_node(core);
 359         if (cpu >= 0) {
 360                 if (!leaf) {
 361                         pr_err("%pOF: Core has both threads and CPU\n",
 362                                core);
 363                         return -EINVAL;
 364                 }
 365
 366                 cpu_topology[cpu].package_id = package_id;
 367                 cpu_topology[cpu].core_id = core_id;
 368         } else if (leaf && cpu != -ENODEV) {
 369                 pr_err("%pOF: Can't get CPU for leaf core\n", core);
 370                 return -EINVAL;
 371         }
 372
 373         return 0;
 374 }
 375
 376 static int __init parse_cluster(struct device_node *cluster, int depth)
 377 {
 378         char name[20];
 379         bool leaf = true;
 380         bool has_cores = false;
 381         struct device_node *c;
 382         static int package_id __initdata;
 383         int core_id = 0;
 384         int i, ret;
 385
 386         /*
 387          * First check for child clusters; we currently ignore any
 388          * information about the nesting of clusters and present the
 389          * scheduler with a flat list of them.
 390          */
 391         i = 0;
 392         do {
 393                 snprintf(name, sizeof(name), "cluster%d", i);
 394                 c = of_get_child_by_name(cluster, name);
 395                 if (c) {
 396                         leaf = false;
 397                         ret = parse_cluster(c, depth + 1);
 398                         of_node_put(c);
 399                         if (ret != 0)
 400                                 return ret;
 401                 }
 402                 i++;
 403         } while (c);
 404
 405         /* Now check for cores */
 406         i = 0;
 407         do {
 408                 snprintf(name, sizeof(name), "core%d", i);
 409                 c = of_get_child_by_name(cluster, name);
 410                 if (c) {
 411                         has_cores = true;
 412
 413                         if (depth == 0) {
 414                                 pr_err("%pOF: cpu-map children should be clusters\n",
 415                                        c);
 416                                 of_node_put(c);
 417                                 return -EINVAL;
 418                         }
 419
 420                         if (leaf) {
 421                                 ret = parse_core(c, package_id, core_id++);
 422                         } else {
 423                                 pr_err("%pOF: Non-leaf cluster with core %s\n",
 424                                        cluster, name);
 425                                 ret = -EINVAL;
 426                         }
 427
 428                         of_node_put(c);
 429                         if (ret != 0)
 430                                 return ret;
 431                 }
 432                 i++;
 433         } while (c);
 434
 435         if (leaf && !has_cores)
 436                 pr_warn("%pOF: empty cluster\n", cluster);
 437
 438         if (leaf)
 439                 package_id++;
 440
 441         return 0;
 442 }
 443
 444 static int __init parse_dt_topology(void)
 445 {
 446         struct device_node *cn, *map;
 447         int ret = 0;
 448         int cpu;
 449
 450         cn = of_find_node_by_path("/cpus");
 451         if (!cn) {
 452                 pr_err("No CPU information found in DT\n");
 453                 return 0;
 454         }
 455
 456         /*
 457          * When topology is provided cpu-map is essentially a root
 458          * cluster with restricted subnodes.
 459          */
 460         map = of_get_child_by_name(cn, "cpu-map");
 461         if (!map)
 462                 goto out;
 463
 464         ret = parse_cluster(map, 0);
 465         if (ret != 0)
 466                 goto out_map;
 467
 468         topology_normalize_cpu_scale();
 469
 470         /*
 471          * Check that all cores are in the topology; the SMP code will
 472          * only mark cores described in the DT as possible.
 473          */
 474         for_each_possible_cpu(cpu)
 475                 if (cpu_topology[cpu].package_id == -1)
 476                         ret = -EINVAL;
 477
 478 out_map:
 479         of_node_put(map);
 480 out:
 481         of_node_put(cn);
 482         return ret;
 483 }
 484 #endif
 485
 486 /*
 487  * cpu topology table
 488  */
 489 struct cpu_topology cpu_topology[NR_CPUS];
 490 EXPORT_SYMBOL_GPL(cpu_topology);
 491
 492 const struct cpumask *cpu_coregroup_mask(int cpu)
 493 {
 494         const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
 495
 496         /* Find the smaller of NUMA, core or LLC siblings */
 497         if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
 498                 /* not numa in package, lets use the package siblings */
 499                 core_mask = &cpu_topology[cpu].core_sibling;
 500         }
 501         if (cpu_topology[cpu].llc_id != -1) {
 502                 if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
 503                         core_mask = &cpu_topology[cpu].llc_sibling;
 504         }
 505
 506         return core_mask;
 507 }
 508
 509 void update_siblings_masks(unsigned int cpuid)
 510 {
 511         struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
 512         int cpu;
 513
 514         /* update core and thread sibling masks */
 515         for_each_online_cpu(cpu) {
 516                 cpu_topo = &cpu_topology[cpu];
 517
 518                 if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) {
 519                         cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
 520                         cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
 521                 }
 522
 523                 if (cpuid_topo->package_id != cpu_topo->package_id)
 524                         continue;
 525
 526                 cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
 527                 cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
 528
 529                 if (cpuid_topo->core_id != cpu_topo->core_id)
 530                         continue;
 531
 532                 cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
 533                 cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
 534         }
 535 }
 536
 537 static void clear_cpu_topology(int cpu)
 538 {
 539         struct cpu_topology *cpu_topo = &cpu_topology[cpu];
 540
 541         cpumask_clear(&cpu_topo->llc_sibling);
 542         cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
 543
 544         cpumask_clear(&cpu_topo->core_sibling);
 545         cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
 546         cpumask_clear(&cpu_topo->thread_sibling);
 547         cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
 548 }
 549
 550 void __init reset_cpu_topology(void)
 551 {
 552         unsigned int cpu;
 553
 554         for_each_possible_cpu(cpu) {
 555                 struct cpu_topology *cpu_topo = &cpu_topology[cpu];
 556
 557                 cpu_topo->thread_id = -1;
 558                 cpu_topo->core_id = -1;
 559                 cpu_topo->package_id = -1;
 560                 cpu_topo->llc_id = -1;
 561
 562                 clear_cpu_topology(cpu);
 563         }
 564 }
 565
 566 void remove_cpu_topology(unsigned int cpu)
 567 {
 568         int sibling;
 569
 570         for_each_cpu(sibling, topology_core_cpumask(cpu))
 571                 cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
 572         for_each_cpu(sibling, topology_sibling_cpumask(cpu))
 573                 cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
 574         for_each_cpu(sibling, topology_llc_cpumask(cpu))
 575                 cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
 576
 577         clear_cpu_topology(cpu);
 578 }
 579
 580 __weak int __init parse_acpi_topology(void)
 581 {
 582         return 0;
 583 }
 584
 585 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
 586 void __init init_cpu_topology(void)
 587 {
 588         reset_cpu_topology();
 589
 590         /*
 591          * Discard anything that was parsed if we hit an error so we
 592          * don't use partial information.
 593          */
 594         if (parse_acpi_topology())
 595                 reset_cpu_topology();
 596         else if (of_have_populated_dt() && parse_dt_topology())
 597                 reset_cpu_topology();
 598 }
 599
 600 void store_cpu_topology(unsigned int cpuid)
 601 {
 602         struct cpu_topology *cpuid_topo = &cpu_topology[cpuid];
 603
 604         if (cpuid_topo->package_id != -1)
 605                 goto topology_populated;
 606
 607         cpuid_topo->thread_id = -1;
 608         cpuid_topo->core_id = cpuid;
 609         cpuid_topo->package_id = cpu_to_node(cpuid);
 610
 611         pr_debug("CPU%u: package %d core %d thread %d\n",
 612                  cpuid, cpuid_topo->package_id, cpuid_topo->core_id,
 613                  cpuid_topo->thread_id);
 614
 615 topology_populated:
 616         update_siblings_masks(cpuid);
 617 }
 618 #endif