1 // SPDX-License-Identifier: GPL-2.0
3 * Routines to identify caches on Intel CPU.
6 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
7 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
8 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
11 #include <linux/slab.h>
12 #include <linux/cacheinfo.h>
13 #include <linux/cpu.h>
14 #include <linux/sched.h>
15 #include <linux/capability.h>
16 #include <linux/sysfs.h>
17 #include <linux/pci.h>
19 #include <asm/cpufeature.h>
20 #include <asm/cacheinfo.h>
21 #include <asm/amd_nb.h>
32 /* Shared last level cache maps */
33 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
35 /* Shared L2 cache maps */
36 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
39 unsigned char descriptor;
44 #define MB(x) ((x) * 1024)
46 /* All the cache descriptor types we care about (no TLB or
47 trace cache entries) */
49 static const struct _cache_table cache_table[] =
51 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
52 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
53 { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */
54 { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
55 { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
56 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
57 { 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */
58 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
59 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
60 { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
61 { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
62 { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
63 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
64 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
65 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
66 { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
67 { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
68 { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
69 { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
70 { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
71 { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */
72 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
73 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
74 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
75 { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */
76 { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
77 { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
78 { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
79 { 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */
80 { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
81 { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
82 { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
83 { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */
84 { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */
85 { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */
86 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
87 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
88 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
89 { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
90 { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
91 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
92 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
93 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
94 { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
95 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
96 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
97 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
98 { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
99 { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
100 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
101 { 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */
102 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
103 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
104 { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
105 { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */
106 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
107 { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */
108 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
109 { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */
110 { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */
111 { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */
112 { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */
113 { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
114 { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */
115 { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
116 { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */
117 { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */
118 { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
119 { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
120 { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */
121 { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */
122 { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */
134 union _cpuid4_leaf_eax {
136 enum _cache_type type:5;
137 unsigned int level:3;
138 unsigned int is_self_initializing:1;
139 unsigned int is_fully_associative:1;
140 unsigned int reserved:4;
141 unsigned int num_threads_sharing:12;
142 unsigned int num_cores_on_die:6;
147 union _cpuid4_leaf_ebx {
149 unsigned int coherency_line_size:12;
150 unsigned int physical_line_partition:10;
151 unsigned int ways_of_associativity:10;
156 union _cpuid4_leaf_ecx {
158 unsigned int number_of_sets:32;
163 struct _cpuid4_info_regs {
164 union _cpuid4_leaf_eax eax;
165 union _cpuid4_leaf_ebx ebx;
166 union _cpuid4_leaf_ecx ecx;
169 struct amd_northbridge *nb;
172 static unsigned short num_cache_leaves;
174 /* AMD doesn't have CPUID4. Emulate it here to report the same
175 information to the user. This makes some assumptions about the machine:
176 L2 not shared, no SMT etc. that is currently true on AMD CPUs.
178 In theory the TLBs could be reported as fake type (they are in "dummy").
182 unsigned line_size:8;
183 unsigned lines_per_tag:8;
185 unsigned size_in_kb:8;
192 unsigned line_size:8;
193 unsigned lines_per_tag:4;
195 unsigned size_in_kb:16;
202 unsigned line_size:8;
203 unsigned lines_per_tag:4;
206 unsigned size_encoded:14;
211 static const unsigned short assocs[] = {
222 [0xf] = 0xffff /* fully associative - no way to show this currently */
225 static const unsigned char levels[] = { 1, 1, 2, 3 };
226 static const unsigned char types[] = { 1, 2, 3, 3 };
228 static const enum cache_type cache_type_map[] = {
229 [CTYPE_NULL] = CACHE_TYPE_NOCACHE,
230 [CTYPE_DATA] = CACHE_TYPE_DATA,
231 [CTYPE_INST] = CACHE_TYPE_INST,
232 [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
236 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
237 union _cpuid4_leaf_ebx *ebx,
238 union _cpuid4_leaf_ecx *ecx)
241 unsigned line_size, lines_per_tag, assoc, size_in_kb;
242 union l1_cache l1i, l1d;
245 union l1_cache *l1 = &l1d;
251 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
252 cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
261 assoc = assocs[l1->assoc];
262 line_size = l1->line_size;
263 lines_per_tag = l1->lines_per_tag;
264 size_in_kb = l1->size_in_kb;
269 assoc = assocs[l2.assoc];
270 line_size = l2.line_size;
271 lines_per_tag = l2.lines_per_tag;
272 /* cpu_data has errata corrections for K7 applied */
273 size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
278 assoc = assocs[l3.assoc];
279 line_size = l3.line_size;
280 lines_per_tag = l3.lines_per_tag;
281 size_in_kb = l3.size_encoded * 512;
282 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
283 size_in_kb = size_in_kb >> 1;
291 eax->split.is_self_initializing = 1;
292 eax->split.type = types[leaf];
293 eax->split.level = levels[leaf];
294 eax->split.num_threads_sharing = 0;
295 eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
299 eax->split.is_fully_associative = 1;
300 ebx->split.coherency_line_size = line_size - 1;
301 ebx->split.ways_of_associativity = assoc - 1;
302 ebx->split.physical_line_partition = lines_per_tag - 1;
303 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
304 (ebx->split.ways_of_associativity + 1) - 1;
307 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
310 * L3 cache descriptors
312 static void amd_calc_l3_indices(struct amd_northbridge *nb)
314 struct amd_l3_cache *l3 = &nb->l3_cache;
315 unsigned int sc0, sc1, sc2, sc3;
318 pci_read_config_dword(nb->misc, 0x1C4, &val);
320 /* calculate subcache sizes */
321 l3->subcaches[0] = sc0 = !(val & BIT(0));
322 l3->subcaches[1] = sc1 = !(val & BIT(4));
324 if (boot_cpu_data.x86 == 0x15) {
325 l3->subcaches[0] = sc0 += !(val & BIT(1));
326 l3->subcaches[1] = sc1 += !(val & BIT(5));
329 l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9));
330 l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
332 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
336 * check whether a slot used for disabling an L3 index is occupied.
337 * @l3: L3 cache descriptor
338 * @slot: slot number (0..1)
340 * @returns: the disabled index if used or negative value if slot free.
342 static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
344 unsigned int reg = 0;
346 pci_read_config_dword(nb->misc, 0x1BC + slot * 4, ®);
348 /* check whether this slot is activated already */
349 if (reg & (3UL << 30))
355 static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
359 struct amd_northbridge *nb = this_leaf->priv;
361 index = amd_get_l3_disable_slot(nb, slot);
363 return sprintf(buf, "%d\n", index);
365 return sprintf(buf, "FREE\n");
368 #define SHOW_CACHE_DISABLE(slot) \
370 cache_disable_##slot##_show(struct device *dev, \
371 struct device_attribute *attr, char *buf) \
373 struct cacheinfo *this_leaf = dev_get_drvdata(dev); \
374 return show_cache_disable(this_leaf, buf, slot); \
376 SHOW_CACHE_DISABLE(0)
377 SHOW_CACHE_DISABLE(1)
379 static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
380 unsigned slot, unsigned long idx)
387 * disable index in all 4 subcaches
389 for (i = 0; i < 4; i++) {
390 u32 reg = idx | (i << 20);
392 if (!nb->l3_cache.subcaches[i])
395 pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
398 * We need to WBINVD on a core on the node containing the L3
399 * cache which indices we disable therefore a simple wbinvd()
405 pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
410 * disable a L3 cache index by using a disable-slot
412 * @l3: L3 cache descriptor
413 * @cpu: A CPU on the node containing the L3 cache
414 * @slot: slot number (0..1)
415 * @index: index to disable
417 * @return: 0 on success, error status on failure
419 static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
420 unsigned slot, unsigned long index)
424 /* check if @slot is already used or the index is already disabled */
425 ret = amd_get_l3_disable_slot(nb, slot);
429 if (index > nb->l3_cache.indices)
432 /* check whether the other slot has disabled the same index already */
433 if (index == amd_get_l3_disable_slot(nb, !slot))
436 amd_l3_disable_index(nb, cpu, slot, index);
441 static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
442 const char *buf, size_t count,
445 unsigned long val = 0;
447 struct amd_northbridge *nb = this_leaf->priv;
449 if (!capable(CAP_SYS_ADMIN))
452 cpu = cpumask_first(&this_leaf->shared_cpu_map);
454 if (kstrtoul(buf, 10, &val) < 0)
457 err = amd_set_l3_disable_slot(nb, cpu, slot, val);
460 pr_warn("L3 slot %d in use/index already disabled!\n",
467 #define STORE_CACHE_DISABLE(slot) \
469 cache_disable_##slot##_store(struct device *dev, \
470 struct device_attribute *attr, \
471 const char *buf, size_t count) \
473 struct cacheinfo *this_leaf = dev_get_drvdata(dev); \
474 return store_cache_disable(this_leaf, buf, count, slot); \
476 STORE_CACHE_DISABLE(0)
477 STORE_CACHE_DISABLE(1)
479 static ssize_t subcaches_show(struct device *dev,
480 struct device_attribute *attr, char *buf)
482 struct cacheinfo *this_leaf = dev_get_drvdata(dev);
483 int cpu = cpumask_first(&this_leaf->shared_cpu_map);
485 return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
488 static ssize_t subcaches_store(struct device *dev,
489 struct device_attribute *attr,
490 const char *buf, size_t count)
492 struct cacheinfo *this_leaf = dev_get_drvdata(dev);
493 int cpu = cpumask_first(&this_leaf->shared_cpu_map);
496 if (!capable(CAP_SYS_ADMIN))
499 if (kstrtoul(buf, 16, &val) < 0)
502 if (amd_set_subcaches(cpu, val))
508 static DEVICE_ATTR_RW(cache_disable_0);
509 static DEVICE_ATTR_RW(cache_disable_1);
510 static DEVICE_ATTR_RW(subcaches);
513 cache_private_attrs_is_visible(struct kobject *kobj,
514 struct attribute *attr, int unused)
516 struct device *dev = kobj_to_dev(kobj);
517 struct cacheinfo *this_leaf = dev_get_drvdata(dev);
518 umode_t mode = attr->mode;
520 if (!this_leaf->priv)
523 if ((attr == &dev_attr_subcaches.attr) &&
524 amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
527 if ((attr == &dev_attr_cache_disable_0.attr ||
528 attr == &dev_attr_cache_disable_1.attr) &&
529 amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
535 static struct attribute_group cache_private_group = {
536 .is_visible = cache_private_attrs_is_visible,
539 static void init_amd_l3_attrs(void)
542 static struct attribute **amd_l3_attrs;
544 if (amd_l3_attrs) /* already initialized */
547 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
549 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
552 amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
557 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
558 amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
559 amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
561 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
562 amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
564 cache_private_group.attrs = amd_l3_attrs;
567 const struct attribute_group *
568 cache_get_priv_group(struct cacheinfo *this_leaf)
570 struct amd_northbridge *nb = this_leaf->priv;
572 if (this_leaf->level < 3 || !nb)
575 if (nb && nb->l3_cache.indices)
578 return &cache_private_group;
581 static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
585 /* only for L3, and not in virtualized environments */
589 node = topology_die_id(smp_processor_id());
590 this_leaf->nb = node_to_amd_nb(node);
591 if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
592 amd_calc_l3_indices(this_leaf->nb);
595 #define amd_init_l3_cache(x, y)
596 #endif /* CONFIG_AMD_NB && CONFIG_SYSFS */
599 cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
601 union _cpuid4_leaf_eax eax;
602 union _cpuid4_leaf_ebx ebx;
603 union _cpuid4_leaf_ecx ecx;
606 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
607 if (boot_cpu_has(X86_FEATURE_TOPOEXT))
608 cpuid_count(0x8000001d, index, &eax.full,
609 &ebx.full, &ecx.full, &edx);
611 amd_cpuid4(index, &eax, &ebx, &ecx);
612 amd_init_l3_cache(this_leaf, index);
613 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
614 cpuid_count(0x8000001d, index, &eax.full,
615 &ebx.full, &ecx.full, &edx);
616 amd_init_l3_cache(this_leaf, index);
618 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
621 if (eax.split.type == CTYPE_NULL)
622 return -EIO; /* better error ? */
624 this_leaf->eax = eax;
625 this_leaf->ebx = ebx;
626 this_leaf->ecx = ecx;
627 this_leaf->size = (ecx.split.number_of_sets + 1) *
628 (ebx.split.coherency_line_size + 1) *
629 (ebx.split.physical_line_partition + 1) *
630 (ebx.split.ways_of_associativity + 1);
634 static int find_num_cache_leaves(struct cpuinfo_x86 *c)
636 unsigned int eax, ebx, ecx, edx, op;
637 union _cpuid4_leaf_eax cache_eax;
640 if (c->x86_vendor == X86_VENDOR_AMD ||
641 c->x86_vendor == X86_VENDOR_HYGON)
648 /* Do cpuid(op) loop to find out num_cache_leaves */
649 cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
650 cache_eax.full = eax;
651 } while (cache_eax.split.type != CTYPE_NULL);
655 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu)
658 * We may have multiple LLCs if L3 caches exist, so check if we
659 * have an L3 cache by looking at the L3 cache CPUID leaf.
661 if (!cpuid_edx(0x80000006))
665 /* LLC is at the node level. */
666 per_cpu(cpu_llc_id, cpu) = c->cpu_die_id;
667 } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
669 * LLC is at the core complex level.
670 * Core complex ID is ApicId[3] for these processors.
672 per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
675 * LLC ID is calculated from the number of threads sharing the
678 u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
679 u32 llc_index = find_num_cache_leaves(c) - 1;
681 cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
683 num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
685 if (num_sharing_cache) {
686 int bits = get_count_order(num_sharing_cache);
688 per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
693 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu)
696 * We may have multiple LLCs if L3 caches exist, so check if we
697 * have an L3 cache by looking at the L3 cache CPUID leaf.
699 if (!cpuid_edx(0x80000006))
703 * LLC is at the core complex level.
704 * Core complex ID is ApicId[3] for these processors.
706 per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
709 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
712 if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
713 num_cache_leaves = find_num_cache_leaves(c);
714 } else if (c->extended_cpuid_level >= 0x80000006) {
715 if (cpuid_edx(0x80000006) & 0xf000)
716 num_cache_leaves = 4;
718 num_cache_leaves = 3;
722 void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
724 num_cache_leaves = find_num_cache_leaves(c);
727 void init_intel_cacheinfo(struct cpuinfo_x86 *c)
730 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
731 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
732 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
733 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
735 unsigned int cpu = c->cpu_index;
738 if (c->cpuid_level > 3) {
739 static int is_initialized;
741 if (is_initialized == 0) {
742 /* Init num_cache_leaves from boot CPU */
743 num_cache_leaves = find_num_cache_leaves(c);
748 * Whenever possible use cpuid(4), deterministic cache
749 * parameters cpuid leaf to find the cache details
751 for (i = 0; i < num_cache_leaves; i++) {
752 struct _cpuid4_info_regs this_leaf = {};
755 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
759 switch (this_leaf.eax.split.level) {
761 if (this_leaf.eax.split.type == CTYPE_DATA)
762 new_l1d = this_leaf.size/1024;
763 else if (this_leaf.eax.split.type == CTYPE_INST)
764 new_l1i = this_leaf.size/1024;
767 new_l2 = this_leaf.size/1024;
768 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
769 index_msb = get_count_order(num_threads_sharing);
770 l2_id = c->apicid & ~((1 << index_msb) - 1);
773 new_l3 = this_leaf.size/1024;
774 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
775 index_msb = get_count_order(num_threads_sharing);
776 l3_id = c->apicid & ~((1 << index_msb) - 1);
784 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
787 if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
788 /* supports eax=2 call */
790 unsigned int regs[4];
791 unsigned char *dp = (unsigned char *)regs;
794 if (num_cache_leaves != 0 && c->x86 == 15)
797 /* Number of times to iterate */
798 n = cpuid_eax(2) & 0xFF;
800 for (i = 0 ; i < n ; i++) {
801 cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]);
803 /* If bit 31 is set, this is an unknown format */
804 for (j = 0 ; j < 3 ; j++)
805 if (regs[j] & (1 << 31))
808 /* Byte 0 is level count, not a descriptor */
809 for (j = 1 ; j < 16 ; j++) {
810 unsigned char des = dp[j];
813 /* look up this descriptor in the table */
814 while (cache_table[k].descriptor != 0) {
815 if (cache_table[k].descriptor == des) {
816 if (only_trace && cache_table[k].cache_type != LVL_TRACE)
818 switch (cache_table[k].cache_type) {
820 l1i += cache_table[k].size;
823 l1d += cache_table[k].size;
826 l2 += cache_table[k].size;
829 l3 += cache_table[k].size;
832 trace += cache_table[k].size;
854 per_cpu(cpu_llc_id, cpu) = l2_id;
855 per_cpu(cpu_l2c_id, cpu) = l2_id;
862 per_cpu(cpu_llc_id, cpu) = l3_id;
868 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
869 * turns means that the only possibility is SMT (as indicated in
870 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
871 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
874 if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
875 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
878 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
881 cpu_detect_cache_sizes(c);
884 static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
885 struct _cpuid4_info_regs *base)
887 struct cpu_cacheinfo *this_cpu_ci;
888 struct cacheinfo *this_leaf;
892 * For L3, always use the pre-calculated cpu_llc_shared_mask
893 * to derive shared_cpu_map.
896 for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
897 this_cpu_ci = get_cpu_cacheinfo(i);
898 if (!this_cpu_ci->info_list)
900 this_leaf = this_cpu_ci->info_list + index;
901 for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
902 if (!cpu_online(sibling))
904 cpumask_set_cpu(sibling,
905 &this_leaf->shared_cpu_map);
908 } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
909 unsigned int apicid, nshared, first, last;
911 nshared = base->eax.split.num_threads_sharing + 1;
912 apicid = cpu_data(cpu).apicid;
913 first = apicid - (apicid % nshared);
914 last = first + nshared - 1;
916 for_each_online_cpu(i) {
917 this_cpu_ci = get_cpu_cacheinfo(i);
918 if (!this_cpu_ci->info_list)
921 apicid = cpu_data(i).apicid;
922 if ((apicid < first) || (apicid > last))
925 this_leaf = this_cpu_ci->info_list + index;
927 for_each_online_cpu(sibling) {
928 apicid = cpu_data(sibling).apicid;
929 if ((apicid < first) || (apicid > last))
931 cpumask_set_cpu(sibling,
932 &this_leaf->shared_cpu_map);
941 static void __cache_cpumap_setup(unsigned int cpu, int index,
942 struct _cpuid4_info_regs *base)
944 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
945 struct cacheinfo *this_leaf, *sibling_leaf;
946 unsigned long num_threads_sharing;
948 struct cpuinfo_x86 *c = &cpu_data(cpu);
950 if (c->x86_vendor == X86_VENDOR_AMD ||
951 c->x86_vendor == X86_VENDOR_HYGON) {
952 if (__cache_amd_cpumap_setup(cpu, index, base))
956 this_leaf = this_cpu_ci->info_list + index;
957 num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
959 cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
960 if (num_threads_sharing == 1)
963 index_msb = get_count_order(num_threads_sharing);
965 for_each_online_cpu(i)
966 if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
967 struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
969 if (i == cpu || !sib_cpu_ci->info_list)
970 continue;/* skip if itself or no cacheinfo */
971 sibling_leaf = sib_cpu_ci->info_list + index;
972 cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
973 cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
977 static void ci_leaf_init(struct cacheinfo *this_leaf,
978 struct _cpuid4_info_regs *base)
980 this_leaf->id = base->id;
981 this_leaf->attributes = CACHE_ID;
982 this_leaf->level = base->eax.split.level;
983 this_leaf->type = cache_type_map[base->eax.split.type];
984 this_leaf->coherency_line_size =
985 base->ebx.split.coherency_line_size + 1;
986 this_leaf->ways_of_associativity =
987 base->ebx.split.ways_of_associativity + 1;
988 this_leaf->size = base->size;
989 this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
990 this_leaf->physical_line_partition =
991 base->ebx.split.physical_line_partition + 1;
992 this_leaf->priv = base->nb;
995 int init_cache_level(unsigned int cpu)
997 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
999 if (!num_cache_leaves)
1003 this_cpu_ci->num_levels = 3;
1004 this_cpu_ci->num_leaves = num_cache_leaves;
1009 * The max shared threads number comes from CPUID.4:EAX[25-14] with input
1010 * ECX as cache index. Then right shift apicid by the number's order to get
1011 * cache id for this cache node.
1013 static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
1015 struct cpuinfo_x86 *c = &cpu_data(cpu);
1016 unsigned long num_threads_sharing;
1019 num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1020 index_msb = get_count_order(num_threads_sharing);
1021 id4_regs->id = c->apicid >> index_msb;
1024 int populate_cache_leaves(unsigned int cpu)
1026 unsigned int idx, ret;
1027 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1028 struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1029 struct _cpuid4_info_regs id4_regs = {};
1031 for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1032 ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1035 get_cache_id(cpu, &id4_regs);
1036 ci_leaf_init(this_leaf++, &id4_regs);
1037 __cache_cpumap_setup(cpu, idx, &id4_regs);
1039 this_cpu_ci->cpu_map_populated = true;