2 * Machine check injection support.
3 * Copyright 2008 Intel Corporation.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; version 2
14 * The AMD part (from mce_amd_inj.c): a simple MCE injection facility
15 * for testing different aspects of the RAS code. This driver should be
16 * built as module so that it can be loaded on production kernels for
19 * This file may be distributed under the terms of the GNU General Public
22 * Copyright (c) 2010-17: Borislav Petkov <bp@alien8.de>
23 * Advanced Micro Devices Inc.
26 #include <linux/cpu.h>
27 #include <linux/debugfs.h>
28 #include <linux/kernel.h>
29 #include <linux/module.h>
30 #include <linux/notifier.h>
31 #include <linux/pci.h>
32 #include <linux/uaccess.h>
34 #include <asm/amd_nb.h>
36 #include <asm/irq_vectors.h>
41 #include "mce-internal.h"
44 * Collect all the MCi_XXX settings
46 static struct mce i_mce;
47 static struct dentry *dfs_inj;
49 #define MAX_FLAG_OPT_SIZE 4
53 SW_INJ = 0, /* SW injection, simply decode the error */
54 HW_INJ, /* Trigger a #MC */
55 DFR_INT_INJ, /* Trigger Deferred error interrupt */
56 THR_INT_INJ, /* Trigger threshold interrupt */
60 static const char * const flags_options[] = {
68 /* Set default injection to SW_INJ */
69 static enum injection_type inj_type = SW_INJ;
71 #define MCE_INJECT_SET(reg) \
72 static int inj_##reg##_set(void *data, u64 val) \
74 struct mce *m = (struct mce *)data; \
80 MCE_INJECT_SET(status);
85 #define MCE_INJECT_GET(reg) \
86 static int inj_##reg##_get(void *data, u64 *val) \
88 struct mce *m = (struct mce *)data; \
94 MCE_INJECT_GET(status);
99 DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
100 DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
101 DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
102 DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
104 static void setup_inj_struct(struct mce *m)
106 memset(m, 0, sizeof(struct mce));
108 m->cpuvendor = boot_cpu_data.x86_vendor;
109 m->time = ktime_get_real_seconds();
110 m->cpuid = cpuid_eax(1);
111 m->microcode = boot_cpu_data.microcode;
114 /* Update fake mce registers on current CPU. */
115 static void inject_mce(struct mce *m)
117 struct mce *i = &per_cpu(injectm, m->extcpu);
119 /* Make sure no one reads partially written injectm */
123 /* First set the fields after finished */
124 i->extcpu = m->extcpu;
126 /* Now write record in order, finished last (except above) */
127 memcpy(i, m, sizeof(struct mce));
128 /* Finally activate it */
133 static void raise_poll(struct mce *m)
138 memset(&b, 0xff, sizeof(mce_banks_t));
139 local_irq_save(flags);
140 machine_check_poll(0, &b);
141 local_irq_restore(flags);
145 static void raise_exception(struct mce *m, struct pt_regs *pregs)
151 memset(®s, 0, sizeof(struct pt_regs));
156 /* in mcheck exeception handler, irq will be disabled */
157 local_irq_save(flags);
158 do_machine_check(pregs, 0);
159 local_irq_restore(flags);
163 static cpumask_var_t mce_inject_cpumask;
164 static DEFINE_MUTEX(mce_inject_mutex);
166 static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
168 int cpu = smp_processor_id();
169 struct mce *m = this_cpu_ptr(&injectm);
170 if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
172 cpumask_clear_cpu(cpu, mce_inject_cpumask);
173 if (m->inject_flags & MCJ_EXCEPTION)
174 raise_exception(m, regs);
180 static void mce_irq_ipi(void *info)
182 int cpu = smp_processor_id();
183 struct mce *m = this_cpu_ptr(&injectm);
185 if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
186 m->inject_flags & MCJ_EXCEPTION) {
187 cpumask_clear_cpu(cpu, mce_inject_cpumask);
188 raise_exception(m, NULL);
192 /* Inject mce on current CPU */
193 static int raise_local(void)
195 struct mce *m = this_cpu_ptr(&injectm);
196 int context = MCJ_CTX(m->inject_flags);
200 if (m->inject_flags & MCJ_EXCEPTION) {
201 pr_info("Triggering MCE exception on CPU %d\n", cpu);
205 * Could do more to fake interrupts like
206 * calling irq_enter, but the necessary
207 * machinery isn't exported currently.
210 case MCJ_CTX_PROCESS:
211 raise_exception(m, NULL);
214 pr_info("Invalid MCE context\n");
217 pr_info("MCE exception done on CPU %d\n", cpu);
218 } else if (m->status) {
219 pr_info("Starting machine check poll CPU %d\n", cpu);
222 pr_info("Machine check poll done on CPU %d\n", cpu);
229 static void __maybe_unused raise_mce(struct mce *m)
231 int context = MCJ_CTX(m->inject_flags);
235 if (context == MCJ_CTX_RANDOM)
238 if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
243 cpumask_copy(mce_inject_cpumask, cpu_online_mask);
244 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
245 for_each_online_cpu(cpu) {
246 struct mce *mcpu = &per_cpu(injectm, cpu);
247 if (!mcpu->finished ||
248 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
249 cpumask_clear_cpu(cpu, mce_inject_cpumask);
251 if (!cpumask_empty(mce_inject_cpumask)) {
252 if (m->inject_flags & MCJ_IRQ_BROADCAST) {
254 * don't wait because mce_irq_ipi is necessary
255 * to be sync with following raise_local
258 smp_call_function_many(mce_inject_cpumask,
259 mce_irq_ipi, NULL, 0);
261 } else if (m->inject_flags & MCJ_NMI_BROADCAST)
262 apic->send_IPI_mask(mce_inject_cpumask,
266 while (!cpumask_empty(mce_inject_cpumask)) {
267 if (!time_before(jiffies, start + 2*HZ)) {
268 pr_err("Timeout waiting for mce inject %lx\n",
269 *cpumask_bits(mce_inject_cpumask));
284 static int mce_inject_raise(struct notifier_block *nb, unsigned long val,
287 struct mce *m = (struct mce *)data;
292 mutex_lock(&mce_inject_mutex);
294 mutex_unlock(&mce_inject_mutex);
299 static struct notifier_block inject_nb = {
300 .notifier_call = mce_inject_raise,
304 * Caller needs to be make sure this cpu doesn't disappear
305 * from under us, i.e.: get_cpu/put_cpu.
307 static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
312 err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
314 pr_err("%s: error reading HWCR\n", __func__);
318 enable ? (l |= BIT(18)) : (l &= ~BIT(18));
320 err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
322 pr_err("%s: error writing HWCR\n", __func__);
327 static int __set_inj(const char *buf)
331 for (i = 0; i < N_INJ_TYPES; i++) {
332 if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
340 static ssize_t flags_read(struct file *filp, char __user *ubuf,
341 size_t cnt, loff_t *ppos)
343 char buf[MAX_FLAG_OPT_SIZE];
346 n = sprintf(buf, "%s\n", flags_options[inj_type]);
348 return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
351 static ssize_t flags_write(struct file *filp, const char __user *ubuf,
352 size_t cnt, loff_t *ppos)
354 char buf[MAX_FLAG_OPT_SIZE], *__buf;
357 if (!cnt || cnt > MAX_FLAG_OPT_SIZE)
360 if (copy_from_user(&buf, ubuf, cnt))
365 /* strip whitespace */
366 __buf = strstrip(buf);
368 err = __set_inj(__buf);
370 pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
379 static const struct file_operations flags_fops = {
381 .write = flags_write,
382 .llseek = generic_file_llseek,
386 * On which CPU to inject?
388 MCE_INJECT_GET(extcpu);
390 static int inj_extcpu_set(void *data, u64 val)
392 struct mce *m = (struct mce *)data;
394 if (val >= nr_cpu_ids || !cpu_online(val)) {
395 pr_err("%s: Invalid CPU: %llu\n", __func__, val);
402 DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");
404 static void trigger_mce(void *info)
406 asm volatile("int $18");
409 static void trigger_dfr_int(void *info)
411 asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR));
414 static void trigger_thr_int(void *info)
416 asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
419 static u32 get_nbc_for_node(int node_id)
421 struct cpuinfo_x86 *c = &boot_cpu_data;
424 cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket();
426 return cores_per_node * node_id;
429 static void toggle_nb_mca_mst_cpu(u16 nid)
431 struct amd_northbridge *nb;
436 nb = node_to_amd_nb(nid);
444 err = pci_read_config_dword(F3, NBCFG, &val);
446 pr_err("%s: Error reading F%dx%03x.\n",
447 __func__, PCI_FUNC(F3->devfn), NBCFG);
454 pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
458 err = pci_write_config_dword(F3, NBCFG, val);
460 pr_err("%s: Error writing F%dx%03x.\n",
461 __func__, PCI_FUNC(F3->devfn), NBCFG);
464 static void prepare_msrs(void *info)
466 struct mce m = *(struct mce *)info;
469 wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
471 if (boot_cpu_has(X86_FEATURE_SMCA)) {
472 if (m.inject_flags == DFR_INT_INJ) {
473 wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
474 wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
476 wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
477 wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
480 wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
481 wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
483 wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
484 wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
485 wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
489 static void do_inject(void)
492 unsigned int cpu = i_mce.extcpu;
498 i_mce.status |= MCI_STATUS_MISCV;
501 i_mce.status |= MCI_STATUS_SYNDV;
503 if (inj_type == SW_INJ) {
504 mce_inject_log(&i_mce);
508 /* prep MCE global settings for the injection */
509 mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
511 if (!(i_mce.status & MCI_STATUS_PCC))
512 mcg_status |= MCG_STATUS_RIPV;
515 * Ensure necessary status bits for deferred errors:
516 * - MCx_STATUS[Deferred]: make sure it is a deferred error
517 * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
519 if (inj_type == DFR_INT_INJ) {
520 i_mce.status |= MCI_STATUS_DEFERRED;
521 i_mce.status &= ~MCI_STATUS_UC;
525 * For multi node CPUs, logging and reporting of bank 4 errors happens
526 * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
527 * Fam10h and later BKDGs.
529 if (static_cpu_has(X86_FEATURE_AMD_DCM) &&
531 boot_cpu_data.x86 < 0x17) {
532 toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
533 cpu = get_nbc_for_node(amd_get_nb_id(cpu));
537 if (!cpu_online(cpu))
540 toggle_hw_mce_inject(cpu, true);
542 i_mce.mcgstatus = mcg_status;
543 i_mce.inject_flags = inj_type;
544 smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
546 toggle_hw_mce_inject(cpu, false);
550 smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
553 smp_call_function_single(cpu, trigger_thr_int, NULL, 0);
556 smp_call_function_single(cpu, trigger_mce, NULL, 0);
565 * This denotes into which bank we're injecting and triggers
566 * the injection, at the same time.
568 static int inj_bank_set(void *data, u64 val)
570 struct mce *m = (struct mce *)data;
574 /* Get bank count on target CPU so we can handle non-uniform values. */
575 rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
576 n_banks = cap & MCG_BANKCNT_MASK;
578 if (val >= n_banks) {
579 pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu);
586 /* Reset injection struct */
587 setup_inj_struct(&i_mce);
592 MCE_INJECT_GET(bank);
594 DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
596 static const char readme_msg[] =
597 "Description of the files and their usages:\n"
599 "Note1: i refers to the bank number below.\n"
600 "Note2: See respective BKDGs for the exact bit definitions of the files below\n"
601 "as they mirror the hardware registers.\n"
603 "status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
604 "\t attributes of the error which caused the MCE.\n"
606 "misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
607 "\t used for error thresholding purposes and its validity is indicated by\n"
608 "\t MCi_STATUS[MiscV].\n"
610 "synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
611 "\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
613 "addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
614 "\t associated with the error.\n"
616 "cpu:\t The CPU to inject the error on.\n"
618 "bank:\t Specify the bank you want to inject the error into: the number of\n"
619 "\t banks in a processor varies and is family/model-specific, therefore, the\n"
620 "\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
623 "flags:\t Injection type to be performed. Writing to this file will trigger a\n"
624 "\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
625 "\t for AMD processors.\n"
627 "\t Allowed error injection types:\n"
628 "\t - \"sw\": Software error injection. Decode error to a human-readable \n"
629 "\t format only. Safe to use.\n"
630 "\t - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
631 "\t handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
632 "\t is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
633 "\t before injecting.\n"
634 "\t - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
635 "\t error APIC interrupt handler to handle the error if the feature is \n"
636 "\t is present in hardware. \n"
637 "\t - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
638 "\t APIC interrupt handler to handle the error. \n"
642 inj_readme_read(struct file *filp, char __user *ubuf,
643 size_t cnt, loff_t *ppos)
645 return simple_read_from_buffer(ubuf, cnt, ppos,
646 readme_msg, strlen(readme_msg));
649 static const struct file_operations readme_fops = {
650 .read = inj_readme_read,
653 static struct dfs_node {
656 const struct file_operations *fops;
659 { .name = "status", .fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
660 { .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR },
661 { .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR },
662 { .name = "synd", .fops = &synd_fops, .perm = S_IRUSR | S_IWUSR },
663 { .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR },
664 { .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR },
665 { .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
666 { .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
669 static int __init debugfs_init(void)
673 dfs_inj = debugfs_create_dir("mce-inject", NULL);
677 for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) {
678 dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name,
692 debugfs_remove(dfs_fls[i].d);
694 debugfs_remove(dfs_inj);
700 static int __init inject_init(void)
704 if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
707 err = debugfs_init();
709 free_cpumask_var(mce_inject_cpumask);
713 register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
714 mce_register_injector_chain(&inject_nb);
716 setup_inj_struct(&i_mce);
718 pr_info("Machine check injector initialized\n");
723 static void __exit inject_exit(void)
726 mce_unregister_injector_chain(&inject_nb);
727 unregister_nmi_handler(NMI_LOCAL, "mce_notify");
729 debugfs_remove_recursive(dfs_inj);
732 memset(&dfs_fls, 0, sizeof(dfs_fls));
734 free_cpumask_var(mce_inject_cpumask);
737 module_init(inject_init);
738 module_exit(inject_exit);
739 MODULE_LICENSE("GPL");