2 * UEFI Common Platform Error Record (CPER) support
4 * Copyright (C) 2010, Intel Corp.
5 * Author: Huang Ying <ying.huang@intel.com>
7 * CPER is the format used to describe platform hardware error by
8 * various tables, such as ERST, BERT and HEST etc.
10 * For more information about CPER, please refer to Appendix N of UEFI
11 * Specification version 2.4.
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version
15 * 2 as published by the Free Software Foundation.
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 #include <linux/kernel.h>
28 #include <linux/module.h>
29 #include <linux/time.h>
30 #include <linux/cper.h>
31 #include <linux/dmi.h>
32 #include <linux/acpi.h>
33 #include <linux/pci.h>
34 #include <linux/aer.h>
39 * CPER record ID need to be unique even after reboot, because record
40 * ID is used as index for ERST storage, while CPER records from
41 * multiple boot may co-exist in ERST.
43 u64 cper_next_record_id(void)
45 static atomic64_t seq;
47 if (!atomic64_read(&seq))
48 atomic64_set(&seq, ((u64)get_seconds()) << 32);
50 return atomic64_inc_return(&seq);
52 EXPORT_SYMBOL_GPL(cper_next_record_id);
54 static const char * const severity_strs[] = {
61 const char *cper_severity_str(unsigned int severity)
63 return severity < ARRAY_SIZE(severity_strs) ?
64 severity_strs[severity] : "unknown";
66 EXPORT_SYMBOL_GPL(cper_severity_str);
69 * cper_print_bits - print strings for set bits
70 * @pfx: prefix for each line, including log level and prefix string
72 * @strs: string array, indexed by bit position
73 * @strs_size: size of the string array: @strs
75 * For each set bit in @bits, print the corresponding string in @strs.
76 * If the output length is longer than 80, multiple line will be
77 * printed, with @pfx is printed at the beginning of each line.
79 void cper_print_bits(const char *pfx, unsigned int bits,
80 const char * const strs[], unsigned int strs_size)
86 for (i = 0; i < strs_size; i++) {
87 if (!(bits & (1U << i)))
92 if (len && len + strlen(str) + 2 > 80) {
97 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
99 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
105 static const char * const proc_type_strs[] = {
110 static const char * const proc_isa_strs[] = {
116 static const char * const proc_error_type_strs[] = {
120 "micro-architectural error",
123 static const char * const proc_op_strs[] = {
124 "unknown or generic",
127 "instruction execution",
130 static const char * const proc_flag_strs[] = {
137 static void cper_print_proc_generic(const char *pfx,
138 const struct cper_sec_proc_generic *proc)
140 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
141 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
142 proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
143 proc_type_strs[proc->proc_type] : "unknown");
144 if (proc->validation_bits & CPER_PROC_VALID_ISA)
145 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
146 proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
147 proc_isa_strs[proc->proc_isa] : "unknown");
148 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
149 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
150 cper_print_bits(pfx, proc->proc_error_type,
151 proc_error_type_strs,
152 ARRAY_SIZE(proc_error_type_strs));
154 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
155 printk("%s""operation: %d, %s\n", pfx, proc->operation,
156 proc->operation < ARRAY_SIZE(proc_op_strs) ?
157 proc_op_strs[proc->operation] : "unknown");
158 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
159 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
160 cper_print_bits(pfx, proc->flags, proc_flag_strs,
161 ARRAY_SIZE(proc_flag_strs));
163 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
164 printk("%s""level: %d\n", pfx, proc->level);
165 if (proc->validation_bits & CPER_PROC_VALID_VERSION)
166 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
167 if (proc->validation_bits & CPER_PROC_VALID_ID)
168 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
169 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
170 printk("%s""target_address: 0x%016llx\n",
171 pfx, proc->target_addr);
172 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
173 printk("%s""requestor_id: 0x%016llx\n",
174 pfx, proc->requestor_id);
175 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
176 printk("%s""responder_id: 0x%016llx\n",
177 pfx, proc->responder_id);
178 if (proc->validation_bits & CPER_PROC_VALID_IP)
179 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
182 static const char * const mem_err_type_strs[] = {
187 "single-symbol chipkill ECC",
188 "multi-symbol chipkill ECC",
196 "scrub corrected error",
197 "scrub uncorrected error",
198 "physical memory map-out event",
201 const char *cper_mem_err_type_str(unsigned int etype)
203 return etype < ARRAY_SIZE(mem_err_type_strs) ?
204 mem_err_type_strs[etype] : "unknown";
206 EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
208 static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
216 len = CPER_REC_LEN - 1;
217 if (mem->validation_bits & CPER_MEM_VALID_NODE)
218 n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
219 if (mem->validation_bits & CPER_MEM_VALID_CARD)
220 n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
221 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
222 n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
223 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
224 n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
225 if (mem->validation_bits & CPER_MEM_VALID_BANK)
226 n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
227 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
228 n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
229 if (mem->validation_bits & CPER_MEM_VALID_ROW)
230 n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
231 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
232 n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
233 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
234 n += scnprintf(msg + n, len - n, "bit_position: %d ",
236 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
237 n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
239 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
240 n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
242 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
243 scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
250 static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
253 const char *bank = NULL, *device = NULL;
255 if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
259 dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
261 n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
263 n = snprintf(msg, len,
264 "DIMM location: not present. DMI handle: 0x%.4x ",
265 mem->mem_dev_handle);
270 void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
271 struct cper_mem_err_compact *cmem)
273 cmem->validation_bits = mem->validation_bits;
274 cmem->node = mem->node;
275 cmem->card = mem->card;
276 cmem->module = mem->module;
277 cmem->bank = mem->bank;
278 cmem->device = mem->device;
279 cmem->row = mem->row;
280 cmem->column = mem->column;
281 cmem->bit_pos = mem->bit_pos;
282 cmem->requestor_id = mem->requestor_id;
283 cmem->responder_id = mem->responder_id;
284 cmem->target_id = mem->target_id;
285 cmem->rank = mem->rank;
286 cmem->mem_array_handle = mem->mem_array_handle;
287 cmem->mem_dev_handle = mem->mem_dev_handle;
290 const char *cper_mem_err_unpack(struct trace_seq *p,
291 struct cper_mem_err_compact *cmem)
293 const char *ret = trace_seq_buffer_ptr(p);
294 char rcd_decode_str[CPER_REC_LEN];
296 if (cper_mem_err_location(cmem, rcd_decode_str))
297 trace_seq_printf(p, "%s", rcd_decode_str);
298 if (cper_dimm_err_location(cmem, rcd_decode_str))
299 trace_seq_printf(p, "%s", rcd_decode_str);
300 trace_seq_putc(p, '\0');
305 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
308 struct cper_mem_err_compact cmem;
309 char rcd_decode_str[CPER_REC_LEN];
311 /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
312 if (len == sizeof(struct cper_sec_mem_err_old) &&
313 (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) {
314 pr_err(FW_WARN "valid bits set for fields beyond structure\n");
317 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
318 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
319 if (mem->validation_bits & CPER_MEM_VALID_PA)
320 printk("%s""physical_address: 0x%016llx\n",
321 pfx, mem->physical_addr);
322 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
323 printk("%s""physical_address_mask: 0x%016llx\n",
324 pfx, mem->physical_addr_mask);
325 cper_mem_err_pack(mem, &cmem);
326 if (cper_mem_err_location(&cmem, rcd_decode_str))
327 printk("%s%s\n", pfx, rcd_decode_str);
328 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
329 u8 etype = mem->error_type;
330 printk("%s""error_type: %d, %s\n", pfx, etype,
331 cper_mem_err_type_str(etype));
333 if (cper_dimm_err_location(&cmem, rcd_decode_str))
334 printk("%s%s\n", pfx, rcd_decode_str);
337 static const char * const pcie_port_type_strs[] = {
339 "legacy PCI end point",
343 "upstream switch port",
344 "downstream switch port",
345 "PCIe to PCI/PCI-X bridge",
346 "PCI/PCI-X to PCIe bridge",
347 "root complex integrated endpoint device",
348 "root complex event collector",
351 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
352 const struct acpi_hest_generic_data *gdata)
354 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
355 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
356 pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
357 pcie_port_type_strs[pcie->port_type] : "unknown");
358 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
359 printk("%s""version: %d.%d\n", pfx,
360 pcie->version.major, pcie->version.minor);
361 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
362 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
363 pcie->command, pcie->status);
364 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
366 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
367 pcie->device_id.segment, pcie->device_id.bus,
368 pcie->device_id.device, pcie->device_id.function);
369 printk("%s""slot: %d\n", pfx,
370 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
371 printk("%s""secondary_bus: 0x%02x\n", pfx,
372 pcie->device_id.secondary_bus);
373 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
374 pcie->device_id.vendor_id, pcie->device_id.device_id);
375 p = pcie->device_id.class_code;
376 printk("%s""class_code: %02x%02x%02x\n", pfx, p[2], p[1], p[0]);
378 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
379 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
380 pcie->serial_number.lower, pcie->serial_number.upper);
381 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
383 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
384 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
386 /* Fatal errors call __ghes_panic() before AER handler prints this */
387 if ((pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) &&
388 (gdata->error_severity & CPER_SEV_FATAL)) {
389 struct aer_capability_regs *aer;
391 aer = (struct aer_capability_regs *)pcie->aer_info;
392 printk("%saer_uncor_status: 0x%08x, aer_uncor_mask: 0x%08x\n",
393 pfx, aer->uncor_status, aer->uncor_mask);
394 printk("%saer_uncor_severity: 0x%08x\n",
395 pfx, aer->uncor_severity);
396 printk("%sTLP Header: %08x %08x %08x %08x\n", pfx,
397 aer->header_log.dw0, aer->header_log.dw1,
398 aer->header_log.dw2, aer->header_log.dw3);
402 static void cper_estatus_print_section(
403 const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
405 uuid_le *sec_type = (uuid_le *)gdata->section_type;
409 severity = gdata->error_severity;
410 printk("%s""Error %d, type: %s\n", pfx, sec_no,
411 cper_severity_str(severity));
412 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
413 printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
414 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
415 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
417 snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
418 if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
419 struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
420 printk("%s""section_type: general processor error\n", newpfx);
421 if (gdata->error_data_length >= sizeof(*proc_err))
422 cper_print_proc_generic(newpfx, proc_err);
424 goto err_section_too_small;
425 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
426 struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
427 printk("%s""section_type: memory error\n", newpfx);
428 if (gdata->error_data_length >=
429 sizeof(struct cper_sec_mem_err_old))
430 cper_print_mem(newpfx, mem_err,
431 gdata->error_data_length);
433 goto err_section_too_small;
434 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
435 struct cper_sec_pcie *pcie = (void *)(gdata + 1);
436 printk("%s""section_type: PCIe error\n", newpfx);
437 if (gdata->error_data_length >= sizeof(*pcie))
438 cper_print_pcie(newpfx, pcie, gdata);
440 goto err_section_too_small;
442 printk("%s""section type: unknown, %pUl\n", newpfx, sec_type);
446 err_section_too_small:
447 pr_err(FW_WARN "error section length is too small\n");
450 void cper_estatus_print(const char *pfx,
451 const struct acpi_hest_generic_status *estatus)
453 struct acpi_hest_generic_data *gdata;
454 unsigned int data_len, gedata_len;
459 severity = estatus->error_severity;
460 if (severity == CPER_SEV_CORRECTED)
461 printk("%s%s\n", pfx,
462 "It has been corrected by h/w "
463 "and requires no further action");
464 printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
465 data_len = estatus->data_length;
466 gdata = (struct acpi_hest_generic_data *)(estatus + 1);
467 snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
468 while (data_len >= sizeof(*gdata)) {
469 gedata_len = gdata->error_data_length;
470 cper_estatus_print_section(newpfx, gdata, sec_no);
471 data_len -= gedata_len + sizeof(*gdata);
472 gdata = (void *)(gdata + 1) + gedata_len;
476 EXPORT_SYMBOL_GPL(cper_estatus_print);
478 int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus)
480 if (estatus->data_length &&
481 estatus->data_length < sizeof(struct acpi_hest_generic_data))
483 if (estatus->raw_data_length &&
484 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
489 EXPORT_SYMBOL_GPL(cper_estatus_check_header);
491 int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
493 struct acpi_hest_generic_data *gdata;
494 unsigned int data_len, gedata_len;
497 rc = cper_estatus_check_header(estatus);
500 data_len = estatus->data_length;
501 gdata = (struct acpi_hest_generic_data *)(estatus + 1);
502 while (data_len >= sizeof(*gdata)) {
503 gedata_len = gdata->error_data_length;
504 if (gedata_len > data_len - sizeof(*gdata))
506 data_len -= gedata_len + sizeof(*gdata);
507 gdata = (void *)(gdata + 1) + gedata_len;
514 EXPORT_SYMBOL_GPL(cper_estatus_check);