From 88f074f4871a8c212b212b725e4dcdcdb09613c1 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Fri, 18 Oct 2013 14:28:59 -0700 Subject: ACPI, CPER: Update cper info We have a lot of confusing names of functions and data structures in amongs the the error reporting code. In particular the "apei" prefix has been applied to many objects that are not part of APEI. Since we will be using these routines for extended error log reporting it will be clearer if we fix up the names first. Signed-off-by: Chen, Gong Acked-by: Borislav Petkov Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Tony Luck --- include/acpi/actbl1.h | 14 +++++++------- include/acpi/ghes.h | 2 +- include/linux/cper.h | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 0bd750ebeb49..556c83ee6b42 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -596,7 +596,7 @@ struct acpi_hest_generic { /* Generic Error Status block */ -struct acpi_hest_generic_status { +struct acpi_generic_status { u32 block_status; u32 raw_data_offset; u32 raw_data_length; @@ -606,15 +606,15 @@ struct acpi_hest_generic_status { /* Values for block_status flags above */ -#define ACPI_HEST_UNCORRECTABLE (1) -#define ACPI_HEST_CORRECTABLE (1<<1) -#define ACPI_HEST_MULTIPLE_UNCORRECTABLE (1<<2) -#define ACPI_HEST_MULTIPLE_CORRECTABLE (1<<3) -#define ACPI_HEST_ERROR_ENTRY_COUNT (0xFF<<4) /* 8 bits, error count */ +#define ACPI_GEN_ERR_UC BIT(0) +#define ACPI_GEN_ERR_CE BIT(1) +#define ACPI_GEN_ERR_MULTI_UC BIT(2) +#define ACPI_GEN_ERR_MULTI_CE BIT(3) +#define ACPI_GEN_ERR_COUNT_SHIFT (0xFF<<4) /* 8 bits, error count */ /* Generic Error Data entry */ -struct acpi_hest_generic_data { +struct acpi_generic_data { u8 section_type[16]; u32 error_severity; u16 revision; diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 720446cb243e..dfd60d0bfd27 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -14,7 +14,7 @@ struct ghes { struct acpi_hest_generic *generic; - struct acpi_hest_generic_status *estatus; + struct acpi_generic_status *estatus; u64 buffer_paddr; unsigned long flags; union { diff --git a/include/linux/cper.h b/include/linux/cper.h index c23049496531..09ebe2113641 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -389,6 +389,6 @@ struct cper_sec_pcie { u64 cper_next_record_id(void); void cper_print_bits(const char *prefix, unsigned int bits, - const char *strs[], unsigned int strs_size); + const char * const strs[], unsigned int strs_size); #endif -- cgit v1.2.3 From 10ef6b0dffe404bcc54e94cb2ca1a5b18445a66b Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Fri, 18 Oct 2013 14:29:07 -0700 Subject: bitops: Introduce a more generic BITMASK macro GENMASK is used to create a contiguous bitmask([hi:lo]). It is implemented twice in current kernel. One is in EDAC driver, the other is in SiS/XGI FB driver. Move it to a more generic place for other usage. Signed-off-by: Chen, Gong Cc: Borislav Petkov Cc: Thomas Winischhofer Cc: Jean-Christophe Plagniol-Villard Cc: Tomi Valkeinen Acked-by: Borislav Petkov Acked-by: Mauro Carvalho Chehab Signed-off-by: Tony Luck --- include/linux/bitops.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index a3b6b82108b9..bd0c4598d03b 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -10,6 +10,14 @@ #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) #endif +/* + * Create a contiguous bitmask starting at bit position @l and ending at + * position @h. For example + * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. + */ +#define GENMASK(h, l) (((U32_C(1) << ((h) - (l) + 1)) - 1) << (l)) +#define GENMASK_ULL(h, l) (((U64_C(1) << ((h) - (l) + 1)) - 1) << (l)) + extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); extern unsigned int __sw_hweight32(unsigned int w); -- cgit v1.2.3 From 4b3db708b114fc35ff1e0cd28a2bfb1490dbb5d3 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Mon, 21 Oct 2013 14:29:25 -0700 Subject: ACPI, x86: Extended error log driver for x86 platform This H/W error log driver (a.k.a eMCA driver) is implemented based on http://www.intel.com/content/www/us/en/architecture-and-technology/enhanced-mca-logging-xeon-paper.html After errors are captured, more detailed platform specific information can be got via this new enhanced H/W error log driver. Most notably we can track memory errors back to the DIMM slot silk screen label. Signed-off-by: Chen, Gong Signed-off-by: Tony Luck --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a5db4aeefa36..c30bac8503bc 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -311,6 +311,7 @@ struct acpi_osc_context { #define OSC_INVALID_REVISION_ERROR 8 #define OSC_CAPABILITIES_MASK_ERROR 16 +acpi_status acpi_str_to_uuid(char *str, u8 *uuid); acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); /* platform-wide _OSC bits */ -- cgit v1.2.3 From dd6dad4288cb93e79bd7abfa6c6a338c47454d1a Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Fri, 18 Oct 2013 14:29:25 -0700 Subject: DMI: Parse memory device (type 17) in SMBIOS This patch adds a new interface to decode memory device (type 17) to help error reporting on DIMMs. Original-author: Tony Luck Signed-off-by: Chen, Gong Acked-by: Naveen N. Rao Acked-by: Borislav Petkov Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Tony Luck --- include/linux/dmi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/dmi.h b/include/linux/dmi.h index b6eb7a05d58e..f820f0a336c9 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -99,6 +99,7 @@ extern const char * dmi_get_system_info(int field); extern const struct dmi_device * dmi_find_device(int type, const char *name, const struct dmi_device *from); extern void dmi_scan_machine(void); +extern void dmi_memdev_walk(void); extern void dmi_set_dump_stack_arch_desc(void); extern bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp); extern int dmi_name_in_vendors(const char *str); @@ -107,6 +108,7 @@ extern int dmi_available; extern int dmi_walk(void (*decode)(const struct dmi_header *, void *), void *private_data); extern bool dmi_match(enum dmi_field f, const char *str); +extern void dmi_memdev_name(u16 handle, const char **bank, const char **device); #else @@ -115,6 +117,7 @@ static inline const char * dmi_get_system_info(int field) { return NULL; } static inline const struct dmi_device * dmi_find_device(int type, const char *name, const struct dmi_device *from) { return NULL; } static inline void dmi_scan_machine(void) { return; } +static inline void dmi_memdev_walk(void) { } static inline void dmi_set_dump_stack_arch_desc(void) { } static inline bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp) { @@ -133,6 +136,8 @@ static inline int dmi_walk(void (*decode)(const struct dmi_header *, void *), void *private_data) { return -1; } static inline bool dmi_match(enum dmi_field f, const char *str) { return false; } +static inline void dmi_memdev_name(u16 handle, const char **bank, + const char **device) { } static inline const struct dmi_system_id * dmi_first_match(const struct dmi_system_id *list) { return NULL; } -- cgit v1.2.3 From 147de14772ed897727dba7353916b02d1e0f17f4 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Fri, 18 Oct 2013 14:30:13 -0700 Subject: ACPI, APEI, CPER: Add UEFI 2.4 support for memory error In latest UEFI spec(by now it is 2.4) memory error definition for CPER (UEFI 2.4 Appendix N Common Platform Error Record) adds some new fields. These fields help people to locate memory error to an actual DIMM location. Original-author: Tony Luck Signed-off-by: Chen, Gong Reviewed-by: Borislav Petkov Reviewed-by: Mauro Carvalho Chehab Acked-by: Naveen N. Rao Signed-off-by: Tony Luck --- include/linux/cper.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cper.h b/include/linux/cper.h index 09ebe2113641..2fc0ec3d89cc 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -218,8 +218,8 @@ enum { #define CPER_PROC_VALID_IP 0x1000 #define CPER_MEM_VALID_ERROR_STATUS 0x0001 -#define CPER_MEM_VALID_PHYSICAL_ADDRESS 0x0002 -#define CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK 0x0004 +#define CPER_MEM_VALID_PA 0x0002 +#define CPER_MEM_VALID_PA_MASK 0x0004 #define CPER_MEM_VALID_NODE 0x0008 #define CPER_MEM_VALID_CARD 0x0010 #define CPER_MEM_VALID_MODULE 0x0020 @@ -232,6 +232,9 @@ enum { #define CPER_MEM_VALID_RESPONDER_ID 0x1000 #define CPER_MEM_VALID_TARGET_ID 0x2000 #define CPER_MEM_VALID_ERROR_TYPE 0x4000 +#define CPER_MEM_VALID_RANK_NUMBER 0x8000 +#define CPER_MEM_VALID_CARD_HANDLE 0x10000 +#define CPER_MEM_VALID_MODULE_HANDLE 0x20000 #define CPER_PCIE_VALID_PORT_TYPE 0x0001 #define CPER_PCIE_VALID_VERSION 0x0002 @@ -347,6 +350,10 @@ struct cper_sec_mem_err { __u64 responder_id; __u64 target_id; __u8 error_type; + __u8 reserved; + __u16 rank; + __u16 mem_array_handle; /* card handle in UEFI 2.4 */ + __u16 mem_dev_handle; /* module handle in UEFI 2.4 */ }; struct cper_sec_pcie { -- cgit v1.2.3 From 56507694de3453076d73e0e9813349586ee67e59 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Fri, 18 Oct 2013 14:30:38 -0700 Subject: EDAC, GHES: Update ghes error record info In latest UEFI spec(by now it's 2.4) there are some new fields for memory error reporting. Add these new fields for ghes_edac interface. Signed-off-by: Chen, Gong Cc: Mauro Carvalho Chehab Signed-off-by: Tony Luck --- include/linux/edac.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/edac.h b/include/linux/edac.h index 5c6d7fbaf89e..dbdffe8d4469 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -51,7 +51,7 @@ static inline void opstate_init(void) #define EDAC_MC_LABEL_LEN 31 /* Maximum size of the location string */ -#define LOCATION_SIZE 80 +#define LOCATION_SIZE 256 /* Defines the maximum number of labels that can be reported */ #define EDAC_MAX_LABELS 8 -- cgit v1.2.3