From c798c88f3962ddff89c7aa818986caeecd46ab4c Mon Sep 17 00:00:00 2001 From: Fan Wu Date: Wed, 19 Sep 2018 01:59:00 +0000 Subject: EDAC, ghes: Use CPER module handles to locate DIMMs Use SMBIOS module handle type 17, on platforms which provide valid ones, to locate the corresponding DIMM and thus have per-DIMM error counter updates. Signed-off-by: Fan Wu [ Massage commit message. ] Signed-off-by: Borislav Petkov Reviewed-by: Tyler Baicar Reviewed-by: James Morse Tested-by: Toshi Kani Cc: Mauro Carvalho Chehab Cc: baicar.tyler@gmail.com Cc: john.garry@huawei.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux-edac Cc: shiju.jose@huawei.com Cc: tanxiaofei@huawei.com Cc: wanghuiqiang@huawei.com Link: http://lkml.kernel.org/r/1537322340-1860-1-git-send-email-wufan@codeaurora.org --- include/linux/edac.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index bffb97828ed6..a45ce1f84bfc 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -451,6 +451,8 @@ struct dimm_info { u32 nr_pages; /* number of pages on this dimm */ unsigned csrow, cschannel; /* Points to the old API data */ + + u16 smbios_handle; /* Handle for SMBIOS type 17 */ }; /** -- cgit v1.2.3 From 6b58859419554fb824e09cfdd73151a195473cbc Mon Sep 17 00:00:00 2001 From: Justin Ernst Date: Tue, 25 Sep 2018 09:34:49 -0500 Subject: EDAC: Raise the maximum number of memory controllers We observe an oops in the skx_edac module during boot: EDAC MC0: Giving out device to module skx_edac controller Skylake Socket#0 IMC#0 EDAC MC1: Giving out device to module skx_edac controller Skylake Socket#0 IMC#1 EDAC MC2: Giving out device to module skx_edac controller Skylake Socket#1 IMC#0 ... EDAC MC13: Giving out device to module skx_edac controller Skylake Socket#0 IMC#1 EDAC MC14: Giving out device to module skx_edac controller Skylake Socket#1 IMC#0 EDAC MC15: Giving out device to module skx_edac controller Skylake Socket#1 IMC#1 Too many memory controllers: 16 EDAC MC: Removed device 0 for skx_edac Skylake Socket#0 IMC#0 We observe there are two memory controllers per socket, with a limit of 16. Raise the maximum number of memory controllers from 16 to 2 * MAX_NUMNODES (1024). [ bp: This is just a band-aid fix until we've sorted out the whole issue with the bus_type association and handling in EDAC and can get rid of this arbitrary limit. ] Signed-off-by: Justin Ernst Signed-off-by: Borislav Petkov Acked-by: Russ Anderson Cc: Mauro Carvalho Chehab Cc: linux-edac@vger.kernel.org Link: https://lkml.kernel.org/r/20180925143449.284634-1-justin.ernst@hpe.com --- include/linux/edac.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index a45ce1f84bfc..1d0c9ea8825d 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -17,6 +17,7 @@ #include #include #include +#include #define EDAC_DEVICE_NAME_LEN 31 @@ -672,6 +673,6 @@ struct mem_ctl_info { /* * Maximum number of memory controllers in the coherent fabric. */ -#define EDAC_MAX_MCS 16 +#define EDAC_MAX_MCS 2 * MAX_NUMNODES #endif -- cgit v1.2.3 From 4cf841e398503990df640f7a7c5b2ea56f11c08c Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 15 Oct 2018 16:11:31 -0700 Subject: ACPI/ADXL: Add address translation interface using an ACPI DSM Some new Intel servers provide an interface so that the OS can ask the BIOS to translate a system physical address to a memory address (socket, memory controller, channel, rank, dimm, etc.). This is useful for EDAC drivers that want to take the address of an error reported in a machine check bank and let the user know which DIMM may need to be replaced. Specification for this interface is available at: https://cdrdv2.intel.com/v1/dl/getContent/603354 [ Based on earlier code by Qiuxu Zhuo . ] [ bp: Make the first pr_info() in adxl_init() pr_debug() so that it doesn't pollute every dmesg. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Acked-by: Rafael J. Wysocki Tested-by: Qiuxu Zhuo CC: Len Brown CC: linux-acpi@vger.kernel.org CC: linux-edac@vger.kernel.org Link: http://lkml.kernel.org/r/20181015202620.23610-1-tony.luck@intel.com --- include/linux/adxl.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/linux/adxl.h (limited to 'include/linux') diff --git a/include/linux/adxl.h b/include/linux/adxl.h new file mode 100644 index 000000000000..2a629acb4c3f --- /dev/null +++ b/include/linux/adxl.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Address translation interface via ACPI DSM. + * Copyright (C) 2018 Intel Corporation + */ + +#ifndef _LINUX_ADXL_H +#define _LINUX_ADXL_H + +const char * const *adxl_get_component_names(void); +int adxl_decode(u64 addr, u64 component_values[]); + +#endif /* _LINUX_ADXL_H */ -- cgit v1.2.3