From c798c88f3962ddff89c7aa818986caeecd46ab4c Mon Sep 17 00:00:00 2001 From: Fan Wu Date: Wed, 19 Sep 2018 01:59:00 +0000 Subject: EDAC, ghes: Use CPER module handles to locate DIMMs Use SMBIOS module handle type 17, on platforms which provide valid ones, to locate the corresponding DIMM and thus have per-DIMM error counter updates. Signed-off-by: Fan Wu [ Massage commit message. ] Signed-off-by: Borislav Petkov Reviewed-by: Tyler Baicar Reviewed-by: James Morse Tested-by: Toshi Kani Cc: Mauro Carvalho Chehab Cc: baicar.tyler@gmail.com Cc: john.garry@huawei.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux-edac Cc: shiju.jose@huawei.com Cc: tanxiaofei@huawei.com Cc: wanghuiqiang@huawei.com Link: http://lkml.kernel.org/r/1537322340-1860-1-git-send-email-wufan@codeaurora.org --- include/linux/edac.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/edac.h') diff --git a/include/linux/edac.h b/include/linux/edac.h index bffb97828ed6..a45ce1f84bfc 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -451,6 +451,8 @@ struct dimm_info { u32 nr_pages; /* number of pages on this dimm */ unsigned csrow, cschannel; /* Points to the old API data */ + + u16 smbios_handle; /* Handle for SMBIOS type 17 */ }; /** -- cgit v1.2.3 From 6b58859419554fb824e09cfdd73151a195473cbc Mon Sep 17 00:00:00 2001 From: Justin Ernst Date: Tue, 25 Sep 2018 09:34:49 -0500 Subject: EDAC: Raise the maximum number of memory controllers We observe an oops in the skx_edac module during boot: EDAC MC0: Giving out device to module skx_edac controller Skylake Socket#0 IMC#0 EDAC MC1: Giving out device to module skx_edac controller Skylake Socket#0 IMC#1 EDAC MC2: Giving out device to module skx_edac controller Skylake Socket#1 IMC#0 ... EDAC MC13: Giving out device to module skx_edac controller Skylake Socket#0 IMC#1 EDAC MC14: Giving out device to module skx_edac controller Skylake Socket#1 IMC#0 EDAC MC15: Giving out device to module skx_edac controller Skylake Socket#1 IMC#1 Too many memory controllers: 16 EDAC MC: Removed device 0 for skx_edac Skylake Socket#0 IMC#0 We observe there are two memory controllers per socket, with a limit of 16. Raise the maximum number of memory controllers from 16 to 2 * MAX_NUMNODES (1024). [ bp: This is just a band-aid fix until we've sorted out the whole issue with the bus_type association and handling in EDAC and can get rid of this arbitrary limit. ] Signed-off-by: Justin Ernst Signed-off-by: Borislav Petkov Acked-by: Russ Anderson Cc: Mauro Carvalho Chehab Cc: linux-edac@vger.kernel.org Link: https://lkml.kernel.org/r/20180925143449.284634-1-justin.ernst@hpe.com --- include/linux/edac.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/edac.h') diff --git a/include/linux/edac.h b/include/linux/edac.h index a45ce1f84bfc..1d0c9ea8825d 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -17,6 +17,7 @@ #include #include #include +#include #define EDAC_DEVICE_NAME_LEN 31 @@ -672,6 +673,6 @@ struct mem_ctl_info { /* * Maximum number of memory controllers in the coherent fabric. */ -#define EDAC_MAX_MCS 16 +#define EDAC_MAX_MCS 2 * MAX_NUMNODES #endif -- cgit v1.2.3