summaryrefslogtreecommitdiff
path: root/drivers/edac/e752x_edac.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-29 18:32:37 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-29 18:32:37 -0700
commit87a5af24e54857e7b15c1f1b0468512ee65c94e3 (patch)
treeee346852a0fc27f172a5eb57b6e3c7bf111f2fad /drivers/edac/e752x_edac.c
parent7e5b2db77b05746613516599c916a8cc2e321077 (diff)
parent0bf09e829dd4b07227ed5a8bc4ac85752a044458 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac
Pull EDAC internal API changes from Mauro Carvalho Chehab: "This changeset is the first part of a series of patches that fixes the EDAC sybsystem. On this set, it changes the Kernel EDAC API in order to properly represent the Intel i3/i5/i7, Xeon 3xxx/5xxx/7xxx, and Intel E5-xxxx memory controllers. The EDAC core used to assume that: - the DRAM chip select pin is directly accessed by the memory controller - when multiple channels are used, they're all filled with the same type of memory. None of the above premises is true on Intel memory controllers since 2002, when RAMBUS and FB-DIMMs were introduced, and Advanced Memory Buffer or by some similar technologies hides the direct access to the DRAM pins. So, the existing drivers for those chipsets had to lie to the EDAC core, in general telling that just one channel is filled. That produces some hard to understand error messages like: EDAC MC0: CE row 3, channel 0, label "DIMM1": 1 Unknown error(s): memory read error on FATAL area : cpu=0 Err=0008:00c2 (ch=2), addr = 0xad1f73480 => socket=0, Channel=0(mask=2), rank=1 The location information there (row3 channel 0) is completely bogus: it has no physical meaning, and are just some random values that the driver uses to talk with the EDAC core. The error actually happened at CPU socket 0, channel 0, slot 1, but this is not reported anywhere, as the EDAC core doesn't know anything about the memory layout. So, only advanced users that know how the EDAC driver works and that tests their systems to see how DIMMs are mapped can actually benefit for such error logs. This patch series fixes the error report logic, in order to allow the EDAC to expose the memory architecture used by them to the EDAC core. So, as the EDAC core now understands how the memory is organized, it can provide an useful report: EDAC MC0: CE memory read error on DIMM1 (channel:0 slot:1 page:0x364b1b offset:0x600 grain:32 syndrome:0x0 - count:1 area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:4) The location of the DIMM where the error happened is reported by "MC0" (cpu socket #0), at "channel:0 slot:1" location, and matches the physical location of the DIMM. There are two remaining issues not covered by this patch series: - The EDAC sysfs API will still report bogus values. So, userspace tools like edac-utils will still use the bogus data; - Add a new tracepoint-based way to get the binary information about the errors. Those are on a second series of patches (also at -next), but will probably miss the train for 3.5, due to the slow review process." Fix up trivial conflict (due to spelling correction of removed code) in drivers/edac/edac_device.c * git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac: (42 commits) i7core: fix ranks information at the per-channel struct i5000: Fix the fatal error handling i5100_edac: Fix a warning when compiled with 32 bits i82975x_edac: Test nr_pages earlier to save a few CPU cycles e752x_edac: provide more info about how DIMMS/ranks are mapped i5000_edac: Fix the logic that retrieves memory information i5400_edac: improve debug messages to better represent the filled memory edac: Cleanup the logs for i7core and sb edac drivers edac: Initialize the dimm label with the known information edac: Remove the legacy EDAC ABI x38_edac: convert driver to use the new edac ABI tile_edac: convert driver to use the new edac ABI sb_edac: convert driver to use the new edac ABI r82600_edac: convert driver to use the new edac ABI ppc4xx_edac: convert driver to use the new edac ABI pasemi_edac: convert driver to use the new edac ABI mv64x60_edac: convert driver to use the new edac ABI mpc85xx_edac: convert driver to use the new edac ABI i82975x_edac: convert driver to use the new edac ABI i82875p_edac: convert driver to use the new edac ABI ...
Diffstat (limited to 'drivers/edac/e752x_edac.c')
-rw-r--r--drivers/edac/e752x_edac.c116
1 files changed, 79 insertions, 37 deletions
diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c
index 41223261ede9..3186512c9739 100644
--- a/drivers/edac/e752x_edac.c
+++ b/drivers/edac/e752x_edac.c
@@ -4,7 +4,11 @@
* This file may be distributed under the terms of the
* GNU General Public License.
*
- * See "enum e752x_chips" below for supported chipsets
+ * Implement support for the e7520, E7525, e7320 and i3100 memory controllers.
+ *
+ * Datasheets:
+ * http://www.intel.in/content/www/in/en/chipsets/e7525-memory-controller-hub-datasheet.html
+ * ftp://download.intel.com/design/intarch/datashts/31345803.pdf
*
* Written by Tom Zimmerman
*
@@ -13,8 +17,6 @@
* Wang Zhenyu at intel.com
* Dave Jiang at mvista.com
*
- * $Id: edac_e752x.c,v 1.5.2.11 2005/10/05 00:43:44 dsp_llnl Exp $
- *
*/
#include <linux/module.h>
@@ -187,6 +189,25 @@ enum e752x_chips {
I3100 = 3
};
+/*
+ * Those chips Support single-rank and dual-rank memories only.
+ *
+ * On e752x chips, the odd rows are present only on dual-rank memories.
+ * Dividing the rank by two will provide the dimm#
+ *
+ * i3100 MC has a different mapping: it supports only 4 ranks.
+ *
+ * The mapping is (from 1 to n):
+ * slot single-ranked double-ranked
+ * dimm #1 -> rank #4 NA
+ * dimm #2 -> rank #3 NA
+ * dimm #3 -> rank #2 Ranks 2 and 3
+ * dimm #4 -> rank $1 Ranks 1 and 4
+ *
+ * FIXME: The current mapping for i3100 considers that it supports up to 8
+ * ranks/chanel, but datasheet says that the MC supports only 4 ranks.
+ */
+
struct e752x_pvt {
struct pci_dev *bridge_ck;
struct pci_dev *dev_d0f0;
@@ -350,8 +371,10 @@ static void do_process_ce(struct mem_ctl_info *mci, u16 error_one,
channel = !(error_one & 1);
/* e752x mc reads 34:6 of the DRAM linear address */
- edac_mc_handle_ce(mci, page, offset_in_page(sec1_add << 4),
- sec1_syndrome, row, channel, "e752x CE");
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+ page, offset_in_page(sec1_add << 4), sec1_syndrome,
+ row, channel, -1,
+ "e752x CE", "", NULL);
}
static inline void process_ce(struct mem_ctl_info *mci, u16 error_one,
@@ -385,9 +408,12 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one,
edac_mc_find_csrow_by_page(mci, block_page);
/* e752x mc reads 34:6 of the DRAM linear address */
- edac_mc_handle_ue(mci, block_page,
- offset_in_page(error_2b << 4),
- row, "e752x UE from Read");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ block_page,
+ offset_in_page(error_2b << 4), 0,
+ row, -1, -1,
+ "e752x UE from Read", "", NULL);
+
}
if (error_one & 0x0404) {
error_2b = scrb_add;
@@ -401,9 +427,11 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one,
edac_mc_find_csrow_by_page(mci, block_page);
/* e752x mc reads 34:6 of the DRAM linear address */
- edac_mc_handle_ue(mci, block_page,
- offset_in_page(error_2b << 4),
- row, "e752x UE from Scruber");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+ block_page,
+ offset_in_page(error_2b << 4), 0,
+ row, -1, -1,
+ "e752x UE from Scruber", "", NULL);
}
}
@@ -426,7 +454,9 @@ static inline void process_ue_no_info_wr(struct mem_ctl_info *mci,
return;
debugf3("%s()\n", __func__);
- edac_mc_handle_ue_no_info(mci, "e752x UE log memory write");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0,
+ -1, -1, -1,
+ "e752x UE log memory write", "", NULL);
}
static void do_process_ded_retry(struct mem_ctl_info *mci, u16 error,
@@ -1044,7 +1074,7 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
int drc_drbg; /* DRB granularity 0=64mb, 1=128mb */
int drc_ddim; /* DRAM Data Integrity Mode 0=none, 2=edac */
u8 value;
- u32 dra, drc, cumul_size;
+ u32 dra, drc, cumul_size, i, nr_pages;
dra = 0;
for (index = 0; index < 4; index++) {
@@ -1053,7 +1083,7 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
dra |= dra_reg << (index * 8);
}
pci_read_config_dword(pdev, E752X_DRC, &drc);
- drc_chan = dual_channel_active(ddrcsr);
+ drc_chan = dual_channel_active(ddrcsr) ? 1 : 0;
drc_drbg = drc_chan + 1; /* 128 in dual mode, 64 in single */
drc_ddim = (drc >> 20) & 0x3;
@@ -1078,26 +1108,33 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
- csrow->nr_pages = cumul_size - last_cumul_size;
+ nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
- csrow->grain = 1 << 12; /* 4KiB - resolution of CELOG */
- csrow->mtype = MEM_RDDR; /* only one type supported */
- csrow->dtype = mem_dev ? DEV_X4 : DEV_X8;
-
- /*
- * if single channel or x8 devices then SECDED
- * if dual channel and x4 then S4ECD4ED
- */
- if (drc_ddim) {
- if (drc_chan && mem_dev) {
- csrow->edac_mode = EDAC_S4ECD4ED;
- mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
- } else {
- csrow->edac_mode = EDAC_SECDED;
- mci->edac_cap |= EDAC_FLAG_SECDED;
- }
- } else
- csrow->edac_mode = EDAC_NONE;
+
+ for (i = 0; i < csrow->nr_channels; i++) {
+ struct dimm_info *dimm = csrow->channels[i].dimm;
+
+ debugf3("Initializing rank at (%i,%i)\n", index, i);
+ dimm->nr_pages = nr_pages / csrow->nr_channels;
+ dimm->grain = 1 << 12; /* 4KiB - resolution of CELOG */
+ dimm->mtype = MEM_RDDR; /* only one type supported */
+ dimm->dtype = mem_dev ? DEV_X4 : DEV_X8;
+
+ /*
+ * if single channel or x8 devices then SECDED
+ * if dual channel and x4 then S4ECD4ED
+ */
+ if (drc_ddim) {
+ if (drc_chan && mem_dev) {
+ dimm->edac_mode = EDAC_S4ECD4ED;
+ mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
+ } else {
+ dimm->edac_mode = EDAC_SECDED;
+ mci->edac_cap |= EDAC_FLAG_SECDED;
+ }
+ } else
+ dimm->edac_mode = EDAC_NONE;
+ }
}
}
@@ -1226,6 +1263,7 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx)
u16 pci_data;
u8 stat8;
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct e752x_pvt *pvt;
u16 ddrcsr;
int drc_chan; /* Number of channels 0=1chan,1=2chan */
@@ -1252,11 +1290,15 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx)
/* Dual channel = 1, Single channel = 0 */
drc_chan = dual_channel_active(ddrcsr);
- mci = edac_mc_alloc(sizeof(*pvt), E752X_NR_CSROWS, drc_chan + 1, 0);
-
- if (mci == NULL) {
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = E752X_NR_CSROWS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = drc_chan + 1;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
+ if (mci == NULL)
return -ENOMEM;
- }
debugf3("%s(): init mci\n", __func__);
mci->mtype_cap = MEM_FLAG_RDDR;