summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLukas Wunner <lukas@wunner.de>2025-09-01 09:44:52 +0200
committerBjorn Helgaas <bhelgaas@google.com>2025-09-04 10:09:05 -0500
commitc8ab5e888bb6721e6e084881e6e24ef2678832c3 (patch)
treeee9c34f98133b2d378979b455246cbf4de1cbf86 /drivers
parent6633875250b38b18b8638cf01e695de031c71f02 (diff)
PCI/AER: Print TLP Log for errors introduced since PCIe r1.1
When reporting an error, the AER driver prints the TLP Header / Prefix Log only for errors enumerated in the AER_LOG_TLP_MASKS macro. The macro was never amended since its introduction in 2006 with commit 6c2b374d7485 ("PCI-Express AER implemetation: AER core and aerdriver"). At the time, PCIe r1.1 was the latest spec revision. Amend the macro with errors defined since then to avoid omitting the TLP Header / Prefix Log for newer errors. The order of the errors in AER_LOG_TLP_MASKS follows PCIe r1.1 sec 6.2.7 rather than 7.10.2, because only the former documents for which errors a TLP Header / Prefix is logged. Retain this order. The section number is still 6.2.7 in today's PCIe r7.0. For Completion Timeouts, the TLP Header / Prefix is only logged if the Completion Timeout Prefix / Header Log Capable bit is set in the AER Capabilities and Control register. Introduce a tlp_header_logged() helper to check whether the TLP Header / Prefix Log is populated and use it in the two places which currently match against AER_LOG_TLP_MASKS directly. For Uncorrectable Internal Errors, logging of the TLP Header / Prefix is optional per PCIe r7.0 sec 6.2.7. If needed, drivers could indicate through a flag whether devices are capable and tlp_header_logged() could then check that flag. pcitools introduced macros for newer errors with commit 144b0911cc0b ("ls-ecaps: extend decode support for more fields for AER CE and UE status"): https://git.kernel.org/pub/scm/utils/pciutils/pciutils.git/commit/?id=144b0911cc0b Unfortunately some of those macros are overly long: PCI_ERR_UNC_POISONED_TLP_EGRESS PCI_ERR_UNC_DMWR_REQ_EGRESS_BLOCKED PCI_ERR_UNC_IDE_CHECK PCI_ERR_UNC_MISR_IDE_TLP PCI_ERR_UNC_PCRC_CHECK PCI_ERR_UNC_TLP_XLAT_EGRESS_BLOCKED This seems unsuitable for <linux/pci_regs.h>, so shorten to: PCI_ERR_UNC_POISON_BLK PCI_ERR_UNC_DMWR_BLK PCI_ERR_UNC_IDE_CHECK PCI_ERR_UNC_MISR_IDE PCI_ERR_UNC_PCRC_CHECK PCI_ERR_UNC_XLAT_BLK Note that some of the existing macros in <linux/pci_regs.h> do not match exactly with pcitools (e.g. PCI_ERR_UNC_SDES versus PCI_ERR_UNC_SURPDN), so it does not seem mandatory for them to be identical. Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Link: https://patch.msgid.link/5f707caf1260bd8f15012bb032f7da9a9b898aba.1756712066.git.lukas@wunner.de
Diffstat (limited to 'drivers')
-rw-r--r--drivers/pci/pcie/aer.c30
1 files changed, 27 insertions, 3 deletions
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 15ed541d2fbe..62c74b5f99ae 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -96,11 +96,21 @@ struct aer_info {
};
#define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \
+ PCI_ERR_UNC_POISON_BLK | \
PCI_ERR_UNC_ECRC| \
PCI_ERR_UNC_UNSUP| \
PCI_ERR_UNC_COMP_ABORT| \
PCI_ERR_UNC_UNX_COMP| \
- PCI_ERR_UNC_MALF_TLP)
+ PCI_ERR_UNC_ACSV | \
+ PCI_ERR_UNC_MCBTLP | \
+ PCI_ERR_UNC_ATOMEG | \
+ PCI_ERR_UNC_DMWR_BLK | \
+ PCI_ERR_UNC_XLAT_BLK | \
+ PCI_ERR_UNC_TLPPRE | \
+ PCI_ERR_UNC_MALF_TLP | \
+ PCI_ERR_UNC_IDE_CHECK | \
+ PCI_ERR_UNC_MISR_IDE | \
+ PCI_ERR_UNC_PCRC_CHECK)
#define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \
PCI_EXP_RTCTL_SENFEE| \
@@ -796,6 +806,20 @@ static int aer_ratelimit(struct pci_dev *dev, unsigned int severity)
}
}
+static bool tlp_header_logged(u32 status, u32 capctl)
+{
+ /* Errors for which a header is always logged (PCIe r7.0 sec 6.2.7) */
+ if (status & AER_LOG_TLP_MASKS)
+ return true;
+
+ /* Completion Timeout header is only logged on capable devices */
+ if (status & PCI_ERR_UNC_COMP_TIME &&
+ capctl & PCI_ERR_CAP_COMP_TIME_LOG)
+ return true;
+
+ return false;
+}
+
static void __aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
{
const char **strings;
@@ -910,7 +934,7 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
status = aer->uncor_status;
mask = aer->uncor_mask;
info.level = KERN_ERR;
- tlp_header_valid = status & AER_LOG_TLP_MASKS;
+ tlp_header_valid = tlp_header_logged(status, aer->cap_control);
}
info.status = status;
@@ -1401,7 +1425,7 @@ int aer_get_device_error_info(struct aer_err_info *info, int i)
pci_read_config_dword(dev, aer + PCI_ERR_CAP, &aercc);
info->first_error = PCI_ERR_CAP_FEP(aercc);
- if (info->status & AER_LOG_TLP_MASKS) {
+ if (tlp_header_logged(info->status, aercc)) {
info->tlp_header_valid = 1;
pcie_read_tlp_log(dev, aer + PCI_ERR_HEADER_LOG,
aer + PCI_ERR_PREFIX_LOG,