diff options
Diffstat (limited to 'drivers/cxl/core')
| -rw-r--r-- | drivers/cxl/core/Makefile | 1 | ||||
| -rw-r--r-- | drivers/cxl/core/core.h | 3 | ||||
| -rw-r--r-- | drivers/cxl/core/port.c | 7 | ||||
| -rw-r--r-- | drivers/cxl/core/ras.c | 82 |
4 files changed, 93 insertions, 0 deletions
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 9259bcc6773c..ba5f0916d379 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -14,5 +14,6 @@ cxl_core-y += pci.o cxl_core-y += hdm.o cxl_core-y += pmu.o cxl_core-y += cdat.o +cxl_core-y += ras.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 800466f96a68..c87dbfcc9403 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -115,4 +115,7 @@ bool cxl_need_node_perf_attrs_update(int nid); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); +int cxl_ras_init(void); +void cxl_ras_exit(void); + #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 78a5c2c25982..7ed6e8f374af 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -2339,8 +2339,14 @@ static __init int cxl_core_init(void) if (rc) goto err_region; + rc = cxl_ras_init(); + if (rc) + goto err_ras; + return 0; +err_ras: + cxl_region_exit(); err_region: bus_unregister(&cxl_bus_type); err_bus: @@ -2352,6 +2358,7 @@ err_wq: static void cxl_core_exit(void) { + cxl_ras_exit(); cxl_region_exit(); bus_unregister(&cxl_bus_type); destroy_workqueue(cxl_bus_wq); diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c new file mode 100644 index 000000000000..91273af6ccbc --- /dev/null +++ b/drivers/cxl/core/ras.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2025 AMD Corporation. All rights reserved. */ + +#include <linux/pci.h> +#include <linux/aer.h> +#include <cxl/event.h> +#include <cxlmem.h> +#include "trace.h" + +static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev, + struct cxl_ras_capability_regs ras_cap) +{ + u32 status = ras_cap.cor_status & ~ras_cap.cor_mask; + struct cxl_dev_state *cxlds; + + cxlds = pci_get_drvdata(pdev); + if (!cxlds) + return; + + trace_cxl_aer_correctable_error(cxlds->cxlmd, status); +} + +static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev, + struct cxl_ras_capability_regs ras_cap) +{ + u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask; + struct cxl_dev_state *cxlds; + u32 fe; + + cxlds = pci_get_drvdata(pdev); + if (!cxlds) + return; + + if (hweight32(status) > 1) + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, + ras_cap.cap_control)); + else + fe = status; + + trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, + ras_cap.header_log); +} + +static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) +{ + unsigned int devfn = PCI_DEVFN(data->prot_err.agent_addr.device, + data->prot_err.agent_addr.function); + struct pci_dev *pdev __free(pci_dev_put) = + pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment, + data->prot_err.agent_addr.bus, + devfn); + + if (!pdev) + return; + + guard(device)(&pdev->dev); + + if (data->severity == AER_CORRECTABLE) + cxl_cper_trace_corr_prot_err(pdev, data->ras_cap); + else + cxl_cper_trace_uncorr_prot_err(pdev, data->ras_cap); +} + +static void cxl_cper_prot_err_work_fn(struct work_struct *work) +{ + struct cxl_cper_prot_err_work_data wd; + + while (cxl_cper_prot_err_kfifo_get(&wd)) + cxl_cper_handle_prot_err(&wd); +} +static DECLARE_WORK(cxl_cper_prot_err_work, cxl_cper_prot_err_work_fn); + +int cxl_ras_init(void) +{ + return cxl_cper_register_prot_err_work(&cxl_cper_prot_err_work); +} + +void cxl_ras_exit(void) +{ + cxl_cper_unregister_prot_err_work(&cxl_cper_prot_err_work); + cancel_work_sync(&cxl_cper_prot_err_work); +} |
