From 958c3a6706863f9f77b21c90d2428473441cd8a1 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Thu, 23 Jan 2025 08:44:17 +0000 Subject: efi/cper, cxl: Make definitions and structures global In preparation to add tracepoint support, move protocol error UUID definition to a common location, Also, make struct CXL RAS capability, cxl_cper_sec_prot_err and CPER validation flags global for use across different modules. Signed-off-by: Smita Koralahalli Reviewed-by: Jonathan Cameron Reviewed-by: Ira Weiny Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Reviewed-by: Gregory Price Reviewed-by: Dan Williams Link: https://patch.msgid.link/20250123084421.127697-3-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dave Jiang --- include/cxl/event.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/cper.h | 4 +++ 2 files changed, 84 insertions(+) (limited to 'include') diff --git a/include/cxl/event.h b/include/cxl/event.h index 04edd44bd26f..7a6c14c05215 100644 --- a/include/cxl/event.h +++ b/include/cxl/event.h @@ -164,6 +164,86 @@ struct cxl_cper_work_data { struct cxl_cper_event_rec rec; }; +#define PROT_ERR_VALID_AGENT_TYPE BIT_ULL(0) +#define PROT_ERR_VALID_AGENT_ADDRESS BIT_ULL(1) +#define PROT_ERR_VALID_DEVICE_ID BIT_ULL(2) +#define PROT_ERR_VALID_SERIAL_NUMBER BIT_ULL(3) +#define PROT_ERR_VALID_CAPABILITY BIT_ULL(4) +#define PROT_ERR_VALID_DVSEC BIT_ULL(5) +#define PROT_ERR_VALID_ERROR_LOG BIT_ULL(6) + +/* + * The layout of the enumeration and the values matches CXL Agent Type + * field in the UEFI 2.10 Section N.2.13, + */ +enum { + RCD, /* Restricted CXL Device */ + RCH_DP, /* Restricted CXL Host Downstream Port */ + DEVICE, /* CXL Device */ + LD, /* CXL Logical Device */ + FMLD, /* CXL Fabric Manager managed Logical Device */ + RP, /* CXL Root Port */ + DSP, /* CXL Downstream Switch Port */ + USP, /* CXL Upstream Switch Port */ +}; + +#pragma pack(1) + +/* Compute Express Link Protocol Error Section, UEFI v2.10 sec N.2.13 */ +struct cxl_cper_sec_prot_err { + u64 valid_bits; + u8 agent_type; + u8 reserved[7]; + + /* + * Except for RCH Downstream Port, all the remaining CXL Agent + * types are uniquely identified by the PCIe compatible SBDF number. + */ + union { + u64 rcrb_base_addr; + struct { + u8 function; + u8 device; + u8 bus; + u16 segment; + u8 reserved_1[3]; + }; + } agent_addr; + + struct { + u16 vendor_id; + u16 device_id; + u16 subsystem_vendor_id; + u16 subsystem_id; + u8 class_code[2]; + u16 slot; + u8 reserved_1[4]; + } device_id; + + struct { + u32 lower_dw; + u32 upper_dw; + } dev_serial_num; + + u8 capability[60]; + u16 dvsec_len; + u16 err_len; + u8 reserved_2[4]; +}; + +#pragma pack() + +/* CXL RAS Capability Structure, CXL v3.0 sec 8.2.4.16 */ +struct cxl_ras_capability_regs { + u32 uncor_status; + u32 uncor_mask; + u32 uncor_severity; + u32 cor_status; + u32 cor_mask; + u32 cap_control; + u32 header_log[16]; +}; + #ifdef CONFIG_ACPI_APEI_GHES int cxl_cper_register_work(struct work_struct *work); int cxl_cper_unregister_work(struct work_struct *work); diff --git a/include/linux/cper.h b/include/linux/cper.h index 265b0f8fc0b3..5c6d4d5b9975 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -89,6 +89,10 @@ enum { #define CPER_NOTIFY_DMAR \ GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ 0x72, 0x2D, 0xEB, 0x41) +/* CXL Protocol Error Section */ +#define CPER_SEC_CXL_PROT_ERR \ + GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \ + 0x4B, 0x77, 0x10, 0x48) /* CXL Event record UUIDs are formatted as GUIDs and reported in section type */ /* -- cgit v1.2.3 From 61eac5f7f6439e8fe99b5fb29406acb0fd7b83c6 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Thu, 23 Jan 2025 08:44:18 +0000 Subject: efi/cper, cxl: Remove cper_cxl.h Move the declaration of cxl_cper_print_prot_err() to include/linux/cper.h to avoid maintaining a separate header file just for this function declaration. Remove drivers/firmware/efi/cper_cxl.h as its contents have been reorganized. No functional changes. Signed-off-by: Smita Koralahalli Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Reviewed-by: Gregory Price Reviewed-by: Dan Williams Link: https://patch.msgid.link/20250123084421.127697-4-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dave Jiang --- include/linux/cper.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/cper.h b/include/linux/cper.h index 5c6d4d5b9975..0ed60a91eca9 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -605,4 +605,8 @@ void cper_estatus_print(const char *pfx, int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus); int cper_estatus_check(const struct acpi_hest_generic_status *estatus); +struct cxl_cper_sec_prot_err; +void cxl_cper_print_prot_err(const char *pfx, + const struct cxl_cper_sec_prot_err *prot_err); + #endif -- cgit v1.2.3 From 315c2f0b53ba2645062627443a12cea73f3dad9c Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Thu, 23 Jan 2025 08:44:19 +0000 Subject: acpi/ghes, cper: Recognize and cache CXL Protocol errors Add support in GHES to detect and process CXL CPER Protocol errors, as defined in UEFI v2.10, section N.2.13. Define struct cxl_cper_prot_err_work_data to cache CXL protocol error information, including RAS capabilities and severity, for further handling. These cached CXL CPER records will later be processed by workqueues within the CXL subsystem. Signed-off-by: Smita Koralahalli Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Reviewed-by: Tony Luck Reviewed-by: Gregory Price Reviewed-by: Dan Williams Link: https://patch.msgid.link/20250123084421.127697-5-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dave Jiang --- include/cxl/event.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/cxl/event.h b/include/cxl/event.h index 7a6c14c05215..8381a07052d0 100644 --- a/include/cxl/event.h +++ b/include/cxl/event.h @@ -244,6 +244,12 @@ struct cxl_ras_capability_regs { u32 header_log[16]; }; +struct cxl_cper_prot_err_work_data { + struct cxl_cper_sec_prot_err prot_err; + struct cxl_ras_capability_regs ras_cap; + int severity; +}; + #ifdef CONFIG_ACPI_APEI_GHES int cxl_cper_register_work(struct work_struct *work); int cxl_cper_unregister_work(struct work_struct *work); -- cgit v1.2.3 From 84b25926fa7abb5b634d4af9544f47ab0aabc399 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 26 Feb 2025 09:21:18 -0700 Subject: acpi: numa: Add support to enumerate and store extended linear address mode Store the address mode as part of the cache attriutes. Export the mode attribute to sysfs as all other cache attributes. Link: https://lore.kernel.org/linux-cxl/668333b17e4b2_5639294fd@dwillia2-xfh.jf.intel.com.notmuch/ Reviewed-by: Jonathan Cameron Reviewed-by: Li Ming Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20250226162224.3633792-2-dave.jiang@intel.com Signed-off-by: Dave Jiang --- include/linux/node.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/node.h b/include/linux/node.h index 9a881c2208b3..2b7517892230 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -57,6 +57,11 @@ enum cache_write_policy { NODE_CACHE_WRITE_OTHER, }; +enum cache_mode { + NODE_CACHE_ADDR_MODE_RESERVED, + NODE_CACHE_ADDR_MODE_EXTENDED_LINEAR, +}; + /** * struct node_cache_attrs - system memory caching attributes * @@ -65,6 +70,7 @@ enum cache_write_policy { * @size: Total size of cache in bytes * @line_size: Number of bytes fetched on a cache miss * @level: The cache hierarchy level + * @address_mode: The address mode */ struct node_cache_attrs { enum cache_indexing indexing; @@ -72,6 +78,7 @@ struct node_cache_attrs { u64 size; u16 line_size; u8 level; + u16 address_mode; }; #ifdef CONFIG_HMEM_REPORTING -- cgit v1.2.3 From 0ec9849b63338da7883440ed3f52757cd8c847b1 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 26 Feb 2025 09:21:19 -0700 Subject: acpi/hmat / cxl: Add extended linear cache support for CXL The current cxl region size only indicates the size of the CXL memory region without accounting for the extended linear cache size. Retrieve the cache size from HMAT and append that to the cxl region size for the cxl region range that matches the SRAT range that has extended linear cache enabled. The SRAT defines the whole memory range that includes the extended linear cache and the CXL memory region. The new HMAT ECN/ECR to the Memory Side Cache Information Structure defines the size of the extended linear cache size and matches to the SRAT Memory Affinity Structure by the memory proxmity domain. Add a helper to match the cxl range to the SRAT memory range in order to retrieve the cache size. There are several places that checks the cxl region range against the decoder range. Use new helper to check between the two ranges and address the new cache size. Reviewed-by: Jonathan Cameron Reviewed-by: Li Ming Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20250226162224.3633792-3-dave.jiang@intel.com Signed-off-by: Dave Jiang --- include/linux/acpi.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4e495b29c640..cbd933504dbf 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1095,6 +1095,17 @@ static inline acpi_handle acpi_get_processor_handle(int cpu) #endif /* !CONFIG_ACPI */ +#ifdef CONFIG_ACPI_HMAT +int hmat_get_extended_linear_cache_size(struct resource *backing_res, int nid, + resource_size_t *size); +#else +static inline int hmat_get_extended_linear_cache_size(struct resource *backing_res, + int nid, resource_size_t *size) +{ + return -EOPNOTSUPP; +} +#endif + extern void arch_post_acpi_subsys_init(void); #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC -- cgit v1.2.3 From 36f257e3b0ba904f5a4e7fa8dafaa60e88cdd28c Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Mon, 10 Mar 2025 22:38:38 +0000 Subject: acpi/ghes, cxl/pci: Process CXL CPER Protocol Errors When PCIe AER is in FW-First, OS should process CXL Protocol errors from CPER records. Introduce support for handling and logging CXL Protocol errors. The defined trace events cxl_aer_uncorrectable_error and cxl_aer_correctable_error trace native CXL AER endpoint errors. Reuse them to trace FW-First Protocol errors. Since the CXL code is required to be called from process context and GHES is in interrupt context, use workqueues for processing. Similar to CXL CPER event handling, use kfifo to handle errors as it simplifies queue processing by providing lock free fifo operations. Add the ability for the CXL sub-system to register a workqueue to process CXL CPER protocol errors. [DJ: return cxl_cper_register_prot_err_work() directly in cxl_ras_init()] Signed-off-by: Smita Koralahalli Reviewed-by: Li Ming Reviewed-by: Alison Schofield Reviewed-by: Ira Weiny Reviewed-by: Tony Luck Link: https://patch.msgid.link/20250310223839.31342-2-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dave Jiang --- include/cxl/event.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/cxl/event.h b/include/cxl/event.h index 8381a07052d0..f9ae1796da85 100644 --- a/include/cxl/event.h +++ b/include/cxl/event.h @@ -254,6 +254,9 @@ struct cxl_cper_prot_err_work_data { int cxl_cper_register_work(struct work_struct *work); int cxl_cper_unregister_work(struct work_struct *work); int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd); +int cxl_cper_register_prot_err_work(struct work_struct *work); +int cxl_cper_unregister_prot_err_work(struct work_struct *work); +int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd); #else static inline int cxl_cper_register_work(struct work_struct *work) { @@ -268,6 +271,18 @@ static inline int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd) { return 0; } +static inline int cxl_cper_register_prot_err_work(struct work_struct *work) +{ + return 0; +} +static inline int cxl_cper_unregister_prot_err_work(struct work_struct *work) +{ + return 0; +} +static inline int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd) +{ + return 0; +} #endif #endif /* _LINUX_CXL_EVENT_H */ -- cgit v1.2.3