From 26a1a86dd093a10d0653429bf013dae6e95dccbf Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 20 Dec 2023 16:17:29 -0800 Subject: cxl/events: Promote CXL event structures to a core header UEFI code can process CXL events through CPER records. Those records use almost the same format as the CXL events. Lift the CXL event structures to a core header to be shared in later patches. [jic123: drop "CXL rev 3.0" mention] Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20231220-cxl-cper-v5-2-1bb8a4ca2c7a@intel.com [djbw: add F: entry to maintainers for include/linux/cxl-event.h] Reviewed-by: Jonathan Cameron Acked-by: Ard Biesheuvel Signed-off-by: Dan Williams --- include/linux/cxl-event.h | 95 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 include/linux/cxl-event.h (limited to 'include/linux') diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h new file mode 100644 index 000000000000..0fc068123f8e --- /dev/null +++ b/include/linux/cxl-event.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2023 Intel Corporation. */ +#ifndef _LINUX_CXL_EVENT_H +#define _LINUX_CXL_EVENT_H + +/* + * Common Event Record Format + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ +struct cxl_event_record_hdr { + uuid_t id; + u8 length; + u8 flags[3]; + __le16 handle; + __le16 related_handle; + __le64 timestamp; + u8 maint_op_class; + u8 reserved[15]; +} __packed; + +#define CXL_EVENT_RECORD_DATA_LENGTH 0x50 +struct cxl_event_record_raw { + struct cxl_event_record_hdr hdr; + u8 data[CXL_EVENT_RECORD_DATA_LENGTH]; +} __packed; + +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 +struct cxl_event_gen_media { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + u8 validity_flags[2]; + u8 channel; + u8 rank; + u8 device[3]; + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; + u8 reserved[46]; +} __packed; + +/* + * DRAM Event Record - DER + * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44 + */ +#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 +struct cxl_event_dram { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + u8 validity_flags[2]; + u8 channel; + u8 rank; + u8 nibble_mask[3]; + u8 bank_group; + u8 bank; + u8 row[3]; + u8 column[2]; + u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE]; + u8 reserved[0x17]; +} __packed; + +/* + * Get Health Info Record + * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100 + */ +struct cxl_get_health_info { + u8 health_status; + u8 media_status; + u8 add_status; + u8 life_used; + u8 device_temp[2]; + u8 dirty_shutdown_cnt[4]; + u8 cor_vol_err_cnt[4]; + u8 cor_per_err_cnt[4]; +} __packed; + +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +struct cxl_event_mem_module { + struct cxl_event_record_hdr hdr; + u8 event_type; + struct cxl_get_health_info info; + u8 reserved[0x3d]; +} __packed; + +#endif /* _LINUX_CXL_EVENT_H */ -- cgit v1.2.3 From 6eade110754c085cee9e46f4d87d2c3ea4e59e8c Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 20 Dec 2023 16:17:32 -0800 Subject: cxl/events: Separate UUID from event structures The UEFI CXL CPER structure does not include the UUID. Now that the UUID is passed separately to the trace event there is no need to have the UUID in those structures. Move UUID from the event record header to the raw structures. Adjust cxl-test to Create dummy structures for creating test records. Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20231220-cxl-cper-v5-5-1bb8a4ca2c7a@intel.com Reviewed-by: Jonathan Cameron Acked-by: Ard Biesheuvel Signed-off-by: Dan Williams --- include/linux/cxl-event.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 0fc068123f8e..3d9b5954d0c1 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -8,7 +8,6 @@ * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 */ struct cxl_event_record_hdr { - uuid_t id; u8 length; u8 flags[3]; __le16 handle; @@ -18,8 +17,13 @@ struct cxl_event_record_hdr { u8 reserved[15]; } __packed; +/* + * Common Event Record Format + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ #define CXL_EVENT_RECORD_DATA_LENGTH 0x50 struct cxl_event_record_raw { + uuid_t id; struct cxl_event_record_hdr hdr; u8 data[CXL_EVENT_RECORD_DATA_LENGTH]; } __packed; -- cgit v1.2.3 From f9c683386f5bc0364615138ce2b14be50848dbcf Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 20 Dec 2023 16:17:33 -0800 Subject: cxl/events: Create a CXL event union The CXL CPER and event log records share everything but a UUID/GUID in their structures. Define a cxl_event union without the UUID/GUID to be shared between the CPER and event log record formats. Adjust the code to use this union. Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20231220-cxl-cper-v5-6-1bb8a4ca2c7a@intel.com Reviewed-by: Jonathan Cameron Acked-by: Ard Biesheuvel Signed-off-by: Dan Williams --- include/linux/cxl-event.h | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 3d9b5954d0c1..4d6c05f535f8 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -17,13 +17,8 @@ struct cxl_event_record_hdr { u8 reserved[15]; } __packed; -/* - * Common Event Record Format - * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 - */ #define CXL_EVENT_RECORD_DATA_LENGTH 0x50 -struct cxl_event_record_raw { - uuid_t id; +struct cxl_event_generic { struct cxl_event_record_hdr hdr; u8 data[CXL_EVENT_RECORD_DATA_LENGTH]; } __packed; @@ -96,4 +91,20 @@ struct cxl_event_mem_module { u8 reserved[0x3d]; } __packed; +union cxl_event { + struct cxl_event_generic generic; + struct cxl_event_gen_media gen_media; + struct cxl_event_dram dram; + struct cxl_event_mem_module mem_module; +} __packed; + +/* + * Common Event Record Format; in event logs + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ +struct cxl_event_record_raw { + uuid_t id; + union cxl_event event; +} __packed; + #endif /* _LINUX_CXL_EVENT_H */ -- cgit v1.2.3 From 671a794c33c6e048ca5cedd5ad6af44d52d5d7e5 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 20 Dec 2023 16:17:34 -0800 Subject: acpi/ghes: Process CXL Component Events BIOS can configure memory devices as firmware first. This will send CXL events to the firmware instead of the OS. The firmware can then send these events to the OS via UEFI. UEFI v2.10 section N.2.14 defines a Common Platform Error Record (CPER) format for CXL Component Events. The format is mostly the same as the CXL Common Event Record Format. The difference is the use of a GUID in the Section Type rather than a UUID as part of the event itself. Add GHES support to detect CXL CPER records and call a registered callback with the event. A notifier chain was considered for the callback but the complexity did not justify the use case as only the CXL subsystem requires this event. Enforce that only one callback can be registered at any time. Cc: Ard Biesheuvel Cc: Rafael J. Wysocki Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20231220-cxl-cper-v5-7-1bb8a4ca2c7a@intel.com [djbw: fixup checkpatch errors] Reviewed-by: Jonathan Cameron Acked-by: Ard Biesheuvel Signed-off-by: Dan Williams --- include/linux/cxl-event.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 4d6c05f535f8..95841750a383 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -107,4 +107,54 @@ struct cxl_event_record_raw { union cxl_event event; } __packed; +enum cxl_event_type { + CXL_CPER_EVENT_GEN_MEDIA, + CXL_CPER_EVENT_DRAM, + CXL_CPER_EVENT_MEM_MODULE, +}; + +#define CPER_CXL_DEVICE_ID_VALID BIT(0) +#define CPER_CXL_DEVICE_SN_VALID BIT(1) +#define CPER_CXL_COMP_EVENT_LOG_VALID BIT(2) +struct cxl_cper_event_rec { + struct { + u32 length; + u64 validation_bits; + struct cper_cxl_event_devid { + u16 vendor_id; + u16 device_id; + u8 func_num; + u8 device_num; + u8 bus_num; + u16 segment_num; + u16 slot_num; /* bits 2:0 reserved */ + u8 reserved; + } __packed device_id; + struct cper_cxl_event_sn { + u32 lower_dw; + u32 upper_dw; + } __packed dev_serial_num; + } __packed hdr; + + union cxl_event event; +} __packed; + +typedef void (*cxl_cper_callback)(enum cxl_event_type type, + struct cxl_cper_event_rec *rec); + +#ifdef CONFIG_ACPI_APEI_GHES +int cxl_cper_register_callback(cxl_cper_callback callback); +int cxl_cper_unregister_callback(cxl_cper_callback callback); +#else +static inline int cxl_cper_register_callback(cxl_cper_callback callback) +{ + return 0; +} + +static inline int cxl_cper_unregister_callback(cxl_cper_callback callback) +{ + return 0; +} +#endif + #endif /* _LINUX_CXL_EVENT_H */ -- cgit v1.2.3 From ced085ef369af7a2b6da962ec2fbd01339f60693 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 20 Dec 2023 16:17:35 -0800 Subject: PCI: Introduce cleanup helpers for device reference counts and locks The "goto error" pattern is notorious for introducing subtle resource leaks. Use the new cleanup.h helpers for PCI device reference counts and locks. Similar to the new put_device() and device_lock() cleanup helpers, __free(put_device) and guard(device), define the same for PCI devices, __free(pci_dev_put) and guard(pci_dev). These helpers eliminate the need for "goto free;" and "goto unlock;" patterns. For example, A 'struct pci_dev *' instance declared as: struct pci_dev *pdev __free(pci_dev_put) = NULL; ...will automatically call pci_dev_put() if @pdev is non-NULL when @pdev goes out of scope (automatic variable scope). If a function wants to invoke pci_dev_put() on error, but return @pdev on success, it can do: return no_free_ptr(pdev); ...or: return_ptr(pdev); For potential cleanup opportunity there are 587 open-coded calls to pci_dev_put() in the kernel with 65 instances within 10 lines of a goto statement with the CXL driver threatening to add another one. The guard() helper holds the associated lock for the remainder of the current scope in which it was invoked. So, for example: func(...) { if (...) { ... guard(pci_dev); /* pci_dev_lock() invoked here */ ... } /* <- implied pci_dev_unlock() triggered here */ } There are 15 invocations of pci_dev_unlock() in the kernel with 5 instances within 10 lines of a goto statement. Again, the CXL driver is threatening to add another. Introduce these helpers to preclude the addition of new more error prone goto put; / goto unlock; sequences. For now, these helpers are used in drivers/cxl/pci.c to allow ACPI error reports to be fed back into the CXL driver associated with the PCI device identified in the report. Cc: Bjorn Helgaas Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20231220-cxl-cper-v5-8-1bb8a4ca2c7a@intel.com [djbw: rewrite changelog] Acked-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Acked-by: Ard Biesheuvel Signed-off-by: Dan Williams --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index dea043bc1e38..0d23d2e0eb1a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1170,6 +1170,7 @@ int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp); struct pci_dev *pci_dev_get(struct pci_dev *dev); void pci_dev_put(struct pci_dev *dev); +DEFINE_FREE(pci_dev_put, struct pci_dev *, if (_T) pci_dev_put(_T)) void pci_remove_bus(struct pci_bus *b); void pci_stop_and_remove_bus_device(struct pci_dev *dev); void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev); @@ -1874,6 +1875,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev); void pci_dev_lock(struct pci_dev *dev); int pci_dev_trylock(struct pci_dev *dev); void pci_dev_unlock(struct pci_dev *dev); +DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T)) /* * PCI domain support. Sometimes called PCI segment (eg by ACPI), -- cgit v1.2.3 From dc97f6344f205b0dfa144e1b3e16d6dc05383d57 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 20 Dec 2023 16:17:36 -0800 Subject: cxl/pci: Register for and process CPER events If the firmware has configured CXL event support to be firmware first the OS can process those events through CPER records. The CXL layer has unique DPA to HPA knowledge and standard event trace parsing in place. CPER records contain Bus, Device, Function information which can be used to identify the PCI device which is sending the event. Change the PCI driver registration to include registration of a CXL CPER callback to process events through the trace subsystem. Use new scoped based management to simplify the handling of the PCI device object. Tested-by: Smita-Koralahalli Reviewed-by: Smita-Koralahalli Link: https://lore.kernel.org/r/20231220-cxl-cper-v5-9-1bb8a4ca2c7a@intel.com Signed-off-by: Ira Weiny [djbw: use new pci_dev guard, flip init order] Reviewed-by: Jonathan Cameron Acked-by: Ard Biesheuvel Signed-off-by: Dan Williams --- include/linux/cxl-event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 95841750a383..91125eca4c8a 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -108,6 +108,7 @@ struct cxl_event_record_raw { } __packed; enum cxl_event_type { + CXL_CPER_EVENT_GENERIC, CXL_CPER_EVENT_GEN_MEDIA, CXL_CPER_EVENT_DRAM, CXL_CPER_EVENT_MEM_MODULE, -- cgit v1.2.3