From 0a4b61d9c2e496b5f0a10e29e355a1465c8738bb Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 28 Oct 2025 21:35:42 +0000 Subject: x86/amd_node: Fix AMD root device caching Recent AMD node rework removed the "search and count" method of caching AMD root devices. This depended on the value from a Data Fabric register that was expected to hold the PCI bus of one of the root devices attached to that fabric. However, this expectation is incorrect. The register, when read from PCI config space, returns the bitwise-OR of the buses of all attached root devices. This behavior is benign on AMD reference design boards, since the bus numbers are aligned. This results in a bitwise-OR value matching one of the buses. For example, 0x00 | 0x40 | 0xA0 | 0xE0 = 0xE0. This behavior breaks on boards where the bus numbers are not exactly aligned. For example, 0x00 | 0x07 | 0xE0 | 0x15 = 0x1F. The examples above are for AMD node 0. The first root device on other nodes will not be 0x00. The first root device for other nodes will depend on the total number of root devices, the system topology, and the specific PCI bus number assignment. For example, a system with 2 AMD nodes could have this: Node 0 : 0x00 0x07 0x0e 0x15 Node 1 : 0x1c 0x23 0x2a 0x31 The bus numbering style in the reference boards is not a requirement. The numbering found in other boards is not incorrect. Therefore, the root device caching method needs to be adjusted. Go back to the "search and count" method used before the recent rework. Search for root devices using PCI class code rather than fixed PCI IDs. This keeps the goal of the rework (remove dependency on PCI IDs) while being able to support various board designs. Merge helper functions to reduce code duplication. [ bp: Reflow comment. ] Fixes: 40a5f6ffdfc8 ("x86/amd_nb: Simplify root device search") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://patch.msgid.link/all/20251028-fix-amd-root-v2-1-843e38f8be2c@amd.com --- arch/x86/kernel/amd_node.c | 150 +++++++++++++++------------------------------ 1 file changed, 51 insertions(+), 99 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_node.c b/arch/x86/kernel/amd_node.c index a40176b62eb5..3d0a4768d603 100644 --- a/arch/x86/kernel/amd_node.c +++ b/arch/x86/kernel/amd_node.c @@ -34,62 +34,6 @@ struct pci_dev *amd_node_get_func(u16 node, u8 func) return pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(AMD_NODE0_PCI_SLOT + node, func)); } -#define DF_BLK_INST_CNT 0x040 -#define DF_CFG_ADDR_CNTL_LEGACY 0x084 -#define DF_CFG_ADDR_CNTL_DF4 0xC04 - -#define DF_MAJOR_REVISION GENMASK(27, 24) - -static u16 get_cfg_addr_cntl_offset(struct pci_dev *df_f0) -{ - u32 reg; - - /* - * Revision fields added for DF4 and later. - * - * Major revision of '0' is found pre-DF4. Field is Read-as-Zero. - */ - if (pci_read_config_dword(df_f0, DF_BLK_INST_CNT, ®)) - return 0; - - if (reg & DF_MAJOR_REVISION) - return DF_CFG_ADDR_CNTL_DF4; - - return DF_CFG_ADDR_CNTL_LEGACY; -} - -struct pci_dev *amd_node_get_root(u16 node) -{ - struct pci_dev *root; - u16 cntl_off; - u8 bus; - - if (!cpu_feature_enabled(X86_FEATURE_ZEN)) - return NULL; - - /* - * D18F0xXXX [Config Address Control] (DF::CfgAddressCntl) - * Bits [7:0] (SecBusNum) holds the bus number of the root device for - * this Data Fabric instance. The segment, device, and function will be 0. - */ - struct pci_dev *df_f0 __free(pci_dev_put) = amd_node_get_func(node, 0); - if (!df_f0) - return NULL; - - cntl_off = get_cfg_addr_cntl_offset(df_f0); - if (!cntl_off) - return NULL; - - if (pci_read_config_byte(df_f0, cntl_off, &bus)) - return NULL; - - /* Grab the pointer for the actual root device instance. */ - root = pci_get_domain_bus_and_slot(0, bus, 0); - - pci_dbg(root, "is root for AMD node %u\n", node); - return root; -} - static struct pci_dev **amd_roots; /* Protect the PCI config register pairs used for SMN. */ @@ -274,51 +218,21 @@ DEFINE_SHOW_STORE_ATTRIBUTE(smn_node); DEFINE_SHOW_STORE_ATTRIBUTE(smn_address); DEFINE_SHOW_STORE_ATTRIBUTE(smn_value); -static int amd_cache_roots(void) -{ - u16 node, num_nodes = amd_num_nodes(); - - amd_roots = kcalloc(num_nodes, sizeof(*amd_roots), GFP_KERNEL); - if (!amd_roots) - return -ENOMEM; - - for (node = 0; node < num_nodes; node++) - amd_roots[node] = amd_node_get_root(node); - - return 0; -} - -static int reserve_root_config_spaces(void) +static struct pci_dev *get_next_root(struct pci_dev *root) { - struct pci_dev *root = NULL; - struct pci_bus *bus = NULL; - - while ((bus = pci_find_next_bus(bus))) { - /* Root device is Device 0 Function 0 on each Primary Bus. */ - root = pci_get_slot(bus, 0); - if (!root) + while ((root = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, root))) { + /* Root device is Device 0 Function 0. */ + if (root->devfn) continue; if (root->vendor != PCI_VENDOR_ID_AMD && root->vendor != PCI_VENDOR_ID_HYGON) continue; - pci_dbg(root, "Reserving PCI config space\n"); - - /* - * There are a few SMN index/data pairs and other registers - * that shouldn't be accessed by user space. - * So reserve the entire PCI config space for simplicity rather - * than covering specific registers piecemeal. - */ - if (!pci_request_config_region_exclusive(root, 0, PCI_CFG_SPACE_SIZE, NULL)) { - pci_err(root, "Failed to reserve config space\n"); - return -EEXIST; - } + break; } - smn_exclusive = true; - return 0; + return root; } static bool enable_dfs; @@ -332,7 +246,8 @@ __setup("amd_smn_debugfs_enable", amd_smn_enable_dfs); static int __init amd_smn_init(void) { - int err; + u16 count, num_roots, roots_per_node, node, num_nodes; + struct pci_dev *root; if (!cpu_feature_enabled(X86_FEATURE_ZEN)) return 0; @@ -342,13 +257,48 @@ static int __init amd_smn_init(void) if (amd_roots) return 0; - err = amd_cache_roots(); - if (err) - return err; + num_roots = 0; + root = NULL; + while ((root = get_next_root(root))) { + pci_dbg(root, "Reserving PCI config space\n"); - err = reserve_root_config_spaces(); - if (err) - return err; + /* + * There are a few SMN index/data pairs and other registers + * that shouldn't be accessed by user space. So reserve the + * entire PCI config space for simplicity rather than covering + * specific registers piecemeal. + */ + if (!pci_request_config_region_exclusive(root, 0, PCI_CFG_SPACE_SIZE, NULL)) { + pci_err(root, "Failed to reserve config space\n"); + return -EEXIST; + } + + num_roots++; + } + + pr_debug("Found %d AMD root devices\n", num_roots); + + if (!num_roots) + return -ENODEV; + + num_nodes = amd_num_nodes(); + amd_roots = kcalloc(num_nodes, sizeof(*amd_roots), GFP_KERNEL); + if (!amd_roots) + return -ENOMEM; + + roots_per_node = num_roots / num_nodes; + + count = 0; + node = 0; + root = NULL; + while (node < num_nodes && (root = get_next_root(root))) { + /* Use one root for each node and skip the rest. */ + if (count++ % roots_per_node) + continue; + + pci_dbg(root, "is root for AMD node %u\n", node); + amd_roots[node++] = root; + } if (enable_dfs) { debugfs_dir = debugfs_create_dir("amd_smn", arch_debugfs_dir); @@ -358,6 +308,8 @@ static int __init amd_smn_init(void) debugfs_create_file("value", 0600, debugfs_dir, NULL, &smn_value_fops); } + smn_exclusive = true; + return 0; } -- cgit v1.2.3 From f1fdffe0afea02ba783acfe815b6a60e7180df40 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 4 Nov 2025 10:10:06 -0600 Subject: x86/CPU/AMD: Add missing terminator for zen5_rdseed_microcode Running x86_match_min_microcode_rev() on a Zen5 CPU trips up KASAN for an out of bounds access. Fixes: 607b9fb2ce248 ("x86/CPU/AMD: Add RDSEED fix for Zen5") Signed-off-by: Mario Limonciello Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251104161007.269885-1-mario.limonciello@amd.com --- arch/x86/kernel/cpu/amd.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8e36964a7721..2ba9f2d42d8c 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1038,6 +1038,7 @@ static void init_amd_zen4(struct cpuinfo_x86 *c) static const struct x86_cpu_id zen5_rdseed_microcode[] = { ZEN_MODEL_STEP_UCODE(0x1a, 0x02, 0x1, 0x0b00215a), ZEN_MODEL_STEP_UCODE(0x1a, 0x11, 0x0, 0x0b101054), + {}, }; static void init_amd_zen5(struct cpuinfo_x86 *c) -- cgit v1.2.3 From 284922f4c563aa3a8558a00f2a05722133237fe8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 4 Nov 2025 15:25:20 +0900 Subject: x86: uaccess: don't use runtime-const rewriting in modules The runtime-const infrastructure was never designed to handle the modular case, because the constant fixup is only done at boot time for core kernel code. But by the time I used it for the x86-64 user space limit handling in commit 86e6b1547b3d ("x86: fix user address masking non-canonical speculation issue"), I had completely repressed that fact. And it all happens to work because the only code that currently actually gets inlined by modules is for the access_ok() limit check, where the default constant value works even when not fixed up. Because at least I had intentionally made it be something that is in the non-canonical address space region. But it's technically very wrong, and it does mean that at least in theory, the use of 'access_ok()' + '__get_user()' can trigger the same speculation issue with non-canonical addresses that the original commit was all about. The pattern is unusual enough that this probably doesn't matter in practice, but very wrong is still very wrong. Also, let's fix it before the nice optimized scoped user accessor helpers that Thomas Gleixner is working on cause this pseudo-constant to then be more widely used. This all came up due to an unrelated discussion with Mateusz Guzik about using the runtime const infrastructure for names_cachep accesses too. There the modular case was much more obviously broken, and Mateusz noted it in his 'v2' of the patch series. That then made me notice how broken 'access_ok()' had been in modules all along. Mea culpa, mea maxima culpa. Fix it by simply not using the runtime-const code in modules, and just using the USER_PTR_MAX variable value instead. This is not performance-critical like the core user accessor functions (get_user() and friends) are. Also make sure this doesn't get forgotten the next time somebody wants to do runtime constant optimizations by having the x86 runtime-const.h header file error out if included by modules. Fixes: 86e6b1547b3d ("x86: fix user address masking non-canonical speculation issue") Acked-by: Borislav Petkov Acked-by: Sean Christopherson Cc: Thomas Gleixner Triggered-by: Mateusz Guzik Link: https://lore.kernel.org/all/20251030105242.801528-1-mjguzik@gmail.com/ Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/common.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c7d3512914ca..02d97834a1d4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -78,6 +78,10 @@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +/* Used for modules: built-in code uses runtime constants */ +unsigned long USER_PTR_MAX; +EXPORT_SYMBOL(USER_PTR_MAX); + u32 elf_hwcap2 __read_mostly; /* Number of siblings per CPU package */ @@ -2579,7 +2583,7 @@ void __init arch_cpu_finalize_init(void) alternative_instructions(); if (IS_ENABLED(CONFIG_X86_64)) { - unsigned long USER_PTR_MAX = TASK_SIZE_MAX; + USER_PTR_MAX = TASK_SIZE_MAX; /* * Enable this when LAM is gated on LASS support -- cgit v1.2.3 From 20a0bc10272fa17a44fc857c31574a8306f60d20 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 4 Nov 2025 22:54:03 +0100 Subject: x86/fgraph,bpf: Fix stack ORC unwind from kprobe_multi return probe Currently we don't get stack trace via ORC unwinder on top of fgraph exit handler. We can see that when generating stacktrace from kretprobe_multi bpf program which is based on fprobe/fgraph. The reason is that the ORC unwind code won't get pass the return_to_handler callback installed by fgraph return probe machinery. Solving this by creating stack frame in return_to_handler expected by ftrace_graph_ret_addr function to recover original return address and continue with the unwind. Also updating the pt_regs data with cs/flags/rsp which are needed for successful stack retrieval from ebpf bpf_get_stackid helper. - in get_perf_callchain we check user_mode(regs) so CS has to be set - in perf_callchain_kernel we call perf_hw_regs(regs), so EFLAGS/FIXED has to be unset Acked-by: Masami Hiramatsu (Google) Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20251104215405.168643-3-jolsa@kernel.org Signed-off-by: Alexei Starovoitov Acked-by: Steven Rostedt (Google) --- arch/x86/kernel/ftrace_64.S | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 367da3638167..823dbdd0eb41 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -354,12 +354,17 @@ SYM_CODE_START(return_to_handler) UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR + /* Restore return_to_handler value that got eaten by previous ret instruction. */ + subq $8, %rsp + UNWIND_HINT_FUNC + /* Save ftrace_regs for function exit context */ subq $(FRAME_SIZE), %rsp movq %rax, RAX(%rsp) movq %rdx, RDX(%rsp) movq %rbp, RBP(%rsp) + movq %rsp, RSP(%rsp) movq %rsp, %rdi call ftrace_return_to_handler @@ -368,7 +373,8 @@ SYM_CODE_START(return_to_handler) movq RDX(%rsp), %rdx movq RAX(%rsp), %rax - addq $(FRAME_SIZE), %rsp + addq $(FRAME_SIZE) + 8, %rsp + /* * Jump back to the old return address. This cannot be JMP_NOSPEC rdi * since IBT would demand that contain ENDBR, which simply isn't so for -- cgit v1.2.3 From d23550efc6800841b4d1639784afaebdea946ae0 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Thu, 6 Nov 2025 12:28:54 -0600 Subject: x86/microcode/AMD: Add more known models to entry sign checking Two Zen5 systems are missing from need_sha_check(). Add them. Fixes: 50cef76d5cb0 ("x86/microcode/AMD: Load only SHA256-checksummed patches") Signed-off-by: Mario Limonciello (AMD) Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://patch.msgid.link/20251106182904.4143757-1-superm1@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index b7c797dc94f4..dc82153009da 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -220,10 +220,12 @@ static bool need_sha_check(u32 cur_rev) case 0xaa001: return cur_rev <= 0xaa00116; break; case 0xaa002: return cur_rev <= 0xaa00218; break; case 0xb0021: return cur_rev <= 0xb002146; break; + case 0xb0081: return cur_rev <= 0xb008111; break; case 0xb1010: return cur_rev <= 0xb101046; break; case 0xb2040: return cur_rev <= 0xb204031; break; case 0xb4040: return cur_rev <= 0xb404031; break; case 0xb6000: return cur_rev <= 0xb600031; break; + case 0xb6080: return cur_rev <= 0xb608031; break; case 0xb7000: return cur_rev <= 0xb700031; break; default: break; } -- cgit v1.2.3 From 4fe5934db4a7187d358f1af1b3ef9b6dd59bce58 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Fri, 7 Nov 2025 13:11:41 +0530 Subject: ACPI: CPPC: Detect preferred core availability on online CPUs Commit 279f838a61f9 ("x86/amd: Detect preferred cores in amd_get_boost_ratio_numerator()") introduced the ability to detect the preferred core on AMD platforms by checking if there at least two distinct highest_perf values. However, it uses for_each_present_cpu() to iterate through all the CPUs in the platform, which is problematic when the kernel is booted with "nosmt=force" commandline option. Hence limit the search to only the online CPUs. Fixes: 279f838a61f9 ("x86/amd: Detect preferred cores in amd_get_boost_ratio_numerator()") Reported-by: Christopher Harris Closes: https://lore.kernel.org/lkml/CAM+eXpdDT7KjLV0AxEwOLkSJ2QtrsvGvjA2cCHvt1d0k2_C4Cw@mail.gmail.com/ Reviewed-by: "Mario Limonciello (AMD) (kernel.org)" Tested-by: Chrisopher Harris Signed-off-by: Gautham R. Shenoy Link: https://patch.msgid.link/20251107074145.2340-2-gautham.shenoy@amd.com Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/acpi/cppc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 7047124490f6..d7c8ef1e354d 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -196,7 +196,7 @@ int amd_detect_prefcore(bool *detected) break; } - for_each_present_cpu(cpu) { + for_each_online_cpu(cpu) { u32 tmp; int ret; -- cgit v1.2.3 From e1a97a627cd01d73fac5dd054d8f3de601ef2781 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 13 Nov 2025 16:35:50 -0600 Subject: x86/CPU/AMD: Add additional fixed RDSEED microcode revisions Microcode that resolves the RDSEED failure (SB-7055 [1]) has been released for additional Zen5 models to linux-firmware [2]. Update the zen5_rdseed_microcode array to cover these new models. Fixes: 607b9fb2ce24 ("x86/CPU/AMD: Add RDSEED fix for Zen5") Signed-off-by: Mario Limonciello Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://www.amd.com/en/resources/product-security/bulletin/amd-sb-7055.html [1] Link: https://gitlab.com/kernel-firmware/linux-firmware/-/commit/6167e5566900cf236f7a69704e8f4c441bc7212a [2] Link: https://patch.msgid.link/20251113223608.1495655-1-mario.limonciello@amd.com --- arch/x86/kernel/cpu/amd.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2ba9f2d42d8c..5d46709c58d0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1037,7 +1037,14 @@ static void init_amd_zen4(struct cpuinfo_x86 *c) static const struct x86_cpu_id zen5_rdseed_microcode[] = { ZEN_MODEL_STEP_UCODE(0x1a, 0x02, 0x1, 0x0b00215a), + ZEN_MODEL_STEP_UCODE(0x1a, 0x08, 0x1, 0x0b008121), ZEN_MODEL_STEP_UCODE(0x1a, 0x11, 0x0, 0x0b101054), + ZEN_MODEL_STEP_UCODE(0x1a, 0x24, 0x0, 0x0b204037), + ZEN_MODEL_STEP_UCODE(0x1a, 0x44, 0x0, 0x0b404035), + ZEN_MODEL_STEP_UCODE(0x1a, 0x44, 0x1, 0x0b404108), + ZEN_MODEL_STEP_UCODE(0x1a, 0x60, 0x0, 0x0b600037), + ZEN_MODEL_STEP_UCODE(0x1a, 0x68, 0x0, 0x0b608038), + ZEN_MODEL_STEP_UCODE(0x1a, 0x70, 0x0, 0x0b700037), {}, }; -- cgit v1.2.3 From dd14022a7ce96963aa923e35cf4bcc8c32f95840 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 14 Nov 2025 14:01:14 +0100 Subject: x86/microcode/AMD: Add Zen5 model 0x44, stepping 0x1 minrev Add the minimum Entrysign revision for that model+stepping to the list of minimum revisions. Fixes: 50cef76d5cb0 ("x86/microcode/AMD: Load only SHA256-checksummed patches") Reported-by: Andrew Cooper Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/e94dd76b-4911-482f-8500-5c848a3df026@citrix.com --- arch/x86/kernel/cpu/microcode/amd.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index dc82153009da..a881bf4c2011 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -224,6 +224,7 @@ static bool need_sha_check(u32 cur_rev) case 0xb1010: return cur_rev <= 0xb101046; break; case 0xb2040: return cur_rev <= 0xb204031; break; case 0xb4040: return cur_rev <= 0xb404031; break; + case 0xb4041: return cur_rev <= 0xb404101; break; case 0xb6000: return cur_rev <= 0xb600031; break; case 0xb6080: return cur_rev <= 0xb608031; break; case 0xb7000: return cur_rev <= 0xb700031; break; -- cgit v1.2.3