diff options
author | Ingo Molnar <mingo@kernel.org> | 2025-05-13 10:37:01 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2025-05-13 10:37:01 +0200 |
commit | 2fb8414e644b133a9c3250f408232af99da256a4 (patch) | |
tree | 62212fe1b6dec3949a47ac6d92f3070842ab9ae2 /arch/x86 | |
parent | 821f82125c47217390099532b09bd09cee9adaac (diff) | |
parent | c1ab4ce3cb759f69fb9085a060e568b73e8f5cd8 (diff) |
Merge branch 'x86/cpu' into x86/core, to resolve conflicts
Conflicts:
arch/x86/kernel/cpu/bugs.c
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
32 files changed, 1026 insertions, 893 deletions
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 0252b7ea8bca..172619932fe3 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -26,7 +26,7 @@ static u32 ibs_caps; #include <linux/hardirq.h> #include <asm/nmi.h> -#include <asm/amd-ibs.h> +#include <asm/amd/ibs.h> /* attr.config2 */ #define IBS_SW_FILTER_MASK 1 diff --git a/arch/x86/include/asm/amd_hsmp.h b/arch/x86/include/asm/amd/hsmp.h index 03c2ce3edaf5..2137f62853ed 100644 --- a/arch/x86/include/asm/amd_hsmp.h +++ b/arch/x86/include/asm/amd/hsmp.h @@ -1,5 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ - #ifndef _ASM_X86_AMD_HSMP_H_ #define _ASM_X86_AMD_HSMP_H_ @@ -13,4 +12,5 @@ static inline int hsmp_send_message(struct hsmp_message *msg) return -ENODEV; } #endif + #endif /*_ASM_X86_AMD_HSMP_H_*/ diff --git a/arch/x86/include/asm/amd-ibs.h b/arch/x86/include/asm/amd/ibs.h index 77f3a589a99a..3ee5903982c2 100644 --- a/arch/x86/include/asm/amd-ibs.h +++ b/arch/x86/include/asm/amd/ibs.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AMD_IBS_H +#define _ASM_X86_AMD_IBS_H + /* * From PPR Vol 1 for AMD Family 19h Model 01h B1 * 55898 Rev 0.35 - Feb 5, 2021 @@ -151,3 +154,5 @@ struct perf_ibs_data { }; u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; }; + +#endif /* _ASM_X86_AMD_IBS_H */ diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd/nb.h index adfa0854cf2d..ddb5108cf46c 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd/nb.h @@ -4,7 +4,7 @@ #include <linux/ioport.h> #include <linux/pci.h> -#include <asm/amd_node.h> +#include <asm/amd/node.h> struct amd_nb_bus_dev_range { u8 bus; diff --git a/arch/x86/include/asm/amd_node.h b/arch/x86/include/asm/amd/node.h index 23fe617898a8..23fe617898a8 100644 --- a/arch/x86/include/asm/amd_node.h +++ b/arch/x86/include/asm/amd/node.h diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 6c2c152d8a67..bc81b9d1aeca 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -476,11 +476,11 @@ #define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ -#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ -#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ -#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ -#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ -#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */ +#define X86_FEATURE_CLEAR_BHB_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_AMD_FAST_CPPC (21*32+ 5) /* Fast CPPC */ +#define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */ +#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */ +#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ /* * BUG word(s) @@ -519,7 +519,7 @@ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */ -#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ +/* unused, was #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ #define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */ #define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */ @@ -527,10 +527,10 @@ #define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */ /* BUG word 2 */ -#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */ -#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */ -#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ -#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ -#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ -#define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ +#define X86_BUG_SRSO X86_BUG( 1*32+ 0) /* "srso" AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG( 1*32+ 1) /* "div0" AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG( 1*32+ 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */ +#define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */ +#define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h index d5749b25fa10..585819331dc6 100644 --- a/arch/x86/include/asm/cpuid.h +++ b/arch/x86/include/asm/cpuid.h @@ -4,5 +4,6 @@ #define _ASM_X86_CPUID_H #include <asm/cpuid/api.h> +#include <asm/cpuid/leaf_0x2_api.h> #endif /* _ASM_X86_CPUID_H */ diff --git a/arch/x86/include/asm/cpuid/api.h b/arch/x86/include/asm/cpuid/api.h index 9c180c9cc58e..bf76a1706d02 100644 --- a/arch/x86/include/asm/cpuid/api.h +++ b/arch/x86/include/asm/cpuid/api.h @@ -36,9 +36,9 @@ static inline void native_cpuid(u32 *eax, u32 *ebx, } #define NATIVE_CPUID_REG(reg) \ -static inline u32 native_cpuid_##reg(u32 op) \ +static inline u32 native_cpuid_##reg(u32 op) \ { \ - u32 eax = op, ebx, ecx = 0, edx; \ + u32 eax = op, ebx, ecx = 0, edx; \ \ native_cpuid(&eax, &ebx, &ecx, &edx); \ \ @@ -207,4 +207,13 @@ static inline u32 hypervisor_cpuid_base(const char *sig, u32 leaves) return 0; } +/* + * CPUID(0x80000006) parsing helpers + */ + +static inline bool cpuid_amd_hygon_has_l3_cache(void) +{ + return cpuid_edx(0x80000006); +} + #endif /* _ASM_X86_CPUID_API_H */ diff --git a/arch/x86/include/asm/cpuid/leaf_0x2_api.h b/arch/x86/include/asm/cpuid/leaf_0x2_api.h new file mode 100644 index 000000000000..09fa3070b271 --- /dev/null +++ b/arch/x86/include/asm/cpuid/leaf_0x2_api.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CPUID_LEAF_0x2_API_H +#define _ASM_X86_CPUID_LEAF_0x2_API_H + +#include <asm/cpuid/api.h> +#include <asm/cpuid/types.h> + +/** + * cpuid_get_leaf_0x2_regs() - Return sanitized leaf 0x2 register output + * @regs: Output parameter + * + * Query CPUID leaf 0x2 and store its output in @regs. Force set any + * invalid 1-byte descriptor returned by the hardware to zero (the NULL + * cache/TLB descriptor) before returning it to the caller. + * + * Use for_each_leaf_0x2_entry() to iterate over the register output in + * parsed form. + */ +static inline void cpuid_get_leaf_0x2_regs(union leaf_0x2_regs *regs) +{ + cpuid_leaf(0x2, regs); + + /* + * All Intel CPUs must report an iteration count of 1. In case + * of bogus hardware, treat all returned descriptors as NULL. + */ + if (regs->desc[0] != 0x01) { + for (int i = 0; i < 4; i++) + regs->regv[i] = 0; + return; + } + + /* + * The most significant bit (MSB) of each register must be clear. + * If a register is invalid, replace its descriptors with NULL. + */ + for (int i = 0; i < 4; i++) { + if (regs->reg[i].invalid) + regs->regv[i] = 0; + } +} + +/** + * for_each_leaf_0x2_entry() - Iterator for parsed leaf 0x2 descriptors + * @regs: Leaf 0x2 register output, returned by cpuid_get_leaf_0x2_regs() + * @__ptr: u8 pointer, for macro internal use only + * @entry: Pointer to parsed descriptor information at each iteration + * + * Loop over the 1-byte descriptors in the passed leaf 0x2 output registers + * @regs. Provide the parsed information for each descriptor through @entry. + * + * To handle cache-specific descriptors, switch on @entry->c_type. For TLB + * descriptors, switch on @entry->t_type. + * + * Example usage for cache descriptors:: + * + * const struct leaf_0x2_table *entry; + * union leaf_0x2_regs regs; + * u8 *ptr; + * + * cpuid_get_leaf_0x2_regs(®s); + * for_each_leaf_0x2_entry(regs, ptr, entry) { + * switch (entry->c_type) { + * ... + * } + * } + */ +#define for_each_leaf_0x2_entry(regs, __ptr, entry) \ + for (__ptr = &(regs).desc[1]; \ + __ptr < &(regs).desc[16] && (entry = &cpuid_0x2_table[*__ptr]); \ + __ptr++) + +#endif /* _ASM_X86_CPUID_LEAF_0x2_API_H */ diff --git a/arch/x86/include/asm/cpuid/types.h b/arch/x86/include/asm/cpuid/types.h index 8582e27e836d..c95fee66e148 100644 --- a/arch/x86/include/asm/cpuid/types.h +++ b/arch/x86/include/asm/cpuid/types.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_CPUID_TYPES_H #define _ASM_X86_CPUID_TYPES_H +#include <linux/build_bug.h> #include <linux/types.h> /* @@ -29,4 +30,99 @@ enum cpuid_regs_idx { #define CPUID_LEAF_FREQ 0x16 #define CPUID_LEAF_TILE 0x1d +/* + * Types for CPUID(0x2) parsing + * Check <asm/cpuid/leaf_0x2_api.h> + */ + +struct leaf_0x2_reg { + u32 : 31, + invalid : 1; +}; + +union leaf_0x2_regs { + struct leaf_0x2_reg reg[4]; + u32 regv[4]; + u8 desc[16]; +}; + +/* + * Leaf 0x2 1-byte descriptors' cache types + * To be used for their mappings at cpuid_0x2_table[] + * + * Start at 1 since type 0 is reserved for HW byte descriptors which are + * not recognized by the kernel; i.e., those without an explicit mapping. + */ +enum _cache_table_type { + CACHE_L1_INST = 1, + CACHE_L1_DATA, + CACHE_L2, + CACHE_L3 + /* Adjust __TLB_TABLE_TYPE_BEGIN before adding more types */ +} __packed; +#ifndef __CHECKER__ +static_assert(sizeof(enum _cache_table_type) == 1); +#endif + +/* + * Ensure that leaf 0x2 cache and TLB type values do not intersect, + * since they share the same type field at struct cpuid_0x2_table. + */ +#define __TLB_TABLE_TYPE_BEGIN (CACHE_L3 + 1) + +/* + * Leaf 0x2 1-byte descriptors' TLB types + * To be used for their mappings at cpuid_0x2_table[] + */ +enum _tlb_table_type { + TLB_INST_4K = __TLB_TABLE_TYPE_BEGIN, + TLB_INST_4M, + TLB_INST_2M_4M, + TLB_INST_ALL, + + TLB_DATA_4K, + TLB_DATA_4M, + TLB_DATA_2M_4M, + TLB_DATA_4K_4M, + TLB_DATA_1G, + TLB_DATA_1G_2M_4M, + + TLB_DATA0_4K, + TLB_DATA0_4M, + TLB_DATA0_2M_4M, + + STLB_4K, + STLB_4K_2M, +} __packed; +#ifndef __CHECKER__ +static_assert(sizeof(enum _tlb_table_type) == 1); +#endif + +/* + * Combined parsing table for leaf 0x2 cache and TLB descriptors. + */ + +struct leaf_0x2_table { + union { + enum _cache_table_type c_type; + enum _tlb_table_type t_type; + }; + union { + short c_size; + short entries; + }; +}; + +extern const struct leaf_0x2_table cpuid_0x2_table[256]; + +/* + * All of leaf 0x2's one-byte TLB descriptors implies the same number of entries + * for their respective TLB types. TLB descriptor 0x63 is an exception: it + * implies 4 dTLB entries for 1GB pages and 32 dTLB entries for 2MB or 4MB pages. + * + * Encode that descriptor's dTLB entry count for 2MB/4MB pages here, as the entry + * count for dTLB 1GB pages is already encoded at the cpuid_0x2_table[]'s mapping. + */ +#define TLB_0x63_2M_4M_ENTRIES 32 + #endif /* _ASM_X86_CPUID_TYPES_H */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 81c4a13e4c0d..51a677fe9a8d 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -327,7 +327,7 @@ .endm .macro CLEAR_BRANCH_HISTORY_VMEXIT - ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_VMEXIT .endm #else #define CLEAR_BRANCH_HISTORY diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 77bfb846490c..62ca714aae77 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -272,7 +272,7 @@ int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator) } /* detect if running on heterogeneous design */ - if (cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) { + if (cpu_feature_enabled(X86_FEATURE_AMD_HTR_CORES)) { switch (core_type) { case TOPO_CPU_TYPE_UNKNOWN: pr_warn("Undefined core type found for cpu %d\n", cpu); diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index c884deca839b..3485d419c2f5 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -39,7 +39,7 @@ #include <asm/gart.h> #include <asm/set_memory.h> #include <asm/dma.h> -#include <asm/amd_nb.h> +#include <asm/amd/nb.h> #include <asm/x86_init.h> static unsigned long iommu_bus_base; /* GART remapping area (physical) */ diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 6d12a9b69432..ffaad175cee2 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -13,7 +13,9 @@ #include <linux/export.h> #include <linux/spinlock.h> #include <linux/pci_ids.h> -#include <asm/amd_nb.h> + +#include <asm/amd/nb.h> +#include <asm/cpuid.h> static u32 *flush_words; @@ -91,10 +93,7 @@ static int amd_cache_northbridges(void) if (amd_gart_present()) amd_northbridges.flags |= AMD_NB_GART; - /* - * Check for L3 cache presence. - */ - if (!cpuid_edx(0x80000006)) + if (!cpuid_amd_hygon_has_l3_cache()) return 0; /* diff --git a/arch/x86/kernel/amd_node.c b/arch/x86/kernel/amd_node.c index b670fa85c61b..a40176b62eb5 100644 --- a/arch/x86/kernel/amd_node.c +++ b/arch/x86/kernel/amd_node.c @@ -9,7 +9,7 @@ */ #include <linux/debugfs.h> -#include <asm/amd_node.h> +#include <asm/amd/node.h> /* * AMD Nodes are a physical collection of I/O devices within an SoC. There can be one diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 89c0c8a3fc7e..769321185a08 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -29,7 +29,7 @@ #include <asm/gart.h> #include <asm/pci-direct.h> #include <asm/dma.h> -#include <asm/amd_nb.h> +#include <asm/amd/nb.h> #include <asm/x86_init.h> #include <linux/crash_dump.h> diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 4efdf5c2efc8..1e26179ff18c 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -24,7 +24,7 @@ obj-y += rdrand.o obj-y += match.o obj-y += bugs.o obj-y += aperfmperf.o -obj-y += cpuid-deps.o +obj-y += cpuid-deps.o cpuid_0x2_table.o obj-y += umwait.o obj-y += capflags.o powerflags.o @@ -38,6 +38,9 @@ obj-y += intel.o tsx.o obj-$(CONFIG_PM) += intel_epb.o endif obj-$(CONFIG_CPU_SUP_AMD) += amd.o +ifeq ($(CONFIG_AMD_NB)$(CONFIG_SYSFS),yy) +obj-y += amd_cache_disable.o +endif obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o diff --git a/arch/x86/kernel/cpu/amd_cache_disable.c b/arch/x86/kernel/cpu/amd_cache_disable.c new file mode 100644 index 000000000000..8843b9557aea --- /dev/null +++ b/arch/x86/kernel/cpu/amd_cache_disable.c @@ -0,0 +1,301 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AMD L3 cache_disable_{0,1} sysfs handling + * Documentation/ABI/testing/sysfs-devices-system-cpu + */ + +#include <linux/cacheinfo.h> +#include <linux/capability.h> +#include <linux/pci.h> +#include <linux/sysfs.h> + +#include <asm/amd/nb.h> + +#include "cpu.h" + +/* + * L3 cache descriptors + */ +static void amd_calc_l3_indices(struct amd_northbridge *nb) +{ + struct amd_l3_cache *l3 = &nb->l3_cache; + unsigned int sc0, sc1, sc2, sc3; + u32 val = 0; + + pci_read_config_dword(nb->misc, 0x1C4, &val); + + /* calculate subcache sizes */ + l3->subcaches[0] = sc0 = !(val & BIT(0)); + l3->subcaches[1] = sc1 = !(val & BIT(4)); + + if (boot_cpu_data.x86 == 0x15) { + l3->subcaches[0] = sc0 += !(val & BIT(1)); + l3->subcaches[1] = sc1 += !(val & BIT(5)); + } + + l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); + l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); + + l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; +} + +/* + * check whether a slot used for disabling an L3 index is occupied. + * @l3: L3 cache descriptor + * @slot: slot number (0..1) + * + * @returns: the disabled index if used or negative value if slot free. + */ +static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned int slot) +{ + unsigned int reg = 0; + + pci_read_config_dword(nb->misc, 0x1BC + slot * 4, ®); + + /* check whether this slot is activated already */ + if (reg & (3UL << 30)) + return reg & 0xfff; + + return -1; +} + +static ssize_t show_cache_disable(struct cacheinfo *ci, char *buf, unsigned int slot) +{ + int index; + struct amd_northbridge *nb = ci->priv; + + index = amd_get_l3_disable_slot(nb, slot); + if (index >= 0) + return sysfs_emit(buf, "%d\n", index); + + return sysfs_emit(buf, "FREE\n"); +} + +#define SHOW_CACHE_DISABLE(slot) \ +static ssize_t \ +cache_disable_##slot##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct cacheinfo *ci = dev_get_drvdata(dev); \ + return show_cache_disable(ci, buf, slot); \ +} + +SHOW_CACHE_DISABLE(0) +SHOW_CACHE_DISABLE(1) + +static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu, + unsigned int slot, unsigned long idx) +{ + int i; + + idx |= BIT(30); + + /* + * disable index in all 4 subcaches + */ + for (i = 0; i < 4; i++) { + u32 reg = idx | (i << 20); + + if (!nb->l3_cache.subcaches[i]) + continue; + + pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg); + + /* + * We need to WBINVD on a core on the node containing the L3 + * cache which indices we disable therefore a simple wbinvd() + * is not sufficient. + */ + wbinvd_on_cpu(cpu); + + reg |= BIT(31); + pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg); + } +} + +/* + * disable a L3 cache index by using a disable-slot + * + * @l3: L3 cache descriptor + * @cpu: A CPU on the node containing the L3 cache + * @slot: slot number (0..1) + * @index: index to disable + * + * @return: 0 on success, error status on failure + */ +static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, + unsigned int slot, unsigned long index) +{ + int ret = 0; + + /* check if @slot is already used or the index is already disabled */ + ret = amd_get_l3_disable_slot(nb, slot); + if (ret >= 0) + return -EEXIST; + + if (index > nb->l3_cache.indices) + return -EINVAL; + + /* check whether the other slot has disabled the same index already */ + if (index == amd_get_l3_disable_slot(nb, !slot)) + return -EEXIST; + + amd_l3_disable_index(nb, cpu, slot, index); + + return 0; +} + +static ssize_t store_cache_disable(struct cacheinfo *ci, const char *buf, + size_t count, unsigned int slot) +{ + struct amd_northbridge *nb = ci->priv; + unsigned long val = 0; + int cpu, err = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + cpu = cpumask_first(&ci->shared_cpu_map); + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + err = amd_set_l3_disable_slot(nb, cpu, slot, val); + if (err) { + if (err == -EEXIST) + pr_warn("L3 slot %d in use/index already disabled!\n", + slot); + return err; + } + return count; +} + +#define STORE_CACHE_DISABLE(slot) \ +static ssize_t \ +cache_disable_##slot##_store(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t count) \ +{ \ + struct cacheinfo *ci = dev_get_drvdata(dev); \ + return store_cache_disable(ci, buf, count, slot); \ +} + +STORE_CACHE_DISABLE(0) +STORE_CACHE_DISABLE(1) + +static ssize_t subcaches_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cacheinfo *ci = dev_get_drvdata(dev); + int cpu = cpumask_first(&ci->shared_cpu_map); + + return sysfs_emit(buf, "%x\n", amd_get_subcaches(cpu)); +} + +static ssize_t subcaches_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cacheinfo *ci = dev_get_drvdata(dev); + int cpu = cpumask_first(&ci->shared_cpu_map); + unsigned long val; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (kstrtoul(buf, 16, &val) < 0) + return -EINVAL; + + if (amd_set_subcaches(cpu, val)) + return -EINVAL; + + return count; +} + +static DEVICE_ATTR_RW(cache_disable_0); +static DEVICE_ATTR_RW(cache_disable_1); +static DEVICE_ATTR_RW(subcaches); + +static umode_t cache_private_attrs_is_visible(struct kobject *kobj, + struct attribute *attr, int unused) +{ + struct device *dev = kobj_to_dev(kobj); + struct cacheinfo *ci = dev_get_drvdata(dev); + umode_t mode = attr->mode; + + if (!ci->priv) + return 0; + + if ((attr == &dev_attr_subcaches.attr) && + amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) + return mode; + + if ((attr == &dev_attr_cache_disable_0.attr || + attr == &dev_attr_cache_disable_1.attr) && + amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) + return mode; + + return 0; +} + +static struct attribute_group cache_private_group = { + .is_visible = cache_private_attrs_is_visible, +}; + +static void init_amd_l3_attrs(void) +{ + static struct attribute **amd_l3_attrs; + int n = 1; + + if (amd_l3_attrs) /* already initialized */ + return; + + if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) + n += 2; + if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) + n += 1; + + amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL); + if (!amd_l3_attrs) + return; + + n = 0; + if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) { + amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr; + amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr; + } + if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) + amd_l3_attrs[n++] = &dev_attr_subcaches.attr; + + cache_private_group.attrs = amd_l3_attrs; +} + +const struct attribute_group *cache_get_priv_group(struct cacheinfo *ci) +{ + struct amd_northbridge *nb = ci->priv; + + if (ci->level < 3 || !nb) + return NULL; + + if (nb && nb->l3_cache.indices) + init_amd_l3_attrs(); + + return &cache_private_group; +} + +struct amd_northbridge *amd_init_l3_cache(int index) +{ + struct amd_northbridge *nb; + int node; + + /* only for L3, and not in virtualized environments */ + if (index < 3) + return NULL; + + node = topology_amd_node_id(smp_processor_id()); + nb = node_to_amd_nb(node); + if (nb && !nb->l3_cache.indices) + amd_calc_l3_indices(nb); + + return nb; +} diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a4f3b1d03406..246a76ddbf46 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -551,7 +551,6 @@ static const char * const mmio_strings[] = { static void __init mmio_select_mitigation(void) { if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || - boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || cpu_mitigations_off()) { mmio_mitigation = MMIO_MITIGATION_OFF; return; @@ -1880,13 +1879,13 @@ static void __init bhi_apply_mitigation(void) if (bhi_mitigation == BHI_MITIGATION_VMEXIT_ONLY) { pr_info("Spectre BHI mitigation: SW BHB clearing on VM exit only\n"); - setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_VMEXIT); return; } pr_info("Spectre BHI mitigation: SW BHB clearing on syscall and VM exit\n"); setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); - setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_VMEXIT); } static void __init spectre_v2_select_mitigation(void) @@ -2947,9 +2946,6 @@ static ssize_t tsx_async_abort_show_state(char *buf) static ssize_t mmio_stale_data_show_state(char *buf) { - if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) - return sysfs_emit(buf, "Unknown: No mitigations\n"); - if (mmio_mitigation == MMIO_MITIGATION_OFF) return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]); @@ -3025,7 +3021,7 @@ static const char *spectre_bhi_state(void) !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE) && rrsba_disabled) return "; BHI: Retpoline"; - else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_VMEXIT)) return "; BHI: Vulnerable, KVM: SW loop"; return "; BHI: Vulnerable"; @@ -3134,7 +3130,6 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return srbds_show_state(buf); case X86_BUG_MMIO_STALE_DATA: - case X86_BUG_MMIO_UNKNOWN: return mmio_stale_data_show_state(buf); case X86_BUG_RETBLEED: @@ -3203,10 +3198,7 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char * ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf) { - if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) - return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN); - else - return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); } ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index b3a520959b51..f866d94352fb 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -1,35 +1,28 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Routines to identify caches on Intel CPU. + * x86 CPU caches detection and configuration * - * Changes: - * Venkatesh Pallipadi : Adding cache identification through cpuid(4) - * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. - * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. + * Previous changes + * - Venkatesh Pallipadi: Cache identification through CPUID(0x4) + * - Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure + * - Andi Kleen / Andreas Herrmann: CPUID(0x4) emulation on AMD */ #include <linux/cacheinfo.h> -#include <linux/capability.h> #include <linux/cpu.h> #include <linux/cpuhotplug.h> -#include <linux/pci.h> #include <linux/stop_machine.h> -#include <linux/sysfs.h> -#include <asm/amd_nb.h> +#include <asm/amd/nb.h> #include <asm/cacheinfo.h> #include <asm/cpufeature.h> +#include <asm/cpuid.h> #include <asm/mtrr.h> #include <asm/smp.h> #include <asm/tlbflush.h> #include "cpu.h" -#define LVL_1_INST 1 -#define LVL_1_DATA 2 -#define LVL_2 3 -#define LVL_3 4 - /* Shared last level cache maps */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); @@ -41,208 +34,127 @@ static cpumask_var_t cpu_cacheinfo_mask; /* Kernel controls MTRR and/or PAT MSRs. */ unsigned int memory_caching_control __ro_after_init; -struct _cache_table { - unsigned char descriptor; - char cache_type; - short size; -}; - -#define MB(x) ((x) * 1024) - -/* All the cache descriptor types we care about (no TLB or - trace cache entries) */ - -static const struct _cache_table cache_table[] = -{ - { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ - { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ - { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */ - { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ - { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ - { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ - { 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */ - { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ - { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ - { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ - { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ - { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ - { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ - { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */ - { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ - { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ - { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ - { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */ - { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */ - { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */ - { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */ - { 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */ - { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ - { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */ - { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ - { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */ - { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */ - { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */ - { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */ - { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */ - { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ - { 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */ - { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ - { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ - { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */ - { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */ - { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ - { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */ - { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ - { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */ - { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */ - { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */ - { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */ - { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */ - { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */ - { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */ - { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */ - { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */ - { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ - { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ - { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */ - { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */ - { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */ - { 0x00, 0, 0} -}; - - enum _cache_type { - CTYPE_NULL = 0, - CTYPE_DATA = 1, - CTYPE_INST = 2, - CTYPE_UNIFIED = 3 + CTYPE_NULL = 0, + CTYPE_DATA = 1, + CTYPE_INST = 2, + CTYPE_UNIFIED = 3 }; union _cpuid4_leaf_eax { struct { - enum _cache_type type:5; - unsigned int level:3; - unsigned int is_self_initializing:1; - unsigned int is_fully_associative:1; - unsigned int reserved:4; - unsigned int num_threads_sharing:12; - unsigned int num_cores_on_die:6; + enum _cache_type type :5; + unsigned int level :3; + unsigned int is_self_initializing :1; + unsigned int is_fully_associative :1; + unsigned int reserved :4; + unsigned int num_threads_sharing :12; + unsigned int num_cores_on_die :6; } split; u32 full; }; union _cpuid4_leaf_ebx { struct { - unsigned int coherency_line_size:12; - unsigned int physical_line_partition:10; - unsigned int ways_of_associativity:10; + unsigned int coherency_line_size :12; + unsigned int physical_line_partition :10; + unsigned int ways_of_associativity :10; } split; u32 full; }; union _cpuid4_leaf_ecx { struct { - unsigned int number_of_sets:32; + unsigned int number_of_sets :32; } split; u32 full; }; -struct _cpuid4_info_regs { +struct _cpuid4_info { union _cpuid4_leaf_eax eax; union _cpuid4_leaf_ebx ebx; union _cpuid4_leaf_ecx ecx; unsigned int id; unsigned long size; - struct amd_northbridge *nb; }; -/* AMD doesn't have CPUID4. Emulate it here to report the same - information to the user. This makes some assumptions about the machine: - L2 not shared, no SMT etc. that is currently true on AMD CPUs. +/* Map CPUID(0x4) EAX.cache_type to <linux/cacheinfo.h> types */ +static const enum cache_type cache_type_map[] = { + [CTYPE_NULL] = CACHE_TYPE_NOCACHE, + [CTYPE_DATA] = CACHE_TYPE_DATA, + [CTYPE_INST] = CACHE_TYPE_INST, + [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED, +}; + +/* + * Fallback AMD CPUID(0x4) emulation + * AMD CPUs with TOPOEXT can just use CPUID(0x8000001d) + * + * @AMD_L2_L3_INVALID_ASSOC: cache info for the respective L2/L3 cache should + * be determined from CPUID(0x8000001d) instead of CPUID(0x80000006). + */ + +#define AMD_CPUID4_FULLY_ASSOCIATIVE 0xffff +#define AMD_L2_L3_INVALID_ASSOC 0x9 - In theory the TLBs could be reported as fake type (they are in "dummy"). - Maybe later */ union l1_cache { struct { - unsigned line_size:8; - unsigned lines_per_tag:8; - unsigned assoc:8; - unsigned size_in_kb:8; + unsigned line_size :8; + unsigned lines_per_tag :8; + unsigned assoc :8; + unsigned size_in_kb :8; }; - unsigned val; + unsigned int val; }; union l2_cache { struct { - unsigned line_size:8; - unsigned lines_per_tag:4; - unsigned assoc:4; - unsigned size_in_kb:16; + unsigned line_size :8; + unsigned lines_per_tag :4; + unsigned assoc :4; + unsigned size_in_kb :16; }; - unsigned val; + unsigned int val; }; union l3_cache { struct { - unsigned line_size:8; - unsigned lines_per_tag:4; - unsigned assoc:4; - unsigned res:2; - unsigned size_encoded:14; + unsigned line_size :8; + unsigned lines_per_tag :4; + unsigned assoc :4; + unsigned res :2; + unsigned size_encoded :14; }; - unsigned val; + unsigned int val; }; +/* L2/L3 associativity mapping */ static const unsigned short assocs[] = { - [1] = 1, - [2] = 2, - [4] = 4, - [6] = 8, - [8] = 16, - [0xa] = 32, - [0xb] = 48, - [0xc] = 64, - [0xd] = 96, - [0xe] = 128, - [0xf] = 0xffff /* fully associative - no way to show this currently */ + [1] = 1, + [2] = 2, + [3] = 3, + [4] = 4, + [5] = 6, + [6] = 8, + [8] = 16, + [0xa] = 32, + [0xb] = 48, + [0xc] = 64, + [0xd] = 96, + [0xe] = 128, + [0xf] = AMD_CPUID4_FULLY_ASSOCIATIVE }; static const unsigned char levels[] = { 1, 1, 2, 3 }; -static const unsigned char types[] = { 1, 2, 3, 3 }; +static const unsigned char types[] = { 1, 2, 3, 3 }; -static const enum cache_type cache_type_map[] = { - [CTYPE_NULL] = CACHE_TYPE_NOCACHE, - [CTYPE_DATA] = CACHE_TYPE_DATA, - [CTYPE_INST] = CACHE_TYPE_INST, - [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED, -}; - -static void -amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, - union _cpuid4_leaf_ebx *ebx, - union _cpuid4_leaf_ecx *ecx) +static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, + union _cpuid4_leaf_ebx *ebx, union _cpuid4_leaf_ecx *ecx) { - unsigned dummy; - unsigned line_size, lines_per_tag, assoc, size_in_kb; - union l1_cache l1i, l1d; + unsigned int dummy, line_size, lines_per_tag, assoc, size_in_kb; + union l1_cache l1i, l1d, *l1; union l2_cache l2; union l3_cache l3; - union l1_cache *l1 = &l1d; eax->full = 0; ebx->full = 0; @@ -251,430 +163,155 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val); - switch (leaf) { + l1 = &l1d; + switch (index) { case 1: l1 = &l1i; fallthrough; case 0: if (!l1->val) return; - assoc = assocs[l1->assoc]; - line_size = l1->line_size; - lines_per_tag = l1->lines_per_tag; - size_in_kb = l1->size_in_kb; + + assoc = (l1->assoc == 0xff) ? AMD_CPUID4_FULLY_ASSOCIATIVE : l1->assoc; + line_size = l1->line_size; + lines_per_tag = l1->lines_per_tag; + size_in_kb = l1->size_in_kb; break; case 2: - if (!l2.val) + if (!l2.assoc || l2.assoc == AMD_L2_L3_INVALID_ASSOC) return; - assoc = assocs[l2.assoc]; - line_size = l2.line_size; - lines_per_tag = l2.lines_per_tag; - /* cpu_data has errata corrections for K7 applied */ - size_in_kb = __this_cpu_read(cpu_info.x86_cache_size); + + /* Use x86_cache_size as it might have K7 errata fixes */ + assoc = assocs[l2.assoc]; + line_size = l2.line_size; + lines_per_tag = l2.lines_per_tag; + size_in_kb = __this_cpu_read(cpu_info.x86_cache_size); break; case 3: - if (!l3.val) + if (!l3.assoc || l3.assoc == AMD_L2_L3_INVALID_ASSOC) return; - assoc = assocs[l3.assoc]; - line_size = l3.line_size; - lines_per_tag = l3.lines_per_tag; - size_in_kb = l3.size_encoded * 512; + + assoc = assocs[l3.assoc]; + line_size = l3.line_size; + lines_per_tag = l3.lines_per_tag; + size_in_kb = l3.size_encoded * 512; if (boot_cpu_has(X86_FEATURE_AMD_DCM)) { - size_in_kb = size_in_kb >> 1; - assoc = assoc >> 1; + size_in_kb = size_in_kb >> 1; + assoc = assoc >> 1; } break; default: return; } - eax->split.is_self_initializing = 1; - eax->split.type = types[leaf]; - eax->split.level = levels[leaf]; - eax->split.num_threads_sharing = 0; - eax->split.num_cores_on_die = topology_num_cores_per_package(); + eax->split.is_self_initializing = 1; + eax->split.type = types[index]; + eax->split.level = levels[index]; + eax->split.num_threads_sharing = 0; + eax->split.num_cores_on_die = topology_num_cores_per_package(); - - if (assoc == 0xffff) + if (assoc == AMD_CPUID4_FULLY_ASSOCIATIVE) eax->split.is_fully_associative = 1; - ebx->split.coherency_line_size = line_size - 1; - ebx->split.ways_of_associativity = assoc - 1; - ebx->split.physical_line_partition = lines_per_tag - 1; - ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / - (ebx->split.ways_of_associativity + 1) - 1; -} - -#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS) - -/* - * L3 cache descriptors - */ -static void amd_calc_l3_indices(struct amd_northbridge *nb) -{ - struct amd_l3_cache *l3 = &nb->l3_cache; - unsigned int sc0, sc1, sc2, sc3; - u32 val = 0; - - pci_read_config_dword(nb->misc, 0x1C4, &val); - - /* calculate subcache sizes */ - l3->subcaches[0] = sc0 = !(val & BIT(0)); - l3->subcaches[1] = sc1 = !(val & BIT(4)); - - if (boot_cpu_data.x86 == 0x15) { - l3->subcaches[0] = sc0 += !(val & BIT(1)); - l3->subcaches[1] = sc1 += !(val & BIT(5)); - } - - l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); - l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); - - l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; -} - -/* - * check whether a slot used for disabling an L3 index is occupied. - * @l3: L3 cache descriptor - * @slot: slot number (0..1) - * - * @returns: the disabled index if used or negative value if slot free. - */ -static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot) -{ - unsigned int reg = 0; - - pci_read_config_dword(nb->misc, 0x1BC + slot * 4, ®); - - /* check whether this slot is activated already */ - if (reg & (3UL << 30)) - return reg & 0xfff; - - return -1; -} - -static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf, - unsigned int slot) -{ - int index; - struct amd_northbridge *nb = this_leaf->priv; - - index = amd_get_l3_disable_slot(nb, slot); - if (index >= 0) - return sprintf(buf, "%d\n", index); - - return sprintf(buf, "FREE\n"); -} - -#define SHOW_CACHE_DISABLE(slot) \ -static ssize_t \ -cache_disable_##slot##_show(struct device *dev, \ - struct device_attribute *attr, char *buf) \ -{ \ - struct cacheinfo *this_leaf = dev_get_drvdata(dev); \ - return show_cache_disable(this_leaf, buf, slot); \ -} -SHOW_CACHE_DISABLE(0) -SHOW_CACHE_DISABLE(1) - -static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu, - unsigned slot, unsigned long idx) -{ - int i; - idx |= BIT(30); - - /* - * disable index in all 4 subcaches - */ - for (i = 0; i < 4; i++) { - u32 reg = idx | (i << 20); - - if (!nb->l3_cache.subcaches[i]) - continue; - - pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg); - - /* - * We need to WBINVD on a core on the node containing the L3 - * cache which indices we disable therefore a simple wbinvd() - * is not sufficient. - */ - wbinvd_on_cpu(cpu); - - reg |= BIT(31); - pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg); - } -} - -/* - * disable a L3 cache index by using a disable-slot - * - * @l3: L3 cache descriptor - * @cpu: A CPU on the node containing the L3 cache - * @slot: slot number (0..1) - * @index: index to disable - * - * @return: 0 on success, error status on failure - */ -static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, - unsigned slot, unsigned long index) -{ - int ret = 0; - - /* check if @slot is already used or the index is already disabled */ - ret = amd_get_l3_disable_slot(nb, slot); - if (ret >= 0) - return -EEXIST; - - if (index > nb->l3_cache.indices) - return -EINVAL; - - /* check whether the other slot has disabled the same index already */ - if (index == amd_get_l3_disable_slot(nb, !slot)) - return -EEXIST; - - amd_l3_disable_index(nb, cpu, slot, index); - - return 0; -} - -static ssize_t store_cache_disable(struct cacheinfo *this_leaf, - const char *buf, size_t count, - unsigned int slot) -{ - unsigned long val = 0; - int cpu, err = 0; - struct amd_northbridge *nb = this_leaf->priv; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - cpu = cpumask_first(&this_leaf->shared_cpu_map); - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - err = amd_set_l3_disable_slot(nb, cpu, slot, val); - if (err) { - if (err == -EEXIST) - pr_warn("L3 slot %d in use/index already disabled!\n", - slot); - return err; - } - return count; -} - -#define STORE_CACHE_DISABLE(slot) \ -static ssize_t \ -cache_disable_##slot##_store(struct device *dev, \ - struct device_attribute *attr, \ - const char *buf, size_t count) \ -{ \ - struct cacheinfo *this_leaf = dev_get_drvdata(dev); \ - return store_cache_disable(this_leaf, buf, count, slot); \ -} -STORE_CACHE_DISABLE(0) -STORE_CACHE_DISABLE(1) - -static ssize_t subcaches_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct cacheinfo *this_leaf = dev_get_drvdata(dev); - int cpu = cpumask_first(&this_leaf->shared_cpu_map); - - return sprintf(buf, "%x\n", amd_get_subcaches(cpu)); -} - -static ssize_t subcaches_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct cacheinfo *this_leaf = dev_get_drvdata(dev); - int cpu = cpumask_first(&this_leaf->shared_cpu_map); - unsigned long val; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (kstrtoul(buf, 16, &val) < 0) - return -EINVAL; - - if (amd_set_subcaches(cpu, val)) - return -EINVAL; - - return count; + ebx->split.coherency_line_size = line_size - 1; + ebx->split.ways_of_associativity = assoc - 1; + ebx->split.physical_line_partition = lines_per_tag - 1; + ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / + (ebx->split.ways_of_associativity + 1) - 1; } -static DEVICE_ATTR_RW(cache_disable_0); -static DEVICE_ATTR_RW(cache_disable_1); -static DEVICE_ATTR_RW(subcaches); - -static umode_t -cache_private_attrs_is_visible(struct kobject *kobj, - struct attribute *attr, int unused) +static int cpuid4_info_fill_done(struct _cpuid4_info *id4, union _cpuid4_leaf_eax eax, + union _cpuid4_leaf_ebx ebx, union _cpuid4_leaf_ecx ecx) { - struct device *dev = kobj_to_dev(kobj); - struct cacheinfo *this_leaf = dev_get_drvdata(dev); - umode_t mode = attr->mode; - - if (!this_leaf->priv) - return 0; - - if ((attr == &dev_attr_subcaches.attr) && - amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) - return mode; + if (eax.split.type == CTYPE_NULL) + return -EIO; - if ((attr == &dev_attr_cache_disable_0.attr || - attr == &dev_attr_cache_disable_1.attr) && - amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) - return mode; + id4->eax = eax; + id4->ebx = ebx; + id4->ecx = ecx; + id4->size = (ecx.split.number_of_sets + 1) * + (ebx.split.coherency_line_size + 1) * + (ebx.split.physical_line_partition + 1) * + (ebx.split.ways_of_associativity + 1); return 0; } -static struct attribute_group cache_private_group = { - .is_visible = cache_private_attrs_is_visible, -}; - -static void init_amd_l3_attrs(void) -{ - int n = 1; - static struct attribute **amd_l3_attrs; - - if (amd_l3_attrs) /* already initialized */ - return; - - if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) - n += 2; - if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) - n += 1; - - amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL); - if (!amd_l3_attrs) - return; - - n = 0; - if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) { - amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr; - amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr; - } - if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) - amd_l3_attrs[n++] = &dev_attr_subcaches.attr; - - cache_private_group.attrs = amd_l3_attrs; -} - -const struct attribute_group * -cache_get_priv_group(struct cacheinfo *this_leaf) +static int amd_fill_cpuid4_info(int index, struct _cpuid4_info *id4) { - struct amd_northbridge *nb = this_leaf->priv; - - if (this_leaf->level < 3 || !nb) - return NULL; + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + u32 ignored; - if (nb && nb->l3_cache.indices) - init_amd_l3_attrs(); + if (boot_cpu_has(X86_FEATURE_TOPOEXT) || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + cpuid_count(0x8000001d, index, &eax.full, &ebx.full, &ecx.full, &ignored); + else + legacy_amd_cpuid4(index, &eax, &ebx, &ecx); - return &cache_private_group; + return cpuid4_info_fill_done(id4, eax, ebx, ecx); } -static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) +static int intel_fill_cpuid4_info(int index, struct _cpuid4_info *id4) { - int node; + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + u32 ignored; - /* only for L3, and not in virtualized environments */ - if (index < 3) - return; + cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &ignored); - node = topology_amd_node_id(smp_processor_id()); - this_leaf->nb = node_to_amd_nb(node); - if (this_leaf->nb && !this_leaf->nb->l3_cache.indices) - amd_calc_l3_indices(this_leaf->nb); + return cpuid4_info_fill_done(id4, eax, ebx, ecx); } -#else -#define amd_init_l3_cache(x, y) -#endif /* CONFIG_AMD_NB && CONFIG_SYSFS */ -static int -cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf) +static int fill_cpuid4_info(int index, struct _cpuid4_info *id4) { - union _cpuid4_leaf_eax eax; - union _cpuid4_leaf_ebx ebx; - union _cpuid4_leaf_ecx ecx; - unsigned edx; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { - if (boot_cpu_has(X86_FEATURE_TOPOEXT)) - cpuid_count(0x8000001d, index, &eax.full, - &ebx.full, &ecx.full, &edx); - else - amd_cpuid4(index, &eax, &ebx, &ecx); - amd_init_l3_cache(this_leaf, index); - } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { - cpuid_count(0x8000001d, index, &eax.full, - &ebx.full, &ecx.full, &edx); - amd_init_l3_cache(this_leaf, index); - } else { - cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); - } + u8 cpu_vendor = boot_cpu_data.x86_vendor; - if (eax.split.type == CTYPE_NULL) - return -EIO; /* better error ? */ - - this_leaf->eax = eax; - this_leaf->ebx = ebx; - this_leaf->ecx = ecx; - this_leaf->size = (ecx.split.number_of_sets + 1) * - (ebx.split.coherency_line_size + 1) * - (ebx.split.physical_line_partition + 1) * - (ebx.split.ways_of_associativity + 1); - return 0; + return (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON) ? + amd_fill_cpuid4_info(index, id4) : + intel_fill_cpuid4_info(index, id4); } static int find_num_cache_leaves(struct cpuinfo_x86 *c) { - unsigned int eax, ebx, ecx, edx, op; - union _cpuid4_leaf_eax cache_eax; - int i = -1; - - if (c->x86_vendor == X86_VENDOR_AMD || - c->x86_vendor == X86_VENDOR_HYGON) - op = 0x8000001d; - else - op = 4; + unsigned int eax, ebx, ecx, edx, op; + union _cpuid4_leaf_eax cache_eax; + int i = -1; + /* Do a CPUID(op) loop to calculate num_cache_leaves */ + op = (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) ? 0x8000001d : 4; do { ++i; - /* Do cpuid(op) loop to find out num_cache_leaves */ cpuid_count(op, i, &eax, &ebx, &ecx, &edx); cache_eax.full = eax; } while (cache_eax.split.type != CTYPE_NULL); return i; } +/* + * AMD/Hygon CPUs may have multiple LLCs if L3 caches exist. + */ + void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id) { - /* - * We may have multiple LLCs if L3 caches exist, so check if we - * have an L3 cache by looking at the L3 cache CPUID leaf. - */ - if (!cpuid_edx(0x80000006)) + if (!cpuid_amd_hygon_has_l3_cache()) return; if (c->x86 < 0x17) { - /* LLC is at the node level. */ + /* Pre-Zen: LLC is at the node level */ c->topo.llc_id = die_id; } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { /* - * LLC is at the core complex level. - * Core complex ID is ApicId[3] for these processors. + * Family 17h up to 1F models: LLC is at the core + * complex level. Core complex ID is ApicId[3]. */ c->topo.llc_id = c->topo.apicid >> 3; } else { /* - * LLC ID is calculated from the number of threads sharing the - * cache. - * */ + * Newer families: LLC ID is calculated from the number + * of threads sharing the L3 cache. + */ u32 eax, ebx, ecx, edx, num_sharing_cache = 0; u32 llc_index = find_num_cache_leaves(c) - 1; @@ -683,25 +320,21 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id) num_sharing_cache = ((eax >> 14) & 0xfff) + 1; if (num_sharing_cache) { - int bits = get_count_order(num_sharing_cache); + int index_msb = get_count_order(num_sharing_cache); - c->topo.llc_id = c->topo.apicid >> bits; + c->topo.llc_id = c->topo.apicid >> index_msb; } } } void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c) { - /* - * We may have multiple LLCs if L3 caches exist, so check if we - * have an L3 cache by looking at the L3 cache CPUID leaf. - */ - if (!cpuid_edx(0x80000006)) + if (!cpuid_amd_hygon_has_l3_cache()) return; /* - * LLC is at the core complex level. - * Core complex ID is ApicId[3] for these processors. + * Hygons are similar to AMD Family 17h up to 1F models: LLC is + * at the core complex level. Core complex ID is ApicId[3]. */ c->topo.llc_id = c->topo.apicid >> 3; } @@ -710,14 +343,10 @@ void init_amd_cacheinfo(struct cpuinfo_x86 *c) { struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); - if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { + if (boot_cpu_has(X86_FEATURE_TOPOEXT)) ci->num_leaves = find_num_cache_leaves(c); - } else if (c->extended_cpuid_level >= 0x80000006) { - if (cpuid_edx(0x80000006) & 0xf000) - ci->num_leaves = 4; - else - ci->num_leaves = 3; - } + else if (c->extended_cpuid_level >= 0x80000006) + ci->num_leaves = (cpuid_edx(0x80000006) & 0xf000) ? 4 : 3; } void init_hygon_cacheinfo(struct cpuinfo_x86 *c) @@ -727,148 +356,131 @@ void init_hygon_cacheinfo(struct cpuinfo_x86 *c) ci->num_leaves = find_num_cache_leaves(c); } -void init_intel_cacheinfo(struct cpuinfo_x86 *c) +static void intel_cacheinfo_done(struct cpuinfo_x86 *c, unsigned int l3, + unsigned int l2, unsigned int l1i, unsigned int l1d) { - /* Cache sizes */ - unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; - unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ - unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ - unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; - struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); + /* + * If llc_id is still unset, then cpuid_level < 4, which implies + * that the only possibility left is SMT. Since CPUID(0x2) doesn't + * specify any shared caches and SMT shares all caches, we can + * unconditionally set LLC ID to the package ID so that all + * threads share it. + */ + if (c->topo.llc_id == BAD_APICID) + c->topo.llc_id = c->topo.pkg_id; - if (c->cpuid_level > 3) { - /* - * There should be at least one leaf. A non-zero value means - * that the number of leaves has been initialized. - */ - if (!ci->num_leaves) - ci->num_leaves = find_num_cache_leaves(c); + c->x86_cache_size = l3 ? l3 : (l2 ? l2 : l1i + l1d); - /* - * Whenever possible use cpuid(4), deterministic cache - * parameters cpuid leaf to find the cache details - */ - for (i = 0; i < ci->num_leaves; i++) { - struct _cpuid4_info_regs this_leaf = {}; - int retval; + if (!l2) + cpu_detect_cache_sizes(c); +} - retval = cpuid4_cache_lookup_regs(i, &this_leaf); - if (retval < 0) - continue; +/* + * Legacy Intel CPUID(0x2) path if CPUID(0x4) is not available. + */ +static void intel_cacheinfo_0x2(struct cpuinfo_x86 *c) +{ + unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; + const struct leaf_0x2_table *entry; + union leaf_0x2_regs regs; + u8 *ptr; - switch (this_leaf.eax.split.level) { - case 1: - if (this_leaf.eax.split.type == CTYPE_DATA) - new_l1d = this_leaf.size/1024; - else if (this_leaf.eax.split.type == CTYPE_INST) - new_l1i = this_leaf.size/1024; - break; - case 2: - new_l2 = this_leaf.size/1024; - num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; - index_msb = get_count_order(num_threads_sharing); - l2_id = c->topo.apicid & ~((1 << index_msb) - 1); - break; - case 3: - new_l3 = this_leaf.size/1024; - num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; - index_msb = get_count_order(num_threads_sharing); - l3_id = c->topo.apicid & ~((1 << index_msb) - 1); - break; - default: - break; - } - } - } + if (c->cpuid_level < 2) + return; - /* Don't use CPUID(2) if CPUID(4) is supported. */ - if (!ci->num_leaves && c->cpuid_level > 1) { - /* supports eax=2 call */ - int j, n; - unsigned int regs[4]; - unsigned char *dp = (unsigned char *)regs; - - /* Number of times to iterate */ - n = cpuid_eax(2) & 0xFF; - - for (i = 0 ; i < n ; i++) { - cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); - - /* If bit 31 is set, this is an unknown format */ - for (j = 0 ; j < 4 ; j++) - if (regs[j] & (1 << 31)) - regs[j] = 0; - - /* Byte 0 is level count, not a descriptor */ - for (j = 1 ; j < 16 ; j++) { - unsigned char des = dp[j]; - unsigned char k = 0; - - /* look up this descriptor in the table */ - while (cache_table[k].descriptor != 0) { - if (cache_table[k].descriptor == des) { - switch (cache_table[k].cache_type) { - case LVL_1_INST: - l1i += cache_table[k].size; - break; - case LVL_1_DATA: - l1d += cache_table[k].size; - break; - case LVL_2: - l2 += cache_table[k].size; - break; - case LVL_3: - l3 += cache_table[k].size; - break; - } - - break; - } - - k++; - } - } + cpuid_get_leaf_0x2_regs(®s); + for_each_leaf_0x2_entry(regs, ptr, entry) { + switch (entry->c_type) { + case CACHE_L1_INST: l1i += entry->c_size; break; + case CACHE_L1_DATA: l1d += entry->c_size; break; + case CACHE_L2: l2 += entry->c_size; break; + case CACHE_L3: l3 += entry->c_size; break; } } - if (new_l1d) - l1d = new_l1d; + intel_cacheinfo_done(c, l3, l2, l1i, l1d); +} - if (new_l1i) - l1i = new_l1i; +static unsigned int calc_cache_topo_id(struct cpuinfo_x86 *c, const struct _cpuid4_info *id4) +{ + unsigned int num_threads_sharing; + int index_msb; - if (new_l2) { - l2 = new_l2; - c->topo.llc_id = l2_id; - c->topo.l2c_id = l2_id; - } + num_threads_sharing = 1 + id4->eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + return c->topo.apicid & ~((1 << index_msb) - 1); +} - if (new_l3) { - l3 = new_l3; - c->topo.llc_id = l3_id; - } +static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c) +{ + struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); + unsigned int l2_id = BAD_APICID, l3_id = BAD_APICID; + unsigned int l1d = 0, l1i = 0, l2 = 0, l3 = 0; + + if (c->cpuid_level < 4) + return false; /* - * If llc_id is not yet set, this means cpuid_level < 4 which in - * turns means that the only possibility is SMT (as indicated in - * cpuid1). Since cpuid2 doesn't specify shared caches, and we know - * that SMT shares all caches, we can unconditionally set cpu_llc_id to - * c->topo.pkg_id. + * There should be at least one leaf. A non-zero value means + * that the number of leaves has been previously initialized. */ - if (c->topo.llc_id == BAD_APICID) - c->topo.llc_id = c->topo.pkg_id; + if (!ci->num_leaves) + ci->num_leaves = find_num_cache_leaves(c); + + if (!ci->num_leaves) + return false; - c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); + for (int i = 0; i < ci->num_leaves; i++) { + struct _cpuid4_info id4 = {}; + int ret; - if (!l2) - cpu_detect_cache_sizes(c); + ret = intel_fill_cpuid4_info(i, &id4); + if (ret < 0) + continue; + + switch (id4.eax.split.level) { + case 1: + if (id4.eax.split.type == CTYPE_DATA) + l1d = id4.size / 1024; + else if (id4.eax.split.type == CTYPE_INST) + l1i = id4.size / 1024; + break; + case 2: + l2 = id4.size / 1024; + l2_id = calc_cache_topo_id(c, &id4); + break; + case 3: + l3 = id4.size / 1024; + l3_id = calc_cache_topo_id(c, &id4); + break; + default: + break; + } + } + + c->topo.l2c_id = l2_id; + c->topo.llc_id = (l3_id == BAD_APICID) ? l2_id : l3_id; + intel_cacheinfo_done(c, l3, l2, l1i, l1d); + return true; +} + +void init_intel_cacheinfo(struct cpuinfo_x86 *c) +{ + /* Don't use CPUID(0x2) if CPUID(0x4) is supported. */ + if (intel_cacheinfo_0x4(c)) + return; + + intel_cacheinfo_0x2(c); } +/* + * <linux/cacheinfo.h> shared_cpu_map setup, AMD/Hygon + */ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, - struct _cpuid4_info_regs *base) + const struct _cpuid4_info *id4) { struct cpu_cacheinfo *this_cpu_ci; - struct cacheinfo *this_leaf; + struct cacheinfo *ci; int i, sibling; /* @@ -880,18 +492,18 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, this_cpu_ci = get_cpu_cacheinfo(i); if (!this_cpu_ci->info_list) continue; - this_leaf = this_cpu_ci->info_list + index; + + ci = this_cpu_ci->info_list + index; for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { if (!cpu_online(sibling)) continue; - cpumask_set_cpu(sibling, - &this_leaf->shared_cpu_map); + cpumask_set_cpu(sibling, &ci->shared_cpu_map); } } } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { unsigned int apicid, nshared, first, last; - nshared = base->eax.split.num_threads_sharing + 1; + nshared = id4->eax.split.num_threads_sharing + 1; apicid = cpu_data(cpu).topo.apicid; first = apicid - (apicid % nshared); last = first + nshared - 1; @@ -905,14 +517,13 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, if ((apicid < first) || (apicid > last)) continue; - this_leaf = this_cpu_ci->info_list + index; + ci = this_cpu_ci->info_list + index; for_each_online_cpu(sibling) { apicid = cpu_data(sibling).topo.apicid; if ((apicid < first) || (apicid > last)) continue; - cpumask_set_cpu(sibling, - &this_leaf->shared_cpu_map); + cpumask_set_cpu(sibling, &ci->shared_cpu_map); } } } else @@ -921,25 +532,27 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, return 1; } +/* + * <linux/cacheinfo.h> shared_cpu_map setup, Intel + fallback AMD/Hygon + */ static void __cache_cpumap_setup(unsigned int cpu, int index, - struct _cpuid4_info_regs *base) + const struct _cpuid4_info *id4) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - struct cacheinfo *this_leaf, *sibling_leaf; + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct cacheinfo *ci, *sibling_ci; unsigned long num_threads_sharing; int index_msb, i; - struct cpuinfo_x86 *c = &cpu_data(cpu); - if (c->x86_vendor == X86_VENDOR_AMD || - c->x86_vendor == X86_VENDOR_HYGON) { - if (__cache_amd_cpumap_setup(cpu, index, base)) + if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) { + if (__cache_amd_cpumap_setup(cpu, index, id4)) return; } - this_leaf = this_cpu_ci->info_list + index; - num_threads_sharing = 1 + base->eax.split.num_threads_sharing; + ci = this_cpu_ci->info_list + index; + num_threads_sharing = 1 + id4->eax.split.num_threads_sharing; - cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); + cpumask_set_cpu(cpu, &ci->shared_cpu_map); if (num_threads_sharing == 1) return; @@ -949,30 +562,29 @@ static void __cache_cpumap_setup(unsigned int cpu, int index, if (cpu_data(i).topo.apicid >> index_msb == c->topo.apicid >> index_msb) { struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); + /* Skip if itself or no cacheinfo */ if (i == cpu || !sib_cpu_ci->info_list) - continue;/* skip if itself or no cacheinfo */ - sibling_leaf = sib_cpu_ci->info_list + index; - cpumask_set_cpu(i, &this_leaf->shared_cpu_map); - cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map); + continue; + + sibling_ci = sib_cpu_ci->info_list + index; + cpumask_set_cpu(i, &ci->shared_cpu_map); + cpumask_set_cpu(cpu, &sibling_ci->shared_cpu_map); } } -static void ci_leaf_init(struct cacheinfo *this_leaf, - struct _cpuid4_info_regs *base) +static void ci_info_init(struct cacheinfo *ci, const struct _cpuid4_info *id4, + struct amd_northbridge *nb) { - this_leaf->id = base->id; - this_leaf->attributes = CACHE_ID; - this_leaf->level = base->eax.split.level; - this_leaf->type = cache_type_map[base->eax.split.type]; - this_leaf->coherency_line_size = - base->ebx.split.coherency_line_size + 1; - this_leaf->ways_of_associativity = - base->ebx.split.ways_of_associativity + 1; - this_leaf->size = base->size; - this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1; - this_leaf->physical_line_partition = - base->ebx.split.physical_line_partition + 1; - this_leaf->priv = base->nb; + ci->id = id4->id; + ci->attributes = CACHE_ID; + ci->level = id4->eax.split.level; + ci->type = cache_type_map[id4->eax.split.type]; + ci->coherency_line_size = id4->ebx.split.coherency_line_size + 1; + ci->ways_of_associativity = id4->ebx.split.ways_of_associativity + 1; + ci->size = id4->size; + ci->number_of_sets = id4->ecx.split.number_of_sets + 1; + ci->physical_line_partition = id4->ebx.split.physical_line_partition + 1; + ci->priv = nb; } int init_cache_level(unsigned int cpu) @@ -987,38 +599,45 @@ int init_cache_level(unsigned int cpu) } /* - * The max shared threads number comes from CPUID.4:EAX[25-14] with input + * The max shared threads number comes from CPUID(0x4) EAX[25-14] with input * ECX as cache index. Then right shift apicid by the number's order to get * cache id for this cache node. */ -static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs) +static void get_cache_id(int cpu, struct _cpuid4_info *id4) { struct cpuinfo_x86 *c = &cpu_data(cpu); unsigned long num_threads_sharing; int index_msb; - num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing; + num_threads_sharing = 1 + id4->eax.split.num_threads_sharing; index_msb = get_count_order(num_threads_sharing); - id4_regs->id = c->topo.apicid >> index_msb; + id4->id = c->topo.apicid >> index_msb; } int populate_cache_leaves(unsigned int cpu) { - unsigned int idx, ret; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - struct cacheinfo *this_leaf = this_cpu_ci->info_list; - struct _cpuid4_info_regs id4_regs = {}; + struct cacheinfo *ci = this_cpu_ci->info_list; + u8 cpu_vendor = boot_cpu_data.x86_vendor; + struct amd_northbridge *nb = NULL; + struct _cpuid4_info id4 = {}; + int idx, ret; for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { - ret = cpuid4_cache_lookup_regs(idx, &id4_regs); + ret = fill_cpuid4_info(idx, &id4); if (ret) return ret; - get_cache_id(cpu, &id4_regs); - ci_leaf_init(this_leaf++, &id4_regs); - __cache_cpumap_setup(cpu, idx, &id4_regs); + + get_cache_id(cpu, &id4); + + if (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON) + nb = amd_init_l3_cache(idx); + + ci_info_init(ci++, &id4, nb); + __cache_cpumap_setup(cpu, idx, &id4); } - this_cpu_ci->cpu_map_populated = true; + this_cpu_ci->cpu_map_populated = true; return 0; } @@ -1034,31 +653,33 @@ int populate_cache_leaves(unsigned int cpu) static unsigned long saved_cr4; static DEFINE_RAW_SPINLOCK(cache_disable_lock); +/* + * Cache flushing is the most time-consuming step when programming the + * MTRRs. On many Intel CPUs without known erratas, it can be skipped + * if the CPU declares cache self-snooping support. + */ +static void maybe_flush_caches(void) +{ + if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) + wbinvd(); +} + void cache_disable(void) __acquires(cache_disable_lock) { unsigned long cr0; /* - * Note that this is not ideal - * since the cache is only flushed/disabled for this CPU while the - * MTRRs are changed, but changing this requires more invasive - * changes to the way the kernel boots + * This is not ideal since the cache is only flushed/disabled + * for this CPU while the MTRRs are changed, but changing this + * requires more invasive changes to the way the kernel boots. */ - raw_spin_lock(&cache_disable_lock); /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ cr0 = read_cr0() | X86_CR0_CD; write_cr0(cr0); - /* - * Cache flushing is the most time-consuming step when programming - * the MTRRs. Fortunately, as per the Intel Software Development - * Manual, we can skip it if the processor supports cache self- - * snooping. - */ - if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) - wbinvd(); + maybe_flush_caches(); /* Save value of CR4 and clear Page Global Enable (bit 7) */ if (cpu_feature_enabled(X86_FEATURE_PGE)) { @@ -1073,9 +694,7 @@ void cache_disable(void) __acquires(cache_disable_lock) if (cpu_feature_enabled(X86_FEATURE_MTRR)) mtrr_disable(); - /* Again, only flush caches if we have to. */ - if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) - wbinvd(); + maybe_flush_caches(); } void cache_enable(void) __releases(cache_disable_lock) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f0f85482a73b..8a1f9a889eb4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1006,17 +1006,18 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_D_1_EAX] = eax; } - /* AMD-defined flags: level 0x80000001 */ + /* + * Check if extended CPUID leaves are implemented: Max extended + * CPUID leaf must be in the 0x80000001-0x8000ffff range. + */ eax = cpuid_eax(0x80000000); - c->extended_cpuid_level = eax; + c->extended_cpuid_level = ((eax & 0xffff0000) == 0x80000000) ? eax : 0; - if ((eax & 0xffff0000) == 0x80000000) { - if (eax >= 0x80000001) { - cpuid(0x80000001, &eax, &ebx, &ecx, &edx); + if (c->extended_cpuid_level >= 0x80000001) { + cpuid(0x80000001, &eax, &ebx, &ecx, &edx); - c->x86_capability[CPUID_8000_0001_ECX] = ecx; - c->x86_capability[CPUID_8000_0001_EDX] = edx; - } + c->x86_capability[CPUID_8000_0001_ECX] = ecx; + c->x86_capability[CPUID_8000_0001_EDX] = edx; } if (c->extended_cpuid_level >= 0x80000007) { @@ -1403,15 +1404,10 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Affected CPU list is generally enough to enumerate the vulnerability, * but for virtualization case check for ARCH_CAP MSR bits also, VMM may * not want the guest to enumerate the bug. - * - * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, - * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. */ if (!arch_cap_mmio_immune(x86_arch_cap_msr)) { if (cpu_matches(cpu_vuln_blacklist, MMIO)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); - else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) - setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN); } if (!cpu_has(c, X86_FEATURE_BTC_NO)) { diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 51deb60a9d26..bc38b2d56f26 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -75,6 +75,15 @@ extern void check_null_seg_clears_base(struct cpuinfo_x86 *c); void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id); void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c); +#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS) +struct amd_northbridge *amd_init_l3_cache(int index); +#else +static inline struct amd_northbridge *amd_init_l3_cache(int index) +{ + return NULL; +} +#endif + unsigned int aperfmperf_get_khz(int cpu); void cpu_select_mitigations(void); diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index a2fbea0be535..94c062cddfa4 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -82,8 +82,12 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_XFD, X86_FEATURE_XSAVES }, { X86_FEATURE_XFD, X86_FEATURE_XGETBV1 }, { X86_FEATURE_AMX_TILE, X86_FEATURE_XFD }, + { X86_FEATURE_AMX_FP16, X86_FEATURE_AMX_TILE }, + { X86_FEATURE_AMX_BF16, X86_FEATURE_AMX_TILE }, + { X86_FEATURE_AMX_INT8, X86_FEATURE_AMX_TILE }, { X86_FEATURE_SHSTK, X86_FEATURE_XSAVES }, { X86_FEATURE_FRED, X86_FEATURE_LKGS }, + { X86_FEATURE_SPEC_CTRL_SSBD, X86_FEATURE_SPEC_CTRL }, {} }; diff --git a/arch/x86/kernel/cpu/cpuid_0x2_table.c b/arch/x86/kernel/cpu/cpuid_0x2_table.c new file mode 100644 index 000000000000..89bc8db5e9c6 --- /dev/null +++ b/arch/x86/kernel/cpu/cpuid_0x2_table.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/sizes.h> + +#include <asm/cpuid/types.h> + +#include "cpu.h" + +#define CACHE_ENTRY(_desc, _type, _size) \ + [_desc] = { \ + .c_type = (_type), \ + .c_size = (_size) / SZ_1K, \ + } + +#define TLB_ENTRY(_desc, _type, _entries) \ + [_desc] = { \ + .t_type = (_type), \ + .entries = (_entries), \ + } + +const struct leaf_0x2_table cpuid_0x2_table[256] = { + CACHE_ENTRY(0x06, CACHE_L1_INST, SZ_8K ), /* 4-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x08, CACHE_L1_INST, SZ_16K ), /* 4-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x09, CACHE_L1_INST, SZ_32K ), /* 4-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x0a, CACHE_L1_DATA, SZ_8K ), /* 2 way set assoc, 32 byte line size */ + CACHE_ENTRY(0x0c, CACHE_L1_DATA, SZ_16K ), /* 4-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x0d, CACHE_L1_DATA, SZ_16K ), /* 4-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x0e, CACHE_L1_DATA, SZ_24K ), /* 6-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x21, CACHE_L2, SZ_256K ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x22, CACHE_L3, SZ_512K ), /* 4-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x23, CACHE_L3, SZ_1M ), /* 8-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x25, CACHE_L3, SZ_2M ), /* 8-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x29, CACHE_L3, SZ_4M ), /* 8-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x2c, CACHE_L1_DATA, SZ_32K ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x30, CACHE_L1_INST, SZ_32K ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x39, CACHE_L2, SZ_128K ), /* 4-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x3a, CACHE_L2, SZ_192K ), /* 6-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x3b, CACHE_L2, SZ_128K ), /* 2-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x3c, CACHE_L2, SZ_256K ), /* 4-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x3d, CACHE_L2, SZ_384K ), /* 6-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x3e, CACHE_L2, SZ_512K ), /* 4-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x3f, CACHE_L2, SZ_256K ), /* 2-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x41, CACHE_L2, SZ_128K ), /* 4-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x42, CACHE_L2, SZ_256K ), /* 4-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x43, CACHE_L2, SZ_512K ), /* 4-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x44, CACHE_L2, SZ_1M ), /* 4-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x45, CACHE_L2, SZ_2M ), /* 4-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x46, CACHE_L3, SZ_4M ), /* 4-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x47, CACHE_L3, SZ_8M ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x48, CACHE_L2, SZ_3M ), /* 12-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x49, CACHE_L3, SZ_4M ), /* 16-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x4a, CACHE_L3, SZ_6M ), /* 12-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x4b, CACHE_L3, SZ_8M ), /* 16-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x4c, CACHE_L3, SZ_12M ), /* 12-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x4d, CACHE_L3, SZ_16M ), /* 16-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x4e, CACHE_L2, SZ_6M ), /* 24-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x60, CACHE_L1_DATA, SZ_16K ), /* 8-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x66, CACHE_L1_DATA, SZ_8K ), /* 4-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x67, CACHE_L1_DATA, SZ_16K ), /* 4-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x68, CACHE_L1_DATA, SZ_32K ), /* 4-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x78, CACHE_L2, SZ_1M ), /* 4-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x79, CACHE_L2, SZ_128K ), /* 8-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x7a, CACHE_L2, SZ_256K ), /* 8-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x7b, CACHE_L2, SZ_512K ), /* 8-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x7c, CACHE_L2, SZ_1M ), /* 8-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x7d, CACHE_L2, SZ_2M ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x7f, CACHE_L2, SZ_512K ), /* 2-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x80, CACHE_L2, SZ_512K ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x82, CACHE_L2, SZ_256K ), /* 8-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x83, CACHE_L2, SZ_512K ), /* 8-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x84, CACHE_L2, SZ_1M ), /* 8-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x85, CACHE_L2, SZ_2M ), /* 8-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x86, CACHE_L2, SZ_512K ), /* 4-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x87, CACHE_L2, SZ_1M ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xd0, CACHE_L3, SZ_512K ), /* 4-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xd1, CACHE_L3, SZ_1M ), /* 4-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xd2, CACHE_L3, SZ_2M ), /* 4-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xd6, CACHE_L3, SZ_1M ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xd7, CACHE_L3, SZ_2M ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xd8, CACHE_L3, SZ_4M ), /* 12-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xdc, CACHE_L3, SZ_2M ), /* 12-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xdd, CACHE_L3, SZ_4M ), /* 12-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xde, CACHE_L3, SZ_8M ), /* 12-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xe2, CACHE_L3, SZ_2M ), /* 16-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xe3, CACHE_L3, SZ_4M ), /* 16-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xe4, CACHE_L3, SZ_8M ), /* 16-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xea, CACHE_L3, SZ_12M ), /* 24-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xeb, CACHE_L3, SZ_18M ), /* 24-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xec, CACHE_L3, SZ_24M ), /* 24-way set assoc, 64 byte line size */ + + TLB_ENTRY( 0x01, TLB_INST_4K, 32 ), /* TLB_INST 4 KByte pages, 4-way set associative */ + TLB_ENTRY( 0x02, TLB_INST_4M, 2 ), /* TLB_INST 4 MByte pages, full associative */ + TLB_ENTRY( 0x03, TLB_DATA_4K, 64 ), /* TLB_DATA 4 KByte pages, 4-way set associative */ + TLB_ENTRY( 0x04, TLB_DATA_4M, 8 ), /* TLB_DATA 4 MByte pages, 4-way set associative */ + TLB_ENTRY( 0x05, TLB_DATA_4M, 32 ), /* TLB_DATA 4 MByte pages, 4-way set associative */ + TLB_ENTRY( 0x0b, TLB_INST_4M, 4 ), /* TLB_INST 4 MByte pages, 4-way set associative */ + TLB_ENTRY( 0x4f, TLB_INST_4K, 32 ), /* TLB_INST 4 KByte pages */ + TLB_ENTRY( 0x50, TLB_INST_ALL, 64 ), /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ + TLB_ENTRY( 0x51, TLB_INST_ALL, 128 ), /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ + TLB_ENTRY( 0x52, TLB_INST_ALL, 256 ), /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ + TLB_ENTRY( 0x55, TLB_INST_2M_4M, 7 ), /* TLB_INST 2-MByte or 4-MByte pages, fully associative */ + TLB_ENTRY( 0x56, TLB_DATA0_4M, 16 ), /* TLB_DATA0 4 MByte pages, 4-way set associative */ + TLB_ENTRY( 0x57, TLB_DATA0_4K, 16 ), /* TLB_DATA0 4 KByte pages, 4-way associative */ + TLB_ENTRY( 0x59, TLB_DATA0_4K, 16 ), /* TLB_DATA0 4 KByte pages, fully associative */ + TLB_ENTRY( 0x5a, TLB_DATA0_2M_4M, 32 ), /* TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative */ + TLB_ENTRY( 0x5b, TLB_DATA_4K_4M, 64 ), /* TLB_DATA 4 KByte and 4 MByte pages */ + TLB_ENTRY( 0x5c, TLB_DATA_4K_4M, 128 ), /* TLB_DATA 4 KByte and 4 MByte pages */ + TLB_ENTRY( 0x5d, TLB_DATA_4K_4M, 256 ), /* TLB_DATA 4 KByte and 4 MByte pages */ + TLB_ENTRY( 0x61, TLB_INST_4K, 48 ), /* TLB_INST 4 KByte pages, full associative */ + TLB_ENTRY( 0x63, TLB_DATA_1G_2M_4M, 4 ), /* TLB_DATA 1 GByte pages, 4-way set associative + * (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here) */ + TLB_ENTRY( 0x6b, TLB_DATA_4K, 256 ), /* TLB_DATA 4 KByte pages, 8-way associative */ + TLB_ENTRY( 0x6c, TLB_DATA_2M_4M, 128 ), /* TLB_DATA 2 MByte or 4 MByte pages, 8-way associative */ + TLB_ENTRY( 0x6d, TLB_DATA_1G, 16 ), /* TLB_DATA 1 GByte pages, fully associative */ + TLB_ENTRY( 0x76, TLB_INST_2M_4M, 8 ), /* TLB_INST 2-MByte or 4-MByte pages, fully associative */ + TLB_ENTRY( 0xb0, TLB_INST_4K, 128 ), /* TLB_INST 4 KByte pages, 4-way set associative */ + TLB_ENTRY( 0xb1, TLB_INST_2M_4M, 4 ), /* TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries */ + TLB_ENTRY( 0xb2, TLB_INST_4K, 64 ), /* TLB_INST 4KByte pages, 4-way set associative */ + TLB_ENTRY( 0xb3, TLB_DATA_4K, 128 ), /* TLB_DATA 4 KByte pages, 4-way set associative */ + TLB_ENTRY( 0xb4, TLB_DATA_4K, 256 ), /* TLB_DATA 4 KByte pages, 4-way associative */ + TLB_ENTRY( 0xb5, TLB_INST_4K, 64 ), /* TLB_INST 4 KByte pages, 8-way set associative */ + TLB_ENTRY( 0xb6, TLB_INST_4K, 128 ), /* TLB_INST 4 KByte pages, 8-way set associative */ + TLB_ENTRY( 0xba, TLB_DATA_4K, 64 ), /* TLB_DATA 4 KByte pages, 4-way associative */ + TLB_ENTRY( 0xc0, TLB_DATA_4K_4M, 8 ), /* TLB_DATA 4 KByte and 4 MByte pages, 4-way associative */ + TLB_ENTRY( 0xc1, STLB_4K_2M, 1024 ), /* STLB 4 KByte and 2 MByte pages, 8-way associative */ + TLB_ENTRY( 0xc2, TLB_DATA_2M_4M, 16 ), /* TLB_DATA 2 MByte/4MByte pages, 4-way associative */ + TLB_ENTRY( 0xca, STLB_4K, 512 ), /* STLB 4 KByte pages, 4-way associative */ +}; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index cdc9813871ef..a6493f60b3f2 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -6,6 +6,7 @@ #include <linux/minmax.h> #include <linux/smp.h> #include <linux/string.h> +#include <linux/types.h> #ifdef CONFIG_X86_64 #include <linux/topology.h> @@ -15,6 +16,7 @@ #include <asm/cpu_device_id.h> #include <asm/cpufeature.h> #include <asm/cpu.h> +#include <asm/cpuid.h> #include <asm/hwcap2.h> #include <asm/intel-family.h> #include <asm/microcode.h> @@ -646,103 +648,11 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) } #endif -#define TLB_INST_4K 0x01 -#define TLB_INST_4M 0x02 -#define TLB_INST_2M_4M 0x03 - -#define TLB_INST_ALL 0x05 -#define TLB_INST_1G 0x06 - -#define TLB_DATA_4K 0x11 -#define TLB_DATA_4M 0x12 -#define TLB_DATA_2M_4M 0x13 -#define TLB_DATA_4K_4M 0x14 - -#define TLB_DATA_1G 0x16 -#define TLB_DATA_1G_2M_4M 0x17 - -#define TLB_DATA0_4K 0x21 -#define TLB_DATA0_4M 0x22 -#define TLB_DATA0_2M_4M 0x23 - -#define STLB_4K 0x41 -#define STLB_4K_2M 0x42 - -/* - * All of leaf 0x2's one-byte TLB descriptors implies the same number of - * entries for their respective TLB types. The 0x63 descriptor is an - * exception: it implies 4 dTLB entries for 1GB pages 32 dTLB entries - * for 2MB or 4MB pages. Encode descriptor 0x63 dTLB entry count for - * 2MB/4MB pages here, as its count for dTLB 1GB pages is already at the - * intel_tlb_table[] mapping. - */ -#define TLB_0x63_2M_4M_ENTRIES 32 - -struct _tlb_table { - unsigned char descriptor; - char tlb_type; - unsigned int entries; -}; - -static const struct _tlb_table intel_tlb_table[] = { - { 0x01, TLB_INST_4K, 32}, /* TLB_INST 4 KByte pages, 4-way set associative */ - { 0x02, TLB_INST_4M, 2}, /* TLB_INST 4 MByte pages, full associative */ - { 0x03, TLB_DATA_4K, 64}, /* TLB_DATA 4 KByte pages, 4-way set associative */ - { 0x04, TLB_DATA_4M, 8}, /* TLB_DATA 4 MByte pages, 4-way set associative */ - { 0x05, TLB_DATA_4M, 32}, /* TLB_DATA 4 MByte pages, 4-way set associative */ - { 0x0b, TLB_INST_4M, 4}, /* TLB_INST 4 MByte pages, 4-way set associative */ - { 0x4f, TLB_INST_4K, 32}, /* TLB_INST 4 KByte pages */ - { 0x50, TLB_INST_ALL, 64}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ - { 0x51, TLB_INST_ALL, 128}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ - { 0x52, TLB_INST_ALL, 256}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ - { 0x55, TLB_INST_2M_4M, 7}, /* TLB_INST 2-MByte or 4-MByte pages, fully associative */ - { 0x56, TLB_DATA0_4M, 16}, /* TLB_DATA0 4 MByte pages, 4-way set associative */ - { 0x57, TLB_DATA0_4K, 16}, /* TLB_DATA0 4 KByte pages, 4-way associative */ - { 0x59, TLB_DATA0_4K, 16}, /* TLB_DATA0 4 KByte pages, fully associative */ - { 0x5a, TLB_DATA0_2M_4M, 32}, /* TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative */ - { 0x5b, TLB_DATA_4K_4M, 64}, /* TLB_DATA 4 KByte and 4 MByte pages */ - { 0x5c, TLB_DATA_4K_4M, 128}, /* TLB_DATA 4 KByte and 4 MByte pages */ - { 0x5d, TLB_DATA_4K_4M, 256}, /* TLB_DATA 4 KByte and 4 MByte pages */ - { 0x61, TLB_INST_4K, 48}, /* TLB_INST 4 KByte pages, full associative */ - { 0x63, TLB_DATA_1G_2M_4M, 4}, /* TLB_DATA 1 GByte pages, 4-way set associative - * (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here) */ - { 0x6b, TLB_DATA_4K, 256}, /* TLB_DATA 4 KByte pages, 8-way associative */ - { 0x6c, TLB_DATA_2M_4M, 128}, /* TLB_DATA 2 MByte or 4 MByte pages, 8-way associative */ - { 0x6d, TLB_DATA_1G, 16}, /* TLB_DATA 1 GByte pages, fully associative */ - { 0x76, TLB_INST_2M_4M, 8}, /* TLB_INST 2-MByte or 4-MByte pages, fully associative */ - { 0xb0, TLB_INST_4K, 128}, /* TLB_INST 4 KByte pages, 4-way set associative */ - { 0xb1, TLB_INST_2M_4M, 4}, /* TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries */ - { 0xb2, TLB_INST_4K, 64}, /* TLB_INST 4KByte pages, 4-way set associative */ - { 0xb3, TLB_DATA_4K, 128}, /* TLB_DATA 4 KByte pages, 4-way set associative */ - { 0xb4, TLB_DATA_4K, 256}, /* TLB_DATA 4 KByte pages, 4-way associative */ - { 0xb5, TLB_INST_4K, 64}, /* TLB_INST 4 KByte pages, 8-way set associative */ - { 0xb6, TLB_INST_4K, 128}, /* TLB_INST 4 KByte pages, 8-way set associative */ - { 0xba, TLB_DATA_4K, 64}, /* TLB_DATA 4 KByte pages, 4-way associative */ - { 0xc0, TLB_DATA_4K_4M, 8}, /* TLB_DATA 4 KByte and 4 MByte pages, 4-way associative */ - { 0xc1, STLB_4K_2M, 1024}, /* STLB 4 KByte and 2 MByte pages, 8-way associative */ - { 0xc2, TLB_DATA_2M_4M, 16}, /* TLB_DATA 2 MByte/4MByte pages, 4-way associative */ - { 0xca, STLB_4K, 512}, /* STLB 4 KByte pages, 4-way associative */ - { 0x00, 0, 0 } -}; - -static void intel_tlb_lookup(const unsigned char desc) +static void intel_tlb_lookup(const struct leaf_0x2_table *entry) { - unsigned int entries; - unsigned char k; - - if (desc == 0) - return; - - /* look up this descriptor in the table */ - for (k = 0; intel_tlb_table[k].descriptor != desc && - intel_tlb_table[k].descriptor != 0; k++) - ; + short entries = entry->entries; - if (intel_tlb_table[k].tlb_type == 0) - return; - - entries = intel_tlb_table[k].entries; - switch (intel_tlb_table[k].tlb_type) { + switch (entry->t_type) { case STLB_4K: tlb_lli_4k = max(tlb_lli_4k, entries); tlb_lld_4k = max(tlb_lld_4k, entries); @@ -799,28 +709,16 @@ static void intel_tlb_lookup(const unsigned char desc) static void intel_detect_tlb(struct cpuinfo_x86 *c) { - int i, j, n; - unsigned int regs[4]; - unsigned char *desc = (unsigned char *)regs; + const struct leaf_0x2_table *entry; + union leaf_0x2_regs regs; + u8 *ptr; if (c->cpuid_level < 2) return; - /* Number of times to iterate */ - n = cpuid_eax(2) & 0xFF; - - for (i = 0 ; i < n ; i++) { - cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); - - /* If bit 31 is set, this is an unknown format */ - for (j = 0 ; j < 4 ; j++) - if (regs[j] & (1 << 31)) - regs[j] = 0; - - /* Byte 0 is level count, not a descriptor */ - for (j = 1 ; j < 16 ; j++) - intel_tlb_lookup(desc[j]); - } + cpuid_get_leaf_0x2_regs(®s); + for_each_leaf_0x2_entry(regs, ptr, entry) + intel_tlb_lookup(entry); } static const struct cpu_dev intel_cpu_dev = { diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 06e3cf7229ce..bb060f8326ef 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -24,7 +24,7 @@ #include <linux/pci.h> #include <linux/uaccess.h> -#include <asm/amd_nb.h> +#include <asm/amd/nb.h> #include <asm/apic.h> #include <asm/irq_vectors.h> #include <asm/mce.h> diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 16f3ca30626a..c75c57b32b74 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -53,7 +53,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, - { X86_FEATURE_AMD_HETEROGENEOUS_CORES, CPUID_EAX, 30, 0x80000026, 0 }, + { X86_FEATURE_AMD_HTR_CORES, CPUID_EAX, 30, 0x80000026, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 03b3c9c3a45e..eb799e2405f9 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -182,7 +182,7 @@ static void parse_topology_amd(struct topo_scan *tscan) if (cpu_feature_enabled(X86_FEATURE_TOPOEXT)) has_topoext = cpu_parse_topology_ext(tscan); - if (cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) + if (cpu_feature_enabled(X86_FEATURE_AMD_HTR_CORES)) tscan->c->topo.cpu_type = cpuid_ebx(0x80000026); if (!has_topoext && !parse_8000_0008(tscan)) diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c index 628833afee37..f980b0eb0105 100644 --- a/arch/x86/mm/amdtopology.c +++ b/arch/x86/mm/amdtopology.c @@ -25,7 +25,7 @@ #include <asm/numa.h> #include <asm/mpspec.h> #include <asm/apic.h> -#include <asm/amd_nb.h> +#include <asm/amd/nb.h> static unsigned char __initdata nodeids[8]; diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 64e5cdb2460a..fed02d1073c7 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -18,7 +18,7 @@ #include <asm/e820/api.h> #include <asm/proto.h> #include <asm/dma.h> -#include <asm/amd_nb.h> +#include <asm/amd/nb.h> #include "numa_internal.h" diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 631512f7ec85..95ae1971a5f1 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -5,7 +5,7 @@ #include <linux/cpu.h> #include <linux/range.h> -#include <asm/amd_nb.h> +#include <asm/amd/nb.h> #include <asm/pci_x86.h> #include <asm/pci-direct.h> diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index efefeb82ab61..36336299596b 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -9,7 +9,7 @@ #include <linux/pci.h> #include <linux/suspend.h> #include <linux/vgaarb.h> -#include <asm/amd_node.h> +#include <asm/amd/node.h> #include <asm/hpet.h> #include <asm/pci_x86.h> |