diff options
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/arm64/boot/dts/mediatek/Makefile | 55 | ||||
| -rw-r--r-- | arch/arm64/include/asm/simd.h | 9 | ||||
| -rw-r--r-- | arch/arm64/kernel/fpsimd.c | 130 | ||||
| -rw-r--r-- | arch/arm64/kernel/process.c | 1 | ||||
| -rw-r--r-- | arch/arm64/mm/mmu.c | 33 | ||||
| -rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 2 | ||||
| -rw-r--r-- | arch/s390/include/uapi/asm/ipl.h | 1 | ||||
| -rw-r--r-- | arch/s390/kernel/ipl.c | 48 | ||||
| -rw-r--r-- | arch/s390/kernel/stacktrace.c | 18 | ||||
| -rw-r--r-- | arch/s390/pci/pci.c | 7 | ||||
| -rw-r--r-- | arch/s390/pci/pci_bus.c | 98 | ||||
| -rw-r--r-- | arch/s390/pci/pci_bus.h | 15 | ||||
| -rw-r--r-- | arch/x86/include/asm/irqflags.h | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/unwind_orc.c | 39 | ||||
| -rw-r--r-- | arch/x86/kvm/cpuid.c | 11 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/nested.c | 4 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/svm.c | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/svm.h | 7 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/nested.c | 3 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 9 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 7 | ||||
| -rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 12 | ||||
| -rw-r--r-- | arch/x86/xen/enlighten_pv.c | 2 |
23 files changed, 298 insertions, 217 deletions
diff --git a/arch/arm64/boot/dts/mediatek/Makefile b/arch/arm64/boot/dts/mediatek/Makefile index c5fd6191a925..3f76d9ce9879 100644 --- a/arch/arm64/boot/dts/mediatek/Makefile +++ b/arch/arm64/boot/dts/mediatek/Makefile @@ -19,6 +19,27 @@ dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-nand.dtbo dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-nor.dtbo dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-sata.dtbo dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-sd.dtbo +mt7986a-bananapi-bpi-r3-emmc-nand-dtbs := \ + mt7986a-bananapi-bpi-r3.dtb \ + mt7986a-bananapi-bpi-r3-emmc.dtbo \ + mt7986a-bananapi-bpi-r3-nand.dtbo +dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-emmc-nand.dtb +mt7986a-bananapi-bpi-r3-emmc-nor-dtbs := \ + mt7986a-bananapi-bpi-r3.dtb \ + mt7986a-bananapi-bpi-r3-emmc.dtbo \ + mt7986a-bananapi-bpi-r3-nor.dtbo +dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-emmc-nor.dtb +mt7986a-bananapi-bpi-r3-sd-nand-dtbs := \ + mt7986a-bananapi-bpi-r3.dtb \ + mt7986a-bananapi-bpi-r3-sd.dtbo \ + mt7986a-bananapi-bpi-r3-nand.dtbo \ + mt7986a-bananapi-bpi-r3-sata.dtbo +dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-sd-nand.dtb +mt7986a-bananapi-bpi-r3-sd-nor-dtbs := \ + mt7986a-bananapi-bpi-r3.dtb \ + mt7986a-bananapi-bpi-r3-sd.dtbo \ + mt7986a-bananapi-bpi-r3-nor.dtbo +dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-sd-nor.dtb dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-rfb.dtb dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986b-rfb.dtb dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4.dtb @@ -31,6 +52,38 @@ dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-cn18.dtbo dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-emmc.dtbo dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-sd.dtbo dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-sd.dtbo +mt7988a-bananapi-bpi-r4-emmc-dtbs := \ + mt7988a-bananapi-bpi-r4.dtb \ + mt7988a-bananapi-bpi-r4-emmc.dtbo +dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-emmc.dtb +mt7988a-bananapi-bpi-r4-sd-dtbs := \ + mt7988a-bananapi-bpi-r4.dtb \ + mt7988a-bananapi-bpi-r4-sd.dtbo +dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-sd.dtb +mt7988a-bananapi-bpi-r4-2g5-emmc-dtbs := \ + mt7988a-bananapi-bpi-r4-2g5.dtb \ + mt7988a-bananapi-bpi-r4-emmc.dtbo +dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-2g5-emmc.dtb +mt7988a-bananapi-bpi-r4-2g5-sd-dtbs := \ + mt7988a-bananapi-bpi-r4-2g5.dtb \ + mt7988a-bananapi-bpi-r4-sd.dtbo +dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-2g5-sd.dtb +mt7988a-bananapi-bpi-r4-pro-8x-emmc-dtbs := \ + mt7988a-bananapi-bpi-r4-pro-8x.dtb \ + mt7988a-bananapi-bpi-r4-pro-emmc.dtbo +dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-8x-emmc.dtb +mt7988a-bananapi-bpi-r4-pro-8x-sd-dtbs := \ + mt7988a-bananapi-bpi-r4-pro-8x.dtb \ + mt7988a-bananapi-bpi-r4-pro-sd.dtbo +dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-8x-sd.dtb +mt7988a-bananapi-bpi-r4-pro-8x-sd-cn15-dtbs := \ + mt7988a-bananapi-bpi-r4-pro-8x-sd.dtb \ + mt7988a-bananapi-bpi-r4-pro-cn15.dtbo +dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-8x-sd-cn15.dtb +mt7988a-bananapi-bpi-r4-pro-8x-sd-cn18-dtbs := \ + mt7988a-bananapi-bpi-r4-pro-8x-sd.dtb \ + mt7988a-bananapi-bpi-r4-pro-cn18.dtbo +dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-8x-sd-cn18.dtb dtb-$(CONFIG_ARCH_MEDIATEK) += mt8167-pumpkin.dtb dtb-$(CONFIG_ARCH_MEDIATEK) += mt8173-elm.dtb dtb-$(CONFIG_ARCH_MEDIATEK) += mt8173-elm-hana.dtb @@ -113,6 +166,8 @@ dtb-$(CONFIG_ARCH_MEDIATEK) += mt8390-grinn-genio-700-sbc.dtb dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-kontron-3-5-sbc-i1200.dtb dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-radxa-nio-12l.dtb dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-radxa-nio-12l-8-hd-panel.dtbo +mt8395-radxa-nio-12l-8-hd-panel-dtbs := mt8395-radxa-nio-12l.dtb mt8395-radxa-nio-12l-8-hd-panel.dtbo +dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-radxa-nio-12l-8-hd-panel.dtb dtb-$(CONFIG_ARCH_MEDIATEK) += mt8516-pumpkin.dtb # Device tree overlays support diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index 0941f6f58a14..69ecbd69ca8c 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -48,6 +48,13 @@ DEFINE_LOCK_GUARD_1(ksimd, kernel_neon_begin(_T->lock), kernel_neon_end(_T->lock)) -#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){}) +#define __scoped_ksimd(_label) \ + for (struct user_fpsimd_state __uninitialized __st; \ + true; ({ goto _label; })) \ + if (0) { \ +_label: break; \ + } else scoped_guard(ksimd, &__st) + +#define scoped_ksimd() __scoped_ksimd(__UNIQUE_ID(label)) #endif diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index c154f72634e0..9de1d8a604cb 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -180,13 +180,6 @@ static inline void set_sve_default_vl(int val) set_default_vl(ARM64_VEC_SVE, val); } -static u8 *efi_sve_state; - -#else /* ! CONFIG_ARM64_SVE */ - -/* Dummy declaration for code that will be optimised out: */ -extern u8 *efi_sve_state; - #endif /* ! CONFIG_ARM64_SVE */ #ifdef CONFIG_ARM64_SME @@ -1095,36 +1088,6 @@ int vec_verify_vq_map(enum vec_type type) return 0; } -static void __init sve_efi_setup(void) -{ - int max_vl = 0; - int i; - - if (!IS_ENABLED(CONFIG_EFI)) - return; - - for (i = 0; i < ARRAY_SIZE(vl_info); i++) - max_vl = max(vl_info[i].max_vl, max_vl); - - /* - * alloc_percpu() warns and prints a backtrace if this goes wrong. - * This is evidence of a crippled system and we are returning void, - * so no attempt is made to handle this situation here. - */ - if (!sve_vl_valid(max_vl)) - goto fail; - - efi_sve_state = kmalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)), - GFP_KERNEL); - if (!efi_sve_state) - goto fail; - - return; - -fail: - panic("Cannot allocate memory for EFI SVE save/restore"); -} - void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p) { write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1); @@ -1185,8 +1148,6 @@ void __init sve_setup(void) if (sve_max_virtualisable_vl() < sve_max_vl()) pr_warn("%s: unvirtualisable vector lengths present\n", info->name); - - sve_efi_setup(); } /* @@ -1947,9 +1908,6 @@ EXPORT_SYMBOL_GPL(kernel_neon_end); #ifdef CONFIG_EFI static struct user_fpsimd_state efi_fpsimd_state; -static bool efi_fpsimd_state_used; -static bool efi_sve_state_used; -static bool efi_sm_state; /* * EFI runtime services support functions @@ -1976,43 +1934,26 @@ void __efi_fpsimd_begin(void) if (may_use_simd()) { kernel_neon_begin(&efi_fpsimd_state); } else { - WARN_ON(preemptible()); - /* - * If !efi_sve_state, SVE can't be in use yet and doesn't need - * preserving: + * We are running in hardirq or NMI context, and the only + * legitimate case where this might happen is when EFI pstore + * is attempting to record the system's dying gasps into EFI + * variables. This could be due to an oops, a panic or a call + * to emergency_restart(), and in none of those cases, we can + * expect the current task to ever return to user space again, + * or for the kernel to resume any normal execution, for that + * matter (an oops in hardirq context triggers a panic too). + * + * Therefore, there is no point in attempting to preserve any + * SVE/SME state here. On the off chance that we might have + * ended up here for a different reason inadvertently, kill the + * task and preserve/restore the base FP/SIMD state, which + * might belong to kernel mode FP/SIMD. */ - if (system_supports_sve() && efi_sve_state != NULL) { - bool ffr = true; - u64 svcr; - - efi_sve_state_used = true; - - if (system_supports_sme()) { - svcr = read_sysreg_s(SYS_SVCR); - - efi_sm_state = svcr & SVCR_SM_MASK; - - /* - * Unless we have FA64 FFR does not - * exist in streaming mode. - */ - if (!system_supports_fa64()) - ffr = !(svcr & SVCR_SM_MASK); - } - - sve_save_state(efi_sve_state + sve_ffr_offset(sve_max_vl()), - &efi_fpsimd_state.fpsr, ffr); - - if (system_supports_sme()) - sysreg_clear_set_s(SYS_SVCR, - SVCR_SM_MASK, 0); - - } else { - fpsimd_save_state(&efi_fpsimd_state); - } - - efi_fpsimd_state_used = true; + pr_warn_ratelimited("Calling EFI runtime from %s context\n", + in_nmi() ? "NMI" : "hardirq"); + force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); + fpsimd_save_state(&efi_fpsimd_state); } } @@ -2024,41 +1965,10 @@ void __efi_fpsimd_end(void) if (!system_supports_fpsimd()) return; - if (!efi_fpsimd_state_used) { + if (may_use_simd()) { kernel_neon_end(&efi_fpsimd_state); } else { - if (system_supports_sve() && efi_sve_state_used) { - bool ffr = true; - - /* - * Restore streaming mode; EFI calls are - * normal function calls so should not return in - * streaming mode. - */ - if (system_supports_sme()) { - if (efi_sm_state) { - sysreg_clear_set_s(SYS_SVCR, - 0, - SVCR_SM_MASK); - - /* - * Unless we have FA64 FFR does not - * exist in streaming mode. - */ - if (!system_supports_fa64()) - ffr = false; - } - } - - sve_load_state(efi_sve_state + sve_ffr_offset(sve_max_vl()), - &efi_fpsimd_state.fpsr, ffr); - - efi_sve_state_used = false; - } else { - fpsimd_load_state(&efi_fpsimd_state); - } - - efi_fpsimd_state_used = false; + fpsimd_load_state(&efi_fpsimd_state); } } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index fba7ca102a8c..489554931231 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -292,6 +292,7 @@ static void flush_gcs(void) current->thread.gcs_base = 0; current->thread.gcs_size = 0; current->thread.gcs_el0_mode = 0; + current->thread.gcs_el0_locked = 0; write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1); write_sysreg_s(0, SYS_GCSPR_EL0); } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 9ae7ce00a7ef..8e1d80a7033e 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -767,18 +767,6 @@ static inline bool force_pte_mapping(void) return rodata_full || arm64_kfence_can_set_direct_map() || is_realm_world(); } -static inline bool split_leaf_mapping_possible(void) -{ - /* - * !BBML2_NOABORT systems should never run into scenarios where we would - * have to split. So exit early and let calling code detect it and raise - * a warning. - */ - if (!system_supports_bbml2_noabort()) - return false; - return !force_pte_mapping(); -} - static DEFINE_MUTEX(pgtable_split_lock); int split_kernel_leaf_mapping(unsigned long start, unsigned long end) @@ -786,11 +774,22 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end) int ret; /* - * Exit early if the region is within a pte-mapped area or if we can't - * split. For the latter case, the permission change code will raise a - * warning if not already pte-mapped. + * !BBML2_NOABORT systems should not be trying to change permissions on + * anything that is not pte-mapped in the first place. Just return early + * and let the permission change code raise a warning if not already + * pte-mapped. */ - if (!split_leaf_mapping_possible() || is_kfence_address((void *)start)) + if (!system_supports_bbml2_noabort()) + return 0; + + /* + * If the region is within a pte-mapped area, there is no need to try to + * split. Additionally, CONFIG_DEBUG_PAGEALLOC and CONFIG_KFENCE may + * change permissions from atomic context so for those cases (which are + * always pte-mapped), we must not go any further because taking the + * mutex below may sleep. + */ + if (force_pte_mapping() || is_kfence_address((void *)start)) return 0; /* @@ -1089,7 +1088,7 @@ bool arch_kfence_init_pool(void) int ret; /* Exit early if we know the linear map is already pte-mapped. */ - if (!split_leaf_mapping_possible()) + if (force_pte_mapping()) return true; /* Kfence pool is already pte-mapped for the early init case. */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 74dd29816f36..b6eb7a465ad2 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1004,7 +1004,7 @@ static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx) arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) return; - if (capable(CAP_SYS_ADMIN)) + if (ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) return; if (supports_clearbhb(SCOPE_SYSTEM)) { diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h index 2cd28af50dd4..3d64a2251699 100644 --- a/arch/s390/include/uapi/asm/ipl.h +++ b/arch/s390/include/uapi/asm/ipl.h @@ -15,6 +15,7 @@ struct ipl_pl_hdr { #define IPL_PL_FLAG_IPLPS 0x80 #define IPL_PL_FLAG_SIPL 0x40 #define IPL_PL_FLAG_IPLSR 0x20 +#define IPL_PL_FLAG_SBP 0x10 /* IPL Parameter Block header */ struct ipl_pb_hdr { diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 961a3d60a4dd..dcdc7e274848 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -262,6 +262,24 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ sys_##_prefix##_##_name##_show, \ sys_##_prefix##_##_name##_store) +#define DEFINE_IPL_ATTR_BOOTPROG_RW(_prefix, _name, _fmt_out, _fmt_in, _hdr, _value) \ + IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value) \ +static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + unsigned long long value; \ + if (sscanf(buf, _fmt_in, &value) != 1) \ + return -EINVAL; \ + (_value) = value; \ + (_hdr).flags &= ~IPL_PL_FLAG_SBP; \ + return len; \ +} \ +static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ + __ATTR(_name, 0644, \ + sys_##_prefix##_##_name##_show, \ + sys_##_prefix##_##_name##_store) + #define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\ IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, _value) \ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ @@ -818,12 +836,13 @@ DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n", reipl_block_fcp->fcp.wwpn); DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n", reipl_block_fcp->fcp.lun); -DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n", - reipl_block_fcp->fcp.bootprog); DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n", reipl_block_fcp->fcp.br_lba); DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n", reipl_block_fcp->fcp.devno); +DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n", + reipl_block_fcp->hdr, + reipl_block_fcp->fcp.bootprog); static void reipl_get_ascii_loadparm(char *loadparm, struct ipl_parameter_block *ibp) @@ -942,10 +961,11 @@ DEFINE_IPL_ATTR_RW(reipl_nvme, fid, "0x%08llx\n", "%llx\n", reipl_block_nvme->nvme.fid); DEFINE_IPL_ATTR_RW(reipl_nvme, nsid, "0x%08llx\n", "%llx\n", reipl_block_nvme->nvme.nsid); -DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n", - reipl_block_nvme->nvme.bootprog); DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n", reipl_block_nvme->nvme.br_lba); +DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n", + reipl_block_nvme->hdr, + reipl_block_nvme->nvme.bootprog); static struct attribute *reipl_nvme_attrs[] = { &sys_reipl_nvme_fid_attr.attr, @@ -1038,8 +1058,9 @@ static const struct bin_attribute *const reipl_eckd_bin_attrs[] = { }; DEFINE_IPL_CCW_ATTR_RW(reipl_eckd, device, reipl_block_eckd->eckd); -DEFINE_IPL_ATTR_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n", - reipl_block_eckd->eckd.bootprog); +DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n", + reipl_block_eckd->hdr, + reipl_block_eckd->eckd.bootprog); static struct attribute *reipl_eckd_attrs[] = { &sys_reipl_eckd_device_attr.attr, @@ -1567,12 +1588,13 @@ DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n", dump_block_fcp->fcp.wwpn); DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n", dump_block_fcp->fcp.lun); -DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n", - dump_block_fcp->fcp.bootprog); DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n", dump_block_fcp->fcp.br_lba); DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n", dump_block_fcp->fcp.devno); +DEFINE_IPL_ATTR_BOOTPROG_RW(dump_fcp, bootprog, "%lld\n", "%lld\n", + dump_block_fcp->hdr, + dump_block_fcp->fcp.bootprog); DEFINE_IPL_ATTR_SCP_DATA_RW(dump_fcp, dump_block_fcp->hdr, dump_block_fcp->fcp, @@ -1604,10 +1626,11 @@ DEFINE_IPL_ATTR_RW(dump_nvme, fid, "0x%08llx\n", "%llx\n", dump_block_nvme->nvme.fid); DEFINE_IPL_ATTR_RW(dump_nvme, nsid, "0x%08llx\n", "%llx\n", dump_block_nvme->nvme.nsid); -DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n", - dump_block_nvme->nvme.bootprog); DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n", dump_block_nvme->nvme.br_lba); +DEFINE_IPL_ATTR_BOOTPROG_RW(dump_nvme, bootprog, "%lld\n", "%llx\n", + dump_block_nvme->hdr, + dump_block_nvme->nvme.bootprog); DEFINE_IPL_ATTR_SCP_DATA_RW(dump_nvme, dump_block_nvme->hdr, dump_block_nvme->nvme, @@ -1635,8 +1658,9 @@ static const struct attribute_group dump_nvme_attr_group = { /* ECKD dump device attributes */ DEFINE_IPL_CCW_ATTR_RW(dump_eckd, device, dump_block_eckd->eckd); -DEFINE_IPL_ATTR_RW(dump_eckd, bootprog, "%lld\n", "%llx\n", - dump_block_eckd->eckd.bootprog); +DEFINE_IPL_ATTR_BOOTPROG_RW(dump_eckd, bootprog, "%lld\n", "%llx\n", + dump_block_eckd->hdr, + dump_block_eckd->eckd.bootprog); IPL_ATTR_BR_CHR_SHOW_FN(dump, dump_block_eckd->eckd); IPL_ATTR_BR_CHR_STORE_FN(dump, dump_block_eckd->eckd); diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 3aae7f70e6ab..18520d333058 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -104,7 +104,6 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo struct stack_frame_vdso_wrapper __user *sf_vdso; struct stack_frame_user __user *sf; unsigned long ip, sp; - bool first = true; if (!current->mm) return; @@ -133,24 +132,11 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo if (__get_user(ip, &sf->gprs[8])) break; } - /* Sanity check: ABI requires SP to be 8 byte aligned. */ - if (sp & 0x7) + /* Validate SP and RA (ABI requires SP to be 8 byte aligned). */ + if (sp & 0x7 || ip_invalid(ip)) break; - if (ip_invalid(ip)) { - /* - * If the instruction address is invalid, and this - * is the first stack frame, assume r14 has not - * been written to the stack yet. Otherwise exit. - */ - if (!first) - break; - ip = regs->gprs[14]; - if (ip_invalid(ip)) - break; - } if (!store_ip(consume_entry, cookie, entry, perf, ip)) break; - first = false; } pagefault_enable(); } diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 5a6ace9d875a..57f3980b98a9 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -961,6 +961,7 @@ void zpci_device_reserved(struct zpci_dev *zdev) } void zpci_release_device(struct kref *kref) + __releases(&zpci_list_lock) { struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); @@ -1148,6 +1149,7 @@ static void zpci_add_devices(struct list_head *scan_list) int zpci_scan_devices(void) { + struct zpci_bus *zbus; LIST_HEAD(scan_list); int rc; @@ -1156,7 +1158,10 @@ int zpci_scan_devices(void) return rc; zpci_add_devices(&scan_list); - zpci_bus_scan_busses(); + zpci_bus_for_each(zbus) { + zpci_bus_scan_bus(zbus); + cond_resched(); + } return 0; } diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 66c4bd888b29..42a13e451f64 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -153,23 +153,6 @@ int zpci_bus_scan_bus(struct zpci_bus *zbus) return ret; } -/* zpci_bus_scan_busses - Scan all registered busses - * - * Scan all available zbusses - * - */ -void zpci_bus_scan_busses(void) -{ - struct zpci_bus *zbus = NULL; - - mutex_lock(&zbus_list_lock); - list_for_each_entry(zbus, &zbus_list, bus_next) { - zpci_bus_scan_bus(zbus); - cond_resched(); - } - mutex_unlock(&zbus_list_lock); -} - static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev) { return !s390_pci_no_rid && zdev->rid_available && @@ -222,10 +205,29 @@ out_free_domain: return -ENOMEM; } -static void zpci_bus_release(struct kref *kref) +/** + * zpci_bus_release - Un-initialize resources associated with the zbus and + * free memory + * @kref: refcount * that is part of struct zpci_bus + * + * MUST be called with `zbus_list_lock` held, but the lock is released during + * run of the function. + */ +static inline void zpci_bus_release(struct kref *kref) + __releases(&zbus_list_lock) { struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref); + lockdep_assert_held(&zbus_list_lock); + + list_del(&zbus->bus_next); + mutex_unlock(&zbus_list_lock); + + /* + * At this point no-one should see this object, or be able to get a new + * reference to it. + */ + if (zbus->bus) { pci_lock_rescan_remove(); pci_stop_root_bus(zbus->bus); @@ -237,16 +239,19 @@ static void zpci_bus_release(struct kref *kref) pci_unlock_rescan_remove(); } - mutex_lock(&zbus_list_lock); - list_del(&zbus->bus_next); - mutex_unlock(&zbus_list_lock); zpci_remove_parent_msi_domain(zbus); kfree(zbus); } -static void zpci_bus_put(struct zpci_bus *zbus) +static inline void __zpci_bus_get(struct zpci_bus *zbus) +{ + lockdep_assert_held(&zbus_list_lock); + kref_get(&zbus->kref); +} + +static inline void zpci_bus_put(struct zpci_bus *zbus) { - kref_put(&zbus->kref, zpci_bus_release); + kref_put_mutex(&zbus->kref, zpci_bus_release, &zbus_list_lock); } static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid) @@ -258,7 +263,7 @@ static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid) if (!zbus->multifunction) continue; if (topo_is_tid == zbus->topo_is_tid && topo == zbus->topo) { - kref_get(&zbus->kref); + __zpci_bus_get(zbus); goto out_unlock; } } @@ -268,6 +273,44 @@ out_unlock: return zbus; } +/** + * zpci_bus_get_next - get the next zbus object from given position in the list + * @pos: current position/cursor in the global zbus list + * + * Acquires and releases references as the cursor iterates (might also free/ + * release the cursor). Is tolerant of concurrent operations on the list. + * + * To begin the iteration, set *@pos to %NULL before calling the function. + * + * *@pos is set to %NULL in cases where either the list is empty, or *@pos is + * the last element in the list. + * + * Context: Process context. May sleep. + */ +void zpci_bus_get_next(struct zpci_bus **pos) +{ + struct zpci_bus *curp = *pos, *next = NULL; + + mutex_lock(&zbus_list_lock); + if (curp) + next = list_next_entry(curp, bus_next); + else + next = list_first_entry(&zbus_list, typeof(*curp), bus_next); + + if (list_entry_is_head(next, &zbus_list, bus_next)) + next = NULL; + + if (next) + __zpci_bus_get(next); + + *pos = next; + mutex_unlock(&zbus_list_lock); + + /* zpci_bus_put() might drop refcount to 0 and locks zbus_list_lock */ + if (curp) + zpci_bus_put(curp); +} + static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid) { struct zpci_bus *zbus; @@ -279,9 +322,6 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid) zbus->topo = topo; zbus->topo_is_tid = topo_is_tid; INIT_LIST_HEAD(&zbus->bus_next); - mutex_lock(&zbus_list_lock); - list_add_tail(&zbus->bus_next, &zbus_list); - mutex_unlock(&zbus_list_lock); kref_init(&zbus->kref); INIT_LIST_HEAD(&zbus->resources); @@ -291,6 +331,10 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid) zbus->bus_resource.flags = IORESOURCE_BUS; pci_add_resource(&zbus->resources, &zbus->bus_resource); + mutex_lock(&zbus_list_lock); + list_add_tail(&zbus->bus_next, &zbus_list); + mutex_unlock(&zbus_list_lock); + return zbus; } diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index ae3d7a9159bd..e440742e3145 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -15,7 +15,20 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops); void zpci_bus_device_unregister(struct zpci_dev *zdev); int zpci_bus_scan_bus(struct zpci_bus *zbus); -void zpci_bus_scan_busses(void); +void zpci_bus_get_next(struct zpci_bus **pos); + +/** + * zpci_bus_for_each - iterate over all the registered zbus objects + * @pos: a struct zpci_bus * as cursor + * + * Acquires and releases references as the cursor iterates over the registered + * objects. Is tolerant against concurrent removals of objects. + * + * Context: Process context. May sleep. + */ +#define zpci_bus_for_each(pos) \ + for ((pos) = NULL, zpci_bus_get_next(&(pos)); (pos) != NULL; \ + zpci_bus_get_next(&(pos))) int zpci_bus_scan_device(struct zpci_dev *zdev); void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error); diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index b30e5474c18e..a1193e9d65f2 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -25,7 +25,7 @@ extern __always_inline unsigned long native_save_fl(void) */ asm volatile("# __raw_save_flags\n\t" "pushf ; pop %0" - : "=rm" (flags) + : ASM_OUTPUT_RM (flags) : /* no input */ : "memory"); diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 977ee75e047c..f610fde2d5c4 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -2,6 +2,7 @@ #include <linux/objtool.h> #include <linux/module.h> #include <linux/sort.h> +#include <linux/bpf.h> #include <asm/ptrace.h> #include <asm/stacktrace.h> #include <asm/unwind.h> @@ -172,6 +173,25 @@ static struct orc_entry *orc_ftrace_find(unsigned long ip) } #endif +/* Fake frame pointer entry -- used as a fallback for generated code */ +static struct orc_entry orc_fp_entry = { + .type = ORC_TYPE_CALL, + .sp_reg = ORC_REG_BP, + .sp_offset = 16, + .bp_reg = ORC_REG_PREV_SP, + .bp_offset = -16, +}; + +static struct orc_entry *orc_bpf_find(unsigned long ip) +{ +#ifdef CONFIG_BPF_JIT + if (bpf_has_frame_pointer(ip)) + return &orc_fp_entry; +#endif + + return NULL; +} + /* * If we crash with IP==0, the last successfully executed instruction * was probably an indirect function call with a NULL function pointer, @@ -186,15 +206,6 @@ static struct orc_entry null_orc_entry = { .type = ORC_TYPE_CALL }; -/* Fake frame pointer entry -- used as a fallback for generated code */ -static struct orc_entry orc_fp_entry = { - .type = ORC_TYPE_CALL, - .sp_reg = ORC_REG_BP, - .sp_offset = 16, - .bp_reg = ORC_REG_PREV_SP, - .bp_offset = -16, -}; - static struct orc_entry *orc_find(unsigned long ip) { static struct orc_entry *orc; @@ -238,6 +249,11 @@ static struct orc_entry *orc_find(unsigned long ip) if (orc) return orc; + /* BPF lookup: */ + orc = orc_bpf_find(ip); + if (orc) + return orc; + return orc_ftrace_find(ip); } @@ -495,9 +511,8 @@ bool unwind_next_frame(struct unwind_state *state) if (!orc) { /* * As a fallback, try to assume this code uses a frame pointer. - * This is useful for generated code, like BPF, which ORC - * doesn't know about. This is just a guess, so the rest of - * the unwind is no longer considered reliable. + * This is just a guess, so the rest of the unwind is no longer + * considered reliable. */ orc = &orc_fp_entry; state->error = true; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index d563a948318b..88a5426674a1 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -510,10 +510,17 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, int r; /* + * Apply pending runtime CPUID updates to the current CPUID entries to + * avoid false positives due to mismatches on KVM-owned feature flags. + */ + if (vcpu->arch.cpuid_dynamic_bits_dirty) + kvm_update_cpuid_runtime(vcpu); + + /* * Swap the existing (old) entries with the incoming (new) entries in * order to massage the new entries, e.g. to account for dynamic bits - * that KVM controls, without clobbering the current guest CPUID, which - * KVM needs to preserve in order to unwind on failure. + * that KVM controls, without losing the current guest CPUID, which KVM + * needs to preserve in order to unwind on failure. * * Similarly, save the vCPU's current cpu_caps so that the capabilities * can be updated alongside the CPUID entries when performing runtime diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index c81005b24522..ba0f11c68372 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -985,7 +985,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) if (!nested_vmcb_check_save(vcpu) || !nested_vmcb_check_controls(vcpu)) { vmcb12->control.exit_code = SVM_EXIT_ERR; - vmcb12->control.exit_code_hi = 0; + vmcb12->control.exit_code_hi = -1u; vmcb12->control.exit_info_1 = 0; vmcb12->control.exit_info_2 = 0; goto out; @@ -1018,7 +1018,7 @@ out_exit_err: svm->soft_int_injected = false; svm->vmcb->control.exit_code = SVM_EXIT_ERR; - svm->vmcb->control.exit_code_hi = 0; + svm->vmcb->control.exit_code_hi = -1u; svm->vmcb->control.exit_info_1 = 0; svm->vmcb->control.exit_info_2 = 0; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f56c2d895011..24d59ccfa40d 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2443,6 +2443,7 @@ static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu, if (cr0 ^ val) { svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; + svm->vmcb->control.exit_code_hi = 0; ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); } @@ -4617,6 +4618,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, if (static_cpu_has(X86_FEATURE_NRIPS)) vmcb->control.next_rip = info->next_rip; vmcb->control.exit_code = icpt_info.exit_code; + vmcb->control.exit_code_hi = 0; vmexit = nested_svm_exit_handled(svm); ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 9e151dbdef25..01be93a53d07 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -761,9 +761,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm); static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code) { - svm->vmcb->control.exit_code = exit_code; - svm->vmcb->control.exit_info_1 = 0; - svm->vmcb->control.exit_info_2 = 0; + svm->vmcb->control.exit_code = exit_code; + svm->vmcb->control.exit_code_hi = 0; + svm->vmcb->control.exit_info_1 = 0; + svm->vmcb->control.exit_info_2 = 0; return nested_svm_vmexit(svm); } diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 40777278eabb..6137e5307d0f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -19,6 +19,7 @@ #include "trace.h" #include "vmx.h" #include "smm.h" +#include "x86_ops.h" static bool __read_mostly enable_shadow_vmcs = 1; module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); @@ -5165,7 +5166,7 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, if (vmx->nested.update_vmcs01_apicv_status) { vmx->nested.update_vmcs01_apicv_status = false; - kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); + vmx_refresh_apicv_exec_ctrl(vcpu); } if (vmx->nested.update_vmcs01_hwapic_isr) { diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4cbe8c84b636..6b96f7aea20b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6937,15 +6937,6 @@ void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) * VM-Exit, otherwise L1 with run with a stale SVI. */ if (is_guest_mode(vcpu)) { - /* - * KVM is supposed to forward intercepted L2 EOIs to L1 if VID - * is enabled in vmcs12; as above, the EOIs affect L2's vAPIC. - * Note, userspace can stuff state while L2 is active; assert - * that VID is disabled if and only if the vCPU is in KVM_RUN - * to avoid false positives if userspace is setting APIC state. - */ - WARN_ON_ONCE(vcpu->wants_to_run && - nested_cpu_has_vid(get_vmcs12(vcpu))); to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true; return; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c6d899d53dd..ff8812f3a129 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10886,9 +10886,16 @@ void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) * pending. At the same time, KVM_REQ_EVENT may not be set as APICv was * still active when the interrupt got accepted. Make sure * kvm_check_and_inject_events() is called to check for that. + * + * Update SVI when APICv gets enabled, otherwise SVI won't reflect the + * highest bit in vISR and the next accelerated EOI in the guest won't + * be virtualized correctly (the CPU uses SVI to determine which vISR + * vector to clear). */ if (!apic->apicv_active) kvm_make_request(KVM_REQ_EVENT, vcpu); + else + kvm_apic_update_hwapic_isr(vcpu); out: preempt_enable(); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b69dc7194e2c..b0bac2a66eff 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1678,6 +1678,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image emit_prologue(&prog, image, stack_depth, bpf_prog_was_classic(bpf_prog), tail_call_reachable, bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb); + + bpf_prog->aux->ksym.fp_start = prog - temp; + /* Exception callback will clobber callee regs for its own use, and * restore the original callee regs from main prog's stack frame. */ @@ -2736,6 +2739,8 @@ emit_jmp: pop_r12(&prog); } EMIT1(0xC9); /* leave */ + bpf_prog->aux->ksym.fp_end = prog - temp; + emit_return(&prog, image + addrs[i - 1] + (prog - temp)); break; @@ -3325,6 +3330,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im } EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ + if (im) + im->ksym.fp_start = prog - (u8 *)rw_image; + if (!is_imm8(stack_size)) { /* sub rsp, stack_size */ EMIT3_off32(0x48, 0x81, 0xEC, stack_size); @@ -3462,7 +3470,11 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off); + EMIT1(0xC9); /* leave */ + if (im) + im->ksym.fp_end = prog - (u8 *)rw_image; + if (flags & BPF_TRAMP_F_SKIP_FRAME) { /* skip our return address and return to parent */ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 4806cc28d7ca..b74ff8bc7f2a 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -108,7 +108,7 @@ static int xen_cpu_dead_pv(unsigned int cpu); * calls. */ DEFINE_PER_CPU(bool, xen_in_preemptible_hcall); -EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall); +EXPORT_PER_CPU_SYMBOL_GPL(xen_in_preemptible_hcall); /* * In case of scheduling the flag must be cleared and restored after |
