summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arc/configs/axs101_defconfig2
-rw-r--r--arch/arc/configs/axs103_defconfig2
-rw-r--r--arch/arc/configs/axs103_smp_defconfig2
-rw-r--r--arch/arc/configs/hsdk_defconfig2
-rw-r--r--arch/arc/configs/vdk_hs38_defconfig2
-rw-r--r--arch/arc/configs/vdk_hs38_smp_defconfig2
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/configs/axm55xx_defconfig3
-rw-r--r--arch/arm/configs/bcm2835_defconfig4
-rw-r--r--arch/arm/configs/davinci_all_defconfig2
-rw-r--r--arch/arm/configs/dove_defconfig4
-rw-r--r--arch/arm/configs/ep93xx_defconfig4
-rw-r--r--arch/arm/configs/imx_v6_v7_defconfig6
-rw-r--r--arch/arm/configs/ixp4xx_defconfig4
-rw-r--r--arch/arm/configs/mmp2_defconfig2
-rw-r--r--arch/arm/configs/moxart_defconfig2
-rw-r--r--arch/arm/configs/multi_v5_defconfig2
-rw-r--r--arch/arm/configs/mv78xx0_defconfig4
-rw-r--r--arch/arm/configs/mvebu_v5_defconfig2
-rw-r--r--arch/arm/configs/nhk8815_defconfig2
-rw-r--r--arch/arm/configs/omap1_defconfig2
-rw-r--r--arch/arm/configs/omap2plus_defconfig2
-rw-r--r--arch/arm/configs/orion5x_defconfig4
-rw-r--r--arch/arm/configs/pxa_defconfig6
-rw-r--r--arch/arm/configs/qcom_defconfig2
-rw-r--r--arch/arm/configs/rpc_defconfig2
-rw-r--r--arch/arm/configs/s3c6400_defconfig6
-rw-r--r--arch/arm/configs/sama7_defconfig2
-rw-r--r--arch/arm/configs/socfpga_defconfig2
-rw-r--r--arch/arm/configs/spear13xx_defconfig4
-rw-r--r--arch/arm/configs/spear3xx_defconfig4
-rw-r--r--arch/arm/configs/spear6xx_defconfig4
-rw-r--r--arch/arm/configs/spitz_defconfig4
-rw-r--r--arch/arm/configs/stm32_defconfig2
-rw-r--r--arch/arm/configs/tegra_defconfig6
-rw-r--r--arch/arm/configs/u8500_defconfig2
-rw-r--r--arch/arm/configs/vexpress_defconfig2
-rw-r--r--arch/arm/kernel/bios32.c5
-rw-r--r--arch/arm/kernel/entry-ftrace.S18
-rw-r--r--arch/arm/mm/cache-l2x0.c7
-rw-r--r--arch/arm/mm/fault.c3
-rw-r--r--arch/arm64/include/asm/ftrace.h1
-rw-r--r--arch/arm64/include/asm/mmu.h7
-rw-r--r--arch/arm64/kernel/cpufeature.c108
-rw-r--r--arch/arm64/kernel/mte.c2
-rw-r--r--arch/arm64/kernel/probes/kprobes.c12
-rw-r--r--arch/arm64/kvm/Kconfig2
-rw-r--r--arch/arm64/kvm/arm.c3
-rw-r--r--arch/arm64/mm/mmu.c98
-rw-r--r--arch/hexagon/configs/comet_defconfig7
-rw-r--r--arch/loongarch/Kconfig11
-rw-r--r--arch/loongarch/Makefile4
-rw-r--r--arch/loongarch/configs/loongson3_defconfig73
-rw-r--r--arch/loongarch/include/asm/image.h52
-rw-r--r--arch/loongarch/include/asm/inst.h5
-rw-r--r--arch/loongarch/include/asm/kexec.h12
-rw-r--r--arch/loongarch/kernel/Makefile1
-rw-r--r--arch/loongarch/kernel/cpu-probe.c46
-rw-r--r--arch/loongarch/kernel/inst.c12
-rw-r--r--arch/loongarch/kernel/kexec_efi.c113
-rw-r--r--arch/loongarch/kernel/kexec_elf.c105
-rw-r--r--arch/loongarch/kernel/machine_kexec.c37
-rw-r--r--arch/loongarch/kernel/machine_kexec_file.c239
-rw-r--r--arch/loongarch/kernel/relocate.c4
-rw-r--r--arch/loongarch/kernel/setup.c1
-rw-r--r--arch/loongarch/kvm/Kconfig2
-rw-r--r--arch/loongarch/kvm/vcpu.c3
-rw-r--r--arch/loongarch/mm/fault.c58
-rw-r--r--arch/loongarch/net/bpf_jit.c86
-rw-r--r--arch/m68k/configs/stmark2_defconfig6
-rw-r--r--arch/m68k/kernel/pcibios.c39
-rw-r--r--arch/microblaze/configs/mmu_defconfig2
-rw-r--r--arch/mips/configs/bigsur_defconfig6
-rw-r--r--arch/mips/configs/cobalt_defconfig6
-rw-r--r--arch/mips/configs/decstation_64_defconfig6
-rw-r--r--arch/mips/configs/decstation_defconfig6
-rw-r--r--arch/mips/configs/decstation_r4k_defconfig6
-rw-r--r--arch/mips/configs/fuloong2e_defconfig2
-rw-r--r--arch/mips/configs/ip22_defconfig6
-rw-r--r--arch/mips/configs/ip27_defconfig6
-rw-r--r--arch/mips/configs/ip28_defconfig6
-rw-r--r--arch/mips/configs/ip30_defconfig6
-rw-r--r--arch/mips/configs/ip32_defconfig6
-rw-r--r--arch/mips/configs/jazz_defconfig2
-rw-r--r--arch/mips/configs/lemote2f_defconfig6
-rw-r--r--arch/mips/configs/loongson2k_defconfig6
-rw-r--r--arch/mips/configs/loongson3_defconfig6
-rw-r--r--arch/mips/configs/malta_defconfig2
-rw-r--r--arch/mips/configs/malta_kvm_defconfig2
-rw-r--r--arch/mips/configs/malta_qemu_32r6_defconfig2
-rw-r--r--arch/mips/configs/maltaaprp_defconfig2
-rw-r--r--arch/mips/configs/maltasmvp_defconfig6
-rw-r--r--arch/mips/configs/maltasmvp_eva_defconfig2
-rw-r--r--arch/mips/configs/maltaup_defconfig2
-rw-r--r--arch/mips/configs/maltaup_xpa_defconfig2
-rw-r--r--arch/mips/configs/mtx1_defconfig6
-rw-r--r--arch/mips/configs/rm200_defconfig2
-rw-r--r--arch/mips/pci/pci-legacy.c38
-rw-r--r--arch/openrisc/configs/or1klitex_defconfig2
-rw-r--r--arch/openrisc/configs/virt_defconfig4
-rw-r--r--arch/parisc/Kconfig3
-rw-r--r--arch/parisc/configs/generic-32bit_defconfig4
-rw-r--r--arch/parisc/configs/generic-64bit_defconfig4
-rw-r--r--arch/parisc/include/asm/perf_event.h8
-rw-r--r--arch/parisc/include/uapi/asm/ioctls.h8
-rw-r--r--arch/parisc/include/uapi/asm/perf_regs.h63
-rw-r--r--arch/parisc/kernel/Makefile1
-rw-r--r--arch/parisc/kernel/drivers.c6
-rw-r--r--arch/parisc/kernel/firmware.c3
-rw-r--r--arch/parisc/kernel/perf_event.c27
-rw-r--r--arch/parisc/kernel/perf_regs.c61
-rw-r--r--arch/parisc/kernel/traps.c2
-rw-r--r--arch/parisc/kernel/unaligned.c2
-rw-r--r--arch/parisc/lib/memcpy.c1
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/powerpc/include/asm/kvm_types.h15
-rw-r--r--arch/powerpc/kernel/eeh_driver.c2
-rw-r--r--arch/riscv/kvm/Kconfig2
-rw-r--r--arch/riscv/kvm/vcpu.c3
-rw-r--r--arch/s390/Makefile1
-rw-r--r--arch/s390/hypfs/hypfs_sprp.c2
-rw-r--r--arch/s390/include/asm/ap.h18
-rw-r--r--arch/s390/include/asm/atomic_ops.h28
-rw-r--r--arch/s390/include/asm/barrier.h8
-rw-r--r--arch/s390/include/asm/bitops.h2
-rw-r--r--arch/s390/include/asm/checksum.h2
-rw-r--r--arch/s390/include/asm/cmpxchg.h12
-rw-r--r--arch/s390/include/asm/cpacf.h24
-rw-r--r--arch/s390/include/asm/ctlreg.h8
-rw-r--r--arch/s390/include/asm/fpu-insn.h36
-rw-r--r--arch/s390/include/asm/kvm_host.h2
-rw-r--r--arch/s390/include/asm/kvm_para.h2
-rw-r--r--arch/s390/include/asm/pci.h10
-rw-r--r--arch/s390/include/asm/percpu.h8
-rw-r--r--arch/s390/include/asm/processor.h2
-rw-r--r--arch/s390/include/asm/rwonce.h2
-rw-r--r--arch/s390/include/asm/spinlock.h2
-rw-r--r--arch/s390/include/asm/stacktrace.h4
-rw-r--r--arch/s390/include/asm/string.h2
-rw-r--r--arch/s390/include/asm/syscall.h2
-rw-r--r--arch/s390/include/asm/timex.h2
-rw-r--r--arch/s390/kernel/diag/diag310.c2
-rw-r--r--arch/s390/kernel/diag/diag324.c2
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c106
-rw-r--r--arch/s390/kernel/setup.c2
-rw-r--r--arch/s390/kernel/skey.c2
-rw-r--r--arch/s390/kernel/smp.c2
-rw-r--r--arch/s390/kernel/uv.c4
-rw-r--r--arch/s390/kernel/vmlinux.lds.S44
-rw-r--r--arch/s390/kvm/kvm-s390.c6
-rw-r--r--arch/s390/kvm/priv.c8
-rw-r--r--arch/s390/lib/spinlock.c6
-rw-r--r--arch/s390/lib/string.c8
-rw-r--r--arch/s390/lib/test_unwind.c4
-rw-r--r--arch/s390/lib/xor.c8
-rw-r--r--arch/s390/mm/maccess.c2
-rw-r--r--arch/s390/mm/pgalloc.c2
-rw-r--r--arch/s390/pci/pci.c4
-rw-r--r--arch/s390/pci/pci_event.c3
-rw-r--r--arch/s390/pci/pci_insn.c4
-rw-r--r--arch/s390/pci/pci_sysfs.c25
-rw-r--r--arch/sh/configs/ap325rxa_defconfig7
-rw-r--r--arch/sh/configs/apsh4a3a_defconfig3
-rw-r--r--arch/sh/configs/apsh4ad0a_defconfig3
-rw-r--r--arch/sh/configs/ecovec24_defconfig7
-rw-r--r--arch/sh/configs/edosk7760_defconfig3
-rw-r--r--arch/sh/configs/espt_defconfig3
-rw-r--r--arch/sh/configs/landisk_defconfig3
-rw-r--r--arch/sh/configs/lboxre2_defconfig3
-rw-r--r--arch/sh/configs/magicpanelr2_defconfig5
-rw-r--r--arch/sh/configs/r7780mp_defconfig3
-rw-r--r--arch/sh/configs/r7785rp_defconfig3
-rw-r--r--arch/sh/configs/rsk7264_defconfig3
-rw-r--r--arch/sh/configs/rsk7269_defconfig3
-rw-r--r--arch/sh/configs/sdk7780_defconfig5
-rw-r--r--arch/sh/configs/sdk7786_defconfig3
-rw-r--r--arch/sh/configs/se7343_defconfig3
-rw-r--r--arch/sh/configs/se7712_defconfig3
-rw-r--r--arch/sh/configs/se7721_defconfig3
-rw-r--r--arch/sh/configs/se7722_defconfig3
-rw-r--r--arch/sh/configs/se7724_defconfig7
-rw-r--r--arch/sh/configs/sh03_defconfig5
-rw-r--r--arch/sh/configs/sh2007_defconfig2
-rw-r--r--arch/sh/configs/sh7757lcr_defconfig2
-rw-r--r--arch/sh/configs/sh7763rdp_defconfig3
-rw-r--r--arch/sh/configs/sh7785lcr_32bit_defconfig3
-rw-r--r--arch/sh/configs/sh7785lcr_defconfig3
-rw-r--r--arch/sh/configs/shx3_defconfig3
-rw-r--r--arch/sh/configs/titan_defconfig5
-rw-r--r--arch/sh/configs/ul2_defconfig3
-rw-r--r--arch/sh/configs/urquell_defconfig3
-rw-r--r--arch/sparc/configs/sparc64_defconfig7
-rw-r--r--arch/sparc/kernel/leon_pci.c27
-rw-r--r--arch/sparc/kernel/pci.c27
-rw-r--r--arch/sparc/kernel/pcic.c27
-rw-r--r--arch/um/Kconfig1
-rw-r--r--arch/um/drivers/ssl.c5
-rw-r--r--arch/um/drivers/ubd_kern.c2
-rw-r--r--arch/um/drivers/vector_kern.c2
-rw-r--r--arch/um/drivers/virtio_pcidev.c6
-rw-r--r--arch/um/include/asm/mmu_context.h11
-rw-r--r--arch/um/include/asm/processor-generic.h3
-rw-r--r--arch/um/include/shared/as-layout.h5
-rw-r--r--arch/um/include/shared/skas/stub-data.h3
-rw-r--r--arch/um/kernel/dtb.c2
-rw-r--r--arch/um/kernel/irq.c5
-rw-r--r--arch/um/kernel/time.c37
-rw-r--r--arch/um/kernel/um_arch.c7
-rw-r--r--arch/um/os-Linux/skas/process.c2
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/Kconfig.assembler20
-rw-r--r--arch/x86/boot/bitops.h2
-rw-r--r--arch/x86/boot/boot.h8
-rw-r--r--arch/x86/boot/string.c4
-rw-r--r--arch/x86/crypto/Kconfig2
-rw-r--r--arch/x86/crypto/Makefile6
-rw-r--r--arch/x86/crypto/aes-ctr-avx-x86_64.S2
-rw-r--r--arch/x86/crypto/aes-xts-avx-x86_64.S2
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c22
-rw-r--r--arch/x86/crypto/aria-aesni-avx-asm_64.S10
-rw-r--r--arch/x86/crypto/aria-aesni-avx2-asm_64.S10
-rw-r--r--arch/x86/crypto/aria_aesni_avx2_glue.c4
-rw-r--r--arch/x86/crypto/aria_aesni_avx_glue.c4
-rw-r--r--arch/x86/entry/calling.h11
-rw-r--r--arch/x86/entry/entry_64_fred.S36
-rw-r--r--arch/x86/hyperv/hv_init.c69
-rw-r--r--arch/x86/hyperv/irqdomain.c111
-rw-r--r--arch/x86/hyperv/ivm.c226
-rw-r--r--arch/x86/include/asm/archrandom.h6
-rw-r--r--arch/x86/include/asm/asm.h12
-rw-r--r--arch/x86/include/asm/bitops.h18
-rw-r--r--arch/x86/include/asm/bug.h9
-rw-r--r--arch/x86/include/asm/cfi.h14
-rw-r--r--arch/x86/include/asm/cmpxchg.h12
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h6
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h3
-rw-r--r--arch/x86/include/asm/cpufeatures.h2
-rw-r--r--arch/x86/include/asm/ibt.h10
-rw-r--r--arch/x86/include/asm/idtentry.h9
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h81
-rw-r--r--arch/x86/include/asm/kvm_types.h10
-rw-r--r--arch/x86/include/asm/mshyperv.h137
-rw-r--r--arch/x86/include/asm/msr-index.h4
-rw-r--r--arch/x86/include/asm/mtrr.h15
-rw-r--r--arch/x86/include/asm/mwait.h8
-rw-r--r--arch/x86/include/asm/percpu.h12
-rw-r--r--arch/x86/include/asm/rmwcc.h26
-rw-r--r--arch/x86/include/asm/sev.h3
-rw-r--r--arch/x86/include/asm/signal.h3
-rw-r--r--arch/x86/include/asm/special_insns.h10
-rw-r--r--arch/x86/include/asm/svm.h1
-rw-r--r--arch/x86/include/asm/text-patching.h20
-rw-r--r--arch/x86/include/asm/uaccess.h7
-rw-r--r--arch/x86/include/asm/vmx.h9
-rw-r--r--arch/x86/include/uapi/asm/kvm.h34
-rw-r--r--arch/x86/include/uapi/asm/vmx.h6
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/alternative.c292
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/cfi.c2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c30
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c15
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.c15
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/cpu/sgx/encls.h6
-rw-r--r--arch/x86/kernel/irqinit.c6
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/traps.c8
-rw-r--r--arch/x86/kvm/Kconfig3
-rw-r--r--arch/x86/kvm/cpuid.c58
-rw-r--r--arch/x86/kvm/emulate.c713
-rw-r--r--arch/x86/kvm/hyperv.c16
-rw-r--r--arch/x86/kvm/ioapic.c15
-rw-r--r--arch/x86/kvm/irq.c91
-rw-r--r--arch/x86/kvm/irq.h4
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h3
-rw-r--r--arch/x86/kvm/kvm_emulate.h3
-rw-r--r--arch/x86/kvm/kvm_onhyperv.c6
-rw-r--r--arch/x86/kvm/lapic.c240
-rw-r--r--arch/x86/kvm/lapic.h19
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/mmu/mmu.c197
-rw-r--r--arch/x86/kvm/mmu/mmu_internal.h6
-rw-r--r--arch/x86/kvm/mmu/mmutrace.h3
-rw-r--r--arch/x86/kvm/mmu/spte.c10
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c51
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.h3
-rw-r--r--arch/x86/kvm/pmu.c173
-rw-r--r--arch/x86/kvm/pmu.h60
-rw-r--r--arch/x86/kvm/reverse_cpuid.h5
-rw-r--r--arch/x86/kvm/smm.c14
-rw-r--r--arch/x86/kvm/smm.h2
-rw-r--r--arch/x86/kvm/svm/avic.c151
-rw-r--r--arch/x86/kvm/svm/nested.c38
-rw-r--r--arch/x86/kvm/svm/pmu.c8
-rw-r--r--arch/x86/kvm/svm/sev.c227
-rw-r--r--arch/x86/kvm/svm/svm.c236
-rw-r--r--arch/x86/kvm/svm/svm.h44
-rw-r--r--arch/x86/kvm/svm/svm_onhyperv.c28
-rw-r--r--arch/x86/kvm/svm/svm_onhyperv.h31
-rw-r--r--arch/x86/kvm/trace.h5
-rw-r--r--arch/x86/kvm/vmx/capabilities.h12
-rw-r--r--arch/x86/kvm/vmx/main.c14
-rw-r--r--arch/x86/kvm/vmx/nested.c215
-rw-r--r--arch/x86/kvm/vmx/nested.h5
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c81
-rw-r--r--arch/x86/kvm/vmx/tdx.c28
-rw-r--r--arch/x86/kvm/vmx/vmcs12.c6
-rw-r--r--arch/x86/kvm/vmx/vmcs12.h14
-rw-r--r--arch/x86/kvm/vmx/vmenter.S4
-rw-r--r--arch/x86/kvm/vmx/vmx.c238
-rw-r--r--arch/x86/kvm/vmx/vmx.h22
-rw-r--r--arch/x86/kvm/vmx/x86_ops.h2
-rw-r--r--arch/x86/kvm/x86.c949
-rw-r--r--arch/x86/kvm/x86.h42
-rw-r--r--arch/x86/lib/bhi.S58
-rw-r--r--arch/x86/lib/retpoline.S4
-rw-r--r--arch/x86/net/bpf_jit_comp.c6
-rw-r--r--arch/x86/pci/fixup.c40
-rw-r--r--arch/x86/platform/efi/efi_stub_64.S4
-rw-r--r--arch/x86/um/shared/sysdep/stub_32.h2
-rw-r--r--arch/x86/um/shared/sysdep/stub_64.h2
-rw-r--r--arch/xtensa/configs/audio_kc705_defconfig2
-rw-r--r--arch/xtensa/configs/cadence_csp_defconfig2
-rw-r--r--arch/xtensa/configs/generic_kc705_defconfig2
-rw-r--r--arch/xtensa/configs/nommu_kc705_defconfig2
-rw-r--r--arch/xtensa/configs/smp_lx200_defconfig2
-rw-r--r--arch/xtensa/configs/virt_defconfig2
-rw-r--r--arch/xtensa/configs/xip_kc705_defconfig2
-rw-r--r--arch/xtensa/kernel/platform.c5
-rw-r--r--arch/xtensa/platforms/iss/simdisk.c6
332 files changed, 5193 insertions, 2843 deletions
diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
index a7cd526dd7ca..f930396d9dae 100644
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@@ -88,7 +88,7 @@ CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_DW=y
# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
index afa6a348f444..6b779dee5ea0 100644
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig
@@ -86,7 +86,7 @@ CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_DW=y
# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
index 2bfa6371953c..a89b50d5369d 100644
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig
@@ -88,7 +88,7 @@ CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_DW=y
# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig
index 1558e8e87767..1b8b2a098cda 100644
--- a/arch/arc/configs/hsdk_defconfig
+++ b/arch/arc/configs/hsdk_defconfig
@@ -77,7 +77,7 @@ CONFIG_DMADEVICES=y
CONFIG_DW_AXI_DMAC=y
CONFIG_IIO=y
CONFIG_TI_ADC108S102=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
CONFIG_NFS_FS=y
diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig
index 03d9ac20baa9..b7120523e09a 100644
--- a/arch/arc/configs/vdk_hs38_defconfig
+++ b/arch/arc/configs/vdk_hs38_defconfig
@@ -74,7 +74,7 @@ CONFIG_USB_OHCI_HCD_PLATFORM=y
CONFIG_USB_STORAGE=y
CONFIG_USB_SERIAL=y
# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig
index c09488992f13..4077abd5980c 100644
--- a/arch/arc/configs/vdk_hs38_smp_defconfig
+++ b/arch/arc/configs/vdk_hs38_smp_defconfig
@@ -81,7 +81,7 @@ CONFIG_MMC_DW=y
CONFIG_UIO=y
CONFIG_UIO_PDRV_GENIRQ=y
# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2a124c92e4f6..2e3f93b690f4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -108,6 +108,7 @@ config ARM
select HAVE_GUP_FAST if ARM_LPAE
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FUNCTION_GRAPH_FREGS
select HAVE_FUNCTION_TRACER if !XIP_KERNEL
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
diff --git a/arch/arm/configs/axm55xx_defconfig b/arch/arm/configs/axm55xx_defconfig
index 516689dc6cf1..242a61208a0f 100644
--- a/arch/arm/configs/axm55xx_defconfig
+++ b/arch/arm/configs/axm55xx_defconfig
@@ -194,8 +194,7 @@ CONFIG_MAILBOX=y
CONFIG_PL320_MBOX=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_EXT4_FS=y
CONFIG_AUTOFS_FS=y
CONFIG_FUSE_FS=y
diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig
index 27dc3bf6b124..4a8ac09843d7 100644
--- a/arch/arm/configs/bcm2835_defconfig
+++ b/arch/arm/configs/bcm2835_defconfig
@@ -154,8 +154,8 @@ CONFIG_PWM_BCM2835=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_FANOTIFY=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig
index e2ddaca0f89d..673408a10888 100644
--- a/arch/arm/configs/davinci_all_defconfig
+++ b/arch/arm/configs/davinci_all_defconfig
@@ -228,7 +228,7 @@ CONFIG_PWM=y
CONFIG_PWM_TIECAP=m
CONFIG_PWM_TIEHRPWM=m
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_XFS_FS=m
CONFIG_AUTOFS_FS=m
diff --git a/arch/arm/configs/dove_defconfig b/arch/arm/configs/dove_defconfig
index d76eb12d29a7..bb6c4748bfc8 100644
--- a/arch/arm/configs/dove_defconfig
+++ b/arch/arm/configs/dove_defconfig
@@ -95,8 +95,8 @@ CONFIG_RTC_DRV_MV=y
CONFIG_DMADEVICES=y
CONFIG_MV_XOR=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_FS_XATTR is not set
+CONFIG_EXT4_FS=y
+# CONFIG_EXT4_FS_XATTR is not set
CONFIG_EXT4_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
diff --git a/arch/arm/configs/ep93xx_defconfig b/arch/arm/configs/ep93xx_defconfig
index 2248afaf35b5..7f3756d8b086 100644
--- a/arch/arm/configs/ep93xx_defconfig
+++ b/arch/arm/configs/ep93xx_defconfig
@@ -103,8 +103,8 @@ CONFIG_RTC_DRV_EP93XX=y
CONFIG_DMADEVICES=y
CONFIG_EP93XX_DMA=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_FS_XATTR is not set
+CONFIG_EXT4_FS=y
+# CONFIG_EXT4_FS_XATTR is not set
CONFIG_EXT4_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index 9a57763a8d38..0d55056c6f82 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -436,9 +436,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_AUTOFS_FS=y
diff --git a/arch/arm/configs/ixp4xx_defconfig b/arch/arm/configs/ixp4xx_defconfig
index 3cb995b9616a..81199dddcde7 100644
--- a/arch/arm/configs/ixp4xx_defconfig
+++ b/arch/arm/configs/ixp4xx_defconfig
@@ -158,8 +158,8 @@ CONFIG_IXP4XX_NPE=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_OVERLAY_FS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
diff --git a/arch/arm/configs/mmp2_defconfig b/arch/arm/configs/mmp2_defconfig
index 842a989baa27..f67e9cda73e2 100644
--- a/arch/arm/configs/mmp2_defconfig
+++ b/arch/arm/configs/mmp2_defconfig
@@ -53,7 +53,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_MAX8925=y
# CONFIG_RESET_CONTROLLER is not set
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_EXT4_FS=y
# CONFIG_DNOTIFY is not set
CONFIG_MSDOS_FS=y
diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig
index fa06d98e43fc..e2d9f3610063 100644
--- a/arch/arm/configs/moxart_defconfig
+++ b/arch/arm/configs/moxart_defconfig
@@ -113,7 +113,7 @@ CONFIG_RTC_DRV_MOXART=y
CONFIG_DMADEVICES=y
CONFIG_MOXART_DMA=y
# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_TMPFS=y
CONFIG_CONFIGFS_FS=y
CONFIG_JFFS2_FS=y
diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig
index b523bc246c09..59b020e66a0b 100644
--- a/arch/arm/configs/multi_v5_defconfig
+++ b/arch/arm/configs/multi_v5_defconfig
@@ -268,7 +268,7 @@ CONFIG_PWM_ATMEL=m
CONFIG_PWM_ATMEL_HLCDC_PWM=m
CONFIG_PWM_ATMEL_TCB=m
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_UDF_FS=m
diff --git a/arch/arm/configs/mv78xx0_defconfig b/arch/arm/configs/mv78xx0_defconfig
index 3343f72de7ea..55f4ab67a306 100644
--- a/arch/arm/configs/mv78xx0_defconfig
+++ b/arch/arm/configs/mv78xx0_defconfig
@@ -91,8 +91,8 @@ CONFIG_RTC_DRV_DS1307=y
CONFIG_RTC_DRV_RS5C372=y
CONFIG_RTC_DRV_M41T80=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_FS_XATTR is not set
+CONFIG_EXT4_FS=y
+# CONFIG_EXT4_FS_XATTR is not set
CONFIG_EXT4_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
diff --git a/arch/arm/configs/mvebu_v5_defconfig b/arch/arm/configs/mvebu_v5_defconfig
index 23dbb80fcc2e..d1742a7cae6a 100644
--- a/arch/arm/configs/mvebu_v5_defconfig
+++ b/arch/arm/configs/mvebu_v5_defconfig
@@ -168,7 +168,7 @@ CONFIG_MV_XOR=y
CONFIG_STAGING=y
CONFIG_FB_XGI=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_UDF_FS=m
diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig
index ea28ed8991b4..696b4fbc2412 100644
--- a/arch/arm/configs/nhk8815_defconfig
+++ b/arch/arm/configs/nhk8815_defconfig
@@ -116,7 +116,7 @@ CONFIG_IIO_ST_ACCEL_3AXIS=y
CONFIG_PWM=y
CONFIG_PWM_STMPE=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_FUSE_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig
index 661e5d6894bd..24c54bf1e243 100644
--- a/arch/arm/configs/omap1_defconfig
+++ b/arch/arm/configs/omap1_defconfig
@@ -184,7 +184,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_OMAP=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
# CONFIG_DNOTIFY is not set
CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 1d5f75241739..4e53c331cd84 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -679,7 +679,7 @@ CONFIG_TWL4030_USB=m
CONFIG_COUNTER=m
CONFIG_TI_EQEP=m
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_SECURITY=y
CONFIG_FANOTIFY=y
CONFIG_QUOTA=y
diff --git a/arch/arm/configs/orion5x_defconfig b/arch/arm/configs/orion5x_defconfig
index 62b9c6102789..c28426250ec3 100644
--- a/arch/arm/configs/orion5x_defconfig
+++ b/arch/arm/configs/orion5x_defconfig
@@ -115,8 +115,8 @@ CONFIG_RTC_DRV_M48T86=y
CONFIG_DMADEVICES=y
CONFIG_MV_XOR=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_FS_XATTR is not set
+CONFIG_EXT4_FS=y
+# CONFIG_EXT4_FS_XATTR is not set
CONFIG_EXT4_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig
index 70489f3555d0..3ea189f1f42f 100644
--- a/arch/arm/configs/pxa_defconfig
+++ b/arch/arm/configs/pxa_defconfig
@@ -579,9 +579,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_XFS_FS=m
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
diff --git a/arch/arm/configs/qcom_defconfig b/arch/arm/configs/qcom_defconfig
index fa681a7a49c2..29a1dea500f0 100644
--- a/arch/arm/configs/qcom_defconfig
+++ b/arch/arm/configs/qcom_defconfig
@@ -291,7 +291,7 @@ CONFIG_INTERCONNECT_QCOM_MSM8974=m
CONFIG_INTERCONNECT_QCOM_SDX55=m
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_FUSE_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
diff --git a/arch/arm/configs/rpc_defconfig b/arch/arm/configs/rpc_defconfig
index 24f1fa868230..46df453e224e 100644
--- a/arch/arm/configs/rpc_defconfig
+++ b/arch/arm/configs/rpc_defconfig
@@ -77,7 +77,7 @@ CONFIG_SOUND_VIDC=m
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_PCF8583=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_AUTOFS_FS=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
diff --git a/arch/arm/configs/s3c6400_defconfig b/arch/arm/configs/s3c6400_defconfig
index 967b1cb22136..7bf28a83946a 100644
--- a/arch/arm/configs/s3c6400_defconfig
+++ b/arch/arm/configs/s3c6400_defconfig
@@ -52,9 +52,9 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_S3C=y
CONFIG_PWM=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_CRAMFS=y
diff --git a/arch/arm/configs/sama7_defconfig b/arch/arm/configs/sama7_defconfig
index e14720a9a5ac..e2ad9a05566f 100644
--- a/arch/arm/configs/sama7_defconfig
+++ b/arch/arm/configs/sama7_defconfig
@@ -201,7 +201,7 @@ CONFIG_MCHP_EIC=y
CONFIG_RESET_CONTROLLER=y
CONFIG_NVMEM_MICROCHIP_OTPC=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_FANOTIFY=y
CONFIG_AUTOFS_FS=m
CONFIG_VFAT_FS=y
diff --git a/arch/arm/configs/socfpga_defconfig b/arch/arm/configs/socfpga_defconfig
index 294906c8f16e..f2e42846b116 100644
--- a/arch/arm/configs/socfpga_defconfig
+++ b/arch/arm/configs/socfpga_defconfig
@@ -136,7 +136,7 @@ CONFIG_FPGA_REGION=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_AUTOFS_FS=y
CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
diff --git a/arch/arm/configs/spear13xx_defconfig b/arch/arm/configs/spear13xx_defconfig
index a8f992fdb30d..8b19af1ea67c 100644
--- a/arch/arm/configs/spear13xx_defconfig
+++ b/arch/arm/configs/spear13xx_defconfig
@@ -84,8 +84,8 @@ CONFIG_DMATEST=m
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=y
CONFIG_MSDOS_FS=m
diff --git a/arch/arm/configs/spear3xx_defconfig b/arch/arm/configs/spear3xx_defconfig
index 8dc5a388759c..b4e4b96a98af 100644
--- a/arch/arm/configs/spear3xx_defconfig
+++ b/arch/arm/configs/spear3xx_defconfig
@@ -67,8 +67,8 @@ CONFIG_DMATEST=m
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_AUTOFS_FS=m
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
diff --git a/arch/arm/configs/spear6xx_defconfig b/arch/arm/configs/spear6xx_defconfig
index 4e9e1a6ff381..7083b1bd8573 100644
--- a/arch/arm/configs/spear6xx_defconfig
+++ b/arch/arm/configs/spear6xx_defconfig
@@ -53,8 +53,8 @@ CONFIG_DMATEST=m
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_AUTOFS_FS=m
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig
index ac2a0f998c73..395df2f9dc8e 100644
--- a/arch/arm/configs/spitz_defconfig
+++ b/arch/arm/configs/spitz_defconfig
@@ -193,8 +193,8 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_FS_XATTR is not set
+CONFIG_EXT4_FS=y
+# CONFIG_EXT4_FS_XATTR is not set
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig
index dcd9c316072e..82190b155b14 100644
--- a/arch/arm/configs/stm32_defconfig
+++ b/arch/arm/configs/stm32_defconfig
@@ -69,7 +69,7 @@ CONFIG_STM32_MDMA=y
CONFIG_IIO=y
CONFIG_STM32_ADC_CORE=y
CONFIG_STM32_ADC=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
# CONFIG_FILE_LOCKING is not set
# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY_USER is not set
diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig
index ba863b445417..ab477ca13f89 100644
--- a/arch/arm/configs/tegra_defconfig
+++ b/arch/arm/configs/tegra_defconfig
@@ -319,9 +319,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
# CONFIG_DNOTIFY is not set
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig
index 9c8dc6dd5fe3..e88533b78327 100644
--- a/arch/arm/configs/u8500_defconfig
+++ b/arch/arm/configs/u8500_defconfig
@@ -175,7 +175,7 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
diff --git a/arch/arm/configs/vexpress_defconfig b/arch/arm/configs/vexpress_defconfig
index cdb6065e04fd..b9454f6954f8 100644
--- a/arch/arm/configs/vexpress_defconfig
+++ b/arch/arm/configs/vexpress_defconfig
@@ -120,7 +120,7 @@ CONFIG_VIRTIO_BALLOON=y
CONFIG_VIRTIO_MMIO=y
CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index d334c7fb672b..b5793e8fbdc1 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/slab.h>
+#include <linux/string_choices.h>
#include <linux/init.h>
#include <linux/io.h>
@@ -337,8 +338,8 @@ void pcibios_fixup_bus(struct pci_bus *bus)
/*
* Report what we did for this bus
*/
- pr_info("PCI: bus%d: Fast back to back transfers %sabled\n",
- bus->number, (features & PCI_COMMAND_FAST_BACK) ? "en" : "dis");
+ pr_info("PCI: bus%d: Fast back to back transfers %s\n",
+ bus->number, str_enabled_disabled(features & PCI_COMMAND_FAST_BACK));
}
EXPORT_SYMBOL(pcibios_fixup_bus);
diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S
index bc598e3d8dd2..e24ee559af81 100644
--- a/arch/arm/kernel/entry-ftrace.S
+++ b/arch/arm/kernel/entry-ftrace.S
@@ -257,11 +257,21 @@ ENDPROC(ftrace_graph_regs_caller)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(return_to_handler)
- stmdb sp!, {r0-r3}
- add r0, sp, #16 @ sp at exit of instrumented routine
+ mov ip, sp @ sp at exit of instrumented routine
+ sub sp, #PT_REGS_SIZE
+ str r0, [sp, #S_R0]
+ str r1, [sp, #S_R1]
+ str r2, [sp, #S_R2]
+ str r3, [sp, #S_R3]
+ str ip, [sp, #S_FP]
+ mov r0, sp
bl ftrace_return_to_handler
- mov lr, r0 @ r0 has real ret addr
- ldmia sp!, {r0-r3}
+ mov lr, r0 @ r0 has real ret addr
+ ldr r3, [sp, #S_R3]
+ ldr r2, [sp, #S_R2]
+ ldr r1, [sp, #S_R1]
+ ldr r0, [sp, #S_R0]
+ add sp, sp, #PT_REGS_SIZE @ restore stack pointer
ret lr
ENDPROC(return_to_handler)
#endif
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 43d91bfd2360..470867160076 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -13,6 +13,7 @@
#include <linux/io.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/string_choices.h>
#include <asm/cacheflush.h>
#include <asm/cp15.h>
@@ -667,9 +668,9 @@ static void __init l2c310_enable(void __iomem *base, unsigned num_lock)
u32 power_ctrl;
power_ctrl = readl_relaxed(base + L310_POWER_CTRL);
- pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n",
- power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis",
- power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis");
+ pr_info("L2C-310 dynamic clock gating %s, standby mode %s\n",
+ str_enabled_disabled(power_ctrl & L310_DYNAMIC_CLK_GATING_EN),
+ str_enabled_disabled(power_ctrl & L310_STNDBY_MODE_EN));
}
if (aux & L310_AUX_CTRL_FULL_LINE_ZERO)
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 46169fe42c61..2bc828a1940c 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -135,8 +135,7 @@ static void die_kernel_fault(const char *msg, struct mm_struct *mm,
bust_spinlocks(1);
pr_alert("8<--- cut here ---\n");
pr_alert("Unable to handle kernel %s at virtual address %08lx when %s\n",
- msg, addr, fsr & FSR_LNX_PF ? "execute" :
- fsr & FSR_WRITE ? "write" : "read");
+ msg, addr, fsr & FSR_LNX_PF ? "execute" : str_write_read(fsr & FSR_WRITE));
show_pte(KERN_ALERT, mm, addr);
die("Oops", regs, fsr);
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index bfe3ce9df197..ba7cf7fec5e9 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -153,6 +153,7 @@ ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
regs->pc = afregs->pc;
regs->regs[29] = afregs->fp;
regs->regs[30] = afregs->lr;
+ regs->pstate = PSR_MODE_EL1h;
return regs;
}
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index ff6fd0bbd7d2..78a4dbf75e60 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -79,7 +79,6 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot);
extern void mark_linear_text_alias_ro(void);
extern int split_kernel_leaf_mapping(unsigned long start, unsigned long end);
-extern void init_idmap_kpti_bbml2_flag(void);
extern void linear_map_maybe_split_to_ptes(void);
/*
@@ -107,5 +106,11 @@ static inline bool kaslr_requires_kpti(void)
return true;
}
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+void kpti_install_ng_mappings(void);
+#else
+static inline void kpti_install_ng_mappings(void) {}
+#endif
+
#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 3917ad897801..5ed401ff79e3 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1941,104 +1941,6 @@ static bool has_pmuv3(const struct arm64_cpu_capabilities *entry, int scope)
}
#endif
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT))
-
-extern
-void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
- phys_addr_t size, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags);
-
-static phys_addr_t __initdata kpti_ng_temp_alloc;
-
-static phys_addr_t __init kpti_ng_pgd_alloc(enum pgtable_type type)
-{
- kpti_ng_temp_alloc -= PAGE_SIZE;
- return kpti_ng_temp_alloc;
-}
-
-static int __init __kpti_install_ng_mappings(void *__unused)
-{
- typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long);
- extern kpti_remap_fn idmap_kpti_install_ng_mappings;
- kpti_remap_fn *remap_fn;
-
- int cpu = smp_processor_id();
- int levels = CONFIG_PGTABLE_LEVELS;
- int order = order_base_2(levels);
- u64 kpti_ng_temp_pgd_pa = 0;
- pgd_t *kpti_ng_temp_pgd;
- u64 alloc = 0;
-
- if (levels == 5 && !pgtable_l5_enabled())
- levels = 4;
- else if (levels == 4 && !pgtable_l4_enabled())
- levels = 3;
-
- remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
-
- if (!cpu) {
- alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order);
- kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE);
- kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd);
-
- //
- // Create a minimal page table hierarchy that permits us to map
- // the swapper page tables temporarily as we traverse them.
- //
- // The physical pages are laid out as follows:
- //
- // +--------+-/-------+-/------ +-/------ +-\\\--------+
- // : PTE[] : | PMD[] : | PUD[] : | P4D[] : ||| PGD[] :
- // +--------+-\-------+-\------ +-\------ +-///--------+
- // ^
- // The first page is mapped into this hierarchy at a PMD_SHIFT
- // aligned virtual address, so that we can manipulate the PTE
- // level entries while the mapping is active. The first entry
- // covers the PTE[] page itself, the remaining entries are free
- // to be used as a ad-hoc fixmap.
- //
- create_kpti_ng_temp_pgd(kpti_ng_temp_pgd, __pa(alloc),
- KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL,
- kpti_ng_pgd_alloc, 0);
- }
-
- cpu_install_idmap();
- remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA);
- cpu_uninstall_idmap();
-
- if (!cpu) {
- free_pages(alloc, order);
- arm64_use_ng_mappings = true;
- }
-
- return 0;
-}
-
-static void __init kpti_install_ng_mappings(void)
-{
- /* Check whether KPTI is going to be used */
- if (!arm64_kernel_unmapped_at_el0())
- return;
-
- /*
- * We don't need to rewrite the page-tables if either we've done
- * it already or we have KASLR enabled and therefore have not
- * created any global mappings at all.
- */
- if (arm64_use_ng_mappings)
- return;
-
- init_idmap_kpti_bbml2_flag();
- stop_machine(__kpti_install_ng_mappings, NULL, cpu_online_mask);
-}
-
-#else
-static inline void kpti_install_ng_mappings(void)
-{
-}
-#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
-
static void cpu_enable_kpti(struct arm64_cpu_capabilities const *cap)
{
if (__this_cpu_read(this_cpu_vector) == vectors) {
@@ -2419,17 +2321,21 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused)
#ifdef CONFIG_ARM64_MTE
static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
{
+ static bool cleared_zero_page = false;
+
sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0);
mte_cpu_setup();
/*
* Clear the tags in the zero page. This needs to be done via the
- * linear map which has the Tagged attribute.
+ * linear map which has the Tagged attribute. Since this page is
+ * always mapped as pte_special(), set_pte_at() will not attempt to
+ * clear the tags or set PG_mte_tagged.
*/
- if (try_page_mte_tagging(ZERO_PAGE(0))) {
+ if (!cleared_zero_page) {
+ cleared_zero_page = true;
mte_clear_page_tags(lm_alias(empty_zero_page));
- set_page_mte_tagged(ZERO_PAGE(0));
}
kasan_init_hw_tags_cpu();
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 54a52dc5c1ae..43f7a2f39403 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -478,7 +478,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
if (folio_test_hugetlb(folio))
WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio));
else
- WARN_ON_ONCE(!page_mte_tagged(page));
+ WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page));
/* limit access to the end of the page */
offset = offset_in_page(addr);
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 0c5d408afd95..8ab6104a4883 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -10,6 +10,7 @@
#define pr_fmt(fmt) "kprobes: " fmt
+#include <linux/execmem.h>
#include <linux/extable.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
@@ -41,6 +42,17 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
static void __kprobes
post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *);
+void *alloc_insn_page(void)
+{
+ void *addr;
+
+ addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
+ if (!addr)
+ return NULL;
+ set_memory_rox((unsigned long)addr, 1);
+ return addr;
+}
+
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
kprobe_opcode_t *addr = p->ainsn.xol_insn;
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index bff62e75d681..4f803fd1c99a 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -25,7 +25,7 @@ menuconfig KVM
select HAVE_KVM_CPU_RELAX_INTERCEPT
select KVM_MMIO
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
- select KVM_XFER_TO_GUEST_WORK
+ select VIRT_XFER_TO_GUEST_WORK
select KVM_VFIO
select HAVE_KVM_DIRTY_RING_ACQ_REL
select NEED_KVM_DIRTY_RING_WITH_BITMAP
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index fa79744290f3..f21d1b7f20f8 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -6,7 +6,6 @@
#include <linux/bug.h>
#include <linux/cpu_pm.h>
-#include <linux/entry-kvm.h>
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
@@ -1183,7 +1182,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
/*
* Check conditions before entering the guest
*/
- ret = xfer_to_guest_mode_handle_work(vcpu);
+ ret = kvm_xfer_to_guest_mode_handle_work(vcpu);
if (!ret)
ret = 1;
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index b3d8c3de4149..b8d37eb037fc 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -470,14 +470,6 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
mutex_unlock(&fixmap_lock);
}
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-extern __alias(__create_pgd_mapping_locked)
-void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
- phys_addr_t size, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(enum pgtable_type),
- int flags);
-#endif
-
#define INVALID_PHYS_ADDR (-1ULL)
static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, gfp_t gfp,
@@ -823,7 +815,7 @@ static bool linear_map_requires_bbml2 __initdata;
u32 idmap_kpti_bbml2_flag;
-void __init init_idmap_kpti_bbml2_flag(void)
+static void __init init_idmap_kpti_bbml2_flag(void)
{
WRITE_ONCE(idmap_kpti_bbml2_flag, 1);
/* Must be visible to other CPUs before stop_machine() is called. */
@@ -1135,7 +1127,93 @@ static void __init declare_vma(struct vm_struct *vma,
}
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-static pgprot_t kernel_exec_prot(void)
+#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT))
+
+static phys_addr_t kpti_ng_temp_alloc __initdata;
+
+static phys_addr_t __init kpti_ng_pgd_alloc(enum pgtable_type type)
+{
+ kpti_ng_temp_alloc -= PAGE_SIZE;
+ return kpti_ng_temp_alloc;
+}
+
+static int __init __kpti_install_ng_mappings(void *__unused)
+{
+ typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long);
+ extern kpti_remap_fn idmap_kpti_install_ng_mappings;
+ kpti_remap_fn *remap_fn;
+
+ int cpu = smp_processor_id();
+ int levels = CONFIG_PGTABLE_LEVELS;
+ int order = order_base_2(levels);
+ u64 kpti_ng_temp_pgd_pa = 0;
+ pgd_t *kpti_ng_temp_pgd;
+ u64 alloc = 0;
+
+ if (levels == 5 && !pgtable_l5_enabled())
+ levels = 4;
+ else if (levels == 4 && !pgtable_l4_enabled())
+ levels = 3;
+
+ remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
+
+ if (!cpu) {
+ alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order);
+ kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE);
+ kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd);
+
+ //
+ // Create a minimal page table hierarchy that permits us to map
+ // the swapper page tables temporarily as we traverse them.
+ //
+ // The physical pages are laid out as follows:
+ //
+ // +--------+-/-------+-/------ +-/------ +-\\\--------+
+ // : PTE[] : | PMD[] : | PUD[] : | P4D[] : ||| PGD[] :
+ // +--------+-\-------+-\------ +-\------ +-///--------+
+ // ^
+ // The first page is mapped into this hierarchy at a PMD_SHIFT
+ // aligned virtual address, so that we can manipulate the PTE
+ // level entries while the mapping is active. The first entry
+ // covers the PTE[] page itself, the remaining entries are free
+ // to be used as a ad-hoc fixmap.
+ //
+ __create_pgd_mapping_locked(kpti_ng_temp_pgd, __pa(alloc),
+ KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL,
+ kpti_ng_pgd_alloc, 0);
+ }
+
+ cpu_install_idmap();
+ remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA);
+ cpu_uninstall_idmap();
+
+ if (!cpu) {
+ free_pages(alloc, order);
+ arm64_use_ng_mappings = true;
+ }
+
+ return 0;
+}
+
+void __init kpti_install_ng_mappings(void)
+{
+ /* Check whether KPTI is going to be used */
+ if (!arm64_kernel_unmapped_at_el0())
+ return;
+
+ /*
+ * We don't need to rewrite the page-tables if either we've done
+ * it already or we have KASLR enabled and therefore have not
+ * created any global mappings at all.
+ */
+ if (arm64_use_ng_mappings)
+ return;
+
+ init_idmap_kpti_bbml2_flag();
+ stop_machine(__kpti_install_ng_mappings, NULL, cpu_online_mask);
+}
+
+static pgprot_t __init kernel_exec_prot(void)
{
return rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
}
diff --git a/arch/hexagon/configs/comet_defconfig b/arch/hexagon/configs/comet_defconfig
index c6108f000288..22d7f8ac58a3 100644
--- a/arch/hexagon/configs/comet_defconfig
+++ b/arch/hexagon/configs/comet_defconfig
@@ -46,10 +46,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_QUOTA=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index ea683bcea14c..5b1116733d88 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -70,6 +70,7 @@ config LOONGARCH
select ARCH_SUPPORTS_LTO_CLANG_THIN
select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS
select ARCH_SUPPORTS_NUMA_BALANCING
+ select ARCH_SUPPORTS_PER_VMA_LOCK
select ARCH_SUPPORTS_RT
select ARCH_SUPPORTS_SCHED_SMT if SMP
select ARCH_SUPPORTS_SCHED_MC if SMP
@@ -618,6 +619,16 @@ config CPU_HAS_PREFETCH
config ARCH_SUPPORTS_KEXEC
def_bool y
+config ARCH_SUPPORTS_KEXEC_FILE
+ def_bool 64BIT
+
+config ARCH_SELECTS_KEXEC_FILE
+ def_bool 64BIT
+ depends on KEXEC_FILE
+ select KEXEC_ELF
+ select RELOCATABLE
+ select HAVE_IMA_KEXEC if IMA
+
config ARCH_SUPPORTS_CRASH_DUMP
def_bool y
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index ae419e32f22e..dc5bd3f1b8d2 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -115,7 +115,7 @@ ifdef CONFIG_LTO_CLANG
# The annotate-tablejump option can not be passed to LLVM backend when LTO is enabled.
# Ensure it is aware of linker with LTO, '--loongarch-annotate-tablejump' also needs to
# be passed via '-mllvm' to ld.lld.
-KBUILD_LDFLAGS += -mllvm --loongarch-annotate-tablejump
+KBUILD_LDFLAGS += $(call ld-option,-mllvm --loongarch-annotate-tablejump)
endif
endif
@@ -129,7 +129,7 @@ KBUILD_RUSTFLAGS_KERNEL += -Crelocation-model=pie
LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext $(call ld-option, --apply-dynamic-relocs)
endif
-cflags-y += $(call cc-option, -mno-check-zero-division)
+cflags-y += $(call cc-option, -mno-check-zero-division -fno-isolate-erroneous-paths-dereference)
ifndef CONFIG_KASAN
cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset
diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index 2b8df0e9e42a..3e838c229cd5 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -45,6 +45,7 @@ CONFIG_EXPERT=y
CONFIG_KALLSYMS_ALL=y
CONFIG_PERF_EVENTS=y
CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
CONFIG_CRASH_DUMP=y
CONFIG_LOONGARCH=y
CONFIG_64BIT=y
@@ -55,7 +56,7 @@ CONFIG_DMI=y
CONFIG_EFI=y
CONFIG_SMP=y
CONFIG_HOTPLUG_CPU=y
-CONFIG_NR_CPUS=256
+CONFIG_NR_CPUS=2048
CONFIG_NUMA=y
CONFIG_CPU_HAS_FPU=y
CONFIG_CPU_HAS_LSX=y
@@ -154,7 +155,16 @@ CONFIG_INET_ESPINTCP=y
CONFIG_INET_IPCOMP=m
CONFIG_INET_UDP_DIAG=y
CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_BBR=m
+CONFIG_TCP_CONG_BIC=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_VEGAS=m
+CONFIG_TCP_CONG_NV=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_DCTCP=m
+CONFIG_TCP_CONG_CDG=m
+CONFIG_TCP_CONG_BBR=y
CONFIG_IPV6_ROUTER_PREF=y
CONFIG_IPV6_ROUTE_INFO=y
CONFIG_INET6_AH=m
@@ -331,15 +341,33 @@ CONFIG_LLC2=m
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_HTB=m
CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
CONFIG_NET_SCH_SFQ=m
CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_CBS=m
+CONFIG_NET_SCH_GRED=m
CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_SKBPRIO=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_CAKE=m
+CONFIG_NET_SCH_FQ=m
+CONFIG_NET_SCH_PIE=m
+CONFIG_NET_SCH_FQ_PIE=m
CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_DEFAULT=y
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
+CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_CGROUP=m
CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=m
CONFIG_NET_ACT_GACT=m
@@ -407,6 +435,7 @@ CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_FW_LOADER_COMPRESS=y
CONFIG_FW_LOADER_COMPRESS_ZSTD=y
+CONFIG_SYSFB_SIMPLEFB=y
CONFIG_EFI_ZBOOT=y
CONFIG_EFI_BOOTLOADER_CONTROL=m
CONFIG_EFI_CAPSULE_LOADER=m
@@ -420,6 +449,11 @@ CONFIG_MTD_CFI_AMDSTD=m
CONFIG_MTD_CFI_STAA=m
CONFIG_MTD_RAM=m
CONFIG_MTD_ROM=m
+CONFIG_MTD_RAW_NAND=m
+CONFIG_MTD_NAND_PLATFORM=m
+CONFIG_MTD_NAND_LOONGSON=m
+CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y
+CONFIG_MTD_NAND_ECC_SW_BCH=y
CONFIG_MTD_UBI=m
CONFIG_MTD_UBI_BLOCK=y
CONFIG_PARPORT=y
@@ -575,6 +609,11 @@ CONFIG_E1000=y
CONFIG_E1000E=y
CONFIG_IGB=y
CONFIG_IXGBE=y
+CONFIG_I40E=y
+CONFIG_ICE=y
+CONFIG_FM10K=y
+CONFIG_IGC=y
+CONFIG_IDPF=y
# CONFIG_NET_VENDOR_MARVELL is not set
# CONFIG_NET_VENDOR_MELLANOX is not set
# CONFIG_NET_VENDOR_MICREL is not set
@@ -679,6 +718,9 @@ CONFIG_USB4_NET=m
CONFIG_INPUT_MOUSEDEV=y
CONFIG_INPUT_MOUSEDEV_PSAUX=y
CONFIG_INPUT_EVDEV=y
+CONFIG_KEYBOARD_GPIO=m
+CONFIG_KEYBOARD_GPIO_POLLED=m
+CONFIG_KEYBOARD_MATRIX=m
CONFIG_KEYBOARD_XTKBD=m
CONFIG_MOUSE_PS2_ELANTECH=y
CONFIG_MOUSE_PS2_SENTELIC=y
@@ -703,8 +745,11 @@ CONFIG_VIRTIO_CONSOLE=y
CONFIG_IPMI_HANDLER=m
CONFIG_IPMI_DEVICE_INTERFACE=m
CONFIG_IPMI_SI=m
+CONFIG_IPMI_LS2K=y
CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_TCG_TPM=m
+CONFIG_TCG_LOONGSON=m
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_PIIX4=y
CONFIG_I2C_DESIGNWARE_CORE=y
@@ -720,6 +765,10 @@ CONFIG_PINCTRL_LOONGSON2=y
CONFIG_GPIO_SYSFS=y
CONFIG_GPIO_LOONGSON=y
CONFIG_GPIO_LOONGSON_64BIT=y
+CONFIG_GPIO_PCA953X=m
+CONFIG_GPIO_PCA953X_IRQ=y
+CONFIG_GPIO_PCA9570=m
+CONFIG_GPIO_PCF857X=m
CONFIG_POWER_RESET=y
CONFIG_POWER_RESET_RESTART=y
CONFIG_POWER_RESET_SYSCON=y
@@ -730,6 +779,7 @@ CONFIG_SENSORS_LM93=m
CONFIG_SENSORS_W83795=m
CONFIG_SENSORS_W83627HF=m
CONFIG_LOONGSON2_THERMAL=m
+CONFIG_MFD_LOONGSON_SE=m
CONFIG_RC_CORE=m
CONFIG_LIRC=y
CONFIG_RC_DECODERS=y
@@ -761,6 +811,7 @@ CONFIG_DRM_AST=y
CONFIG_DRM_QXL=m
CONFIG_DRM_VIRTIO_GPU=m
CONFIG_DRM_LOONGSON=y
+CONFIG_DRM_SIMPLEDRM=y
CONFIG_FB=y
CONFIG_FB_EFI=y
CONFIG_FB_RADEON=y
@@ -801,6 +852,7 @@ CONFIG_SND_HDA_CODEC_HDMI_ATI=y
CONFIG_SND_HDA_CODEC_HDMI_NVIDIA=y
CONFIG_SND_HDA_CODEC_CONEXANT=y
CONFIG_SND_USB_AUDIO=m
+CONFIG_SND_USB_AUDIO_MIDI_V2=y
CONFIG_SND_SOC=m
CONFIG_SND_SOC_LOONGSON_CARD=m
CONFIG_SND_SOC_ES7134=m
@@ -861,6 +913,8 @@ CONFIG_TYPEC_TCPM=m
CONFIG_TYPEC_TCPCI=m
CONFIG_TYPEC_UCSI=m
CONFIG_UCSI_ACPI=m
+CONFIG_MMC=y
+CONFIG_MMC_LOONGSON2=m
CONFIG_INFINIBAND=m
CONFIG_EDAC=y
# CONFIG_EDAC_LEGACY_SYSFS is not set
@@ -922,19 +976,22 @@ CONFIG_NTB_SWITCHTEC=m
CONFIG_NTB_PERF=m
CONFIG_NTB_TRANSPORT=m
CONFIG_PWM=y
+CONFIG_PWM_LOONGSON=y
CONFIG_GENERIC_PHY=y
CONFIG_USB4=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_JFS_FS=m
CONFIG_JFS_POSIX_ACL=y
CONFIG_JFS_SECURITY=y
CONFIG_XFS_FS=y
+CONFIG_XFS_SUPPORT_V4=y
+CONFIG_XFS_SUPPORT_ASCII_CI=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_GFS2_FS=m
@@ -1026,9 +1083,12 @@ CONFIG_CEPH_FS_SECURITY_LABEL=y
CONFIG_CIFS=m
# CONFIG_CIFS_DEBUG is not set
CONFIG_9P_FS=y
+CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_936=y
+CONFIG_NLS_CODEPAGE_950=y
CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_DLM=m
CONFIG_KEY_DH_OPERATIONS=y
@@ -1049,9 +1109,11 @@ CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_SM3_GENERIC=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_DEFLATE=m
CONFIG_CRYPTO_LZO=m
@@ -1063,6 +1125,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_DEV_VIRTIO=m
+CONFIG_CRYPTO_DEV_LOONGSON_RNG=m
CONFIG_DMA_CMA=y
CONFIG_DMA_NUMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=0
diff --git a/arch/loongarch/include/asm/image.h b/arch/loongarch/include/asm/image.h
new file mode 100644
index 000000000000..cab981cdb72a
--- /dev/null
+++ b/arch/loongarch/include/asm/image.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * LoongArch binary image header for EFI(PE/COFF) format.
+ *
+ * Author: Youling Tang <tangyouling@kylinos.cn>
+ * Copyright (C) 2025 KylinSoft Corporation.
+ */
+
+#ifndef __ASM_IMAGE_H
+#define __ASM_IMAGE_H
+
+#ifndef __ASSEMBLER__
+
+/**
+ * struct loongarch_image_header
+ *
+ * @dos_sig: Optional PE format 'MZ' signature.
+ * @padding_1: Reserved.
+ * @kernel_entry: Kernel image entry pointer.
+ * @kernel_asize: An estimated size of the memory image size in LSB byte order.
+ * @text_offset: The image load offset in LSB byte order.
+ * @padding_2: Reserved.
+ * @pe_header: Optional offset to a PE format header.
+ **/
+
+struct loongarch_image_header {
+ uint8_t dos_sig[2];
+ uint16_t padding_1[3];
+ uint64_t kernel_entry;
+ uint64_t kernel_asize;
+ uint64_t text_offset;
+ uint32_t padding_2[7];
+ uint32_t pe_header;
+};
+
+/*
+ * loongarch_header_check_dos_sig - Helper to check the header
+ *
+ * Returns true (non-zero) if 'MZ' signature is found.
+ */
+
+static inline int loongarch_header_check_dos_sig(const struct loongarch_image_header *h)
+{
+ if (!h)
+ return 0;
+
+ return (h->dos_sig[0] == 'M' && h->dos_sig[1] == 'Z');
+}
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ASM_IMAGE_H */
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index 277d2140676b..55e64a12a124 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -77,6 +77,10 @@ enum reg2_op {
iocsrwrh_op = 0x19205,
iocsrwrw_op = 0x19206,
iocsrwrd_op = 0x19207,
+ llacqw_op = 0xe15e0,
+ screlw_op = 0xe15e1,
+ llacqd_op = 0xe15e2,
+ screld_op = 0xe15e3,
};
enum reg2i5_op {
@@ -189,6 +193,7 @@ enum reg3_op {
fldxd_op = 0x7068,
fstxs_op = 0x7070,
fstxd_op = 0x7078,
+ scq_op = 0x70ae,
amswapw_op = 0x70c0,
amswapd_op = 0x70c1,
amaddw_op = 0x70c2,
diff --git a/arch/loongarch/include/asm/kexec.h b/arch/loongarch/include/asm/kexec.h
index cf95cd3eb2de..209fa43222e1 100644
--- a/arch/loongarch/include/asm/kexec.h
+++ b/arch/loongarch/include/asm/kexec.h
@@ -41,6 +41,18 @@ struct kimage_arch {
unsigned long systable_ptr;
};
+#ifdef CONFIG_KEXEC_FILE
+extern const struct kexec_file_ops kexec_efi_ops;
+extern const struct kexec_file_ops kexec_elf_ops;
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image);
+#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
+
+extern int load_other_segments(struct kimage *image,
+ unsigned long kernel_load_addr, unsigned long kernel_size,
+ char *initrd, unsigned long initrd_len, char *cmdline, unsigned long cmdline_len);
+#endif
+
typedef void (*do_kexec_t)(unsigned long efi_boot,
unsigned long cmdline_ptr,
unsigned long systable_ptr,
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 6f5a4574a911..001924877772 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o
obj-$(CONFIG_RELOCATABLE) += relocate.o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_efi.o kexec_elf.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c
index fedaa67cde41..cbfce2872d71 100644
--- a/arch/loongarch/kernel/cpu-probe.c
+++ b/arch/loongarch/kernel/cpu-probe.c
@@ -52,6 +52,48 @@ static inline void cpu_set_fpu_fcsr_mask(struct cpuinfo_loongarch *c)
c->fpu_mask = ~(fcsr0 ^ fcsr1) & ~mask;
}
+/* simd = -1/0/128/256 */
+static unsigned int simd = -1U;
+
+static int __init cpu_setup_simd(char *str)
+{
+ get_option(&str, &simd);
+ pr_info("Set SIMD width = %u\n", simd);
+
+ return 0;
+}
+
+early_param("simd", cpu_setup_simd);
+
+static int __init cpu_final_simd(void)
+{
+ struct cpuinfo_loongarch *c = &cpu_data[0];
+
+ if (simd < 128) {
+ c->options &= ~LOONGARCH_CPU_LSX;
+ elf_hwcap &= ~HWCAP_LOONGARCH_LSX;
+ }
+
+ if (simd < 256) {
+ c->options &= ~LOONGARCH_CPU_LASX;
+ elf_hwcap &= ~HWCAP_LOONGARCH_LASX;
+ }
+
+ simd = 0;
+
+ if (c->options & LOONGARCH_CPU_LSX)
+ simd = 128;
+
+ if (c->options & LOONGARCH_CPU_LASX)
+ simd = 256;
+
+ pr_info("Final SIMD width = %u\n", simd);
+
+ return 0;
+}
+
+arch_initcall(cpu_final_simd);
+
static inline void set_elf_platform(int cpu, const char *plat)
{
if (cpu == 0)
@@ -134,13 +176,13 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
elf_hwcap |= HWCAP_LOONGARCH_FPU;
}
#ifdef CONFIG_CPU_HAS_LSX
- if (config & CPUCFG2_LSX) {
+ if ((config & CPUCFG2_LSX) && (simd >= 128)) {
c->options |= LOONGARCH_CPU_LSX;
elf_hwcap |= HWCAP_LOONGARCH_LSX;
}
#endif
#ifdef CONFIG_CPU_HAS_LASX
- if (config & CPUCFG2_LASX) {
+ if ((config & CPUCFG2_LASX) && (simd >= 256)) {
c->options |= LOONGARCH_CPU_LASX;
elf_hwcap |= HWCAP_LOONGARCH_LASX;
}
diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c
index 72ecfed29d55..bf037f0c6b26 100644
--- a/arch/loongarch/kernel/inst.c
+++ b/arch/loongarch/kernel/inst.c
@@ -141,6 +141,9 @@ bool insns_not_supported(union loongarch_instruction insn)
case amswapw_op ... ammindbdu_op:
pr_notice("atomic memory access instructions are not supported\n");
return true;
+ case scq_op:
+ pr_notice("sc.q instruction is not supported\n");
+ return true;
}
switch (insn.reg2i14_format.opcode) {
@@ -152,6 +155,15 @@ bool insns_not_supported(union loongarch_instruction insn)
return true;
}
+ switch (insn.reg2_format.opcode) {
+ case llacqw_op:
+ case llacqd_op:
+ case screlw_op:
+ case screld_op:
+ pr_notice("llacq and screl instructions are not supported\n");
+ return true;
+ }
+
switch (insn.reg1i21_format.opcode) {
case bceqz_op:
pr_notice("bceqz and bcnez instructions are not supported\n");
diff --git a/arch/loongarch/kernel/kexec_efi.c b/arch/loongarch/kernel/kexec_efi.c
new file mode 100644
index 000000000000..45121b914f8f
--- /dev/null
+++ b/arch/loongarch/kernel/kexec_efi.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Load EFI vmlinux file for the kexec_file_load syscall.
+ *
+ * Author: Youling Tang <tangyouling@kylinos.cn>
+ * Copyright (C) 2025 KylinSoft Corporation.
+ */
+
+#define pr_fmt(fmt) "kexec_file(EFI): " fmt
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <linux/pe.h>
+#include <linux/string.h>
+#include <asm/byteorder.h>
+#include <asm/cpufeature.h>
+#include <asm/image.h>
+
+static int efi_kexec_probe(const char *kernel_buf, unsigned long kernel_len)
+{
+ const struct loongarch_image_header *h = (const struct loongarch_image_header *)kernel_buf;
+
+ if (!h || (kernel_len < sizeof(*h))) {
+ kexec_dprintk("No LoongArch image header.\n");
+ return -EINVAL;
+ }
+
+ if (!loongarch_header_check_dos_sig(h)) {
+ kexec_dprintk("No LoongArch PE image header.\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void *efi_kexec_load(struct kimage *image,
+ char *kernel, unsigned long kernel_len,
+ char *initrd, unsigned long initrd_len,
+ char *cmdline, unsigned long cmdline_len)
+{
+ int ret;
+ unsigned long text_offset, kernel_segment_number;
+ struct kexec_buf kbuf;
+ struct kexec_segment *kernel_segment;
+ struct loongarch_image_header *h;
+
+ h = (struct loongarch_image_header *)kernel;
+ if (!h->kernel_asize)
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * Load the kernel
+ * FIXME: Non-relocatable kernel rejected for kexec_file (require CONFIG_RELOCATABLE)
+ */
+ kbuf.image = image;
+ kbuf.buf_max = ULONG_MAX;
+ kbuf.top_down = false;
+
+ kbuf.buffer = kernel;
+ kbuf.bufsz = kernel_len;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = le64_to_cpu(h->kernel_asize);
+ text_offset = le64_to_cpu(h->text_offset);
+ kbuf.buf_min = text_offset;
+ kbuf.buf_align = SZ_2M;
+
+ kernel_segment_number = image->nr_segments;
+
+ /*
+ * The location of the kernel segment may make it impossible to
+ * satisfy the other segment requirements, so we try repeatedly
+ * to find a location that will work.
+ */
+ while ((ret = kexec_add_buffer(&kbuf)) == 0) {
+ /* Try to load additional data */
+ kernel_segment = &image->segment[kernel_segment_number];
+ ret = load_other_segments(image, kernel_segment->mem,
+ kernel_segment->memsz, initrd,
+ initrd_len, cmdline, cmdline_len);
+ if (!ret)
+ break;
+
+ /*
+ * We couldn't find space for the other segments; erase the
+ * kernel segment and try the next available hole.
+ */
+ image->nr_segments -= 1;
+ kbuf.buf_min = kernel_segment->mem + kernel_segment->memsz;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ }
+
+ if (ret < 0) {
+ pr_err("Could not find any suitable kernel location!");
+ return ERR_PTR(ret);
+ }
+
+ kernel_segment = &image->segment[kernel_segment_number];
+
+ /* Make sure the second kernel jumps to the correct "kernel_entry" */
+ image->start = kernel_segment->mem + h->kernel_entry - text_offset;
+
+ kexec_dprintk("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ kernel_segment->mem, kbuf.bufsz, kernel_segment->memsz);
+
+ return NULL;
+}
+
+const struct kexec_file_ops kexec_efi_ops = {
+ .probe = efi_kexec_probe,
+ .load = efi_kexec_load,
+};
diff --git a/arch/loongarch/kernel/kexec_elf.c b/arch/loongarch/kernel/kexec_elf.c
new file mode 100644
index 000000000000..97b2f049801a
--- /dev/null
+++ b/arch/loongarch/kernel/kexec_elf.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Load ELF vmlinux file for the kexec_file_load syscall.
+ *
+ * Author: Youling Tang <tangyouling@kylinos.cn>
+ * Copyright (C) 2025 KylinSoft Corporation.
+ */
+
+#define pr_fmt(fmt) "kexec_file(ELF): " fmt
+
+#include <linux/elf.h>
+#include <linux/kexec.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/memblock.h>
+#include <asm/setup.h>
+
+#define elf_kexec_probe kexec_elf_probe
+
+static int _elf_kexec_load(struct kimage *image,
+ struct elfhdr *ehdr, struct kexec_elf_info *elf_info,
+ struct kexec_buf *kbuf, unsigned long *text_offset)
+{
+ int i, ret = -1;
+
+ /* Read in the PT_LOAD segments. */
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ size_t size;
+ const struct elf_phdr *phdr;
+
+ phdr = &elf_info->proghdrs[i];
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ size = phdr->p_filesz;
+ if (size > phdr->p_memsz)
+ size = phdr->p_memsz;
+
+ kbuf->buffer = (void *)elf_info->buffer + phdr->p_offset;
+ kbuf->bufsz = size;
+ kbuf->buf_align = phdr->p_align;
+ *text_offset = __pa(phdr->p_paddr);
+ kbuf->buf_min = *text_offset;
+ kbuf->memsz = ALIGN(phdr->p_memsz, SZ_64K);
+ kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
+ ret = kexec_add_buffer(kbuf);
+ if (ret < 0)
+ break;
+ }
+
+ return ret;
+}
+
+static void *elf_kexec_load(struct kimage *image,
+ char *kernel, unsigned long kernel_len,
+ char *initrd, unsigned long initrd_len,
+ char *cmdline, unsigned long cmdline_len)
+{
+ int ret;
+ unsigned long text_offset, kernel_segment_number;
+ struct elfhdr ehdr;
+ struct kexec_buf kbuf;
+ struct kexec_elf_info elf_info;
+ struct kexec_segment *kernel_segment;
+
+ ret = kexec_build_elf_info(kernel, kernel_len, &ehdr, &elf_info);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ /*
+ * Load the kernel
+ * FIXME: Non-relocatable kernel rejected for kexec_file (require CONFIG_RELOCATABLE)
+ */
+ kbuf.image = image;
+ kbuf.buf_max = ULONG_MAX;
+ kbuf.top_down = false;
+
+ kernel_segment_number = image->nr_segments;
+
+ ret = _elf_kexec_load(image, &ehdr, &elf_info, &kbuf, &text_offset);
+ if (ret < 0)
+ goto out;
+
+ /* Load additional data */
+ kernel_segment = &image->segment[kernel_segment_number];
+ ret = load_other_segments(image, kernel_segment->mem, kernel_segment->memsz,
+ initrd, initrd_len, cmdline, cmdline_len);
+ if (ret < 0)
+ goto out;
+
+ /* Make sure the second kernel jumps to the correct "kernel_entry". */
+ image->start = kernel_segment->mem + __pa(ehdr.e_entry) - text_offset;
+
+ kexec_dprintk("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ kernel_segment->mem, kbuf.bufsz, kernel_segment->memsz);
+
+out:
+ kexec_free_elf_info(&elf_info);
+ return ret ? ERR_PTR(ret) : NULL;
+}
+
+const struct kexec_file_ops kexec_elf_ops = {
+ .probe = elf_kexec_probe,
+ .load = elf_kexec_load,
+};
diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
index f9381800e291..e4b2bbc47e62 100644
--- a/arch/loongarch/kernel/machine_kexec.c
+++ b/arch/loongarch/kernel/machine_kexec.c
@@ -70,18 +70,28 @@ int machine_kexec_prepare(struct kimage *kimage)
kimage->arch.efi_boot = fw_arg0;
kimage->arch.systable_ptr = fw_arg2;
- /* Find the command line */
- for (i = 0; i < kimage->nr_segments; i++) {
- if (!strncmp(bootloader, (char __user *)kimage->segment[i].buf, strlen(bootloader))) {
- if (!copy_from_user(cmdline_ptr, kimage->segment[i].buf, COMMAND_LINE_SIZE))
- kimage->arch.cmdline_ptr = (unsigned long)cmdline_ptr;
- break;
+ if (kimage->file_mode == 1) {
+ /*
+ * kimage->cmdline_buf will be released in kexec_file_load, so copy
+ * to the KEXEC_CMDLINE_ADDR safe area.
+ */
+ memcpy((void *)KEXEC_CMDLINE_ADDR, (void *)kimage->arch.cmdline_ptr,
+ strlen((char *)kimage->arch.cmdline_ptr) + 1);
+ kimage->arch.cmdline_ptr = (unsigned long)KEXEC_CMDLINE_ADDR;
+ } else {
+ /* Find the command line */
+ for (i = 0; i < kimage->nr_segments; i++) {
+ if (!strncmp(bootloader, (char __user *)kimage->segment[i].buf, strlen(bootloader))) {
+ if (!copy_from_user(cmdline_ptr, kimage->segment[i].buf, COMMAND_LINE_SIZE))
+ kimage->arch.cmdline_ptr = (unsigned long)cmdline_ptr;
+ break;
+ }
}
- }
- if (!kimage->arch.cmdline_ptr) {
- pr_err("Command line not included in the provided image\n");
- return -EINVAL;
+ if (!kimage->arch.cmdline_ptr) {
+ pr_err("Command line not included in the provided image\n");
+ return -EINVAL;
+ }
}
/* kexec/kdump need a safe page to save reboot_code_buffer */
@@ -287,9 +297,10 @@ void machine_kexec(struct kimage *image)
/* We do not want to be bothered. */
local_irq_disable();
- pr_notice("EFI boot flag 0x%lx\n", efi_boot);
- pr_notice("Command line at 0x%lx\n", cmdline_ptr);
- pr_notice("System table at 0x%lx\n", systable_ptr);
+ pr_notice("EFI boot flag: 0x%lx\n", efi_boot);
+ pr_notice("Command line addr: 0x%lx\n", cmdline_ptr);
+ pr_notice("Command line string: %s\n", (char *)cmdline_ptr);
+ pr_notice("System table addr: 0x%lx\n", systable_ptr);
pr_notice("We will call new kernel at 0x%lx\n", start_addr);
pr_notice("Bye ...\n");
diff --git a/arch/loongarch/kernel/machine_kexec_file.c b/arch/loongarch/kernel/machine_kexec_file.c
new file mode 100644
index 000000000000..dda236b51a88
--- /dev/null
+++ b/arch/loongarch/kernel/machine_kexec_file.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * kexec_file for LoongArch
+ *
+ * Author: Youling Tang <tangyouling@kylinos.cn>
+ * Copyright (C) 2025 KylinSoft Corporation.
+ *
+ * Most code is derived from LoongArch port of kexec-tools
+ */
+
+#define pr_fmt(fmt) "kexec_file: " fmt
+
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+#include <asm/bootinfo.h>
+
+const struct kexec_file_ops * const kexec_file_loaders[] = {
+ &kexec_efi_ops,
+ &kexec_elf_ops,
+ NULL
+};
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ vfree(image->elf_headers);
+ image->elf_headers = NULL;
+ image->elf_headers_sz = 0;
+
+ return kexec_image_post_load_cleanup_default(image);
+}
+
+/* Add the "kexec_file" command line parameter to command line. */
+static void cmdline_add_loader(unsigned long *cmdline_tmplen, char *modified_cmdline)
+{
+ int loader_strlen;
+
+ loader_strlen = sprintf(modified_cmdline + (*cmdline_tmplen), "kexec_file ");
+ *cmdline_tmplen += loader_strlen;
+}
+
+/* Add the "initrd=start,size" command line parameter to command line. */
+static void cmdline_add_initrd(struct kimage *image, unsigned long *cmdline_tmplen,
+ char *modified_cmdline, unsigned long initrd)
+{
+ int initrd_strlen;
+
+ initrd_strlen = sprintf(modified_cmdline + (*cmdline_tmplen), "initrd=0x%lx,0x%lx ",
+ initrd, image->initrd_buf_len);
+ *cmdline_tmplen += initrd_strlen;
+}
+
+#ifdef CONFIG_CRASH_DUMP
+
+static int prepare_elf_headers(void **addr, unsigned long *sz)
+{
+ int ret, nr_ranges;
+ uint64_t i;
+ phys_addr_t start, end;
+ struct crash_mem *cmem;
+
+ nr_ranges = 2; /* for exclusion of crashkernel region */
+ for_each_mem_range(i, &start, &end)
+ nr_ranges++;
+
+ cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
+ if (!cmem)
+ return -ENOMEM;
+
+ cmem->max_nr_ranges = nr_ranges;
+ cmem->nr_ranges = 0;
+ for_each_mem_range(i, &start, &end) {
+ cmem->ranges[cmem->nr_ranges].start = start;
+ cmem->ranges[cmem->nr_ranges].end = end - 1;
+ cmem->nr_ranges++;
+ }
+
+ /* Exclude crashkernel region */
+ ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+ if (ret < 0)
+ goto out;
+
+ if (crashk_low_res.end) {
+ ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
+
+out:
+ kfree(cmem);
+ return ret;
+}
+
+/*
+ * Add the "mem=size@start" command line parameter to command line, indicating the
+ * memory region the new kernel can use to boot into.
+ */
+static void cmdline_add_mem(unsigned long *cmdline_tmplen, char *modified_cmdline)
+{
+ int mem_strlen = 0;
+
+ mem_strlen = sprintf(modified_cmdline + (*cmdline_tmplen), "mem=0x%llx@0x%llx ",
+ crashk_res.end - crashk_res.start + 1, crashk_res.start);
+ *cmdline_tmplen += mem_strlen;
+
+ if (crashk_low_res.end) {
+ mem_strlen = sprintf(modified_cmdline + (*cmdline_tmplen), "mem=0x%llx@0x%llx ",
+ crashk_low_res.end - crashk_low_res.start + 1, crashk_low_res.start);
+ *cmdline_tmplen += mem_strlen;
+ }
+}
+
+/* Add the "elfcorehdr=size@start" command line parameter to command line. */
+static void cmdline_add_elfcorehdr(struct kimage *image, unsigned long *cmdline_tmplen,
+ char *modified_cmdline, unsigned long elfcorehdr_sz)
+{
+ int elfcorehdr_strlen = 0;
+
+ elfcorehdr_strlen = sprintf(modified_cmdline + (*cmdline_tmplen), "elfcorehdr=0x%lx@0x%lx ",
+ elfcorehdr_sz, image->elf_load_addr);
+ *cmdline_tmplen += elfcorehdr_strlen;
+}
+
+#endif
+
+/*
+ * Try to add the initrd to the image. If it is not possible to find valid
+ * locations, this function will undo changes to the image and return non zero.
+ */
+int load_other_segments(struct kimage *image,
+ unsigned long kernel_load_addr, unsigned long kernel_size,
+ char *initrd, unsigned long initrd_len, char *cmdline, unsigned long cmdline_len)
+{
+ int ret = 0;
+ unsigned long cmdline_tmplen = 0;
+ unsigned long initrd_load_addr = 0;
+ unsigned long orig_segments = image->nr_segments;
+ char *modified_cmdline = NULL;
+ struct kexec_buf kbuf;
+
+ kbuf.image = image;
+ /* Don't allocate anything below the kernel */
+ kbuf.buf_min = kernel_load_addr + kernel_size;
+
+ modified_cmdline = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
+ if (!modified_cmdline)
+ return -EINVAL;
+
+ cmdline_add_loader(&cmdline_tmplen, modified_cmdline);
+ /* Ensure it's null terminated */
+ modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0';
+
+#ifdef CONFIG_CRASH_DUMP
+ /* Load elf core header */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ void *headers;
+ unsigned long headers_sz;
+
+ ret = prepare_elf_headers(&headers, &headers_sz);
+ if (ret < 0) {
+ pr_err("Preparing elf core header failed\n");
+ goto out_err;
+ }
+
+ kbuf.buffer = headers;
+ kbuf.bufsz = headers_sz;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = headers_sz;
+ kbuf.buf_align = SZ_64K; /* largest supported page size */
+ kbuf.buf_max = ULONG_MAX;
+ kbuf.top_down = true;
+
+ ret = kexec_add_buffer(&kbuf);
+ if (ret < 0) {
+ vfree(headers);
+ goto out_err;
+ }
+ image->elf_headers = headers;
+ image->elf_load_addr = kbuf.mem;
+ image->elf_headers_sz = headers_sz;
+
+ kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
+
+ /* Add the mem=size@start parameter to the command line */
+ cmdline_add_mem(&cmdline_tmplen, modified_cmdline);
+
+ /* Add the elfcorehdr=size@start parameter to the command line */
+ cmdline_add_elfcorehdr(image, &cmdline_tmplen, modified_cmdline, headers_sz);
+ }
+#endif
+
+ /* Load initrd */
+ if (initrd) {
+ kbuf.buffer = initrd;
+ kbuf.bufsz = initrd_len;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = initrd_len;
+ kbuf.buf_align = 0;
+ /* within 1GB-aligned window of up to 32GB in size */
+ kbuf.buf_max = round_down(kernel_load_addr, SZ_1G) + (unsigned long)SZ_1G * 32;
+ kbuf.top_down = false;
+
+ ret = kexec_add_buffer(&kbuf);
+ if (ret < 0)
+ goto out_err;
+ initrd_load_addr = kbuf.mem;
+
+ kexec_dprintk("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ initrd_load_addr, kbuf.bufsz, kbuf.memsz);
+
+ /* Add the initrd=start,size parameter to the command line */
+ cmdline_add_initrd(image, &cmdline_tmplen, modified_cmdline, initrd_load_addr);
+ }
+
+ if (cmdline_len + cmdline_tmplen > COMMAND_LINE_SIZE) {
+ pr_err("Appending command line exceeds COMMAND_LINE_SIZE\n");
+ ret = -EINVAL;
+ goto out_err;
+ }
+
+ memcpy(modified_cmdline + cmdline_tmplen, cmdline, cmdline_len);
+ cmdline = modified_cmdline;
+ image->arch.cmdline_ptr = (unsigned long)cmdline;
+
+ return 0;
+
+out_err:
+ image->nr_segments = orig_segments;
+ kfree(modified_cmdline);
+ return ret;
+}
diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c
index 50c469067f3a..b5e2312a2fca 100644
--- a/arch/loongarch/kernel/relocate.c
+++ b/arch/loongarch/kernel/relocate.c
@@ -166,6 +166,10 @@ static inline __init bool kaslr_disabled(void)
return true;
#endif
+ str = strstr(boot_command_line, "kexec_file");
+ if (str == boot_command_line || (str > boot_command_line && *(str - 1) == ' '))
+ return true;
+
return false;
}
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 075b79b2c1d3..69c17d162fff 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -355,6 +355,7 @@ void __init platform_init(void)
#ifdef CONFIG_ACPI
acpi_table_upgrade();
+ acpi_gbl_use_global_lock = false;
acpi_gbl_use_default_register_widths = false;
acpi_boot_table_init();
#endif
diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig
index 40eea6da7c25..ae64bbdf83a7 100644
--- a/arch/loongarch/kvm/Kconfig
+++ b/arch/loongarch/kvm/Kconfig
@@ -31,7 +31,7 @@ config KVM
select KVM_GENERIC_HARDWARE_ENABLING
select KVM_GENERIC_MMU_NOTIFIER
select KVM_MMIO
- select KVM_XFER_TO_GUEST_WORK
+ select VIRT_XFER_TO_GUEST_WORK
select SCHED_INFO
select GUEST_PERF_EVENTS if PERF_EVENTS
help
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 9c802f7103c6..30e3b089a596 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -4,7 +4,6 @@
*/
#include <linux/kvm_host.h>
-#include <linux/entry-kvm.h>
#include <asm/fpu.h>
#include <asm/lbt.h>
#include <asm/loongarch.h>
@@ -251,7 +250,7 @@ static int kvm_enter_guest_check(struct kvm_vcpu *vcpu)
/*
* Check conditions before entering the guest
*/
- ret = xfer_to_guest_mode_handle_work(vcpu);
+ ret = kvm_xfer_to_guest_mode_handle_work(vcpu);
if (ret < 0)
return ret;
diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
index deefd9617d00..2c93d33356e5 100644
--- a/arch/loongarch/mm/fault.c
+++ b/arch/loongarch/mm/fault.c
@@ -215,6 +215,58 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
flags |= FAULT_FLAG_USER;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
+ if (!(flags & FAULT_FLAG_USER))
+ goto lock_mmap;
+
+ vma = lock_vma_under_rcu(mm, address);
+ if (!vma)
+ goto lock_mmap;
+
+ if (write) {
+ flags |= FAULT_FLAG_WRITE;
+ if (!(vma->vm_flags & VM_WRITE)) {
+ vma_end_read(vma);
+ si_code = SEGV_ACCERR;
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ goto bad_area_nosemaphore;
+ }
+ } else {
+ if (!(vma->vm_flags & VM_EXEC) && address == exception_era(regs)) {
+ vma_end_read(vma);
+ si_code = SEGV_ACCERR;
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ goto bad_area_nosemaphore;
+ }
+ if (!(vma->vm_flags & (VM_READ | VM_WRITE)) && address != exception_era(regs)) {
+ vma_end_read(vma);
+ si_code = SEGV_ACCERR;
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ goto bad_area_nosemaphore;
+ }
+ }
+
+ fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
+ if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
+ vma_end_read(vma);
+
+ if (!(fault & VM_FAULT_RETRY)) {
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ goto done;
+ }
+
+ count_vm_vma_lock_event(VMA_LOCK_RETRY);
+ if (fault & VM_FAULT_MAJOR)
+ flags |= FAULT_FLAG_TRIED;
+
+ /* Quick path to respond to signals */
+ if (fault_signal_pending(fault, regs)) {
+ if (!user_mode(regs))
+ no_context(regs, write, address);
+ return;
+ }
+lock_mmap:
+
retry:
vma = lock_mm_and_find_vma(mm, address, regs);
if (unlikely(!vma))
@@ -276,8 +328,10 @@ good_area:
*/
goto retry;
}
+ mmap_read_unlock(mm);
+
+done:
if (unlikely(fault & VM_FAULT_ERROR)) {
- mmap_read_unlock(mm);
if (fault & VM_FAULT_OOM) {
do_out_of_memory(regs, write, address);
return;
@@ -290,8 +344,6 @@ good_area:
}
BUG();
}
-
- mmap_read_unlock(mm);
}
asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index abfdb6bb5c38..cbe53d0b7fb0 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -527,13 +527,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
emit_zext_32(ctx, dst, is32);
break;
case 8:
- move_reg(ctx, t1, src);
- emit_insn(ctx, extwb, dst, t1);
+ emit_insn(ctx, extwb, dst, src);
emit_zext_32(ctx, dst, is32);
break;
case 16:
- move_reg(ctx, t1, src);
- emit_insn(ctx, extwh, dst, t1);
+ emit_insn(ctx, extwh, dst, src);
emit_zext_32(ctx, dst, is32);
break;
case 32:
@@ -1294,8 +1292,10 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};
u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};
- if (!is_kernel_text((unsigned long)ip) &&
- !is_bpf_text_address((unsigned long)ip))
+ /* Only poking bpf text is supported. Since kernel function entry
+ * is set up by ftrace, we rely on ftrace to poke kernel functions.
+ */
+ if (!is_bpf_text_address((unsigned long)ip))
return -ENOTSUPP;
ret = emit_jump_or_nops(old_addr, ip, old_insns, is_call);
@@ -1448,12 +1448,43 @@ void arch_free_bpf_trampoline(void *image, unsigned int size)
bpf_prog_pack_free(image, size);
}
+/*
+ * Sign-extend the register if necessary
+ */
+static void sign_extend(struct jit_ctx *ctx, int rd, int rj, u8 size, bool sign)
+{
+ /* ABI requires unsigned char/short to be zero-extended */
+ if (!sign && (size == 1 || size == 2)) {
+ if (rd != rj)
+ move_reg(ctx, rd, rj);
+ return;
+ }
+
+ switch (size) {
+ case 1:
+ emit_insn(ctx, extwb, rd, rj);
+ break;
+ case 2:
+ emit_insn(ctx, extwh, rd, rj);
+ break;
+ case 4:
+ emit_insn(ctx, addiw, rd, rj, 0);
+ break;
+ case 8:
+ if (rd != rj)
+ move_reg(ctx, rd, rj);
+ break;
+ default:
+ pr_warn("bpf_jit: invalid size %d for sign_extend\n", size);
+ }
+}
+
static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
const struct btf_func_model *m, struct bpf_tramp_links *tlinks,
void *func_addr, u32 flags)
{
int i, ret, save_ret;
- int stack_size = 0, nargs = 0;
+ int stack_size, nargs;
int retval_off, args_off, nargs_off, ip_off, run_ctx_off, sreg_off, tcc_ptr_off;
bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT;
void *orig_call = func_addr;
@@ -1462,9 +1493,6 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
u32 **branches = NULL;
- if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
- return -ENOTSUPP;
-
/*
* FP + 8 [ RA to parent func ] return address to parent
* function
@@ -1495,20 +1523,23 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i
if (m->nr_args > LOONGARCH_MAX_REG_ARGS)
return -ENOTSUPP;
+ /* FIXME: No support of struct argument */
+ for (i = 0; i < m->nr_args; i++) {
+ if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
+ return -ENOTSUPP;
+ }
+
if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
return -ENOTSUPP;
- stack_size = 0;
-
/* Room of trampoline frame to store return address and frame pointer */
- stack_size += 16;
+ stack_size = 16;
save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
- if (save_ret) {
- /* Save BPF R0 and A0 */
- stack_size += 16;
- retval_off = stack_size;
- }
+ if (save_ret)
+ stack_size += 16; /* Save BPF R0 and A0 */
+
+ retval_off = stack_size;
/* Room of trampoline frame to store args */
nargs = m->nr_args;
@@ -1595,7 +1626,7 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i
orig_call += LOONGARCH_BPF_FENTRY_NBYTES;
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- move_imm(ctx, LOONGARCH_GPR_A0, (const s64)im, false);
+ move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im);
ret = emit_call(ctx, (const u64)__bpf_tramp_enter);
if (ret)
return ret;
@@ -1645,7 +1676,7 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i
if (flags & BPF_TRAMP_F_CALL_ORIG) {
im->ip_epilogue = ctx->ro_image + ctx->idx;
- move_imm(ctx, LOONGARCH_GPR_A0, (const s64)im, false);
+ move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im);
ret = emit_call(ctx, (const u64)__bpf_tramp_exit);
if (ret)
goto out;
@@ -1655,8 +1686,12 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i
restore_args(ctx, m->nr_args, args_off);
if (save_ret) {
- emit_insn(ctx, ldd, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off);
emit_insn(ctx, ldd, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8));
+ if (is_struct_ops)
+ sign_extend(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0],
+ m->ret_size, m->ret_flags & BTF_FMODEL_SIGNED_ARG);
+ else
+ emit_insn(ctx, ldd, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off);
}
emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off);
@@ -1715,7 +1750,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
ret = __arch_prepare_bpf_trampoline(&ctx, im, m, tlinks, func_addr, flags);
- if (ret > 0 && validate_code(&ctx) < 0) {
+ if (ret < 0)
+ goto out;
+
+ if (validate_code(&ctx) < 0) {
ret = -EINVAL;
goto out;
}
@@ -1726,7 +1764,6 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
goto out;
}
- bpf_flush_icache(ro_image, ro_image_end);
out:
kvfree(image);
return ret < 0 ? ret : size;
@@ -1744,8 +1781,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
ret = __arch_prepare_bpf_trampoline(&ctx, &im, m, tlinks, func_addr, flags);
- /* Page align */
- return ret < 0 ? ret : round_up(ret * LOONGARCH_INSN_SIZE, PAGE_SIZE);
+ return ret < 0 ? ret : ret * LOONGARCH_INSN_SIZE;
}
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig
index 7787a4dd7c3c..f3268fed02fc 100644
--- a/arch/m68k/configs/stmark2_defconfig
+++ b/arch/m68k/configs/stmark2_defconfig
@@ -72,9 +72,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
# CONFIG_FILE_LOCKING is not set
# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY_USER is not set
diff --git a/arch/m68k/kernel/pcibios.c b/arch/m68k/kernel/pcibios.c
index 9504eb19d73a..e6ab3f9ff5d8 100644
--- a/arch/m68k/kernel/pcibios.c
+++ b/arch/m68k/kernel/pcibios.c
@@ -44,41 +44,24 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
*/
int pcibios_enable_device(struct pci_dev *dev, int mask)
{
- struct resource *r;
u16 cmd, newcmd;
- int idx;
+ int ret;
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- newcmd = cmd;
-
- for (idx = 0; idx < 6; idx++) {
- /* Only set up the requested stuff */
- if (!(mask & (1 << idx)))
- continue;
-
- r = dev->resource + idx;
- if (!r->start && r->end) {
- pr_err("PCI: Device %s not available because of resource collisions\n",
- pci_name(dev));
- return -EINVAL;
- }
- if (r->flags & IORESOURCE_IO)
- newcmd |= PCI_COMMAND_IO;
- if (r->flags & IORESOURCE_MEM)
- newcmd |= PCI_COMMAND_MEMORY;
- }
+ ret = pci_enable_resources(dev, mask);
+ if (ret < 0)
+ return ret;
/*
* Bridges (eg, cardbus bridges) need to be fully enabled
*/
- if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)
+ if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) {
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
newcmd |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
-
-
- if (newcmd != cmd) {
- pr_info("PCI: enabling device %s (0x%04x -> 0x%04x)\n",
- pci_name(dev), cmd, newcmd);
- pci_write_config_word(dev, PCI_COMMAND, newcmd);
+ if (newcmd != cmd) {
+ pr_info("PCI: enabling bridge %s (0x%04x -> 0x%04x)\n",
+ pci_name(dev), cmd, newcmd);
+ pci_write_config_word(dev, PCI_COMMAND, newcmd);
+ }
}
return 0;
}
diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
index 176314f3c9aa..fbbdcb394ca2 100644
--- a/arch/microblaze/configs/mmu_defconfig
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -73,7 +73,7 @@ CONFIG_FB_XILINX=y
CONFIG_UIO=y
CONFIG_UIO_PDRV_GENIRQ=y
CONFIG_UIO_DMEM_GENIRQ=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
# CONFIG_DNOTIFY is not set
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig
index 97d2cd997285..349e9e0b4f54 100644
--- a/arch/mips/configs/bigsur_defconfig
+++ b/arch/mips/configs/bigsur_defconfig
@@ -144,9 +144,9 @@ CONFIG_EXT2_FS=m
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=m
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=m
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_EXT4_FS=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
diff --git a/arch/mips/configs/cobalt_defconfig b/arch/mips/configs/cobalt_defconfig
index b0b551efac7c..6ee9ee391fdc 100644
--- a/arch/mips/configs/cobalt_defconfig
+++ b/arch/mips/configs/cobalt_defconfig
@@ -59,9 +59,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
diff --git a/arch/mips/configs/decstation_64_defconfig b/arch/mips/configs/decstation_64_defconfig
index 85a4472cb058..52a63dd7aac7 100644
--- a/arch/mips/configs/decstation_64_defconfig
+++ b/arch/mips/configs/decstation_64_defconfig
@@ -133,9 +133,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_PROC_KCORE=y
diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig
index a3b2c8da2dde..59fb7ee5eeb0 100644
--- a/arch/mips/configs/decstation_defconfig
+++ b/arch/mips/configs/decstation_defconfig
@@ -129,9 +129,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_PROC_KCORE=y
diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig
index a476717b8a6a..8be1cb433e95 100644
--- a/arch/mips/configs/decstation_r4k_defconfig
+++ b/arch/mips/configs/decstation_r4k_defconfig
@@ -129,9 +129,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_PROC_KCORE=y
diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig
index cdedbb8a8f53..b6fe3c962464 100644
--- a/arch/mips/configs/fuloong2e_defconfig
+++ b/arch/mips/configs/fuloong2e_defconfig
@@ -173,7 +173,7 @@ CONFIG_USB_ISIGHTFW=m
CONFIG_UIO=m
CONFIG_UIO_CIF=m
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
CONFIG_AUTOFS_FS=y
diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig
index 2decf8b98d31..e123848f94ab 100644
--- a/arch/mips/configs/ip22_defconfig
+++ b/arch/mips/configs/ip22_defconfig
@@ -232,9 +232,9 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_INTF_DEV_UIE_EMUL=y
CONFIG_RTC_DRV_DS1286=y
CONFIG_EXT2_FS=m
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_XFS_FS=m
CONFIG_XFS_QUOTA=y
CONFIG_QUOTA=y
diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig
index 5d079941fd20..1c10242b148b 100644
--- a/arch/mips/configs/ip27_defconfig
+++ b/arch/mips/configs/ip27_defconfig
@@ -272,9 +272,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_XFS_FS=m
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
diff --git a/arch/mips/configs/ip28_defconfig b/arch/mips/configs/ip28_defconfig
index 6db21e498faa..755cbf20f5a5 100644
--- a/arch/mips/configs/ip28_defconfig
+++ b/arch/mips/configs/ip28_defconfig
@@ -49,9 +49,9 @@ CONFIG_WATCHDOG=y
CONFIG_INDYDOG=y
# CONFIG_VGA_CONSOLE is not set
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_QUOTA=y
CONFIG_PROC_KCORE=y
# CONFIG_PROC_PAGE_MONITOR is not set
diff --git a/arch/mips/configs/ip30_defconfig b/arch/mips/configs/ip30_defconfig
index a4524e785469..718f3060d9fa 100644
--- a/arch/mips/configs/ip30_defconfig
+++ b/arch/mips/configs/ip30_defconfig
@@ -143,9 +143,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_XFS_FS=m
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig
index d8ac11427f69..7568838eb08b 100644
--- a/arch/mips/configs/ip32_defconfig
+++ b/arch/mips/configs/ip32_defconfig
@@ -89,9 +89,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_QUOTA=y
CONFIG_QFMT_V1=m
CONFIG_QFMT_V2=m
diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig
index 65adb538030d..a790c2610fd3 100644
--- a/arch/mips/configs/jazz_defconfig
+++ b/arch/mips/configs/jazz_defconfig
@@ -69,7 +69,7 @@ CONFIG_FB_G364=y
CONFIG_FRAMEBUFFER_CONSOLE=y
# CONFIG_HWMON is not set
CONFIG_EXT2_FS=m
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_XFS_FS=m
CONFIG_XFS_QUOTA=y
CONFIG_AUTOFS_FS=m
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index 5038a27d035f..8d3f20ed19b5 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -226,9 +226,9 @@ CONFIG_MMC=m
CONFIG_LEDS_CLASS=y
CONFIG_STAGING=y
CONFIG_EXT2_FS=m
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_JFS_FS=m
CONFIG_JFS_POSIX_ACL=y
CONFIG_XFS_FS=m
diff --git a/arch/mips/configs/loongson2k_defconfig b/arch/mips/configs/loongson2k_defconfig
index 0cc665d3ea34..aec1fd1902eb 100644
--- a/arch/mips/configs/loongson2k_defconfig
+++ b/arch/mips/configs/loongson2k_defconfig
@@ -298,9 +298,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_XFS_FS=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index 240efff37d98..575aaf242361 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -348,9 +348,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_XFS_FS=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_QUOTA=y
diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig
index 9fcbac829920..81704ec67f09 100644
--- a/arch/mips/configs/malta_defconfig
+++ b/arch/mips/configs/malta_defconfig
@@ -313,7 +313,7 @@ CONFIG_RTC_DRV_CMOS=y
CONFIG_UIO=m
CONFIG_UIO_CIF=m
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_JFS_FS=m
CONFIG_JFS_POSIX_ACL=y
CONFIG_JFS_SECURITY=y
diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig
index 19102386a81c..82a97f58bce1 100644
--- a/arch/mips/configs/malta_kvm_defconfig
+++ b/arch/mips/configs/malta_kvm_defconfig
@@ -319,7 +319,7 @@ CONFIG_RTC_DRV_CMOS=y
CONFIG_UIO=m
CONFIG_UIO_CIF=m
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_JFS_FS=m
CONFIG_JFS_POSIX_ACL=y
CONFIG_JFS_SECURITY=y
diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig
index 1b98f6945c2d..accb471a1d93 100644
--- a/arch/mips/configs/malta_qemu_32r6_defconfig
+++ b/arch/mips/configs/malta_qemu_32r6_defconfig
@@ -148,7 +148,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_CMOS=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_XFS_FS=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig
index 7b8905cb3400..6bda67c5f68f 100644
--- a/arch/mips/configs/maltaaprp_defconfig
+++ b/arch/mips/configs/maltaaprp_defconfig
@@ -149,7 +149,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_CMOS=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_XFS_FS=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig
index 8249f6a51895..e4082537f80f 100644
--- a/arch/mips/configs/maltasmvp_defconfig
+++ b/arch/mips/configs/maltasmvp_defconfig
@@ -148,9 +148,9 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_CMOS=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_XFS_FS=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig
index 21cb37668763..58f5af45fa98 100644
--- a/arch/mips/configs/maltasmvp_eva_defconfig
+++ b/arch/mips/configs/maltasmvp_eva_defconfig
@@ -152,7 +152,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_CMOS=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_XFS_FS=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig
index 3df9cd669683..9bfef7de0d1c 100644
--- a/arch/mips/configs/maltaup_defconfig
+++ b/arch/mips/configs/maltaup_defconfig
@@ -148,7 +148,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_CMOS=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_XFS_FS=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig
index 1dd07c9d1812..0f9ef20744f9 100644
--- a/arch/mips/configs/maltaup_xpa_defconfig
+++ b/arch/mips/configs/maltaup_xpa_defconfig
@@ -319,7 +319,7 @@ CONFIG_RTC_DRV_CMOS=y
CONFIG_UIO=m
CONFIG_UIO_CIF=m
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_JFS_FS=m
CONFIG_JFS_POSIX_ACL=y
CONFIG_JFS_SECURITY=y
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index 2707ab134639..c58d1a61d528 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -595,9 +595,9 @@ CONFIG_EXT2_FS=m
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=m
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=m
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_QUOTA=y
CONFIG_AUTOFS_FS=y
CONFIG_FUSE_FS=m
diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
index 39a2419e1f3e..b507dc4dddd4 100644
--- a/arch/mips/configs/rm200_defconfig
+++ b/arch/mips/configs/rm200_defconfig
@@ -307,7 +307,7 @@ CONFIG_USB_SISUSBVGA=m
CONFIG_USB_LD=m
CONFIG_USB_TEST=m
CONFIG_EXT2_FS=m
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_XFS_FS=m
CONFIG_XFS_QUOTA=y
CONFIG_AUTOFS_FS=m
diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c
index 66898fd182dc..d04b7c1294b6 100644
--- a/arch/mips/pci/pci-legacy.c
+++ b/arch/mips/pci/pci-legacy.c
@@ -249,45 +249,11 @@ static int __init pcibios_init(void)
subsys_initcall(pcibios_init);
-static int pcibios_enable_resources(struct pci_dev *dev, int mask)
-{
- u16 cmd, old_cmd;
- int idx;
- struct resource *r;
-
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- old_cmd = cmd;
- pci_dev_for_each_resource(dev, r, idx) {
- /* Only set up the requested stuff */
- if (!(mask & (1<<idx)))
- continue;
-
- if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM)))
- continue;
- if ((idx == PCI_ROM_RESOURCE) &&
- (!(r->flags & IORESOURCE_ROM_ENABLE)))
- continue;
- if (!r->start && r->end) {
- pci_err(dev,
- "can't enable device: resource collisions\n");
- return -EINVAL;
- }
- if (r->flags & IORESOURCE_IO)
- cmd |= PCI_COMMAND_IO;
- if (r->flags & IORESOURCE_MEM)
- cmd |= PCI_COMMAND_MEMORY;
- }
- if (cmd != old_cmd) {
- pci_info(dev, "enabling device (%04x -> %04x)\n", old_cmd, cmd);
- pci_write_config_word(dev, PCI_COMMAND, cmd);
- }
- return 0;
-}
-
int pcibios_enable_device(struct pci_dev *dev, int mask)
{
- int err = pcibios_enable_resources(dev, mask);
+ int err;
+ err = pci_enable_resources(dev, mask);
if (err < 0)
return err;
diff --git a/arch/openrisc/configs/or1klitex_defconfig b/arch/openrisc/configs/or1klitex_defconfig
index 3e849d25838a..fb1eb9a68bd6 100644
--- a/arch/openrisc/configs/or1klitex_defconfig
+++ b/arch/openrisc/configs/or1klitex_defconfig
@@ -38,7 +38,7 @@ CONFIG_MMC_LITEX=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_LITEX_SOC_CONTROLLER=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_EXFAT_FS=y
diff --git a/arch/openrisc/configs/virt_defconfig b/arch/openrisc/configs/virt_defconfig
index a93a3e1e4f87..0b9979b35ca8 100644
--- a/arch/openrisc/configs/virt_defconfig
+++ b/arch/openrisc/configs/virt_defconfig
@@ -94,8 +94,8 @@ CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_INPUT=y
CONFIG_VIRTIO_MMIO=y
CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
# CONFIG_DNOTIFY is not set
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 0940c162f1f7..47fd9662d800 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -31,6 +31,9 @@ config PARISC
select HAVE_KERNEL_UNCOMPRESSED
select HAVE_PCI
select HAVE_PERF_EVENTS
+ select HAVE_PERF_REGS
+ select HAVE_PERF_USER_STACK_DUMP
+ select PERF_USE_VMALLOC
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZ4
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index 94928d114d4c..52031bde9f17 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -232,8 +232,8 @@ CONFIG_AUXDISPLAY=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_QFMT_V2=y
diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig
index d8cd7f858b2a..1aec04c09d0b 100644
--- a/arch/parisc/configs/generic-64bit_defconfig
+++ b/arch/parisc/configs/generic-64bit_defconfig
@@ -251,8 +251,8 @@ CONFIG_STAGING=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_XFS_FS=m
CONFIG_BTRFS_FS=m
CONFIG_QUOTA=y
diff --git a/arch/parisc/include/asm/perf_event.h b/arch/parisc/include/asm/perf_event.h
index 1e0fd8ba6c03..8a2925029d15 100644
--- a/arch/parisc/include/asm/perf_event.h
+++ b/arch/parisc/include/asm/perf_event.h
@@ -1,6 +1,12 @@
#ifndef __ASM_PARISC_PERF_EVENT_H
#define __ASM_PARISC_PERF_EVENT_H
-/* Empty, just to avoid compiling error */
+#include <asm/psw.h>
+
+#define perf_arch_fetch_caller_regs(regs, __ip) { \
+ (regs)->gr[0] = KERNEL_PSW; \
+ (regs)->iaoq[0] = (__ip); \
+ asm volatile("copy %%sp, %0\n":"=r"((regs)->gr[30])); \
+}
#endif /* __ASM_PARISC_PERF_EVENT_H */
diff --git a/arch/parisc/include/uapi/asm/ioctls.h b/arch/parisc/include/uapi/asm/ioctls.h
index 82d1148c6379..74b4027a4e80 100644
--- a/arch/parisc/include/uapi/asm/ioctls.h
+++ b/arch/parisc/include/uapi/asm/ioctls.h
@@ -10,10 +10,10 @@
#define TCSETS _IOW('T', 17, struct termios) /* TCSETATTR */
#define TCSETSW _IOW('T', 18, struct termios) /* TCSETATTRD */
#define TCSETSF _IOW('T', 19, struct termios) /* TCSETATTRF */
-#define TCGETA _IOR('T', 1, struct termio)
-#define TCSETA _IOW('T', 2, struct termio)
-#define TCSETAW _IOW('T', 3, struct termio)
-#define TCSETAF _IOW('T', 4, struct termio)
+#define TCGETA 0x40125401
+#define TCSETA 0x80125402
+#define TCSETAW 0x80125403
+#define TCSETAF 0x80125404
#define TCSBRK _IO('T', 5)
#define TCXONC _IO('T', 6)
#define TCFLSH _IO('T', 7)
diff --git a/arch/parisc/include/uapi/asm/perf_regs.h b/arch/parisc/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000000..1ae687bb3d3c
--- /dev/null
+++ b/arch/parisc/include/uapi/asm/perf_regs.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_PARISC_PERF_REGS_H
+#define _UAPI_ASM_PARISC_PERF_REGS_H
+
+/* see struct user_regs_struct */
+enum perf_event_parisc_regs {
+ PERF_REG_PARISC_R0, /* PSW is in gr[0] */
+ PERF_REG_PARISC_R1,
+ PERF_REG_PARISC_R2,
+ PERF_REG_PARISC_R3,
+ PERF_REG_PARISC_R4,
+ PERF_REG_PARISC_R5,
+ PERF_REG_PARISC_R6,
+ PERF_REG_PARISC_R7,
+ PERF_REG_PARISC_R8,
+ PERF_REG_PARISC_R9,
+ PERF_REG_PARISC_R10,
+ PERF_REG_PARISC_R11,
+ PERF_REG_PARISC_R12,
+ PERF_REG_PARISC_R13,
+ PERF_REG_PARISC_R14,
+ PERF_REG_PARISC_R15,
+ PERF_REG_PARISC_R16,
+ PERF_REG_PARISC_R17,
+ PERF_REG_PARISC_R18,
+ PERF_REG_PARISC_R19,
+ PERF_REG_PARISC_R20,
+ PERF_REG_PARISC_R21,
+ PERF_REG_PARISC_R22,
+ PERF_REG_PARISC_R23,
+ PERF_REG_PARISC_R24,
+ PERF_REG_PARISC_R25,
+ PERF_REG_PARISC_R26,
+ PERF_REG_PARISC_R27,
+ PERF_REG_PARISC_R28,
+ PERF_REG_PARISC_R29,
+ PERF_REG_PARISC_R30,
+ PERF_REG_PARISC_R31,
+
+ PERF_REG_PARISC_SR0,
+ PERF_REG_PARISC_SR1,
+ PERF_REG_PARISC_SR2,
+ PERF_REG_PARISC_SR3,
+ PERF_REG_PARISC_SR4,
+ PERF_REG_PARISC_SR5,
+ PERF_REG_PARISC_SR6,
+ PERF_REG_PARISC_SR7,
+
+ PERF_REG_PARISC_IAOQ0,
+ PERF_REG_PARISC_IAOQ1,
+ PERF_REG_PARISC_IASQ0,
+ PERF_REG_PARISC_IASQ1,
+
+ PERF_REG_PARISC_SAR, /* CR11 */
+ PERF_REG_PARISC_IIR, /* CR19 */
+ PERF_REG_PARISC_ISR, /* CR20 */
+ PERF_REG_PARISC_IOR, /* CR21 */
+ PERF_REG_PARISC_IPSW, /* CR22 */
+
+ PERF_REG_PARISC_MAX
+};
+
+#endif /* _UAPI_ASM_PARISC_PERF_REGS_H */
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile
index d5055ba33722..9157bc8bdf41 100644
--- a/arch/parisc/kernel/Makefile
+++ b/arch/parisc/kernel/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_GENERIC_ARCH_TOPOLOGY) += topology.o
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KEXEC_CORE) += kexec.o relocate_kernel.o
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
index 1e793f770f71..1f8936fc2292 100644
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -995,6 +995,7 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data)
struct pdc_system_map_mod_info pdc_mod_info;
struct pdc_module_path mod_path;
+ memset(&iodc_data, 0, sizeof(iodc_data));
status = pdc_iodc_read(&count, hpa, 0,
&iodc_data, sizeof(iodc_data));
if (status != PDC_OK) {
@@ -1012,6 +1013,11 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data)
mod_index = 0;
do {
+ /* initialize device path for old machines */
+ memset(&mod_path, 0xff, sizeof(mod_path));
+ get_node_path(dev->dev.parent, &mod_path.path);
+ mod_path.path.mod = dev->hw_path;
+ memset(&pdc_mod_info, 0, sizeof(pdc_mod_info));
status = pdc_system_map_find_mods(&pdc_mod_info,
&mod_path, mod_index++);
} while (status == PDC_OK && pdc_mod_info.mod_addr != hpa);
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index c69f6d5946e9..042343492a28 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -464,7 +464,8 @@ int pdc_system_map_find_mods(struct pdc_system_map_mod_info *pdc_mod_info,
unsigned long flags;
spin_lock_irqsave(&pdc_lock, flags);
- retval = mem_pdc_call(PDC_SYSTEM_MAP, PDC_FIND_MODULE, __pa(pdc_result),
+ memcpy(pdc_result2, mod_path, sizeof(*mod_path));
+ retval = mem_pdc_call(PDC_SYSTEM_MAP, PDC_FIND_MODULE, __pa(pdc_result),
__pa(pdc_result2), mod_index);
convert_to_wide(pdc_result);
memcpy(pdc_mod_info, pdc_result, sizeof(*pdc_mod_info));
diff --git a/arch/parisc/kernel/perf_event.c b/arch/parisc/kernel/perf_event.c
new file mode 100644
index 000000000000..f90b83886ab4
--- /dev/null
+++ b/arch/parisc/kernel/perf_event.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support for parisc
+ *
+ * Copyright (C) 2025 by Helge Deller <deller@gmx.de>
+ */
+
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <asm/unwind.h>
+
+void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+
+ struct unwind_frame_info info;
+
+ unwind_frame_init_task(&info, current, NULL);
+ while (1) {
+ if (unwind_once(&info) < 0 || info.ip == 0)
+ break;
+
+ if (!__kernel_text_address(info.ip) ||
+ perf_callchain_store(entry, info.ip))
+ return;
+ }
+}
diff --git a/arch/parisc/kernel/perf_regs.c b/arch/parisc/kernel/perf_regs.c
new file mode 100644
index 000000000000..68458e2f6197
--- /dev/null
+++ b/arch/parisc/kernel/perf_regs.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (C) 2025 by Helge Deller <deller@gmx.de> */
+
+#include <linux/perf_event.h>
+#include <linux/perf_regs.h>
+#include <asm/ptrace.h>
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+ switch (idx) {
+ case PERF_REG_PARISC_R0 ... PERF_REG_PARISC_R31:
+ return regs->gr[idx - PERF_REG_PARISC_R0];
+ case PERF_REG_PARISC_SR0 ... PERF_REG_PARISC_SR7:
+ return regs->sr[idx - PERF_REG_PARISC_SR0];
+ case PERF_REG_PARISC_IASQ0 ... PERF_REG_PARISC_IASQ1:
+ return regs->iasq[idx - PERF_REG_PARISC_IASQ0];
+ case PERF_REG_PARISC_IAOQ0 ... PERF_REG_PARISC_IAOQ1:
+ return regs->iasq[idx - PERF_REG_PARISC_IAOQ0];
+ case PERF_REG_PARISC_SAR: /* CR11 */
+ return regs->sar;
+ case PERF_REG_PARISC_IIR: /* CR19 */
+ return regs->iir;
+ case PERF_REG_PARISC_ISR: /* CR20 */
+ return regs->isr;
+ case PERF_REG_PARISC_IOR: /* CR21 */
+ return regs->ior;
+ case PERF_REG_PARISC_IPSW: /* CR22 */
+ return regs->ipsw;
+ };
+ WARN_ON_ONCE((u32)idx >= PERF_REG_PARISC_MAX);
+ return 0;
+}
+
+#define REG_RESERVED (~((1ULL << PERF_REG_PARISC_MAX) - 1))
+
+int perf_reg_validate(u64 mask)
+{
+ if (!mask || mask & REG_RESERVED)
+ return -EINVAL;
+
+ return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+ if (!IS_ENABLED(CONFIG_64BIT))
+ return PERF_SAMPLE_REGS_ABI_32;
+
+ if (test_tsk_thread_flag(task, TIF_32BIT))
+ return PERF_SAMPLE_REGS_ABI_32;
+
+ return PERF_SAMPLE_REGS_ABI_64;
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+ struct pt_regs *regs)
+{
+ regs_user->regs = task_pt_regs(current);
+ regs_user->abi = perf_reg_abi(current);
+}
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index b9b3d527bc90..4c7c5df80bd0 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -31,6 +31,7 @@
#include <linux/uaccess.h>
#include <linux/kdebug.h>
#include <linux/kfence.h>
+#include <linux/perf_event.h>
#include <asm/assembly.h>
#include <asm/io.h>
@@ -633,6 +634,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
/* Assist Exception Trap, i.e. floating point exception. */
die_if_kernel("Floating point exception", regs, 0); /* quiet */
__inc_irq_stat(irq_fpassist_count);
+ perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
handle_fpe(regs);
return;
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 00e97204783e..fb64d9ce0b17 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -13,6 +13,7 @@
#include <linux/uaccess.h>
#include <linux/sysctl.h>
#include <linux/unaligned.h>
+#include <linux/perf_event.h>
#include <asm/hardirq.h>
#include <asm/traps.h>
#include "unaligned.h"
@@ -378,6 +379,7 @@ void handle_unaligned(struct pt_regs *regs)
int ret = ERR_NOTHANDLED;
__inc_irq_stat(irq_unaligned_count);
+ perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->ior);
/* log a message with pacing */
if (user_mode(regs)) {
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index 69d65ffab312..03165c82dfdb 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -41,7 +41,6 @@ unsigned long raw_copy_from_user(void *dst, const void __user *src,
mtsp(get_kernel_space(), SR_TEMP2);
/* Check region is user accessible */
- if (start)
while (start < end) {
if (!prober_user(SR_TEMP1, start)) {
newlen = (start - (unsigned long) src);
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index e5fdc336c9b2..2e23533b67e3 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -3,7 +3,6 @@ generated-y += syscall_table_32.h
generated-y += syscall_table_64.h
generated-y += syscall_table_spu.h
generic-y += agp.h
-generic-y += kvm_types.h
generic-y += mcs_spinlock.h
generic-y += qrwlock.h
generic-y += early_ioremap.h
diff --git a/arch/powerpc/include/asm/kvm_types.h b/arch/powerpc/include/asm/kvm_types.h
new file mode 100644
index 000000000000..5d4bffea7d47
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_types.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_PPC_KVM_TYPES_H
+#define _ASM_PPC_KVM_TYPES_H
+
+#if IS_MODULE(CONFIG_KVM_BOOK3S_64_PR) && IS_MODULE(CONFIG_KVM_BOOK3S_64_HV)
+#define KVM_SUB_MODULES kvm-pr,kvm-hv
+#elif IS_MODULE(CONFIG_KVM_BOOK3S_64_PR)
+#define KVM_SUB_MODULES kvm-pr
+#elif IS_MODULE(CONFIG_KVM_BOOK3S_64_HV)
+#define KVM_SUB_MODULES kvm-hv
+#else
+#undef KVM_SUB_MODULES
+#endif
+
+#endif
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 48ad0116f359..ef78ff77cf8f 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -334,7 +334,7 @@ static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
rc = driver->err_handler->error_detected(pdev, pci_channel_io_frozen);
edev->in_error = true;
- pci_uevent_ers(pdev, PCI_ERS_RESULT_NONE);
+ pci_uevent_ers(pdev, rc);
return rc;
}
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index 5a62091b0809..c50328212917 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -30,7 +30,7 @@ config KVM
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_GENERIC_HARDWARE_ENABLING
select KVM_MMIO
- select KVM_XFER_TO_GUEST_WORK
+ select VIRT_XFER_TO_GUEST_WORK
select KVM_GENERIC_MMU_NOTIFIER
select SCHED_INFO
select GUEST_PERF_EVENTS if PERF_EVENTS
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 47bcf190ccc5..bccb919ca615 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -7,7 +7,6 @@
*/
#include <linux/bitops.h>
-#include <linux/entry-kvm.h>
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kdebug.h>
@@ -911,7 +910,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
run->exit_reason = KVM_EXIT_UNKNOWN;
while (ret > 0) {
/* Check conditions before entering the guest */
- ret = xfer_to_guest_mode_handle_work(vcpu);
+ ret = kvm_xfer_to_guest_mode_handle_work(vcpu);
if (ret)
continue;
ret = 1;
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 7679bc16b692..b4769241332b 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -25,6 +25,7 @@ endif
KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack -std=gnu11
KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
KBUILD_CFLAGS_DECOMPRESSOR += -D__DECOMPRESSOR
+KBUILD_CFLAGS_DECOMPRESSOR += -Wno-pointer-sign
KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain
KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables
KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding
diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c
index 9fc3f0dae8f0..a2952ed5518b 100644
--- a/arch/s390/hypfs/hypfs_sprp.c
+++ b/arch/s390/hypfs/hypfs_sprp.c
@@ -27,7 +27,7 @@ static inline unsigned long __hypfs_sprp_diag304(void *data, unsigned long cmd)
{
union register_pair r1 = { .even = virt_to_phys(data), };
- asm volatile("diag %[r1],%[r3],0x304\n"
+ asm volatile("diag %[r1],%[r3],0x304"
: [r1] "+&d" (r1.pair)
: [r3] "d" (cmd)
: "memory");
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index 352108727d7e..56817990c73d 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -143,7 +143,7 @@ static inline struct ap_queue_status ap_tapq(ap_qid_t qid,
" lghi 2,0\n" /* 0 into gr2 */
" .insn rre,0xb2af0000,0,0\n" /* PQAP(TAPQ) */
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
- " lgr %[reg2],2\n" /* gr2 into reg2 */
+ " lgr %[reg2],2" /* gr2 into reg2 */
: [reg1] "=&d" (reg1.value), [reg2] "=&d" (reg2)
: [qid] "d" (qid)
: "cc", "0", "1", "2");
@@ -186,7 +186,7 @@ static inline struct ap_queue_status ap_rapq(ap_qid_t qid, int fbit)
asm volatile(
" lgr 0,%[reg0]\n" /* qid arg into gr0 */
" .insn rre,0xb2af0000,0,0\n" /* PQAP(RAPQ) */
- " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr %[reg1],1" /* gr1 (status) into reg1 */
: [reg1] "=&d" (reg1.value)
: [reg0] "d" (reg0)
: "cc", "0", "1");
@@ -211,7 +211,7 @@ static inline struct ap_queue_status ap_zapq(ap_qid_t qid, int fbit)
asm volatile(
" lgr 0,%[reg0]\n" /* qid arg into gr0 */
" .insn rre,0xb2af0000,0,0\n" /* PQAP(ZAPQ) */
- " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr %[reg1],1" /* gr1 (status) into reg1 */
: [reg1] "=&d" (reg1.value)
: [reg0] "d" (reg0)
: "cc", "0", "1");
@@ -315,7 +315,7 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
" lgr 1,%[reg1]\n" /* irq ctrl into gr1 */
" lgr 2,%[reg2]\n" /* ni addr into gr2 */
" .insn rre,0xb2af0000,0,0\n" /* PQAP(AQIC) */
- " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr %[reg1],1" /* gr1 (status) into reg1 */
: [reg1] "+&d" (reg1.value)
: [reg0] "d" (reg0), [reg2] "d" (reg2)
: "cc", "memory", "0", "1", "2");
@@ -363,7 +363,7 @@ static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit,
" lgr 1,%[reg1]\n" /* qact in info into gr1 */
" .insn rre,0xb2af0000,0,0\n" /* PQAP(QACT) */
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
- " lgr %[reg2],2\n" /* qact out info into reg2 */
+ " lgr %[reg2],2" /* qact out info into reg2 */
: [reg1] "+&d" (reg1.value), [reg2] "=&d" (reg2)
: [reg0] "d" (reg0)
: "cc", "0", "1", "2");
@@ -388,7 +388,7 @@ static inline struct ap_queue_status ap_bapq(ap_qid_t qid)
asm volatile(
" lgr 0,%[reg0]\n" /* qid arg into gr0 */
" .insn rre,0xb2af0000,0,0\n" /* PQAP(BAPQ) */
- " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr %[reg1],1" /* gr1 (status) into reg1 */
: [reg1] "=&d" (reg1.value)
: [reg0] "d" (reg0)
: "cc", "0", "1");
@@ -416,7 +416,7 @@ static inline struct ap_queue_status ap_aapq(ap_qid_t qid, unsigned int sec_idx)
" lgr 0,%[reg0]\n" /* qid arg into gr0 */
" lgr 2,%[reg2]\n" /* secret index into gr2 */
" .insn rre,0xb2af0000,0,0\n" /* PQAP(AAPQ) */
- " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr %[reg1],1" /* gr1 (status) into reg1 */
: [reg1] "=&d" (reg1.value)
: [reg0] "d" (reg0), [reg2] "d" (reg2)
: "cc", "0", "1", "2");
@@ -453,7 +453,7 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid,
" lgr 0,%[reg0]\n" /* qid param in gr0 */
"0: .insn rre,0xb2ad0000,%[nqap_r1],%[nqap_r2]\n"
" brc 2,0b\n" /* handle partial completion */
- " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr %[reg1],1" /* gr1 (status) into reg1 */
: [reg0] "+&d" (reg0), [reg1] "=&d" (reg1.value),
[nqap_r2] "+&d" (nqap_r2.pair)
: [nqap_r1] "d" (nqap_r1.pair)
@@ -518,7 +518,7 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid,
" brc 6,0b\n" /* handle partial complete */
"2: lgr %[reg0],0\n" /* gr0 (qid + info) into reg0 */
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
- " lgr %[reg2],2\n" /* gr2 (res length) into reg2 */
+ " lgr %[reg2],2" /* gr2 (res length) into reg2 */
: [reg0] "+&d" (reg0), [reg1] "=&d" (reg1.value),
[reg2] "=&d" (reg2), [rp1] "+&d" (rp1.pair),
[rp2] "+&d" (rp2.pair)
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
index 21c26d842832..845b77864412 100644
--- a/arch/s390/include/asm/atomic_ops.h
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -17,7 +17,7 @@ static __always_inline int __atomic_read(const int *ptr)
int val;
asm volatile(
- " l %[val],%[ptr]\n"
+ " l %[val],%[ptr]"
: [val] "=d" (val) : [ptr] "R" (*ptr));
return val;
}
@@ -26,11 +26,11 @@ static __always_inline void __atomic_set(int *ptr, int val)
{
if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) {
asm volatile(
- " mvhi %[ptr],%[val]\n"
+ " mvhi %[ptr],%[val]"
: [ptr] "=Q" (*ptr) : [val] "K" (val));
} else {
asm volatile(
- " st %[val],%[ptr]\n"
+ " st %[val],%[ptr]"
: [ptr] "=R" (*ptr) : [val] "d" (val));
}
}
@@ -40,7 +40,7 @@ static __always_inline long __atomic64_read(const long *ptr)
long val;
asm volatile(
- " lg %[val],%[ptr]\n"
+ " lg %[val],%[ptr]"
: [val] "=d" (val) : [ptr] "RT" (*ptr));
return val;
}
@@ -49,11 +49,11 @@ static __always_inline void __atomic64_set(long *ptr, long val)
{
if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) {
asm volatile(
- " mvghi %[ptr],%[val]\n"
+ " mvghi %[ptr],%[val]"
: [ptr] "=Q" (*ptr) : [val] "K" (val));
} else {
asm volatile(
- " stg %[val],%[ptr]\n"
+ " stg %[val],%[ptr]"
: [ptr] "=RT" (*ptr) : [val] "d" (val));
}
}
@@ -66,7 +66,7 @@ static __always_inline op_type op_name(op_type val, op_type *ptr) \
op_type old; \
\
asm volatile( \
- op_string " %[old],%[val],%[ptr]\n" \
+ op_string " %[old],%[val],%[ptr]" \
op_barrier \
: [old] "=d" (old), [ptr] "+QS" (*ptr) \
: [val] "d" (val) : "cc", "memory"); \
@@ -75,7 +75,7 @@ static __always_inline op_type op_name(op_type val, op_type *ptr) \
#define __ATOMIC_OPS(op_name, op_type, op_string) \
__ATOMIC_OP(op_name, op_type, op_string, "") \
- __ATOMIC_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
+ __ATOMIC_OP(op_name##_barrier, op_type, op_string, "\nbcr 14,0")
__ATOMIC_OPS(__atomic_add, int, "laa")
__ATOMIC_OPS(__atomic_and, int, "lan")
@@ -94,14 +94,14 @@ __ATOMIC_OPS(__atomic64_xor, long, "laxg")
static __always_inline void op_name(op_type val, op_type *ptr) \
{ \
asm volatile( \
- op_string " %[ptr],%[val]\n" \
+ op_string " %[ptr],%[val]" \
op_barrier \
: [ptr] "+QS" (*ptr) : [val] "i" (val) : "cc", "memory");\
}
#define __ATOMIC_CONST_OPS(op_name, op_type, op_string) \
__ATOMIC_CONST_OP(op_name, op_type, op_string, "") \
- __ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
+ __ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "\nbcr 14,0")
__ATOMIC_CONST_OPS(__atomic_add_const, int, "asi")
__ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi")
@@ -179,7 +179,7 @@ static __always_inline bool op_name(op_type val, op_type *ptr) \
int cc; \
\
asm volatile( \
- op_string " %[tmp],%[val],%[ptr]\n" \
+ op_string " %[tmp],%[val],%[ptr]" \
op_barrier \
: "=@cc" (cc), [tmp] "=d" (tmp), [ptr] "+QS" (*ptr) \
: [val] "d" (val) \
@@ -189,7 +189,7 @@ static __always_inline bool op_name(op_type val, op_type *ptr) \
#define __ATOMIC_TEST_OPS(op_name, op_type, op_string) \
__ATOMIC_TEST_OP(op_name, op_type, op_string, "") \
- __ATOMIC_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
+ __ATOMIC_TEST_OP(op_name##_barrier, op_type, op_string, "\nbcr 14,0")
__ATOMIC_TEST_OPS(__atomic_add_and_test, int, "laal")
__ATOMIC_TEST_OPS(__atomic64_add_and_test, long, "laalg")
@@ -203,7 +203,7 @@ static __always_inline bool op_name(op_type val, op_type *ptr) \
int cc; \
\
asm volatile( \
- op_string " %[ptr],%[val]\n" \
+ op_string " %[ptr],%[val]" \
op_barrier \
: "=@cc" (cc), [ptr] "+QS" (*ptr) \
: [val] "i" (val) \
@@ -213,7 +213,7 @@ static __always_inline bool op_name(op_type val, op_type *ptr) \
#define __ATOMIC_CONST_TEST_OPS(op_name, op_type, op_string) \
__ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, "") \
- __ATOMIC_CONST_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
+ __ATOMIC_CONST_TEST_OP(op_name##_barrier, op_type, op_string, "\nbcr 14,0")
__ATOMIC_CONST_TEST_OPS(__atomic_add_const_and_test, int, "alsi")
__ATOMIC_CONST_TEST_OPS(__atomic64_add_const_and_test, long, "algsi")
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index d82130d7f2b6..f3184073e754 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -18,9 +18,9 @@
#ifdef MARCH_HAS_Z196_FEATURES
/* Fast-BCR without checkpoint synchronization */
-#define __ASM_BCR_SERIALIZE "bcr 14,0\n"
+#define __ASM_BCR_SERIALIZE "bcr 14,0"
#else
-#define __ASM_BCR_SERIALIZE "bcr 15,0\n"
+#define __ASM_BCR_SERIALIZE "bcr 15,0"
#endif
static __always_inline void bcr_serialize(void)
@@ -69,12 +69,12 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
if (__builtin_constant_p(size) && size > 0) {
asm(" clgr %2,%1\n"
- " slbgr %0,%0\n"
+ " slbgr %0,%0"
:"=d" (mask) : "d" (size-1), "d" (index) :"cc");
return mask;
}
asm(" clgr %1,%2\n"
- " slbgr %0,%0\n"
+ " slbgr %0,%0"
:"=d" (mask) : "d" (size), "d" (index) :"cc");
return ~mask;
}
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index ec945fb60c02..5f10074665b0 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -62,7 +62,7 @@ static __always_inline bool arch_test_bit(unsigned long nr, const volatile unsig
addr += (nr ^ (BITS_PER_LONG - BITS_PER_BYTE)) / BITS_PER_BYTE;
mask = 1UL << (nr & (BITS_PER_BYTE - 1));
asm volatile(
- " tm %[addr],%[mask]\n"
+ " tm %[addr],%[mask]"
: "=@cc" (cc)
: [addr] "Q" (*addr), [mask] "I" (mask)
);
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index d86dea5900e7..7e83dc2d3b06 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -27,7 +27,7 @@ static inline __wsum cksm(const void *buff, int len, __wsum sum)
kmsan_check_memory(buff, len);
asm volatile(
"0: cksm %[sum],%[rp]\n"
- " jo 0b\n"
+ " jo 0b"
: [sum] "+&d" (sum), [rp] "+&d" (rp.pair) : : "cc", "memory");
return sum;
}
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index a9e2006033b7..008357996262 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -18,7 +18,7 @@ void __cmpxchg_called_with_bad_pointer(void);
static __always_inline u32 __cs_asm(u64 ptr, u32 old, u32 new)
{
asm volatile(
- " cs %[old],%[new],%[ptr]\n"
+ " cs %[old],%[new],%[ptr]"
: [old] "+d" (old), [ptr] "+Q" (*(u32 *)ptr)
: [new] "d" (new)
: "memory", "cc");
@@ -28,7 +28,7 @@ static __always_inline u32 __cs_asm(u64 ptr, u32 old, u32 new)
static __always_inline u64 __csg_asm(u64 ptr, u64 old, u64 new)
{
asm volatile(
- " csg %[old],%[new],%[ptr]\n"
+ " csg %[old],%[new],%[ptr]"
: [old] "+d" (old), [ptr] "+QS" (*(u64 *)ptr)
: [new] "d" (new)
: "memory", "cc");
@@ -126,7 +126,7 @@ static __always_inline u64 __arch_cmpxchg(u64 ptr, u64 old, u64 new, int size)
} \
case 4: { \
asm volatile( \
- " cs %[__old],%[__new],%[__ptr]\n" \
+ " cs %[__old],%[__new],%[__ptr]" \
: [__old] "+d" (*__oldp), \
[__ptr] "+Q" (*(ptr)), \
"=@cc" (__cc) \
@@ -136,7 +136,7 @@ static __always_inline u64 __arch_cmpxchg(u64 ptr, u64 old, u64 new, int size)
} \
case 8: { \
asm volatile( \
- " csg %[__old],%[__new],%[__ptr]\n" \
+ " csg %[__old],%[__new],%[__ptr]" \
: [__old] "+d" (*__oldp), \
[__ptr] "+QS" (*(ptr)), \
"=@cc" (__cc) \
@@ -241,7 +241,7 @@ static __always_inline u64 __arch_xchg(u64 ptr, u64 x, int size)
static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new)
{
asm volatile(
- " cdsg %[old],%[new],%[ptr]\n"
+ " cdsg %[old],%[new],%[ptr]"
: [old] "+d" (old), [ptr] "+QS" (*ptr)
: [new] "d" (new)
: "memory", "cc");
@@ -258,7 +258,7 @@ static __always_inline bool arch_try_cmpxchg128(volatile u128 *ptr, u128 *oldp,
int cc;
asm volatile(
- " cdsg %[old],%[new],%[ptr]\n"
+ " cdsg %[old],%[new],%[ptr]"
: [old] "+d" (*oldp), [ptr] "+QS" (*ptr), "=@cc" (cc)
: [new] "d" (new)
: "memory");
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index 4bc5317fbb12..a83683169d98 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -229,7 +229,7 @@ static __always_inline void __cpacf_query_rre(u32 opc, u8 r1, u8 r2,
asm volatile(
" la %%r1,%[pb]\n"
" lghi %%r0,%[fc]\n"
- " .insn rre,%[opc] << 16,%[r1],%[r2]\n"
+ " .insn rre,%[opc] << 16,%[r1],%[r2]"
: [pb] "=R" (*pb)
: [opc] "i" (opc), [fc] "i" (fc),
[r1] "i" (r1), [r2] "i" (r2)
@@ -242,7 +242,7 @@ static __always_inline void __cpacf_query_rrf(u32 opc, u8 r1, u8 r2, u8 r3,
asm volatile(
" la %%r1,%[pb]\n"
" lghi %%r0,%[fc]\n"
- " .insn rrf,%[opc] << 16,%[r1],%[r2],%[r3],%[m4]\n"
+ " .insn rrf,%[opc] << 16,%[r1],%[r2],%[r3],%[m4]"
: [pb] "=R" (*pb)
: [opc] "i" (opc), [fc] "i" (fc), [r1] "i" (r1),
[r2] "i" (r2), [r3] "i" (r3), [m4] "i" (m4)
@@ -416,7 +416,7 @@ static inline int cpacf_km(unsigned long func, void *param,
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,%[dst],%[src]\n"
- " brc 1,0b\n" /* handle partial completion */
+ " brc 1,0b" /* handle partial completion */
: [src] "+&d" (s.pair), [dst] "+&d" (d.pair)
: [fc] "d" (func), [pba] "d" ((unsigned long)param),
[opc] "i" (CPACF_KM)
@@ -448,7 +448,7 @@ static inline int cpacf_kmc(unsigned long func, void *param,
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,%[dst],%[src]\n"
- " brc 1,0b\n" /* handle partial completion */
+ " brc 1,0b" /* handle partial completion */
: [src] "+&d" (s.pair), [dst] "+&d" (d.pair)
: [fc] "d" (func), [pba] "d" ((unsigned long)param),
[opc] "i" (CPACF_KMC)
@@ -476,7 +476,7 @@ static inline void cpacf_kimd(unsigned long func, void *param,
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
"0: .insn rrf,%[opc] << 16,0,%[src],8,0\n"
- " brc 1,0b\n" /* handle partial completion */
+ " brc 1,0b" /* handle partial completion */
: [src] "+&d" (s.pair)
: [fc] "d" (func), [pba] "d" ((unsigned long)(param)),
[opc] "i" (CPACF_KIMD)
@@ -501,7 +501,7 @@ static inline void cpacf_klmd(unsigned long func, void *param,
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
"0: .insn rrf,%[opc] << 16,0,%[src],8,0\n"
- " brc 1,0b\n" /* handle partial completion */
+ " brc 1,0b" /* handle partial completion */
: [src] "+&d" (s.pair)
: [fc] "d" (func), [pba] "d" ((unsigned long)param),
[opc] "i" (CPACF_KLMD)
@@ -530,7 +530,7 @@ static inline int _cpacf_kmac(unsigned long *gr0, void *param,
" lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,0,%[src]\n"
" brc 1,0b\n" /* handle partial completion */
- " lgr %[r0],0\n"
+ " lgr %[r0],0"
: [r0] "+d" (*gr0), [src] "+&d" (s.pair)
: [pba] "d" ((unsigned long)param),
[opc] "i" (CPACF_KMAC)
@@ -580,7 +580,7 @@ static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest,
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
"0: .insn rrf,%[opc] << 16,%[dst],%[src],%[ctr],0\n"
- " brc 1,0b\n" /* handle partial completion */
+ " brc 1,0b" /* handle partial completion */
: [src] "+&d" (s.pair), [dst] "+&d" (d.pair),
[ctr] "+&d" (c.pair)
: [fc] "d" (func), [pba] "d" ((unsigned long)param),
@@ -614,7 +614,7 @@ static inline void cpacf_prno(unsigned long func, void *param,
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,%[dst],%[seed]\n"
- " brc 1,0b\n" /* handle partial completion */
+ " brc 1,0b" /* handle partial completion */
: [dst] "+&d" (d.pair)
: [fc] "d" (func), [pba] "d" ((unsigned long)param),
[seed] "d" (s.pair), [opc] "i" (CPACF_PRNO)
@@ -640,7 +640,7 @@ static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len,
asm volatile (
" lghi 0,%[fc]\n"
"0: .insn rre,%[opc] << 16,%[ucbuf],%[cbuf]\n"
- " brc 1,0b\n" /* handle partial completion */
+ " brc 1,0b" /* handle partial completion */
: [ucbuf] "+&d" (u.pair), [cbuf] "+&d" (c.pair)
: [fc] "K" (CPACF_PRNO_TRNG), [opc] "i" (CPACF_PRNO)
: "cc", "memory", "0");
@@ -692,7 +692,7 @@ static inline void cpacf_pckmo(long func, void *param)
asm volatile(
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
- " .insn rre,%[opc] << 16,0,0\n" /* PCKMO opcode */
+ " .insn rre,%[opc] << 16,0,0" /* PCKMO opcode */
:
: [fc] "d" (func), [pba] "d" ((unsigned long)param),
[opc] "i" (CPACF_PCKMO)
@@ -725,7 +725,7 @@ static inline void cpacf_kma(unsigned long func, void *param, u8 *dest,
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
"0: .insn rrf,%[opc] << 16,%[dst],%[src],%[aad],0\n"
- " brc 1,0b\n" /* handle partial completion */
+ " brc 1,0b" /* handle partial completion */
: [dst] "+&d" (d.pair), [src] "+&d" (s.pair),
[aad] "+&d" (a.pair)
: [fc] "d" (func), [pba] "d" ((unsigned long)param),
diff --git a/arch/s390/include/asm/ctlreg.h b/arch/s390/include/asm/ctlreg.h
index e93cc240a1ed..1765a0320933 100644
--- a/arch/s390/include/asm/ctlreg.h
+++ b/arch/s390/include/asm/ctlreg.h
@@ -100,7 +100,7 @@ struct ctlreg {
BUILD_BUG_ON(sizeof(struct addrtype) != _esize); \
typecheck(struct ctlreg, array[0]); \
asm volatile( \
- " lctlg %[_low],%[_high],%[_arr]\n" \
+ " lctlg %[_low],%[_high],%[_arr]" \
: \
: [_arr] "Q" (*(struct addrtype *)(&array)), \
[_low] "i" (low), [_high] "i" (high) \
@@ -119,7 +119,7 @@ struct ctlreg {
BUILD_BUG_ON(sizeof(struct addrtype) != _esize); \
typecheck(struct ctlreg, array[0]); \
asm volatile( \
- " stctg %[_low],%[_high],%[_arr]\n" \
+ " stctg %[_low],%[_high],%[_arr]" \
: [_arr] "=Q" (*(struct addrtype *)(&array)) \
: [_low] "i" (low), [_high] "i" (high)); \
} while (0)
@@ -127,7 +127,7 @@ struct ctlreg {
static __always_inline void local_ctl_load(unsigned int cr, struct ctlreg *reg)
{
asm volatile(
- " lctlg %[cr],%[cr],%[reg]\n"
+ " lctlg %[cr],%[cr],%[reg]"
:
: [reg] "Q" (*reg), [cr] "i" (cr)
: "memory");
@@ -136,7 +136,7 @@ static __always_inline void local_ctl_load(unsigned int cr, struct ctlreg *reg)
static __always_inline void local_ctl_store(unsigned int cr, struct ctlreg *reg)
{
asm volatile(
- " stctg %[cr],%[cr],%[reg]\n"
+ " stctg %[cr],%[cr],%[reg]"
: [reg] "=Q" (*reg)
: [cr] "i" (cr));
}
diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h
index 135bb89c0a89..e99f8bca8e08 100644
--- a/arch/s390/include/asm/fpu-insn.h
+++ b/arch/s390/include/asm/fpu-insn.h
@@ -38,7 +38,7 @@ asm(".include \"asm/fpu-insn-asm.h\"\n");
static __always_inline void fpu_cefbr(u8 f1, s32 val)
{
- asm volatile("cefbr %[f1],%[val]\n"
+ asm volatile("cefbr %[f1],%[val]"
:
: [f1] "I" (f1), [val] "d" (val)
: "memory");
@@ -48,7 +48,7 @@ static __always_inline unsigned long fpu_cgebr(u8 f2, u8 mode)
{
unsigned long val;
- asm volatile("cgebr %[val],%[mode],%[f2]\n"
+ asm volatile("cgebr %[val],%[mode],%[f2]"
: [val] "=d" (val)
: [f2] "I" (f2), [mode] "I" (mode)
: "memory");
@@ -57,7 +57,7 @@ static __always_inline unsigned long fpu_cgebr(u8 f2, u8 mode)
static __always_inline void fpu_debr(u8 f1, u8 f2)
{
- asm volatile("debr %[f1],%[f2]\n"
+ asm volatile("debr %[f1],%[f2]"
:
: [f1] "I" (f1), [f2] "I" (f2)
: "memory");
@@ -66,7 +66,7 @@ static __always_inline void fpu_debr(u8 f1, u8 f2)
static __always_inline void fpu_ld(unsigned short fpr, freg_t *reg)
{
instrument_read(reg, sizeof(*reg));
- asm volatile("ld %[fpr],%[reg]\n"
+ asm volatile("ld %[fpr],%[reg]"
:
: [fpr] "I" (fpr), [reg] "Q" (reg->ui)
: "memory");
@@ -74,7 +74,7 @@ static __always_inline void fpu_ld(unsigned short fpr, freg_t *reg)
static __always_inline void fpu_ldgr(u8 f1, u32 val)
{
- asm volatile("ldgr %[f1],%[val]\n"
+ asm volatile("ldgr %[f1],%[val]"
:
: [f1] "I" (f1), [val] "d" (val)
: "memory");
@@ -113,7 +113,7 @@ static inline void fpu_lfpc_safe(unsigned int *fpc)
static __always_inline void fpu_std(unsigned short fpr, freg_t *reg)
{
instrument_write(reg, sizeof(*reg));
- asm volatile("std %[fpr],%[reg]\n"
+ asm volatile("std %[fpr],%[reg]"
: [reg] "=Q" (reg->ui)
: [fpr] "I" (fpr)
: "memory");
@@ -181,7 +181,7 @@ static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3)
static __always_inline void fpu_vl(u8 v1, const void *vxr)
{
instrument_read(vxr, sizeof(__vector128));
- asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n"
+ asm volatile("VL %[v1],%O[vxr],,%R[vxr]"
:
: [vxr] "Q" (*(__vector128 *)vxr),
[v1] "I" (v1)
@@ -195,7 +195,7 @@ static __always_inline void fpu_vl(u8 v1, const void *vxr)
instrument_read(vxr, sizeof(__vector128));
asm volatile(
" la 1,%[vxr]\n"
- " VL %[v1],0,,1\n"
+ " VL %[v1],0,,1"
:
: [vxr] "R" (*(__vector128 *)vxr),
[v1] "I" (v1)
@@ -239,7 +239,7 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
size = min(index + 1, sizeof(__vector128));
instrument_read(vxr, size);
- asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n"
+ asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]"
:
: [vxr] "Q" (*(u8 *)vxr),
[index] "d" (index),
@@ -257,7 +257,7 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
instrument_read(vxr, size);
asm volatile(
" la 1,%[vxr]\n"
- " VLL %[v1],%[index],0,1\n"
+ " VLL %[v1],%[index],0,1"
:
: [vxr] "R" (*(u8 *)vxr),
[index] "d" (index),
@@ -277,7 +277,7 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_read(_v, size); \
- asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
+ asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]" \
: \
: [vxrs] "Q" (*_v), \
[v1] "I" (_v1), [v3] "I" (_v3) \
@@ -297,7 +297,7 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
instrument_read(_v, size); \
asm volatile( \
" la 1,%[vxrs]\n" \
- " VLM %[v1],%[v3],0,1\n" \
+ " VLM %[v1],%[v3],0,1" \
: \
: [vxrs] "R" (*_v), \
[v1] "I" (_v1), [v3] "I" (_v3) \
@@ -360,7 +360,7 @@ static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3)
static __always_inline void fpu_vst(u8 v1, const void *vxr)
{
instrument_write(vxr, sizeof(__vector128));
- asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n"
+ asm volatile("VST %[v1],%O[vxr],,%R[vxr]"
: [vxr] "=Q" (*(__vector128 *)vxr)
: [v1] "I" (v1)
: "memory");
@@ -373,7 +373,7 @@ static __always_inline void fpu_vst(u8 v1, const void *vxr)
instrument_write(vxr, sizeof(__vector128));
asm volatile(
" la 1,%[vxr]\n"
- " VST %[v1],0,,1\n"
+ " VST %[v1],0,,1"
: [vxr] "=R" (*(__vector128 *)vxr)
: [v1] "I" (v1)
: "memory", "1");
@@ -389,7 +389,7 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
size = min(index + 1, sizeof(__vector128));
instrument_write(vxr, size);
- asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n"
+ asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]"
: [vxr] "=Q" (*(u8 *)vxr)
: [index] "d" (index), [v1] "I" (v1)
: "memory");
@@ -405,7 +405,7 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
instrument_write(vxr, size);
asm volatile(
" la 1,%[vxr]\n"
- " VSTL %[v1],%[index],0,1\n"
+ " VSTL %[v1],%[index],0,1"
: [vxr] "=R" (*(u8 *)vxr)
: [index] "d" (index), [v1] "I" (v1)
: "memory", "1");
@@ -423,7 +423,7 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_write(_v, size); \
- asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
+ asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]" \
: [vxrs] "=Q" (*_v) \
: [v1] "I" (_v1), [v3] "I" (_v3) \
: "memory"); \
@@ -442,7 +442,7 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
instrument_write(_v, size); \
asm volatile( \
" la 1,%[vxrs]\n" \
- " VSTM %[v1],%[v3],0,1\n" \
+ " VSTM %[v1],%[v3],0,1" \
: [vxrs] "=R" (*_v) \
: [v1] "I" (_v1), [v3] "I" (_v3) \
: "memory", "1"); \
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 95d15416c39d..c2ba3d4398c5 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -722,6 +722,8 @@ extern int kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
+bool kvm_s390_is_gpa_in_memslot(struct kvm *kvm, gpa_t gpa);
+
static inline void kvm_arch_free_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot) {}
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
index df73a052760c..00cc8c916cfb 100644
--- a/arch/s390/include/asm/kvm_para.h
+++ b/arch/s390/include/asm/kvm_para.h
@@ -76,7 +76,7 @@ long __kvm_hypercall##args(unsigned long nr HYPERCALL_PARM_##args) \
HYPERCALL_REGS_##args; \
\
asm volatile ( \
- " diag 2,4,0x500\n" \
+ " diag 2,4,0x500" \
: "=d" (__rc) \
: "d" (__nr) HYPERCALL_FMT_##args \
: "memory", "cc"); \
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 41f900f693d9..6890925d5587 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -246,6 +246,16 @@ int clp_refresh_fh(u32 fid, u32 *fh);
/* UID */
void update_uid_checking(bool new);
+/* Firmware Sysfs */
+int __init __zpci_fw_sysfs_init(void);
+
+static inline int __init zpci_fw_sysfs_init(void)
+{
+ if (IS_ENABLED(CONFIG_SYSFS))
+ return __zpci_fw_sysfs_init();
+ return 0;
+}
+
/* IOMMU Interface */
int zpci_init_iommu(struct zpci_dev *zdev);
void zpci_destroy_iommu(struct zpci_dev *zdev);
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 96af7d964014..965886dfe954 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -73,13 +73,13 @@
if (__builtin_constant_p(val__) && \
((szcast)val__ > -129) && ((szcast)val__ < 128)) { \
asm volatile( \
- op2 " %[ptr__],%[val__]\n" \
+ op2 " %[ptr__],%[val__]" \
: [ptr__] "+Q" (*ptr__) \
: [val__] "i" ((szcast)val__) \
: "cc"); \
} else { \
asm volatile( \
- op1 " %[old__],%[val__],%[ptr__]\n" \
+ op1 " %[old__],%[val__],%[ptr__]" \
: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
: [val__] "d" (val__) \
: "cc"); \
@@ -98,7 +98,7 @@
preempt_disable_notrace(); \
ptr__ = raw_cpu_ptr(&(pcp)); \
asm volatile( \
- op " %[old__],%[val__],%[ptr__]\n" \
+ op " %[old__],%[val__],%[ptr__]" \
: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
: [val__] "d" (val__) \
: "cc"); \
@@ -117,7 +117,7 @@
preempt_disable_notrace(); \
ptr__ = raw_cpu_ptr(&(pcp)); \
asm volatile( \
- op " %[old__],%[val__],%[ptr__]\n" \
+ op " %[old__],%[val__],%[ptr__]" \
: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
: [val__] "d" (val__) \
: "cc"); \
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 6a9c08b80eda..93e1034485d7 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -163,7 +163,7 @@ static __always_inline void __stackleak_poison(unsigned long erase_low,
"2: stg %[poison],0(%[addr])\n"
" j 4f\n"
"3: mvc 8(1,%[addr]),0(%[addr])\n"
- "4:\n"
+ "4:"
: [addr] "+&a" (erase_low), [count] "+&d" (count), [tmp] "=&a" (tmp)
: [poison] "d" (poison)
: "memory", "cc"
diff --git a/arch/s390/include/asm/rwonce.h b/arch/s390/include/asm/rwonce.h
index 91fc24520e82..402325ec20f0 100644
--- a/arch/s390/include/asm/rwonce.h
+++ b/arch/s390/include/asm/rwonce.h
@@ -19,7 +19,7 @@
\
BUILD_BUG_ON(sizeof(x) != 16); \
asm volatile( \
- " lpq %[val],%[_x]\n" \
+ " lpq %[val],%[_x]" \
: [val] "=d" (__u.val) \
: [_x] "QS" (x) \
: "memory"); \
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index f9935db9fd76..b06b183b7246 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -98,7 +98,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
kcsan_release();
asm_inline volatile(
ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", ALT_FACILITY(49)) /* NIAI 7 */
- " mvhhi %[lock],0\n"
+ " mvhhi %[lock],0"
: [lock] "=Q" (((unsigned short *)&lp->lock)[1])
:
: "memory");
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 1d5ca13dc90f..810a6b9d9628 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -199,7 +199,7 @@ static __always_inline unsigned long get_stack_pointer(struct task_struct *task,
" lg 15,%[_stack]\n" \
" stg %[_frame],%[_bc](15)\n" \
" brasl 14,%[_fn]\n" \
- " lgr 15,%[_prev]\n" \
+ " lgr 15,%[_prev]" \
: [_prev] "=&d" (prev), CALL_FMT_##nr \
: [_stack] "R" (__stack), \
[_bc] "i" (offsetof(struct stack_frame, back_chain)), \
@@ -250,7 +250,7 @@ static __always_inline unsigned long get_stack_pointer(struct task_struct *task,
" lra 14,0(1)\n" \
" lpswe %[psw_enter]\n" \
"0: lpswe 0(7)\n" \
- "1:\n" \
+ "1:" \
: CALL_FMT_##nr, [psw_leave] "=Q" (psw_leave) \
: [psw_enter] "Q" (psw_enter) \
: "7", CALL_CLOBBER_##nr); \
diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h
index f8f68f4ef255..238e721e5a22 100644
--- a/arch/s390/include/asm/string.h
+++ b/arch/s390/include/asm/string.h
@@ -125,7 +125,7 @@ static inline void *memscan(void *s, int c, size_t n)
asm volatile(
" lgr 0,%[c]\n"
"0: srst %[ret],%[s]\n"
- " jo 0b\n"
+ " jo 0b"
: [ret] "+&a" (ret), [s] "+&a" (s)
: [c] "d" (c)
: "cc", "memory", "0");
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index bd4cb00ccd5e..10ce5c4ccbd6 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -155,7 +155,7 @@ long syscall##nr(unsigned long syscall SYSCALL_PARM_##nr) \
SYSCALL_REGS_##nr; \
\
asm volatile ( \
- " svc 0\n" \
+ " svc 0" \
: "=d" (rc) \
: "d" (r1) SYSCALL_FMT_##nr \
: "memory"); \
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 59dfb8780f62..49447b40f038 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -81,7 +81,7 @@ static inline void set_tod_programmable_field(u16 val)
{
asm volatile(
" lgr 0,%[val]\n"
- " sckpf\n"
+ " sckpf"
:
: [val] "d" ((unsigned long)val)
: "0");
diff --git a/arch/s390/kernel/diag/diag310.c b/arch/s390/kernel/diag/diag310.c
index d6a34454aa5a..f411562aa7f6 100644
--- a/arch/s390/kernel/diag/diag310.c
+++ b/arch/s390/kernel/diag/diag310.c
@@ -66,7 +66,7 @@ static inline unsigned long diag310(unsigned long subcode, unsigned long size, v
union register_pair rp = { .even = (unsigned long)addr, .odd = size };
diag_stat_inc(DIAG_STAT_X310);
- asm volatile("diag %[rp],%[subcode],0x310\n"
+ asm volatile("diag %[rp],%[subcode],0x310"
: [rp] "+d" (rp.pair)
: [subcode] "d" (subcode)
: "memory");
diff --git a/arch/s390/kernel/diag/diag324.c b/arch/s390/kernel/diag/diag324.c
index f0a8b4841fb9..fe325c2a2d0d 100644
--- a/arch/s390/kernel/diag/diag324.c
+++ b/arch/s390/kernel/diag/diag324.c
@@ -101,7 +101,7 @@ static unsigned long diag324(unsigned long subcode, void *addr)
union register_pair rp = { .even = (unsigned long)addr };
diag_stat_inc(DIAG_STAT_X324);
- asm volatile("diag %[rp],%[subcode],0x324\n"
+ asm volatile("diag %[rp],%[subcode],0x324"
: [rp] "+d" (rp.pair)
: [subcode] "d" (subcode)
: "memory");
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index 9455f213dc20..62bf8a15bf32 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -80,6 +80,15 @@ static int paicrypt_root_alloc(void)
/* Release the PMU if event is the last perf event */
static DEFINE_MUTEX(pai_reserve_mutex);
+/* Free all memory allocated for event counting/sampling setup */
+static void paicrypt_free(struct paicrypt_mapptr *mp)
+{
+ free_page((unsigned long)mp->mapptr->page);
+ kvfree(mp->mapptr->save);
+ kfree(mp->mapptr);
+ mp->mapptr = NULL;
+}
+
/* Adjust usage counters and remove allocated memory when all users are
* gone.
*/
@@ -93,15 +102,8 @@ static void paicrypt_event_destroy_cpu(struct perf_event *event, int cpu)
"refcnt %u\n", __func__, event->attr.config,
event->cpu, cpump->active_events,
refcount_read(&cpump->refcnt));
- if (refcount_dec_and_test(&cpump->refcnt)) {
- debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n",
- __func__, (unsigned long)cpump->page,
- cpump->save);
- free_page((unsigned long)cpump->page);
- kvfree(cpump->save);
- kfree(cpump);
- mp->mapptr = NULL;
- }
+ if (refcount_dec_and_test(&cpump->refcnt))
+ paicrypt_free(mp);
paicrypt_root_free();
mutex_unlock(&pai_reserve_mutex);
}
@@ -175,14 +177,13 @@ static u64 paicrypt_getall(struct perf_event *event)
*
* Allocate the memory for the event.
*/
-static struct paicrypt_map *paicrypt_busy(struct perf_event *event, int cpu)
+static int paicrypt_alloc_cpu(struct perf_event *event, int cpu)
{
struct paicrypt_map *cpump = NULL;
struct paicrypt_mapptr *mp;
int rc;
mutex_lock(&pai_reserve_mutex);
-
/* Allocate root node */
rc = paicrypt_root_alloc();
if (rc)
@@ -192,58 +193,44 @@ static struct paicrypt_map *paicrypt_busy(struct perf_event *event, int cpu)
mp = per_cpu_ptr(paicrypt_root.mapptr, cpu);
cpump = mp->mapptr;
if (!cpump) { /* Paicrypt_map allocated? */
+ rc = -ENOMEM;
cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
- if (!cpump) {
- rc = -ENOMEM;
- goto free_root;
+ if (!cpump)
+ goto undo;
+ /* Allocate memory for counter page and counter extraction.
+ * Only the first counting event has to allocate a page.
+ */
+ mp->mapptr = cpump;
+ cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+ cpump->save = kvmalloc_array(paicrypt_cnt + 1,
+ sizeof(struct pai_userdata),
+ GFP_KERNEL);
+ if (!cpump->page || !cpump->save) {
+ paicrypt_free(mp);
+ goto undo;
}
INIT_LIST_HEAD(&cpump->syswide_list);
- }
-
- /* Allocate memory for counter page and counter extraction.
- * Only the first counting event has to allocate a page.
- */
- if (cpump->page) {
+ refcount_set(&cpump->refcnt, 1);
+ rc = 0;
+ } else {
refcount_inc(&cpump->refcnt);
- goto unlock;
}
- rc = -ENOMEM;
- cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL);
- if (!cpump->page)
- goto free_paicrypt_map;
- cpump->save = kvmalloc_array(paicrypt_cnt + 1,
- sizeof(struct pai_userdata), GFP_KERNEL);
- if (!cpump->save) {
- free_page((unsigned long)cpump->page);
- cpump->page = NULL;
- goto free_paicrypt_map;
+undo:
+ if (rc) {
+ /* Error in allocation of event, decrement anchor. Since
+ * the event in not created, its destroy() function is never
+ * invoked. Adjust the reference counter for the anchor.
+ */
+ paicrypt_root_free();
}
-
- /* Set mode and reference count */
- rc = 0;
- refcount_set(&cpump->refcnt, 1);
- mp->mapptr = cpump;
- debug_sprintf_event(cfm_dbg, 5, "%s users %d refcnt %u page %#lx "
- "save %p rc %d\n", __func__, cpump->active_events,
- refcount_read(&cpump->refcnt),
- (unsigned long)cpump->page, cpump->save, rc);
- goto unlock;
-
-free_paicrypt_map:
- /* Undo memory allocation */
- kfree(cpump);
- mp->mapptr = NULL;
-free_root:
- paicrypt_root_free();
unlock:
mutex_unlock(&pai_reserve_mutex);
- return rc ? ERR_PTR(rc) : cpump;
+ return rc;
}
-static int paicrypt_event_init_all(struct perf_event *event)
+static int paicrypt_alloc(struct perf_event *event)
{
- struct paicrypt_map *cpump;
struct cpumask *maskptr;
int cpu, rc = -ENOMEM;
@@ -252,12 +239,11 @@ static int paicrypt_event_init_all(struct perf_event *event)
goto out;
for_each_online_cpu(cpu) {
- cpump = paicrypt_busy(event, cpu);
- if (IS_ERR(cpump)) {
+ rc = paicrypt_alloc_cpu(event, cpu);
+ if (rc) {
for_each_cpu(cpu, maskptr)
paicrypt_event_destroy_cpu(event, cpu);
kfree(maskptr);
- rc = PTR_ERR(cpump);
goto out;
}
cpumask_set_cpu(cpu, maskptr);
@@ -279,7 +265,6 @@ out:
static int paicrypt_event_init(struct perf_event *event)
{
struct perf_event_attr *a = &event->attr;
- struct paicrypt_map *cpump;
int rc = 0;
/* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */
@@ -301,13 +286,10 @@ static int paicrypt_event_init(struct perf_event *event)
}
}
- if (event->cpu >= 0) {
- cpump = paicrypt_busy(event, event->cpu);
- if (IS_ERR(cpump))
- rc = PTR_ERR(cpump);
- } else {
- rc = paicrypt_event_init_all(event);
- }
+ if (event->cpu >= 0)
+ rc = paicrypt_alloc_cpu(event, event->cpu);
+ else
+ rc = paicrypt_alloc(event);
if (rc) {
free_page(PAI_SAVE_AREA(event));
goto out;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 7b529868789f..892fce2b7549 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -839,7 +839,7 @@ static void __init setup_control_program_code(void)
return;
diag_stat_inc(DIAG_STAT_X318);
- asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val));
+ asm volatile("diag %0,0,0x318" : : "d" (diag318_info.val));
}
/*
diff --git a/arch/s390/kernel/skey.c b/arch/s390/kernel/skey.c
index ba049fd103c2..cc869de6e3a5 100644
--- a/arch/s390/kernel/skey.c
+++ b/arch/s390/kernel/skey.c
@@ -11,7 +11,7 @@ static inline unsigned long load_real_address(unsigned long address)
unsigned long real;
asm volatile(
- " lra %[real],0(%[address])\n"
+ " lra %[real],0(%[address])"
: [real] "=d" (real)
: [address] "a" (address)
: "cc");
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index e88ebe5339fc..da84c0dc6b7e 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -340,7 +340,7 @@ static void pcpu_delegate(struct pcpu *pcpu, int cpu,
"0: sigp 0,%0,%2 # sigp restart to target cpu\n"
" brc 2,0b # busy, try again\n"
"1: sigp 0,%1,%3 # sigp stop to current cpu\n"
- " brc 2,1b # busy, try again\n"
+ " brc 2,1b # busy, try again"
: : "d" (pcpu->address), "d" (source_cpu),
"K" (SIGP_RESTART), "K" (SIGP_STOP)
: "0", "1", "cc");
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 93b2a01bae40..5d17609bcfe1 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -866,8 +866,8 @@ static int find_secret_in_page(const u8 secret_id[UV_SECRET_ID_LEN],
return -ENOENT;
}
-/*
- * Do the actual search for `uv_get_secret_metadata`.
+/**
+ * uv_find_secret() - search secret metadata for a given secret id.
* @secret_id: search pattern.
* @list: ephemeral buffer space
* @secret: output data, containing the secret's metadata.
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index feecf1a6ddb4..d74d4c52ccd0 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -215,6 +215,28 @@ SECTIONS
ELF_DETAILS
/*
+ * Make sure that the .got.plt is either completely empty or it
+ * contains only the three reserved double words.
+ */
+ .got.plt : {
+ *(.got.plt)
+ }
+ ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!")
+
+ /*
+ * Sections that should stay zero sized, which is safer to
+ * explicitly check instead of blindly discarding.
+ */
+ .plt : {
+ *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
+ }
+ ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
+ .rela.dyn : {
+ *(.rela.*) *(.rela_*)
+ }
+ ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!")
+
+ /*
* uncompressed image info used by the decompressor
* it should match struct vmlinux_info
*/
@@ -244,28 +266,6 @@ SECTIONS
#endif
} :NONE
- /*
- * Make sure that the .got.plt is either completely empty or it
- * contains only the three reserved double words.
- */
- .got.plt : {
- *(.got.plt)
- }
- ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!")
-
- /*
- * Sections that should stay zero sized, which is safer to
- * explicitly check instead of blindly discarding.
- */
- .plt : {
- *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
- }
- ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
- .rela.dyn : {
- *(.rela.*) *(.rela_*)
- }
- ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!")
-
/* Sections to be discarded */
DISCARDS
/DISCARD/ : {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6d51aa5f66be..16ba04062854 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -356,7 +356,7 @@ static __always_inline void pfcr_query(u8 (*query)[16])
{
asm volatile(
" lghi 0,0\n"
- " .insn rsy,0xeb0000000016,0,0,%[query]\n"
+ " .insn rsy,0xeb0000000016,0,0,%[query]"
: [query] "=QS" (*query)
:
: "cc", "0");
@@ -368,7 +368,7 @@ static __always_inline void __sortl_query(u8 (*query)[32])
" lghi 0,0\n"
" la 1,%[query]\n"
/* Parameter registers are ignored */
- " .insn rre,0xb9380000,2,4\n"
+ " .insn rre,0xb9380000,2,4"
: [query] "=R" (*query)
:
: "cc", "0", "1");
@@ -380,7 +380,7 @@ static __always_inline void __dfltcc_query(u8 (*query)[32])
" lghi 0,0\n"
" la 1,%[query]\n"
/* Parameter registers are ignored */
- " .insn rrf,0xb9390000,2,4,6,0\n"
+ " .insn rrf,0xb9390000,2,4,6,0"
: [query] "=R" (*query)
:
: "cc", "0", "1");
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 9253c70897a8..9a71b6e00948 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -605,6 +605,14 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
}
}
+#if IS_ENABLED(CONFIG_VFIO_AP)
+bool kvm_s390_is_gpa_in_memslot(struct kvm *kvm, gpa_t gpa)
+{
+ return kvm_is_gpa_in_memslot(kvm, gpa);
+}
+EXPORT_SYMBOL_FOR_MODULES(kvm_s390_is_gpa_in_memslot, "vfio_ap");
+#endif
+
/*
* handle_pqap: Handling pqap interception
* @vcpu: the vcpu having issue the pqap instruction
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index ad9da4038511..10db1e56a811 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -96,7 +96,7 @@ static inline int arch_load_niai4(int *lock)
asm_inline volatile(
ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", ALT_FACILITY(49)) /* NIAI 4 */
- " l %[owner],%[lock]\n"
+ " l %[owner],%[lock]"
: [owner] "=d" (owner) : [lock] "R" (*lock) : "memory");
return owner;
}
@@ -109,7 +109,7 @@ static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new)
asm_inline volatile(
ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */
- " cs %[old],%[new],%[lock]\n"
+ " cs %[old],%[new],%[lock]"
: [old] "+d" (old), [lock] "+Q" (*lock), "=@cc" (cc)
: [new] "d" (new)
: "memory");
@@ -124,7 +124,7 @@ static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new)
asm_inline volatile(
ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */
- " cs %[old],%[new],%[lock]\n"
+ " cs %[old],%[new],%[lock]"
: [old] "+d" (old), [lock] "+Q" (*lock)
: [new] "d" (new)
: "cc", "memory");
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index 099de76e8b1a..757f58960198 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -27,7 +27,7 @@ static inline char *__strend(const char *s)
asm volatile(
" lghi 0,0\n"
"0: srst %[e],%[s]\n"
- " jo 0b\n"
+ " jo 0b"
: [e] "+&a" (e), [s] "+&a" (s)
:
: "cc", "memory", "0");
@@ -41,7 +41,7 @@ static inline char *__strnend(const char *s, size_t n)
asm volatile(
" lghi 0,0\n"
"0: srst %[p],%[s]\n"
- " jo 0b\n"
+ " jo 0b"
: [p] "+&d" (p), [s] "+&a" (s)
:
: "cc", "memory", "0");
@@ -95,7 +95,7 @@ char *strcat(char *dest, const char *src)
"0: srst %[dummy],%[dest]\n"
" jo 0b\n"
"1: mvst %[dummy],%[src]\n"
- " jo 1b\n"
+ " jo 1b"
: [dummy] "+&a" (dummy), [dest] "+&a" (dest), [src] "+&a" (src)
:
: "cc", "memory", "0");
@@ -291,7 +291,7 @@ void *memscan(void *s, int c, size_t n)
asm volatile(
" lgr 0,%[c]\n"
"0: srst %[ret],%[s]\n"
- " jo 0b\n"
+ " jo 0b"
: [ret] "+&a" (ret), [s] "+&a" (s)
: [c] "d" (c)
: "cc", "memory", "0");
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index 6e42100875e7..6bb3fa5bf925 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -150,7 +150,7 @@ static __always_inline struct pt_regs fake_pt_regs(void)
regs.gprs[15] = current_stack_pointer;
asm volatile(
- "basr %[psw_addr],0\n"
+ "basr %[psw_addr],0"
: [psw_addr] "=d" (regs.psw.addr));
return regs;
}
@@ -232,7 +232,7 @@ static noinline void test_unwind_kprobed_func(void)
asm volatile(
" nopr %%r7\n"
"test_unwind_kprobed_insn:\n"
- " nopr %%r7\n"
+ " nopr %%r7"
:);
}
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
index ce7bcf7c0032..1721b73b7803 100644
--- a/arch/s390/lib/xor.c
+++ b/arch/s390/lib/xor.c
@@ -27,7 +27,7 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
"1: exrl %0,2f\n"
" j 3f\n"
"2: xc 0(1,%1),0(%2)\n"
- "3:\n"
+ "3:"
: : "d" (bytes), "a" (p1), "a" (p2)
: "0", "cc", "memory");
}
@@ -53,7 +53,7 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
" j 4f\n"
"2: xc 0(1,%1),0(%2)\n"
"3: xc 0(1,%1),0(%3)\n"
- "4:\n"
+ "4:"
: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3)
: : "0", "cc", "memory");
}
@@ -84,7 +84,7 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
"2: xc 0(1,%1),0(%2)\n"
"3: xc 0(1,%1),0(%3)\n"
"4: xc 0(1,%1),0(%4)\n"
- "5:\n"
+ "5:"
: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4)
: : "0", "cc", "memory");
}
@@ -121,7 +121,7 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
"3: xc 0(1,%1),0(%3)\n"
"4: xc 0(1,%1),0(%4)\n"
"5: xc 0(1,%1),0(%5)\n"
- "6:\n"
+ "6:"
: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4),
"+a" (p5)
: : "0", "cc", "memory");
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 44426e0f2944..cfd219fe495c 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -41,7 +41,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz
" ex %1,0(1)\n"
" lg %1,0(%3)\n"
" lra %0,0(%0)\n"
- " sturg %1,%0\n"
+ " sturg %1,%0"
: "+&a" (aligned), "+&a" (count), "=m" (tmp)
: "a" (&tmp), "a" (&tmp[offset]), "a" (src)
: "cc", "memory", "1");
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 76d92069799f..626fca116cd7 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -245,7 +245,7 @@ static inline unsigned long base_lra(unsigned long address)
unsigned long real;
asm volatile(
- " lra %0,0(%1)\n"
+ " lra %0,0(%1)"
: "=d" (real) : "a" (address) : "cc");
return real;
}
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index cd6676c2d602..c82c577db2bc 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -1188,6 +1188,10 @@ static int __init pci_base_init(void)
if (rc)
goto out_find;
+ rc = zpci_fw_sysfs_init();
+ if (rc)
+ goto out_find;
+
s390_pci_initialized = 1;
return 0;
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index d930416d4c90..b95376041501 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -88,6 +88,7 @@ static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
ers_res = driver->err_handler->error_detected(pdev, pdev->error_state);
+ pci_uevent_ers(pdev, ers_res);
if (ers_result_indicates_abort(ers_res))
pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
@@ -244,6 +245,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
ers_res = PCI_ERS_RESULT_RECOVERED;
if (ers_res != PCI_ERS_RESULT_RECOVERED) {
+ pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT);
pr_err("%s: Automatic recovery failed; operator intervention is required\n",
pci_name(pdev));
status_str = "failed (driver can't recover)";
@@ -253,6 +255,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
if (driver->err_handler->resume)
driver->err_handler->resume(pdev);
+ pci_uevent_ers(pdev, PCI_ERS_RESULT_RECOVERED);
out_unlock:
pci_dev_unlock(pdev);
zpci_report_status(zdev, "recovery", status_str);
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index eb978c8012be..35ceb1bea1c6 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -145,7 +145,7 @@ int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
return -EIO;
asm volatile(
- ".insn rsy,0xeb00000000d1,%[ctl],%[isc],%[iib]\n"
+ ".insn rsy,0xeb00000000d1,%[ctl],%[isc],%[iib]"
: : [ctl] "d" (ctl), [isc] "d" (isc << 27), [iib] "Q" (*iib));
return 0;
@@ -442,7 +442,7 @@ EXPORT_SYMBOL_GPL(zpci_write_block);
static inline void __pciwb_mio(void)
{
- asm volatile (".insn rre,0xb9d50000,0,0\n");
+ asm volatile (".insn rre,0xb9d50000,0,0");
}
void zpci_barrier(void)
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index 0ee0924cfab7..12060870e2aa 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -41,6 +41,9 @@ zpci_attr(segment1, "0x%02x\n", pfip[1]);
zpci_attr(segment2, "0x%02x\n", pfip[2]);
zpci_attr(segment3, "0x%02x\n", pfip[3]);
+#define ZPCI_FW_ATTR_RO(_name) \
+ static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+
static ssize_t mio_enabled_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -164,6 +167,13 @@ static ssize_t uid_is_unique_show(struct device *dev,
}
static DEVICE_ATTR_RO(uid_is_unique);
+static ssize_t uid_checking_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", zpci_unique_uid ? 1 : 0);
+}
+ZPCI_FW_ATTR_RO(uid_checking);
+
/* analogous to smbios index */
static ssize_t index_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -233,3 +243,18 @@ const struct attribute_group pfip_attr_group = {
.name = "pfip",
.attrs = pfip_attrs,
};
+
+static struct attribute *clp_fw_attrs[] = {
+ &uid_checking_attr.attr,
+ NULL,
+};
+
+static struct attribute_group clp_fw_attr_group = {
+ .name = "clp",
+ .attrs = clp_fw_attrs,
+};
+
+int __init __zpci_fw_sysfs_init(void)
+{
+ return sysfs_create_group(firmware_kobj, &clp_fw_attr_group);
+}
diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig
index b6f36c938f1d..48b2e97114f9 100644
--- a/arch/sh/configs/ap325rxa_defconfig
+++ b/arch/sh/configs/ap325rxa_defconfig
@@ -81,10 +81,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
diff --git a/arch/sh/configs/apsh4a3a_defconfig b/arch/sh/configs/apsh4a3a_defconfig
index 9c2644443c4d..85db9ce42d1a 100644
--- a/arch/sh/configs/apsh4a3a_defconfig
+++ b/arch/sh/configs/apsh4a3a_defconfig
@@ -60,8 +60,7 @@ CONFIG_FONT_8x16=y
CONFIG_LOGO=y
# CONFIG_USB_SUPPORT is not set
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig
index 137573610ec4..e8b3b720578b 100644
--- a/arch/sh/configs/apsh4ad0a_defconfig
+++ b/arch/sh/configs/apsh4ad0a_defconfig
@@ -88,8 +88,7 @@ CONFIG_USB_MON=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_STORAGE=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig
index e76694aace25..fcca7cc5a75a 100644
--- a/arch/sh/configs/ecovec24_defconfig
+++ b/arch/sh/configs/ecovec24_defconfig
@@ -109,10 +109,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig
index f427a95bcd21..98f4611ba553 100644
--- a/arch/sh/configs/edosk7760_defconfig
+++ b/arch/sh/configs/edosk7760_defconfig
@@ -87,8 +87,7 @@ CONFIG_SND_SOC=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_XIP=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_NFS_FS=y
diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig
index da176f100e00..e5d102cbff89 100644
--- a/arch/sh/configs/espt_defconfig
+++ b/arch/sh/configs/espt_defconfig
@@ -59,8 +59,7 @@ CONFIG_USB_MON=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_STORAGE=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_AUTOFS_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig
index 924bb3233b0b..22177aa8f961 100644
--- a/arch/sh/configs/landisk_defconfig
+++ b/arch/sh/configs/landisk_defconfig
@@ -93,8 +93,7 @@ CONFIG_USB_EMI62=m
CONFIG_USB_EMI26=m
CONFIG_USB_SISUSBVGA=m
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_ISO9660_FS=m
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig
index 0307bb2be79f..ff992301622b 100644
--- a/arch/sh/configs/lboxre2_defconfig
+++ b/arch/sh/configs/lboxre2_defconfig
@@ -49,8 +49,7 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y
CONFIG_HW_RANDOM=y
CONFIG_RTC_CLASS=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig
index 93b9aa32dc7c..a29fb912a242 100644
--- a/arch/sh/configs/magicpanelr2_defconfig
+++ b/arch/sh/configs/magicpanelr2_defconfig
@@ -64,9 +64,8 @@ CONFIG_RTC_CLASS=y
# CONFIG_RTC_HCTOSYS is not set
CONFIG_RTC_DRV_SH=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
+CONFIG_EXT4_FS=y
+# CONFIG_EXT4_FS_XATTR is not set
# CONFIG_DNOTIFY is not set
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig
index f28b8c4181c2..58b792dacfec 100644
--- a/arch/sh/configs/r7780mp_defconfig
+++ b/arch/sh/configs/r7780mp_defconfig
@@ -74,8 +74,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_RS5C372=y
CONFIG_RTC_DRV_SH=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_FUSE_FS=m
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig
index 3a4239f20ff1..7edf18451158 100644
--- a/arch/sh/configs/r7785rp_defconfig
+++ b/arch/sh/configs/r7785rp_defconfig
@@ -69,8 +69,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_RS5C372=y
CONFIG_RTC_DRV_SH=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_FUSE_FS=m
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
diff --git a/arch/sh/configs/rsk7264_defconfig b/arch/sh/configs/rsk7264_defconfig
index e4ef259425c4..28a81efefb02 100644
--- a/arch/sh/configs/rsk7264_defconfig
+++ b/arch/sh/configs/rsk7264_defconfig
@@ -59,8 +59,7 @@ CONFIG_USB_R8A66597_HCD=y
CONFIG_USB_STORAGE=y
CONFIG_USB_STORAGE_DEBUG=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_VFAT_FS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
diff --git a/arch/sh/configs/rsk7269_defconfig b/arch/sh/configs/rsk7269_defconfig
index e0d1560b2bfd..f8bfa46643ff 100644
--- a/arch/sh/configs/rsk7269_defconfig
+++ b/arch/sh/configs/rsk7269_defconfig
@@ -43,8 +43,7 @@ CONFIG_USB_R8A66597_HCD=y
CONFIG_USB_STORAGE=y
CONFIG_USB_STORAGE_DEBUG=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_VFAT_FS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig
index 9870d16d9711..311817161afb 100644
--- a/arch/sh/configs/sdk7780_defconfig
+++ b/arch/sh/configs/sdk7780_defconfig
@@ -102,9 +102,8 @@ CONFIG_LEDS_CLASS=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=y
CONFIG_MSDOS_FS=y
diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
index 07894f13441e..2433aa5f44a8 100644
--- a/arch/sh/configs/sdk7786_defconfig
+++ b/arch/sh/configs/sdk7786_defconfig
@@ -161,8 +161,7 @@ CONFIG_STAGING=y
# CONFIG_STAGING_EXCLUDE_BUILD is not set
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_EXT4_FS=y
CONFIG_XFS_FS=y
CONFIG_BTRFS_FS=y
diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig
index 75db12fb9ad1..b0baa5771c26 100644
--- a/arch/sh/configs/se7343_defconfig
+++ b/arch/sh/configs/se7343_defconfig
@@ -84,8 +84,7 @@ CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_ISP116X_HCD=y
CONFIG_UIO=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
# CONFIG_DNOTIFY is not set
CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig
index 8770a72e6a63..1078c286a610 100644
--- a/arch/sh/configs/se7712_defconfig
+++ b/arch/sh/configs/se7712_defconfig
@@ -83,8 +83,7 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
# CONFIG_DNOTIFY is not set
CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig
index b15c6406a0e8..edb9e0d2dce5 100644
--- a/arch/sh/configs/se7721_defconfig
+++ b/arch/sh/configs/se7721_defconfig
@@ -107,8 +107,7 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
# CONFIG_DNOTIFY is not set
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
diff --git a/arch/sh/configs/se7722_defconfig b/arch/sh/configs/se7722_defconfig
index 5327a2f70980..33daa0a17a32 100644
--- a/arch/sh/configs/se7722_defconfig
+++ b/arch/sh/configs/se7722_defconfig
@@ -44,8 +44,7 @@ CONFIG_HW_RANDOM=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_SH=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_HUGETLBFS=y
diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig
index 9501e69eb886..d572655f842d 100644
--- a/arch/sh/configs/se7724_defconfig
+++ b/arch/sh/configs/se7724_defconfig
@@ -110,10 +110,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig
index 4d75c92cac10..3d194d81c92b 100644
--- a/arch/sh/configs/sh03_defconfig
+++ b/arch/sh/configs/sh03_defconfig
@@ -57,9 +57,8 @@ CONFIG_WATCHDOG=y
CONFIG_SH_WDT=m
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig
index cc6292b3235a..889daa5d2faa 100644
--- a/arch/sh/configs/sh2007_defconfig
+++ b/arch/sh/configs/sh2007_defconfig
@@ -95,7 +95,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_INTF_DEV_UIE_EMUL=y
CONFIG_DMADEVICES=y
CONFIG_TIMB_DMA=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
diff --git a/arch/sh/configs/sh7757lcr_defconfig b/arch/sh/configs/sh7757lcr_defconfig
index 48a0f9beb116..25e9d22779b3 100644
--- a/arch/sh/configs/sh7757lcr_defconfig
+++ b/arch/sh/configs/sh7757lcr_defconfig
@@ -64,7 +64,7 @@ CONFIG_MMC=y
CONFIG_MMC_SDHI=y
CONFIG_MMC_SH_MMCIF=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_ISO9660_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig
index b77b3313157e..e7b72ff377a8 100644
--- a/arch/sh/configs/sh7763rdp_defconfig
+++ b/arch/sh/configs/sh7763rdp_defconfig
@@ -61,8 +61,7 @@ CONFIG_USB_OHCI_HCD=y
CONFIG_USB_STORAGE=y
CONFIG_MMC=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_AUTOFS_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig
index 44f9b2317f09..17d2471d8e51 100644
--- a/arch/sh/configs/sh7785lcr_32bit_defconfig
+++ b/arch/sh/configs/sh7785lcr_32bit_defconfig
@@ -113,8 +113,7 @@ CONFIG_RTC_DRV_RS5C372=y
CONFIG_DMADEVICES=y
CONFIG_UIO=m
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
diff --git a/arch/sh/configs/sh7785lcr_defconfig b/arch/sh/configs/sh7785lcr_defconfig
index aec74b0e7003..34c8fe755add 100644
--- a/arch/sh/configs/sh7785lcr_defconfig
+++ b/arch/sh/configs/sh7785lcr_defconfig
@@ -90,8 +90,7 @@ CONFIG_USB_TEST=m
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_RS5C372=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig
index 9a0df5ea3866..52e7a42d66c7 100644
--- a/arch/sh/configs/shx3_defconfig
+++ b/arch/sh/configs/shx3_defconfig
@@ -84,8 +84,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_SH=y
CONFIG_UIO=m
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_HUGETLBFS=y
diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig
index 8ef72b8dbcd3..2c474645ec36 100644
--- a/arch/sh/configs/titan_defconfig
+++ b/arch/sh/configs/titan_defconfig
@@ -215,9 +215,8 @@ CONFIG_USB_SERIAL_PL2303=m
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_SH=m
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
+CONFIG_EXT4_FS=y
+# CONFIG_EXT4_FS_XATTR is not set
CONFIG_XFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_ISO9660_FS=m
diff --git a/arch/sh/configs/ul2_defconfig b/arch/sh/configs/ul2_defconfig
index 103b81ec1ffb..b0c2ba478353 100644
--- a/arch/sh/configs/ul2_defconfig
+++ b/arch/sh/configs/ul2_defconfig
@@ -66,8 +66,7 @@ CONFIG_USB_R8A66597_HCD=y
CONFIG_USB_STORAGE=y
CONFIG_MMC=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig
index 00ef62133b04..e6d807f52253 100644
--- a/arch/sh/configs/urquell_defconfig
+++ b/arch/sh/configs/urquell_defconfig
@@ -114,8 +114,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_SH=y
CONFIG_RTC_DRV_GENERIC=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_EXT4_FS=y
CONFIG_BTRFS_FS=y
CONFIG_MSDOS_FS=y
diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig
index 7a7c4dec2925..127940aafc39 100644
--- a/arch/sparc/configs/sparc64_defconfig
+++ b/arch/sparc/configs/sparc64_defconfig
@@ -187,10 +187,9 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_HUGETLBFS=y
diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c
index 8de6646e9ce8..10934dfa987a 100644
--- a/arch/sparc/kernel/leon_pci.c
+++ b/arch/sparc/kernel/leon_pci.c
@@ -60,30 +60,3 @@ void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info)
pci_assign_unassigned_resources();
pci_bus_add_devices(root_bus);
}
-
-int pcibios_enable_device(struct pci_dev *dev, int mask)
-{
- struct resource *res;
- u16 cmd, oldcmd;
- int i;
-
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- oldcmd = cmd;
-
- pci_dev_for_each_resource(dev, res, i) {
- /* Only set up the requested stuff */
- if (!(mask & (1<<i)))
- continue;
-
- if (res->flags & IORESOURCE_IO)
- cmd |= PCI_COMMAND_IO;
- if (res->flags & IORESOURCE_MEM)
- cmd |= PCI_COMMAND_MEMORY;
- }
-
- if (cmd != oldcmd) {
- pci_info(dev, "enabling device (%04x -> %04x)\n", oldcmd, cmd);
- pci_write_config_word(dev, PCI_COMMAND, cmd);
- }
- return 0;
-}
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index ddac216a2aff..a9448088e762 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -722,33 +722,6 @@ struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm,
return bus;
}
-int pcibios_enable_device(struct pci_dev *dev, int mask)
-{
- struct resource *res;
- u16 cmd, oldcmd;
- int i;
-
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- oldcmd = cmd;
-
- pci_dev_for_each_resource(dev, res, i) {
- /* Only set up the requested stuff */
- if (!(mask & (1<<i)))
- continue;
-
- if (res->flags & IORESOURCE_IO)
- cmd |= PCI_COMMAND_IO;
- if (res->flags & IORESOURCE_MEM)
- cmd |= PCI_COMMAND_MEMORY;
- }
-
- if (cmd != oldcmd) {
- pci_info(dev, "enabling device (%04x -> %04x)\n", oldcmd, cmd);
- pci_write_config_word(dev, PCI_COMMAND, cmd);
- }
- return 0;
-}
-
/* Platform support for /proc/bus/pci/X/Y mmap()s. */
int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma)
{
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index f894ae79e78a..d7c911724435 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -642,33 +642,6 @@ void pcibios_fixup_bus(struct pci_bus *bus)
}
}
-int pcibios_enable_device(struct pci_dev *dev, int mask)
-{
- struct resource *res;
- u16 cmd, oldcmd;
- int i;
-
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- oldcmd = cmd;
-
- pci_dev_for_each_resource(dev, res, i) {
- /* Only set up the requested stuff */
- if (!(mask & (1<<i)))
- continue;
-
- if (res->flags & IORESOURCE_IO)
- cmd |= PCI_COMMAND_IO;
- if (res->flags & IORESOURCE_MEM)
- cmd |= PCI_COMMAND_MEMORY;
- }
-
- if (cmd != oldcmd) {
- pci_info(dev, "enabling device (%04x -> %04x)\n", oldcmd, cmd);
- pci_write_config_word(dev, PCI_COMMAND, cmd);
- }
- return 0;
-}
-
/* Makes compiler happy */
static volatile int pcic_timer_dummy;
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 1d4def0db841..49781bee7905 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -39,6 +39,7 @@ config UML
select HAVE_ARCH_TRACEHOOK
select HAVE_SYSCALL_TRACEPOINTS
select THREAD_INFO_IN_TASK
+ select SPARSE_IRQ
config MMU
bool
diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c
index 277cea3d30eb..8006a5bd578c 100644
--- a/arch/um/drivers/ssl.c
+++ b/arch/um/drivers/ssl.c
@@ -199,4 +199,7 @@ static int ssl_non_raw_setup(char *str)
return 1;
}
__setup("ssl-non-raw", ssl_non_raw_setup);
-__channel_help(ssl_non_raw_setup, "set serial lines to non-raw mode");
+__uml_help(ssl_non_raw_setup,
+"ssl-non-raw\n"
+" Set serial lines to non-raw mode.\n\n"
+);
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index f2b2feeeb455..37455e74d314 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -370,7 +370,7 @@ __uml_help(ubd_setup,
" useful when a unique number should be given to the device. Note when\n"
" specifying a label, the filename2 must be also presented. It can be\n"
" an empty string, in which case the backing file is not used:\n"
-" ubd0=File,,Serial\n"
+" ubd0=File,,Serial\n\n"
);
static int udb_setup(char *str)
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 9bbbddfe866b..25d9258fa592 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -1721,7 +1721,7 @@ static int __init vector_setup(char *str)
__setup("vec", vector_setup);
__uml_help(vector_setup,
"vec[0-9]+:<option>=<value>,<option>=<value>\n"
-" Configure a vector io network device.\n\n"
+" Configure a vector io network device.\n\n"
);
late_initcall(vector_init);
diff --git a/arch/um/drivers/virtio_pcidev.c b/arch/um/drivers/virtio_pcidev.c
index e9e23cc3f357..f9b4b6f7582c 100644
--- a/arch/um/drivers/virtio_pcidev.c
+++ b/arch/um/drivers/virtio_pcidev.c
@@ -598,6 +598,11 @@ static void virtio_pcidev_virtio_remove(struct virtio_device *vdev)
kfree(dev);
}
+static void virtio_pcidev_virtio_shutdown(struct virtio_device *vdev)
+{
+ /* nothing to do, we just don't want queue shutdown */
+}
+
static struct virtio_device_id id_table[] = {
{ CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID },
{ 0 },
@@ -609,6 +614,7 @@ static struct virtio_driver virtio_pcidev_virtio_driver = {
.id_table = id_table,
.probe = virtio_pcidev_virtio_probe,
.remove = virtio_pcidev_virtio_remove,
+ .shutdown = virtio_pcidev_virtio_shutdown,
};
static int __init virtio_pcidev_init(void)
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index 0bbb24868557..c727e56ba116 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -13,20 +13,9 @@
#include <asm/mm_hooks.h>
#include <asm/mmu.h>
-#define activate_mm activate_mm
-static inline void activate_mm(struct mm_struct *old, struct mm_struct *new)
-{
-}
-
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
- unsigned cpu = smp_processor_id();
-
- if (prev != next) {
- cpumask_clear_cpu(cpu, mm_cpumask(prev));
- cpumask_set_cpu(cpu, mm_cpumask(next));
- }
}
#define init_new_context init_new_context
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 8a789c17acd8..7854d51b6639 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -71,7 +71,6 @@ extern void start_thread(struct pt_regs *regs, unsigned long entry,
struct cpuinfo_um {
unsigned long loops_per_jiffy;
- int ipi_pipe[2];
int cache_alignment;
union {
__u32 x86_capability[NCAPINTS + NBUGINTS];
@@ -81,8 +80,6 @@ struct cpuinfo_um {
extern struct cpuinfo_um boot_cpu_data;
-#define cpu_data(cpu) boot_cpu_data
-#define current_cpu_data boot_cpu_data
#define cache_line_size() (boot_cpu_data.cache_alignment)
#define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf)
diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h
index 2f9bfd99460a..7c7e17bce403 100644
--- a/arch/um/include/shared/as-layout.h
+++ b/arch/um/include/shared/as-layout.h
@@ -23,8 +23,9 @@
#define STUB_START stub_start
#define STUB_CODE STUB_START
#define STUB_DATA (STUB_CODE + UM_KERN_PAGE_SIZE)
-#define STUB_DATA_PAGES 2 /* must be a power of two */
-#define STUB_END (STUB_DATA + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE)
+#define STUB_DATA_PAGES 2
+#define STUB_SIZE ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE)
+#define STUB_END (STUB_START + STUB_SIZE)
#ifndef __ASSEMBLER__
diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h
index c261a77a32f6..27db38e95df9 100644
--- a/arch/um/include/shared/skas/stub-data.h
+++ b/arch/um/include/shared/skas/stub-data.h
@@ -53,8 +53,7 @@ struct stub_syscall {
};
struct stub_data {
- unsigned long offset;
- long err, child_err;
+ long err;
int syscall_data_len;
/* 128 leaves enough room for additional fields in the struct */
diff --git a/arch/um/kernel/dtb.c b/arch/um/kernel/dtb.c
index 15c342426489..47cd3d869fb2 100644
--- a/arch/um/kernel/dtb.c
+++ b/arch/um/kernel/dtb.c
@@ -38,5 +38,5 @@ static int __init uml_dtb_setup(char *line, int *add)
__uml_setup("dtb=", uml_dtb_setup,
"dtb=<file>\n"
-" Boot the kernel with the devicetree blob from the specified file.\n"
+" Boot the kernel with the devicetree blob from the specified file.\n\n"
);
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 0dfaf96bb7da..d69d137a0334 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -691,6 +691,11 @@ void __init init_IRQ(void)
os_setup_epoll();
}
+int __init arch_probe_nr_irqs(void)
+{
+ return NR_IRQS;
+}
+
void sigchld_handler(int sig, struct siginfo *unused_si,
struct uml_pt_regs *regs, void *mc)
{
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index ae0fa2173778..17da0a870650 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -986,26 +986,26 @@ static int setup_time_travel(char *str)
__setup("time-travel", setup_time_travel);
__uml_help(setup_time_travel,
"time-travel\n"
-"This option just enables basic time travel mode, in which the clock/timers\n"
-"inside the UML instance skip forward when there's nothing to do, rather than\n"
-"waiting for real time to elapse. However, instance CPU speed is limited by\n"
-"the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n"
-"clock (but quicker when there's nothing to do).\n"
+" This option just enables basic time travel mode, in which the clock/timers\n"
+" inside the UML instance skip forward when there's nothing to do, rather than\n"
+" waiting for real time to elapse. However, instance CPU speed is limited by\n"
+" the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n"
+" clock (but quicker when there's nothing to do).\n"
"\n"
"time-travel=inf-cpu\n"
-"This enables time travel mode with infinite processing power, in which there\n"
-"are no wall clock timers, and any CPU processing happens - as seen from the\n"
-"guest - instantly. This can be useful for accurate simulation regardless of\n"
-"debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n"
-"easily lead to getting stuck (e.g. if anything in the system busy loops).\n"
+" This enables time travel mode with infinite processing power, in which there\n"
+" are no wall clock timers, and any CPU processing happens - as seen from the\n"
+" guest - instantly. This can be useful for accurate simulation regardless of\n"
+" debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n"
+" easily lead to getting stuck (e.g. if anything in the system busy loops).\n"
"\n"
"time-travel=ext:[ID:]/path/to/socket\n"
-"This enables time travel mode similar to =inf-cpu, except the system will\n"
-"use the given socket to coordinate with a central scheduler, in order to\n"
-"have more than one system simultaneously be on simulated time. The virtio\n"
-"driver code in UML knows about this so you can also simulate networks and\n"
-"devices using it, assuming the device has the right capabilities.\n"
-"The optional ID is a 64-bit integer that's sent to the central scheduler.\n");
+" This enables time travel mode similar to =inf-cpu, except the system will\n"
+" use the given socket to coordinate with a central scheduler, in order to\n"
+" have more than one system simultaneously be on simulated time. The virtio\n"
+" driver code in UML knows about this so you can also simulate networks and\n"
+" devices using it, assuming the device has the right capabilities.\n"
+" The optional ID is a 64-bit integer that's sent to the central scheduler.\n\n");
static int setup_time_travel_start(char *str)
{
@@ -1022,8 +1022,9 @@ static int setup_time_travel_start(char *str)
__setup("time-travel-start=", setup_time_travel_start);
__uml_help(setup_time_travel_start,
"time-travel-start=<nanoseconds>\n"
-"Configure the UML instance's wall clock to start at this value rather than\n"
-"the host's wall clock at the time of UML boot.\n");
+" Configure the UML instance's wall clock to start at this value rather than\n"
+" the host's wall clock at the time of UML boot.\n\n");
+
static struct kobject *bc_time_kobject;
static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 2f5ee045bc7a..cfbbbf8500c3 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -54,12 +54,9 @@ static void __init add_arg(char *arg)
/*
* These fields are initialized at boot time and not changed.
- * XXX This structure is used only in the non-SMP case. Maybe this
- * should be moved to smp.c.
*/
struct cpuinfo_um boot_cpu_data = {
.loops_per_jiffy = 0,
- .ipi_pipe = { -1, -1 },
.cache_alignment = L1_CACHE_BYTES,
.x86_capability = { 0 }
};
@@ -331,9 +328,7 @@ int __init linux_main(int argc, char **argv, char **envp)
host_task_size = get_top_address(envp);
/* reserve a few pages for the stubs */
- stub_start = host_task_size - STUB_DATA_PAGES * PAGE_SIZE;
- /* another page for the code portion */
- stub_start -= PAGE_SIZE;
+ stub_start = host_task_size - STUB_SIZE;
host_task_size = stub_start;
/* Limit TASK_SIZE to what is addressable by the page table */
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 78f48fa9db8b..0bc10cd4cbed 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -895,7 +895,7 @@ __uml_setup("noreboot", noreboot_cmd_param,
"noreboot\n"
" Rather than rebooting, exit always, akin to QEMU's -no-reboot option.\n"
" This is useful if you're using CONFIG_PANIC_TIMEOUT in order to catch\n"
-" crashes in CI\n");
+" crashes in CI\n\n");
void reboot_skas(void)
{
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9d034a987c6e..fa3b616af03a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -412,10 +412,6 @@ config HAVE_INTEL_TXT
def_bool y
depends on INTEL_IOMMU && ACPI
-config X86_64_SMP
- def_bool y
- depends on X86_64 && SMP
-
config ARCH_SUPPORTS_UPROBES
def_bool y
diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler
index c827f694fb72..b1c59fb0a4c9 100644
--- a/arch/x86/Kconfig.assembler
+++ b/arch/x86/Kconfig.assembler
@@ -1,26 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
-config AS_AVX512
- def_bool $(as-instr,vpmovm2b %k1$(comma)%zmm5)
- help
- Supported by binutils >= 2.25 and LLVM integrated assembler
-
-config AS_GFNI
- def_bool $(as-instr,vgf2p8mulb %xmm0$(comma)%xmm1$(comma)%xmm2)
- help
- Supported by binutils >= 2.30 and LLVM integrated assembler
-
-config AS_VAES
- def_bool $(as-instr,vaesenc %ymm0$(comma)%ymm1$(comma)%ymm2)
- help
- Supported by binutils >= 2.30 and LLVM integrated assembler
-
-config AS_VPCLMULQDQ
- def_bool $(as-instr,vpclmulqdq \$0x10$(comma)%ymm0$(comma)%ymm1$(comma)%ymm2)
- help
- Supported by binutils >= 2.30 and LLVM integrated assembler
-
config AS_WRUSS
def_bool $(as-instr64,wrussq %rax$(comma)(%rbx))
help
diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h
index 8518ae214c9b..79e15971529d 100644
--- a/arch/x86/boot/bitops.h
+++ b/arch/x86/boot/bitops.h
@@ -27,7 +27,7 @@ static inline bool variable_test_bit(int nr, const void *addr)
bool v;
const u32 *p = addr;
- asm("btl %2,%1" CC_SET(c) : CC_OUT(c) (v) : "m" (*p), "Ir" (nr));
+ asm("btl %2,%1" : "=@ccc" (v) : "m" (*p), "Ir" (nr));
return v;
}
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 60580836daf7..a3c58ebe3662 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -155,15 +155,15 @@ static inline void wrgs32(u32 v, addr_t addr)
static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len)
{
bool diff;
- asm volatile("fs repe cmpsb" CC_SET(nz)
- : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
+ asm volatile("fs repe cmpsb"
+ : "=@ccnz" (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len)
{
bool diff;
- asm volatile("gs repe cmpsb" CC_SET(nz)
- : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
+ asm volatile("gs repe cmpsb"
+ : "=@ccnz" (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index f35369bb14c5..b25c6a9303b7 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -32,8 +32,8 @@
int memcmp(const void *s1, const void *s2, size_t len)
{
bool diff;
- asm("repe cmpsb" CC_SET(nz)
- : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
+ asm("repe cmpsb"
+ : "=@ccnz" (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig
index d9c6fc78cf33..48d3076b6053 100644
--- a/arch/x86/crypto/Kconfig
+++ b/arch/x86/crypto/Kconfig
@@ -306,7 +306,7 @@ config CRYPTO_ARIA_AESNI_AVX2_X86_64
config CRYPTO_ARIA_GFNI_AVX512_X86_64
tristate "Ciphers: ARIA with modes: ECB, CTR (AVX512/GFNI)"
- depends on 64BIT && AS_GFNI
+ depends on 64BIT
select CRYPTO_SKCIPHER
select CRYPTO_ALGAPI
select CRYPTO_ARIA
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index dfba7e5e88ea..2d30d5d36145 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -46,10 +46,8 @@ obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
aesni-intel-$(CONFIG_64BIT) += aes-ctr-avx-x86_64.o \
aes-gcm-aesni-x86_64.o \
- aes-xts-avx-x86_64.o
-ifeq ($(CONFIG_AS_VAES)$(CONFIG_AS_VPCLMULQDQ),yy)
-aesni-intel-$(CONFIG_64BIT) += aes-gcm-avx10-x86_64.o
-endif
+ aes-xts-avx-x86_64.o \
+ aes-gcm-avx10-x86_64.o
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
diff --git a/arch/x86/crypto/aes-ctr-avx-x86_64.S b/arch/x86/crypto/aes-ctr-avx-x86_64.S
index bbbfd80f5a50..2745918f68ee 100644
--- a/arch/x86/crypto/aes-ctr-avx-x86_64.S
+++ b/arch/x86/crypto/aes-ctr-avx-x86_64.S
@@ -552,7 +552,6 @@ SYM_TYPED_FUNC_START(aes_xctr_crypt_aesni_avx)
_aes_ctr_crypt 1
SYM_FUNC_END(aes_xctr_crypt_aesni_avx)
-#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
.set VL, 32
.set USE_AVX512, 0
SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx2)
@@ -570,4 +569,3 @@ SYM_FUNC_END(aes_ctr64_crypt_vaes_avx512)
SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx512)
_aes_ctr_crypt 1
SYM_FUNC_END(aes_xctr_crypt_vaes_avx512)
-#endif // CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ
diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S
index db79cdf81588..a30753a3e207 100644
--- a/arch/x86/crypto/aes-xts-avx-x86_64.S
+++ b/arch/x86/crypto/aes-xts-avx-x86_64.S
@@ -886,7 +886,6 @@ SYM_TYPED_FUNC_START(aes_xts_decrypt_aesni_avx)
_aes_xts_crypt 0
SYM_FUNC_END(aes_xts_decrypt_aesni_avx)
-#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
.set VL, 32
.set USE_AVX512, 0
SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx2)
@@ -904,4 +903,3 @@ SYM_FUNC_END(aes_xts_encrypt_vaes_avx512)
SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx512)
_aes_xts_crypt 0
SYM_FUNC_END(aes_xts_decrypt_vaes_avx512)
-#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 061b1ced93c5..d953ac470aae 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -828,10 +828,8 @@ static struct skcipher_alg skcipher_algs_##suffix[] = {{ \
}}
DEFINE_AVX_SKCIPHER_ALGS(aesni_avx, "aesni-avx", 500);
-#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
DEFINE_AVX_SKCIPHER_ALGS(vaes_avx2, "vaes-avx2", 600);
DEFINE_AVX_SKCIPHER_ALGS(vaes_avx512, "vaes-avx512", 800);
-#endif
/* The common part of the x86_64 AES-GCM key struct */
struct aes_gcm_key {
@@ -912,17 +910,8 @@ struct aes_gcm_key_avx10 {
#define FLAG_RFC4106 BIT(0)
#define FLAG_ENC BIT(1)
#define FLAG_AVX BIT(2)
-#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
-# define FLAG_AVX10_256 BIT(3)
-# define FLAG_AVX10_512 BIT(4)
-#else
- /*
- * This should cause all calls to the AVX10 assembly functions to be
- * optimized out, avoiding the need to ifdef each call individually.
- */
-# define FLAG_AVX10_256 0
-# define FLAG_AVX10_512 0
-#endif
+#define FLAG_AVX10_256 BIT(3)
+#define FLAG_AVX10_512 BIT(4)
static inline struct aes_gcm_key *
aes_gcm_key_get(struct crypto_aead *tfm, int flags)
@@ -1519,7 +1508,6 @@ DEFINE_GCM_ALGS(aesni_avx, FLAG_AVX,
"generic-gcm-aesni-avx", "rfc4106-gcm-aesni-avx",
AES_GCM_KEY_AESNI_SIZE, 500);
-#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
/* aes_gcm_algs_vaes_avx10_256 */
DEFINE_GCM_ALGS(vaes_avx10_256, FLAG_AVX10_256,
"generic-gcm-vaes-avx10_256", "rfc4106-gcm-vaes-avx10_256",
@@ -1529,7 +1517,6 @@ DEFINE_GCM_ALGS(vaes_avx10_256, FLAG_AVX10_256,
DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512,
"generic-gcm-vaes-avx10_512", "rfc4106-gcm-vaes-avx10_512",
AES_GCM_KEY_AVX10_SIZE, 800);
-#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
static int __init register_avx_algs(void)
{
@@ -1551,7 +1538,6 @@ static int __init register_avx_algs(void)
* Similarly, the assembler support was added at about the same time.
* For simplicity, just always check for VAES and VPCLMULQDQ together.
*/
-#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
if (!boot_cpu_has(X86_FEATURE_AVX2) ||
!boot_cpu_has(X86_FEATURE_VAES) ||
!boot_cpu_has(X86_FEATURE_VPCLMULQDQ) ||
@@ -1592,7 +1578,7 @@ static int __init register_avx_algs(void)
ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512));
if (err)
return err;
-#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
+
return 0;
}
@@ -1607,12 +1593,10 @@ static void unregister_avx_algs(void)
{
unregister_skciphers(skcipher_algs_aesni_avx);
unregister_aeads(aes_gcm_algs_aesni_avx);
-#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
unregister_skciphers(skcipher_algs_vaes_avx2);
unregister_skciphers(skcipher_algs_vaes_avx512);
unregister_aeads(aes_gcm_algs_vaes_avx10_256);
unregister_aeads(aes_gcm_algs_vaes_avx10_512);
-#endif
}
#else /* CONFIG_X86_64 */
static struct aead_alg aes_gcm_algs_aesni[0];
diff --git a/arch/x86/crypto/aria-aesni-avx-asm_64.S b/arch/x86/crypto/aria-aesni-avx-asm_64.S
index 9556dacd9841..932fb17308e7 100644
--- a/arch/x86/crypto/aria-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/aria-aesni-avx-asm_64.S
@@ -295,7 +295,6 @@
vpshufb t1, t0, t2; \
vpxor t2, x7, x7;
-#ifdef CONFIG_AS_GFNI
#define aria_sbox_8way_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
t0, t1, t2, t3, \
@@ -318,8 +317,6 @@
vgf2p8affineinvqb $0, t2, x3, x3; \
vgf2p8affineinvqb $0, t2, x7, x7
-#endif /* CONFIG_AS_GFNI */
-
#define aria_sbox_8way(x0, x1, x2, x3, \
x4, x5, x6, x7, \
t0, t1, t2, t3, \
@@ -561,7 +558,6 @@
y4, y5, y6, y7, \
mem_tmp, 8);
-#ifdef CONFIG_AS_GFNI
#define aria_fe_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
@@ -719,8 +715,6 @@
y4, y5, y6, y7, \
mem_tmp, 8);
-#endif /* CONFIG_AS_GFNI */
-
/* NB: section is mergeable, all elements must be aligned 16-byte blocks */
.section .rodata.cst16, "aM", @progbits, 16
.align 16
@@ -772,7 +766,6 @@
.Ltf_hi__x2__and__fwd_aff:
.octa 0x3F893781E95FE1576CDA64D2BA0CB204
-#ifdef CONFIG_AS_GFNI
/* AES affine: */
#define tf_aff_const BV8(1, 1, 0, 0, 0, 1, 1, 0)
.Ltf_aff_bitmatrix:
@@ -871,7 +864,6 @@
BV8(0, 0, 0, 0, 0, 1, 0, 0),
BV8(0, 0, 0, 0, 0, 0, 1, 0),
BV8(0, 0, 0, 0, 0, 0, 0, 1))
-#endif /* CONFIG_AS_GFNI */
/* 4-bit mask */
.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4
@@ -1140,7 +1132,6 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_ctr_crypt_16way)
RET;
SYM_FUNC_END(aria_aesni_avx_ctr_crypt_16way)
-#ifdef CONFIG_AS_GFNI
SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way)
/* input:
* %r9: rk
@@ -1359,4 +1350,3 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way)
FRAME_END
RET;
SYM_FUNC_END(aria_aesni_avx_gfni_ctr_crypt_16way)
-#endif /* CONFIG_AS_GFNI */
diff --git a/arch/x86/crypto/aria-aesni-avx2-asm_64.S b/arch/x86/crypto/aria-aesni-avx2-asm_64.S
index c60fa2980630..ed53d4f46bd7 100644
--- a/arch/x86/crypto/aria-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/aria-aesni-avx2-asm_64.S
@@ -302,7 +302,6 @@
vpbroadcastb ((round * 16) + idx + 4)(rk), t0; \
vpxor t0, x7, x7;
-#ifdef CONFIG_AS_GFNI
#define aria_sbox_8way_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
t0, t1, t2, t3, \
@@ -325,7 +324,6 @@
vgf2p8affineinvqb $0, t2, x3, x3; \
vgf2p8affineinvqb $0, t2, x7, x7
-#endif /* CONFIG_AS_GFNI */
#define aria_sbox_8way(x0, x1, x2, x3, \
x4, x5, x6, x7, \
t0, t1, t2, t3, \
@@ -598,7 +596,7 @@
aria_load_state_8way(y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, 8);
-#ifdef CONFIG_AS_GFNI
+
#define aria_fe_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
@@ -752,7 +750,6 @@
aria_load_state_8way(y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, 8);
-#endif /* CONFIG_AS_GFNI */
.section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32
.align 32
@@ -806,7 +803,6 @@
.Ltf_hi__x2__and__fwd_aff:
.octa 0x3F893781E95FE1576CDA64D2BA0CB204
-#ifdef CONFIG_AS_GFNI
.section .rodata.cst8, "aM", @progbits, 8
.align 8
/* AES affine: */
@@ -868,8 +864,6 @@
BV8(0, 0, 0, 0, 0, 0, 1, 0),
BV8(0, 0, 0, 0, 0, 0, 0, 1))
-#endif /* CONFIG_AS_GFNI */
-
/* 4-bit mask */
.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4
.align 4
@@ -1219,7 +1213,6 @@ SYM_TYPED_FUNC_START(aria_aesni_avx2_ctr_crypt_32way)
RET;
SYM_FUNC_END(aria_aesni_avx2_ctr_crypt_32way)
-#ifdef CONFIG_AS_GFNI
SYM_FUNC_START_LOCAL(__aria_aesni_avx2_gfni_crypt_32way)
/* input:
* %r9: rk
@@ -1438,4 +1431,3 @@ SYM_TYPED_FUNC_START(aria_aesni_avx2_gfni_ctr_crypt_32way)
FRAME_END
RET;
SYM_FUNC_END(aria_aesni_avx2_gfni_ctr_crypt_32way)
-#endif /* CONFIG_AS_GFNI */
diff --git a/arch/x86/crypto/aria_aesni_avx2_glue.c b/arch/x86/crypto/aria_aesni_avx2_glue.c
index 007b250f774c..1487a49bfbac 100644
--- a/arch/x86/crypto/aria_aesni_avx2_glue.c
+++ b/arch/x86/crypto/aria_aesni_avx2_glue.c
@@ -26,7 +26,6 @@ asmlinkage void aria_aesni_avx2_ctr_crypt_32way(const void *ctx, u8 *dst,
const u8 *src,
u8 *keystream, u8 *iv);
EXPORT_SYMBOL_GPL(aria_aesni_avx2_ctr_crypt_32way);
-#ifdef CONFIG_AS_GFNI
asmlinkage void aria_aesni_avx2_gfni_encrypt_32way(const void *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_encrypt_32way);
@@ -37,7 +36,6 @@ asmlinkage void aria_aesni_avx2_gfni_ctr_crypt_32way(const void *ctx, u8 *dst,
const u8 *src,
u8 *keystream, u8 *iv);
EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_ctr_crypt_32way);
-#endif /* CONFIG_AS_GFNI */
static struct aria_avx_ops aria_ops;
@@ -213,7 +211,7 @@ static int __init aria_avx2_init(void)
return -ENODEV;
}
- if (boot_cpu_has(X86_FEATURE_GFNI) && IS_ENABLED(CONFIG_AS_GFNI)) {
+ if (boot_cpu_has(X86_FEATURE_GFNI)) {
aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way;
aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way;
aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way;
diff --git a/arch/x86/crypto/aria_aesni_avx_glue.c b/arch/x86/crypto/aria_aesni_avx_glue.c
index 4c88ef4eba82..e4e3d78915a5 100644
--- a/arch/x86/crypto/aria_aesni_avx_glue.c
+++ b/arch/x86/crypto/aria_aesni_avx_glue.c
@@ -26,7 +26,6 @@ asmlinkage void aria_aesni_avx_ctr_crypt_16way(const void *ctx, u8 *dst,
const u8 *src,
u8 *keystream, u8 *iv);
EXPORT_SYMBOL_GPL(aria_aesni_avx_ctr_crypt_16way);
-#ifdef CONFIG_AS_GFNI
asmlinkage void aria_aesni_avx_gfni_encrypt_16way(const void *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_encrypt_16way);
@@ -37,7 +36,6 @@ asmlinkage void aria_aesni_avx_gfni_ctr_crypt_16way(const void *ctx, u8 *dst,
const u8 *src,
u8 *keystream, u8 *iv);
EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_ctr_crypt_16way);
-#endif /* CONFIG_AS_GFNI */
static struct aria_avx_ops aria_ops;
@@ -199,7 +197,7 @@ static int __init aria_avx_init(void)
return -ENODEV;
}
- if (boot_cpu_has(X86_FEATURE_GFNI) && IS_ENABLED(CONFIG_AS_GFNI)) {
+ if (boot_cpu_has(X86_FEATURE_GFNI)) {
aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way;
aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way;
aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way;
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 94519688b007..77e2d920a640 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -99,7 +99,7 @@ For 32-bit we have the following conventions - kernel is built with
.endif
.endm
-.macro CLEAR_REGS clear_bp=1
+.macro CLEAR_REGS clear_callee=1
/*
* Sanitize registers of values that a speculation attack might
* otherwise want to exploit. The lower registers are likely clobbered
@@ -113,20 +113,19 @@ For 32-bit we have the following conventions - kernel is built with
xorl %r9d, %r9d /* nospec r9 */
xorl %r10d, %r10d /* nospec r10 */
xorl %r11d, %r11d /* nospec r11 */
+ .if \clear_callee
xorl %ebx, %ebx /* nospec rbx */
- .if \clear_bp
xorl %ebp, %ebp /* nospec rbp */
- .endif
xorl %r12d, %r12d /* nospec r12 */
xorl %r13d, %r13d /* nospec r13 */
xorl %r14d, %r14d /* nospec r14 */
xorl %r15d, %r15d /* nospec r15 */
-
+ .endif
.endm
-.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 clear_bp=1 unwind_hint=1
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 clear_callee=1 unwind_hint=1
PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret unwind_hint=\unwind_hint
- CLEAR_REGS clear_bp=\clear_bp
+ CLEAR_REGS clear_callee=\clear_callee
.endm
.macro POP_REGS pop_rdi=1
diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S
index 907bd233c6c1..fafbd3e68cb8 100644
--- a/arch/x86/entry/entry_64_fred.S
+++ b/arch/x86/entry/entry_64_fred.S
@@ -97,8 +97,7 @@ SYM_FUNC_START(asm_fred_entry_from_kvm)
push %rdi /* fred_ss handed in by the caller */
push %rbp
pushf
- mov $__KERNEL_CS, %rax
- push %rax
+ push $__KERNEL_CS
/*
* Unlike the IDT event delivery, FRED _always_ pushes an error code
@@ -112,18 +111,37 @@ SYM_FUNC_START(asm_fred_entry_from_kvm)
push %rax /* Return RIP */
push $0 /* Error code, 0 for IRQ/NMI */
- PUSH_AND_CLEAR_REGS clear_bp=0 unwind_hint=0
+ PUSH_AND_CLEAR_REGS clear_callee=0 unwind_hint=0
+
movq %rsp, %rdi /* %rdi -> pt_regs */
+ /*
+ * At this point: {rdi, rsi, rdx, rcx, r8, r9}, {r10, r11}, {rax, rdx}
+ * are clobbered, which corresponds to: arguments, extra caller-saved
+ * and return. All registers a C function is allowed to clobber.
+ *
+ * Notably, the callee-saved registers: {rbx, r12, r13, r14, r15}
+ * are untouched, with the exception of rbp, which carries the stack
+ * frame and will be restored before exit.
+ *
+ * Further calling another C function will not alter this state.
+ */
call __fred_entry_from_kvm /* Call the C entry point */
- POP_REGS
- ERETS
-1:
+
/*
- * Objtool doesn't understand what ERETS does, this hint tells it that
- * yes, we'll reach here and with what stack state. A save/restore pair
- * isn't strictly needed, but it's the simplest form.
+ * When FRED, use ERETS to potentially clear NMIs, otherwise simply
+ * restore the stack pointer.
+ */
+ ALTERNATIVE "nop; nop; mov %rbp, %rsp", \
+ __stringify(add $C_PTREGS_SIZE, %rsp; ERETS), \
+ X86_FEATURE_FRED
+
+1: /*
+ * Objtool doesn't understand ERETS, and the cfi register state is
+ * different from initial_func_cfi due to PUSH_REGS. Tell it the state
+ * is similar to where UNWIND_HINT_SAVE is.
*/
UNWIND_HINT_RESTORE
+
pop %rbp
RET
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index afdbda2dd7b7..e890fd37e9c2 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -17,7 +17,6 @@
#include <asm/desc.h>
#include <asm/e820/api.h>
#include <asm/sev.h>
-#include <asm/ibt.h>
#include <asm/hypervisor.h>
#include <hyperv/hvhdk.h>
#include <asm/mshyperv.h>
@@ -37,7 +36,45 @@
#include <linux/export.h>
void *hv_hypercall_pg;
+
+#ifdef CONFIG_X86_64
+static u64 __hv_hyperfail(u64 control, u64 param1, u64 param2)
+{
+ return U64_MAX;
+}
+
+DEFINE_STATIC_CALL(__hv_hypercall, __hv_hyperfail);
+
+u64 hv_std_hypercall(u64 control, u64 param1, u64 param2)
+{
+ u64 hv_status;
+
+ register u64 __r8 asm("r8") = param2;
+ asm volatile ("call " STATIC_CALL_TRAMP_STR(__hv_hypercall)
+ : "=a" (hv_status), ASM_CALL_CONSTRAINT,
+ "+c" (control), "+d" (param1), "+r" (__r8)
+ : : "cc", "memory", "r9", "r10", "r11");
+
+ return hv_status;
+}
+
+typedef u64 (*hv_hypercall_f)(u64 control, u64 param1, u64 param2);
+
+static inline void hv_set_hypercall_pg(void *ptr)
+{
+ hv_hypercall_pg = ptr;
+
+ if (!ptr)
+ ptr = &__hv_hyperfail;
+ static_call_update(__hv_hypercall, (hv_hypercall_f)ptr);
+}
+#else
+static inline void hv_set_hypercall_pg(void *ptr)
+{
+ hv_hypercall_pg = ptr;
+}
EXPORT_SYMBOL_GPL(hv_hypercall_pg);
+#endif
union hv_ghcb * __percpu *hv_ghcb_pg;
@@ -330,7 +367,7 @@ static int hv_suspend(void)
* pointer is restored on resume.
*/
hv_hypercall_pg_saved = hv_hypercall_pg;
- hv_hypercall_pg = NULL;
+ hv_set_hypercall_pg(NULL);
/* Disable the hypercall page in the hypervisor */
rdmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
@@ -356,7 +393,7 @@ static void hv_resume(void)
vmalloc_to_pfn(hv_hypercall_pg_saved);
wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
- hv_hypercall_pg = hv_hypercall_pg_saved;
+ hv_set_hypercall_pg(hv_hypercall_pg_saved);
hv_hypercall_pg_saved = NULL;
/*
@@ -476,8 +513,8 @@ void __init hyperv_init(void)
if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present)
goto skip_hypercall_pg_init;
- hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START,
- VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
+ hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, MODULES_VADDR,
+ MODULES_END, GFP_KERNEL, PAGE_KERNEL_ROX,
VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
__builtin_return_address(0));
if (hv_hypercall_pg == NULL)
@@ -515,27 +552,9 @@ void __init hyperv_init(void)
wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
}
-skip_hypercall_pg_init:
- /*
- * Some versions of Hyper-V that provide IBT in guest VMs have a bug
- * in that there's no ENDBR64 instruction at the entry to the
- * hypercall page. Because hypercalls are invoked via an indirect call
- * to the hypercall page, all hypercall attempts fail when IBT is
- * enabled, and Linux panics. For such buggy versions, disable IBT.
- *
- * Fixed versions of Hyper-V always provide ENDBR64 on the hypercall
- * page, so if future Linux kernel versions enable IBT for 32-bit
- * builds, additional hypercall page hackery will be required here
- * to provide an ENDBR32.
- */
-#ifdef CONFIG_X86_KERNEL_IBT
- if (cpu_feature_enabled(X86_FEATURE_IBT) &&
- *(u32 *)hv_hypercall_pg != gen_endbr()) {
- setup_clear_cpu_cap(X86_FEATURE_IBT);
- pr_warn("Disabling IBT because of Hyper-V bug\n");
- }
-#endif
+ hv_set_hypercall_pg(hv_hypercall_pg);
+skip_hypercall_pg_init:
/*
* hyperv_init() is called before LAPIC is initialized: see
* apic_intr_mode_init() -> x86_platform.apic_post_init() and
diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c
index 090f5ac9f492..c3ba12b1bc07 100644
--- a/arch/x86/hyperv/irqdomain.c
+++ b/arch/x86/hyperv/irqdomain.c
@@ -11,6 +11,7 @@
#include <linux/pci.h>
#include <linux/irq.h>
#include <linux/export.h>
+#include <linux/irqchip/irq-msi-lib.h>
#include <asm/mshyperv.h>
static int hv_map_interrupt(union hv_device_id device_id, bool level,
@@ -289,59 +290,99 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
(void)hv_unmap_msi_interrupt(dev, &old_entry);
}
-static void hv_msi_free_irq(struct irq_domain *domain,
- struct msi_domain_info *info, unsigned int virq)
-{
- struct irq_data *irqd = irq_get_irq_data(virq);
- struct msi_desc *desc;
-
- if (!irqd)
- return;
-
- desc = irq_data_get_msi_desc(irqd);
- if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev)))
- return;
-
- hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd);
-}
-
/*
* IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
* which implement the MSI or MSI-X Capability Structure.
*/
static struct irq_chip hv_pci_msi_controller = {
.name = "HV-PCI-MSI",
- .irq_unmask = pci_msi_unmask_irq,
- .irq_mask = pci_msi_mask_irq,
.irq_ack = irq_chip_ack_parent,
- .irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_compose_msi_msg = hv_irq_compose_msi_msg,
- .irq_set_affinity = msi_domain_set_affinity,
- .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED,
+ .irq_set_affinity = irq_chip_set_affinity_parent,
};
-static struct msi_domain_ops pci_msi_domain_ops = {
- .msi_free = hv_msi_free_irq,
- .msi_prepare = pci_msi_prepare,
+static bool hv_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
+ struct irq_domain *real_parent, struct msi_domain_info *info)
+{
+ struct irq_chip *chip = info->chip;
+
+ if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info))
+ return false;
+
+ chip->flags |= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED;
+
+ info->ops->msi_prepare = pci_msi_prepare;
+
+ return true;
+}
+
+#define HV_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX)
+#define HV_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS)
+
+static struct msi_parent_ops hv_msi_parent_ops = {
+ .supported_flags = HV_MSI_FLAGS_SUPPORTED,
+ .required_flags = HV_MSI_FLAGS_REQUIRED,
+ .bus_select_token = DOMAIN_BUS_NEXUS,
+ .bus_select_mask = MATCH_PCI_MSI,
+ .chip_flags = MSI_CHIP_FLAG_SET_ACK,
+ .prefix = "HV-",
+ .init_dev_msi_info = hv_init_dev_msi_info,
};
-static struct msi_domain_info hv_pci_msi_domain_info = {
- .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
- MSI_FLAG_PCI_MSIX,
- .ops = &pci_msi_domain_ops,
- .chip = &hv_pci_msi_controller,
- .handler = handle_edge_irq,
- .handler_name = "edge",
+static int hv_msi_domain_alloc(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs,
+ void *arg)
+{
+ /*
+ * TODO: The allocation bits of hv_irq_compose_msi_msg(), i.e. everything except
+ * entry_to_msi_msg() should be in here.
+ */
+
+ int ret;
+
+ ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, arg);
+ if (ret)
+ return ret;
+
+ for (int i = 0; i < nr_irqs; ++i) {
+ irq_domain_set_info(d, virq + i, 0, &hv_pci_msi_controller, NULL,
+ handle_edge_irq, NULL, "edge");
+ }
+ return 0;
+}
+
+static void hv_msi_domain_free(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs)
+{
+ for (int i = 0; i < nr_irqs; ++i) {
+ struct irq_data *irqd = irq_domain_get_irq_data(d, virq);
+ struct msi_desc *desc;
+
+ desc = irq_data_get_msi_desc(irqd);
+ if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev)))
+ continue;
+
+ hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd);
+ }
+ irq_domain_free_irqs_top(d, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops hv_msi_domain_ops = {
+ .select = msi_lib_irq_domain_select,
+ .alloc = hv_msi_domain_alloc,
+ .free = hv_msi_domain_free,
};
struct irq_domain * __init hv_create_pci_msi_domain(void)
{
struct irq_domain *d = NULL;
- struct fwnode_handle *fn;
- fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI");
- if (fn)
- d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain);
+ struct irq_domain_info info = {
+ .fwnode = irq_domain_alloc_named_fwnode("HV-PCI-MSI"),
+ .ops = &hv_msi_domain_ops,
+ .parent = x86_vector_domain,
+ };
+
+ if (info.fwnode)
+ d = msi_create_parent_irq_domain(&info, &hv_msi_parent_ops);
/* No point in going further if we can't get an irq domain */
BUG_ON(!d);
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index ade6c665c97e..651771534cae 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -385,9 +385,23 @@ int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu)
return ret;
}
+u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2)
+{
+ u64 hv_status;
+
+ register u64 __r8 asm("r8") = param2;
+ asm volatile("vmmcall"
+ : "=a" (hv_status), ASM_CALL_CONSTRAINT,
+ "+c" (control), "+d" (param1), "+r" (__r8)
+ : : "cc", "memory", "r9", "r10", "r11");
+
+ return hv_status;
+}
+
#else
static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
+u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; }
#endif /* CONFIG_AMD_MEM_ENCRYPT */
#ifdef CONFIG_INTEL_TDX_GUEST
@@ -437,6 +451,7 @@ u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
#else
static inline void hv_tdx_msr_write(u64 msr, u64 value) {}
static inline void hv_tdx_msr_read(u64 msr, u64 *value) {}
+u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; }
#endif /* CONFIG_INTEL_TDX_GUEST */
#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST)
@@ -463,6 +478,195 @@ void hv_ivm_msr_read(u64 msr, u64 *value)
}
/*
+ * Keep track of the PFN regions which were shared with the host. The access
+ * must be revoked upon kexec/kdump (see hv_ivm_clear_host_access()).
+ */
+struct hv_enc_pfn_region {
+ struct list_head list;
+ u64 pfn;
+ int count;
+};
+
+static LIST_HEAD(hv_list_enc);
+static DEFINE_RAW_SPINLOCK(hv_list_enc_lock);
+
+static int hv_list_enc_add(const u64 *pfn_list, int count)
+{
+ struct hv_enc_pfn_region *ent;
+ unsigned long flags;
+ u64 pfn;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ pfn = pfn_list[i];
+
+ raw_spin_lock_irqsave(&hv_list_enc_lock, flags);
+ /* Check if the PFN already exists in some region first */
+ list_for_each_entry(ent, &hv_list_enc, list) {
+ if ((ent->pfn <= pfn) && (ent->pfn + ent->count - 1 >= pfn))
+ /* Nothing to do - pfn is already in the list */
+ goto unlock_done;
+ }
+
+ /*
+ * Check if the PFN is adjacent to an existing region. Growing
+ * a region can make it adjacent to another one but merging is
+ * not (yet) implemented for simplicity. A PFN cannot be added
+ * to two regions to keep the logic in hv_list_enc_remove()
+ * correct.
+ */
+ list_for_each_entry(ent, &hv_list_enc, list) {
+ if (ent->pfn + ent->count == pfn) {
+ /* Grow existing region up */
+ ent->count++;
+ goto unlock_done;
+ } else if (pfn + 1 == ent->pfn) {
+ /* Grow existing region down */
+ ent->pfn--;
+ ent->count++;
+ goto unlock_done;
+ }
+ }
+ raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
+
+ /* No adjacent region found -- create a new one */
+ ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL);
+ if (!ent)
+ return -ENOMEM;
+
+ ent->pfn = pfn;
+ ent->count = 1;
+
+ raw_spin_lock_irqsave(&hv_list_enc_lock, flags);
+ list_add(&ent->list, &hv_list_enc);
+
+unlock_done:
+ raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
+ }
+
+ return 0;
+}
+
+static int hv_list_enc_remove(const u64 *pfn_list, int count)
+{
+ struct hv_enc_pfn_region *ent, *t;
+ struct hv_enc_pfn_region new_region;
+ unsigned long flags;
+ u64 pfn;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ pfn = pfn_list[i];
+
+ raw_spin_lock_irqsave(&hv_list_enc_lock, flags);
+ list_for_each_entry_safe(ent, t, &hv_list_enc, list) {
+ if (pfn == ent->pfn + ent->count - 1) {
+ /* Removing tail pfn */
+ ent->count--;
+ if (!ent->count) {
+ list_del(&ent->list);
+ kfree(ent);
+ }
+ goto unlock_done;
+ } else if (pfn == ent->pfn) {
+ /* Removing head pfn */
+ ent->count--;
+ ent->pfn++;
+ if (!ent->count) {
+ list_del(&ent->list);
+ kfree(ent);
+ }
+ goto unlock_done;
+ } else if (pfn > ent->pfn && pfn < ent->pfn + ent->count - 1) {
+ /*
+ * Removing a pfn in the middle. Cut off the tail
+ * of the existing region and create a template for
+ * the new one.
+ */
+ new_region.pfn = pfn + 1;
+ new_region.count = ent->count - (pfn - ent->pfn + 1);
+ ent->count = pfn - ent->pfn;
+ goto unlock_split;
+ }
+
+ }
+unlock_done:
+ raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
+ continue;
+
+unlock_split:
+ raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
+
+ ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL);
+ if (!ent)
+ return -ENOMEM;
+
+ ent->pfn = new_region.pfn;
+ ent->count = new_region.count;
+
+ raw_spin_lock_irqsave(&hv_list_enc_lock, flags);
+ list_add(&ent->list, &hv_list_enc);
+ raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
+ }
+
+ return 0;
+}
+
+/* Stop new private<->shared conversions */
+static void hv_vtom_kexec_begin(void)
+{
+ if (!IS_ENABLED(CONFIG_KEXEC_CORE))
+ return;
+
+ /*
+ * Crash kernel reaches here with interrupts disabled: can't wait for
+ * conversions to finish.
+ *
+ * If race happened, just report and proceed.
+ */
+ if (!set_memory_enc_stop_conversion())
+ pr_warn("Failed to stop shared<->private conversions\n");
+}
+
+static void hv_vtom_kexec_finish(void)
+{
+ struct hv_gpa_range_for_visibility *input;
+ struct hv_enc_pfn_region *ent;
+ unsigned long flags;
+ u64 hv_status;
+ int cur, i;
+
+ local_irq_save(flags);
+ input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+
+ if (unlikely(!input))
+ goto out;
+
+ list_for_each_entry(ent, &hv_list_enc, list) {
+ for (i = 0, cur = 0; i < ent->count; i++) {
+ input->gpa_page_list[cur] = ent->pfn + i;
+ cur++;
+
+ if (cur == HV_MAX_MODIFY_GPA_REP_COUNT || i == ent->count - 1) {
+ input->partition_id = HV_PARTITION_ID_SELF;
+ input->host_visibility = VMBUS_PAGE_NOT_VISIBLE;
+ input->reserved0 = 0;
+ input->reserved1 = 0;
+ hv_status = hv_do_rep_hypercall(
+ HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY,
+ cur, 0, input, NULL);
+ WARN_ON_ONCE(!hv_result_success(hv_status));
+ cur = 0;
+ }
+ }
+
+ }
+
+out:
+ local_irq_restore(flags);
+}
+
+/*
* hv_mark_gpa_visibility - Set pages visible to host via hvcall.
*
* In Isolation VM, all guest memory is encrypted from host and guest
@@ -475,6 +679,7 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
struct hv_gpa_range_for_visibility *input;
u64 hv_status;
unsigned long flags;
+ int ret;
/* no-op if partition isolation is not enabled */
if (!hv_is_isolation_supported())
@@ -486,6 +691,13 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
return -EINVAL;
}
+ if (visibility == VMBUS_PAGE_NOT_VISIBLE)
+ ret = hv_list_enc_remove(pfn, count);
+ else
+ ret = hv_list_enc_add(pfn, count);
+ if (ret)
+ return ret;
+
local_irq_save(flags);
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
@@ -506,8 +718,18 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
if (hv_result_success(hv_status))
return 0;
+
+ if (visibility == VMBUS_PAGE_NOT_VISIBLE)
+ ret = hv_list_enc_add(pfn, count);
else
- return -EFAULT;
+ ret = hv_list_enc_remove(pfn, count);
+ /*
+ * There's no good way to recover from -ENOMEM here, the accounting is
+ * wrong either way.
+ */
+ WARN_ON_ONCE(ret);
+
+ return -EFAULT;
}
/*
@@ -669,6 +891,8 @@ void __init hv_vtom_init(void)
x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required;
x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present;
x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility;
+ x86_platform.guest.enc_kexec_begin = hv_vtom_kexec_begin;
+ x86_platform.guest.enc_kexec_finish = hv_vtom_kexec_finish;
/* Set WB as the default cache mode. */
guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK);
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 02bae8e0758b..4c305305871b 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -23,8 +23,7 @@ static inline bool __must_check rdrand_long(unsigned long *v)
unsigned int retry = RDRAND_RETRY_LOOPS;
do {
asm volatile("rdrand %[out]"
- CC_SET(c)
- : CC_OUT(c) (ok), [out] "=r" (*v));
+ : "=@ccc" (ok), [out] "=r" (*v));
if (ok)
return true;
} while (--retry);
@@ -35,8 +34,7 @@ static inline bool __must_check rdseed_long(unsigned long *v)
{
bool ok;
asm volatile("rdseed %[out]"
- CC_SET(c)
- : CC_OUT(c) (ok), [out] "=r" (*v));
+ : "=@ccc" (ok), [out] "=r" (*v));
return ok;
}
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index f963848024a5..d5c8d3afe196 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -122,18 +122,6 @@ static __always_inline __pure void *rip_rel_ptr(void *p)
}
#endif
-/*
- * Macros to generate condition code outputs from inline assembly,
- * The output operand must be type "bool".
- */
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-# define CC_SET(c) "\n\t/* output condition code " #c "*/\n"
-# define CC_OUT(c) "=@cc" #c
-#else
-# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n"
-# define CC_OUT(c) [_cc_ ## c] "=qm"
-#endif
-
#ifdef __KERNEL__
# include <asm/extable_fixup_types.h>
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index a835f891164d..c2ce213f2b9b 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -99,8 +99,7 @@ static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
{
bool negative;
asm_inline volatile(LOCK_PREFIX "xorb %2,%1"
- CC_SET(s)
- : CC_OUT(s) (negative), WBYTE_ADDR(addr)
+ : "=@ccs" (negative), WBYTE_ADDR(addr)
: "iq" ((char)mask) : "memory");
return negative;
}
@@ -149,8 +148,7 @@ arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
bool oldbit;
asm(__ASM_SIZE(bts) " %2,%1"
- CC_SET(c)
- : CC_OUT(c) (oldbit)
+ : "=@ccc" (oldbit)
: ADDR, "Ir" (nr) : "memory");
return oldbit;
}
@@ -175,8 +173,7 @@ arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
bool oldbit;
asm volatile(__ASM_SIZE(btr) " %2,%1"
- CC_SET(c)
- : CC_OUT(c) (oldbit)
+ : "=@ccc" (oldbit)
: ADDR, "Ir" (nr) : "memory");
return oldbit;
}
@@ -187,8 +184,7 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
bool oldbit;
asm volatile(__ASM_SIZE(btc) " %2,%1"
- CC_SET(c)
- : CC_OUT(c) (oldbit)
+ : "=@ccc" (oldbit)
: ADDR, "Ir" (nr) : "memory");
return oldbit;
@@ -211,8 +207,7 @@ static __always_inline bool constant_test_bit_acquire(long nr, const volatile un
bool oldbit;
asm volatile("testb %2,%1"
- CC_SET(nz)
- : CC_OUT(nz) (oldbit)
+ : "=@ccnz" (oldbit)
: "m" (((unsigned char *)addr)[nr >> 3]),
"i" (1 << (nr & 7))
:"memory");
@@ -225,8 +220,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
bool oldbit;
asm volatile(__ASM_SIZE(bt) " %2,%1"
- CC_SET(c)
- : CC_OUT(c) (oldbit)
+ : "=@ccc" (oldbit)
: "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
return oldbit;
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 20fcb8507ad1..880ca15073ed 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -5,14 +5,19 @@
#include <linux/stringify.h>
#include <linux/instrumentation.h>
#include <linux/objtool.h>
+#include <asm/asm.h>
/*
* Despite that some emulators terminate on UD2, we use it for WARN().
*/
-#define ASM_UD2 ".byte 0x0f, 0x0b"
+#define ASM_UD2 _ASM_BYTES(0x0f, 0x0b)
#define INSN_UD2 0x0b0f
#define LEN_UD2 2
+#define ASM_UDB _ASM_BYTES(0xd6)
+#define INSN_UDB 0xd6
+#define LEN_UDB 1
+
/*
* In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
*/
@@ -26,7 +31,7 @@
#define BUG_UD2 0xfffe
#define BUG_UD1 0xfffd
#define BUG_UD1_UBSAN 0xfffc
-#define BUG_EA 0xffea
+#define BUG_UDB 0xffd6
#define BUG_LOCK 0xfff0
#ifdef CONFIG_GENERIC_BUG
diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h
index 976b90a3d190..c40b9ebc1fb4 100644
--- a/arch/x86/include/asm/cfi.h
+++ b/arch/x86/include/asm/cfi.h
@@ -71,12 +71,10 @@
*
* __cfi_foo:
* endbr64
- * subl 0x12345678, %r10d
- * jz foo
- * ud2
- * nop
+ * subl 0x12345678, %eax
+ * jne.32,pn foo+3
* foo:
- * osp nop3 # was endbr64
+ * nopl -42(%rax) # was endbr64
* ... code here ...
* ret
*
@@ -86,9 +84,9 @@
* indirect caller:
* lea foo(%rip), %r11
* ...
- * movl $0x12345678, %r10d
- * subl $16, %r11
- * nop4
+ * movl $0x12345678, %eax
+ * lea -0x10(%r11), %r11
+ * nop5
* call *%r11
*
*/
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index b61f32c3459f..a88b06f1c35e 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -166,8 +166,7 @@ extern void __add_wrong_size(void)
{ \
volatile u8 *__ptr = (volatile u8 *)(_ptr); \
asm_inline volatile(lock "cmpxchgb %[new], %[ptr]" \
- CC_SET(z) \
- : CC_OUT(z) (success), \
+ : "=@ccz" (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "q" (__new) \
@@ -178,8 +177,7 @@ extern void __add_wrong_size(void)
{ \
volatile u16 *__ptr = (volatile u16 *)(_ptr); \
asm_inline volatile(lock "cmpxchgw %[new], %[ptr]" \
- CC_SET(z) \
- : CC_OUT(z) (success), \
+ : "=@ccz" (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
@@ -190,8 +188,7 @@ extern void __add_wrong_size(void)
{ \
volatile u32 *__ptr = (volatile u32 *)(_ptr); \
asm_inline volatile(lock "cmpxchgl %[new], %[ptr]" \
- CC_SET(z) \
- : CC_OUT(z) (success), \
+ : "=@ccz" (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
@@ -202,8 +199,7 @@ extern void __add_wrong_size(void)
{ \
volatile u64 *__ptr = (volatile u64 *)(_ptr); \
asm_inline volatile(lock "cmpxchgq %[new], %[ptr]" \
- CC_SET(z) \
- : CC_OUT(z) (success), \
+ : "=@ccz" (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 371f7906019e..1f80a62be969 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -46,8 +46,7 @@ static __always_inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new
bool ret; \
\
asm_inline volatile(_lock "cmpxchg8b %[ptr]" \
- CC_SET(e) \
- : CC_OUT(e) (ret), \
+ : "=@ccz" (ret), \
[ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \
@@ -125,8 +124,7 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64
ALTERNATIVE(_lock_loc \
"call cmpxchg8b_emu", \
_lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
- CC_SET(e) \
- : ALT_OUTPUT_SP(CC_OUT(e) (ret), \
+ : ALT_OUTPUT_SP("=@ccz" (ret), \
"+a" (o.low), "+d" (o.high)) \
: "b" (n.low), "c" (n.high), \
[ptr] "S" (_ptr) \
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 71d1e72ed879..5afea056fb20 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -66,8 +66,7 @@ static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old,
bool ret; \
\
asm_inline volatile(_lock "cmpxchg16b %[ptr]" \
- CC_SET(e) \
- : CC_OUT(e) (ret), \
+ : "=@ccz" (ret), \
[ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index b2a562217d3f..4091a776e37a 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -444,6 +444,7 @@
#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */
#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */
#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */
+#define X86_FEATURE_SNP_SECURE_TSC (19*32+ 8) /* SEV-SNP Secure TSC */
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */
@@ -497,6 +498,7 @@
#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */
#define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */
#define X86_FEATURE_ABMC (21*32+15) /* Assignable Bandwidth Monitoring Counters */
+#define X86_FEATURE_MSR_IMM (21*32+16) /* MSR immediate form instructions */
/*
* BUG word(s)
diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h
index 28d845257303..5e45d6424722 100644
--- a/arch/x86/include/asm/ibt.h
+++ b/arch/x86/include/asm/ibt.h
@@ -59,10 +59,10 @@ static __always_inline __attribute_const__ u32 gen_endbr(void)
static __always_inline __attribute_const__ u32 gen_endbr_poison(void)
{
/*
- * 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it
- * will be unique to (former) ENDBR sites.
+ * 4 byte NOP that isn't NOP4, such that it will be unique to (former)
+ * ENDBR sites. Additionally it carries UDB as immediate.
*/
- return 0x001f0f66; /* osp nopl (%rax) */
+ return 0xd6401f0f; /* nopl -42(%rax) */
}
static inline bool __is_endbr(u32 val)
@@ -70,10 +70,6 @@ static inline bool __is_endbr(u32 val)
if (val == gen_endbr_poison())
return true;
- /* See cfi_fineibt_bhi_preamble() */
- if (IS_ENABLED(CONFIG_FINEIBT_BHI) && val == 0x001f0ff5)
- return true;
-
val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */
return val == gen_endbr();
}
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index a4ec27c67988..abd637e54e94 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -460,17 +460,12 @@ __visible noinstr void func(struct pt_regs *regs, \
#endif
void idt_install_sysvec(unsigned int n, const void *function);
-
-#ifdef CONFIG_X86_FRED
void fred_install_sysvec(unsigned int vector, const idtentry_t function);
-#else
-static inline void fred_install_sysvec(unsigned int vector, const idtentry_t function) { }
-#endif
#define sysvec_install(vector, function) { \
- if (cpu_feature_enabled(X86_FEATURE_FRED)) \
+ if (IS_ENABLED(CONFIG_X86_FRED)) \
fred_install_sysvec(vector, function); \
- else \
+ if (!cpu_feature_enabled(X86_FEATURE_FRED)) \
idt_install_sysvec(vector, asm_##function); \
}
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 62c3e4de3303..fdf178443f85 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -138,7 +138,7 @@ KVM_X86_OP(check_emulate_instruction)
KVM_X86_OP(apic_init_signal_blocked)
KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush)
KVM_X86_OP_OPTIONAL(migrate_timers)
-KVM_X86_OP(recalc_msr_intercepts)
+KVM_X86_OP(recalc_intercepts)
KVM_X86_OP(complete_emulated_msr)
KVM_X86_OP(vcpu_deliver_sipi_vector)
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c56cc54d682a..48598d017d6f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -120,7 +120,7 @@
#define KVM_REQ_TLB_FLUSH_GUEST \
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
-#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
+#define KVM_REQ_RECALC_INTERCEPTS KVM_ARCH_REQ(29)
#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \
@@ -142,7 +142,7 @@
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
| X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
- | X86_CR4_LAM_SUP))
+ | X86_CR4_LAM_SUP | X86_CR4_CET))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -267,6 +267,7 @@ enum x86_intercept_stage;
#define PFERR_RSVD_MASK BIT(3)
#define PFERR_FETCH_MASK BIT(4)
#define PFERR_PK_MASK BIT(5)
+#define PFERR_SS_MASK BIT(6)
#define PFERR_SGX_MASK BIT(15)
#define PFERR_GUEST_RMP_MASK BIT_ULL(31)
#define PFERR_GUEST_FINAL_MASK BIT_ULL(32)
@@ -545,10 +546,10 @@ struct kvm_pmc {
#define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \
KVM_MAX_NR_AMD_GP_COUNTERS)
-#define KVM_MAX_NR_INTEL_FIXED_COUTNERS 3
-#define KVM_MAX_NR_AMD_FIXED_COUTNERS 0
-#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUTNERS, \
- KVM_MAX_NR_AMD_FIXED_COUTNERS)
+#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 3
+#define KVM_MAX_NR_AMD_FIXED_COUNTERS 0
+#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUNTERS, \
+ KVM_MAX_NR_AMD_FIXED_COUNTERS)
struct kvm_pmu {
u8 version;
@@ -579,6 +580,9 @@ struct kvm_pmu {
DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
+ DECLARE_BITMAP(pmc_counting_instructions, X86_PMC_IDX_MAX);
+ DECLARE_BITMAP(pmc_counting_branches, X86_PMC_IDX_MAX);
+
u64 ds_area;
u64 pebs_enable;
u64 pebs_enable_rsvd;
@@ -771,6 +775,7 @@ enum kvm_only_cpuid_leafs {
CPUID_7_2_EDX,
CPUID_24_0_EBX,
CPUID_8000_0021_ECX,
+ CPUID_7_1_ECX,
NR_KVM_CPU_CAPS,
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
@@ -811,7 +816,6 @@ struct kvm_vcpu_arch {
bool at_instruction_boundary;
bool tpr_access_reporting;
bool xfd_no_write_intercept;
- u64 ia32_xss;
u64 microcode_version;
u64 arch_capabilities;
u64 perf_capabilities;
@@ -872,6 +876,8 @@ struct kvm_vcpu_arch {
u64 xcr0;
u64 guest_supported_xcr0;
+ u64 ia32_xss;
+ u64 guest_supported_xss;
struct kvm_pio_request pio;
void *pio_data;
@@ -926,6 +932,7 @@ struct kvm_vcpu_arch {
bool emulate_regs_need_sync_from_vcpu;
int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
unsigned long cui_linear_rip;
+ int cui_rdmsr_imm_reg;
gpa_t time;
s8 pvclock_tsc_shift;
@@ -1348,6 +1355,30 @@ enum kvm_apicv_inhibit {
__APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED), \
__APICV_INHIBIT_REASON(PHYSICAL_ID_TOO_BIG)
+struct kvm_possible_nx_huge_pages {
+ /*
+ * A list of kvm_mmu_page structs that, if zapped, could possibly be
+ * replaced by an NX huge page. A shadow page is on this list if its
+ * existence disallows an NX huge page (nx_huge_page_disallowed is set)
+ * and there are no other conditions that prevent a huge page, e.g.
+ * the backing host page is huge, dirtly logging is not enabled for its
+ * memslot, etc... Note, zapping shadow pages on this list doesn't
+ * guarantee an NX huge page will be created in its stead, e.g. if the
+ * guest attempts to execute from the region then KVM obviously can't
+ * create an NX huge page (without hanging the guest).
+ */
+ struct list_head pages;
+ u64 nr_pages;
+};
+
+enum kvm_mmu_type {
+ KVM_SHADOW_MMU,
+#ifdef CONFIG_X86_64
+ KVM_TDP_MMU,
+#endif
+ KVM_NR_MMU_TYPES,
+};
+
struct kvm_arch {
unsigned long n_used_mmu_pages;
unsigned long n_requested_mmu_pages;
@@ -1357,21 +1388,11 @@ struct kvm_arch {
u8 vm_type;
bool has_private_mem;
bool has_protected_state;
+ bool has_protected_eoi;
bool pre_fault_allowed;
struct hlist_head *mmu_page_hash;
struct list_head active_mmu_pages;
- /*
- * A list of kvm_mmu_page structs that, if zapped, could possibly be
- * replaced by an NX huge page. A shadow page is on this list if its
- * existence disallows an NX huge page (nx_huge_page_disallowed is set)
- * and there are no other conditions that prevent a huge page, e.g.
- * the backing host page is huge, dirtly logging is not enabled for its
- * memslot, etc... Note, zapping shadow pages on this list doesn't
- * guarantee an NX huge page will be created in its stead, e.g. if the
- * guest attempts to execute from the region then KVM obviously can't
- * create an NX huge page (without hanging the guest).
- */
- struct list_head possible_nx_huge_pages;
+ struct kvm_possible_nx_huge_pages possible_nx_huge_pages[KVM_NR_MMU_TYPES];
#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
struct kvm_page_track_notifier_head track_notifier_head;
#endif
@@ -1526,7 +1547,7 @@ struct kvm_arch {
* is held in read mode:
* - tdp_mmu_roots (above)
* - the link field of kvm_mmu_page structs used by the TDP MMU
- * - possible_nx_huge_pages;
+ * - possible_nx_huge_pages[KVM_TDP_MMU];
* - the possible_nx_huge_page_link field of kvm_mmu_page structs used
* by the TDP MMU
* Because the lock is only taken within the MMU lock, strictly
@@ -1908,7 +1929,7 @@ struct kvm_x86_ops {
int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu);
void (*migrate_timers)(struct kvm_vcpu *vcpu);
- void (*recalc_msr_intercepts)(struct kvm_vcpu *vcpu);
+ void (*recalc_intercepts)(struct kvm_vcpu *vcpu);
int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
@@ -2149,13 +2170,16 @@ void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa);
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
-int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data);
-int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data);
-int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);
-int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
-int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int kvm_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int kvm_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
+int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
+int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
int kvm_emulate_as_nop(struct kvm_vcpu *vcpu);
int kvm_emulate_invd(struct kvm_vcpu *vcpu);
int kvm_emulate_mwait(struct kvm_vcpu *vcpu);
@@ -2187,6 +2211,7 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr);
unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
+int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu);
int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
@@ -2354,6 +2379,7 @@ int kvm_add_user_return_msr(u32 msr);
int kvm_find_user_return_msr(u32 msr);
int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
void kvm_user_return_msr_update_cache(unsigned int index, u64 val);
+u64 kvm_get_user_return_msr(unsigned int slot);
static inline bool kvm_is_supported_user_return_msr(u32 msr)
{
@@ -2390,9 +2416,6 @@ void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
-bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
- struct kvm_vcpu **dest_vcpu);
-
static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
{
/* We can only post Fixed and LowPrio IRQs */
diff --git a/arch/x86/include/asm/kvm_types.h b/arch/x86/include/asm/kvm_types.h
index 08f1b57d3b62..23268a188e70 100644
--- a/arch/x86/include/asm/kvm_types.h
+++ b/arch/x86/include/asm/kvm_types.h
@@ -2,6 +2,16 @@
#ifndef _ASM_X86_KVM_TYPES_H
#define _ASM_X86_KVM_TYPES_H
+#if IS_MODULE(CONFIG_KVM_AMD) && IS_MODULE(CONFIG_KVM_INTEL)
+#define KVM_SUB_MODULES kvm-amd,kvm-intel
+#elif IS_MODULE(CONFIG_KVM_AMD)
+#define KVM_SUB_MODULES kvm-amd
+#elif IS_MODULE(CONFIG_KVM_INTEL)
+#define KVM_SUB_MODULES kvm-intel
+#else
+#undef KVM_SUB_MODULES
+#endif
+
#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
#endif /* _ASM_X86_KVM_TYPES_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index abc4659f5809..605abd02158d 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -6,6 +6,7 @@
#include <linux/nmi.h>
#include <linux/msi.h>
#include <linux/io.h>
+#include <linux/static_call.h>
#include <asm/nospec-branch.h>
#include <asm/paravirt.h>
#include <asm/msr.h>
@@ -39,16 +40,21 @@ static inline unsigned char hv_get_nmi_reason(void)
return 0;
}
-#if IS_ENABLED(CONFIG_HYPERV)
-extern bool hyperv_paravisor_present;
+extern u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
+extern u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2);
+extern u64 hv_std_hypercall(u64 control, u64 param1, u64 param2);
+#if IS_ENABLED(CONFIG_HYPERV)
extern void *hv_hypercall_pg;
extern union hv_ghcb * __percpu *hv_ghcb_pg;
bool hv_isolation_type_snp(void);
bool hv_isolation_type_tdx(void);
-u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
+
+#ifdef CONFIG_X86_64
+DECLARE_STATIC_CALL(hv_hypercall, hv_std_hypercall);
+#endif
/*
* DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA
@@ -65,37 +71,15 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
{
u64 input_address = input ? virt_to_phys(input) : 0;
u64 output_address = output ? virt_to_phys(output) : 0;
- u64 hv_status;
#ifdef CONFIG_X86_64
- if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
- return hv_tdx_hypercall(control, input_address, output_address);
-
- if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
- __asm__ __volatile__("mov %[output_address], %%r8\n"
- "vmmcall"
- : "=a" (hv_status), ASM_CALL_CONSTRAINT,
- "+c" (control), "+d" (input_address)
- : [output_address] "r" (output_address)
- : "cc", "memory", "r8", "r9", "r10", "r11");
- return hv_status;
- }
-
- if (!hv_hypercall_pg)
- return U64_MAX;
-
- __asm__ __volatile__("mov %[output_address], %%r8\n"
- CALL_NOSPEC
- : "=a" (hv_status), ASM_CALL_CONSTRAINT,
- "+c" (control), "+d" (input_address)
- : [output_address] "r" (output_address),
- THUNK_TARGET(hv_hypercall_pg)
- : "cc", "memory", "r8", "r9", "r10", "r11");
+ return static_call_mod(hv_hypercall)(control, input_address, output_address);
#else
u32 input_address_hi = upper_32_bits(input_address);
u32 input_address_lo = lower_32_bits(input_address);
u32 output_address_hi = upper_32_bits(output_address);
u32 output_address_lo = lower_32_bits(output_address);
+ u64 hv_status;
if (!hv_hypercall_pg)
return U64_MAX;
@@ -108,48 +92,30 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
"D"(output_address_hi), "S"(output_address_lo),
THUNK_TARGET(hv_hypercall_pg)
: "cc", "memory");
-#endif /* !x86_64 */
return hv_status;
+#endif /* !x86_64 */
}
/* Fast hypercall with 8 bytes of input and no output */
static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
{
- u64 hv_status;
-
#ifdef CONFIG_X86_64
- if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
- return hv_tdx_hypercall(control, input1, 0);
-
- if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
- __asm__ __volatile__(
- "vmmcall"
- : "=a" (hv_status), ASM_CALL_CONSTRAINT,
- "+c" (control), "+d" (input1)
- :: "cc", "r8", "r9", "r10", "r11");
- } else {
- __asm__ __volatile__(CALL_NOSPEC
- : "=a" (hv_status), ASM_CALL_CONSTRAINT,
- "+c" (control), "+d" (input1)
- : THUNK_TARGET(hv_hypercall_pg)
- : "cc", "r8", "r9", "r10", "r11");
- }
+ return static_call_mod(hv_hypercall)(control, input1, 0);
#else
- {
- u32 input1_hi = upper_32_bits(input1);
- u32 input1_lo = lower_32_bits(input1);
-
- __asm__ __volatile__ (CALL_NOSPEC
- : "=A"(hv_status),
- "+c"(input1_lo),
- ASM_CALL_CONSTRAINT
- : "A" (control),
- "b" (input1_hi),
- THUNK_TARGET(hv_hypercall_pg)
- : "cc", "edi", "esi");
- }
-#endif
+ u32 input1_hi = upper_32_bits(input1);
+ u32 input1_lo = lower_32_bits(input1);
+ u64 hv_status;
+
+ __asm__ __volatile__ (CALL_NOSPEC
+ : "=A"(hv_status),
+ "+c"(input1_lo),
+ ASM_CALL_CONSTRAINT
+ : "A" (control),
+ "b" (input1_hi),
+ THUNK_TARGET(hv_hypercall_pg)
+ : "cc", "edi", "esi");
return hv_status;
+#endif
}
static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
@@ -162,45 +128,24 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
/* Fast hypercall with 16 bytes of input */
static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
{
- u64 hv_status;
-
#ifdef CONFIG_X86_64
- if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
- return hv_tdx_hypercall(control, input1, input2);
-
- if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
- __asm__ __volatile__("mov %[input2], %%r8\n"
- "vmmcall"
- : "=a" (hv_status), ASM_CALL_CONSTRAINT,
- "+c" (control), "+d" (input1)
- : [input2] "r" (input2)
- : "cc", "r8", "r9", "r10", "r11");
- } else {
- __asm__ __volatile__("mov %[input2], %%r8\n"
- CALL_NOSPEC
- : "=a" (hv_status), ASM_CALL_CONSTRAINT,
- "+c" (control), "+d" (input1)
- : [input2] "r" (input2),
- THUNK_TARGET(hv_hypercall_pg)
- : "cc", "r8", "r9", "r10", "r11");
- }
+ return static_call_mod(hv_hypercall)(control, input1, input2);
#else
- {
- u32 input1_hi = upper_32_bits(input1);
- u32 input1_lo = lower_32_bits(input1);
- u32 input2_hi = upper_32_bits(input2);
- u32 input2_lo = lower_32_bits(input2);
-
- __asm__ __volatile__ (CALL_NOSPEC
- : "=A"(hv_status),
- "+c"(input1_lo), ASM_CALL_CONSTRAINT
- : "A" (control), "b" (input1_hi),
- "D"(input2_hi), "S"(input2_lo),
- THUNK_TARGET(hv_hypercall_pg)
- : "cc");
- }
-#endif
+ u32 input1_hi = upper_32_bits(input1);
+ u32 input1_lo = lower_32_bits(input1);
+ u32 input2_hi = upper_32_bits(input2);
+ u32 input2_lo = lower_32_bits(input2);
+ u64 hv_status;
+
+ __asm__ __volatile__ (CALL_NOSPEC
+ : "=A"(hv_status),
+ "+c"(input1_lo), ASM_CALL_CONSTRAINT
+ : "A" (control), "b" (input1_hi),
+ "D"(input2_hi), "S"(input2_lo),
+ THUNK_TARGET(hv_hypercall_pg)
+ : "cc");
return hv_status;
+#endif
}
static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 718a55d82fe4..9e1720d73244 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -315,9 +315,12 @@
#define PERF_CAP_PT_IDX 16
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
+
+#define PERF_CAP_LBR_FMT 0x3f
#define PERF_CAP_PEBS_TRAP BIT_ULL(6)
#define PERF_CAP_ARCH_REG BIT_ULL(7)
#define PERF_CAP_PEBS_FORMAT 0xf00
+#define PERF_CAP_FW_WRITES BIT_ULL(13)
#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
#define PERF_CAP_PEBS_TIMING_INFO BIT_ULL(17)
#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
@@ -747,6 +750,7 @@
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET 0xc0000303
/* AMD Hardware Feedback Support MSRs */
#define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index c69e269937c5..76b95bd1a405 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.0+ */
/* Generic MTRR (Memory Type Range Register) ioctls.
Copyright (C) 1997-1999 Richard Gooch
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with this library; if not, write to the Free
- Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
Richard Gooch may be reached by email at rgooch@atnf.csiro.au
The postal address is:
Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 6ca6516c7492..e4815e15dc9a 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -36,9 +36,7 @@ static __always_inline void __monitor(const void *eax, u32 ecx, u32 edx)
static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx)
{
- /* "monitorx %eax, %ecx, %edx" */
- asm volatile(".byte 0x0f, 0x01, 0xfa"
- :: "a" (eax), "c" (ecx), "d"(edx));
+ asm volatile("monitorx" :: "a" (eax), "c" (ecx), "d"(edx));
}
static __always_inline void __mwait(u32 eax, u32 ecx)
@@ -80,9 +78,7 @@ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx)
{
/* No need for TSA buffer clearing on AMD */
- /* "mwaitx %eax, %ebx, %ecx" */
- asm volatile(".byte 0x0f, 0x01, 0xfb"
- :: "a" (eax), "b" (ebx), "c" (ecx));
+ asm volatile("mwaitx" :: "a" (eax), "b" (ebx), "c" (ecx));
}
/*
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index b0d03b6c279b..332428caaed2 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -309,8 +309,7 @@ do { \
\
asm qual (__pcpu_op_##size("cmpxchg") "%[nval], " \
__percpu_arg([var]) \
- CC_SET(z) \
- : CC_OUT(z) (success), \
+ : "=@ccz" (success), \
[oval] "+a" (pco_old__), \
[var] "+m" (__my_cpu_var(_var)) \
: [nval] __pcpu_reg_##size(, pco_new__) \
@@ -367,8 +366,7 @@ do { \
asm_inline qual ( \
ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
"cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
- CC_SET(z) \
- : ALT_OUTPUT_SP(CC_OUT(z) (success), \
+ : ALT_OUTPUT_SP("=@ccz" (success), \
[var] "+m" (__my_cpu_var(_var)), \
"+a" (old__.low), "+d" (old__.high)) \
: "b" (new__.low), "c" (new__.high), \
@@ -436,8 +434,7 @@ do { \
asm_inline qual ( \
ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
"cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
- CC_SET(z) \
- : ALT_OUTPUT_SP(CC_OUT(z) (success), \
+ : ALT_OUTPUT_SP("=@ccz" (success), \
[var] "+m" (__my_cpu_var(_var)), \
"+a" (old__.low), "+d" (old__.high)) \
: "b" (new__.low), "c" (new__.high), \
@@ -585,8 +582,7 @@ do { \
bool oldbit; \
\
asm volatile("btl %[nr], " __percpu_arg([var]) \
- CC_SET(c) \
- : CC_OUT(c) (oldbit) \
+ : "=@ccc" (oldbit) \
: [var] "m" (__my_cpu_var(_var)), \
[nr] "rI" (_nr)); \
oldbit; \
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 3821ee3fae35..54c8fc430684 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -6,37 +6,15 @@
#define __CLOBBERS_MEM(clb...) "memory", ## clb
-#ifndef __GCC_ASM_FLAG_OUTPUTS__
-
-/* Use asm goto */
-
-#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \
-({ \
- bool c = false; \
- asm goto (fullop "; j" #cc " %l[cc_label]" \
- : : [var] "m" (_var), ## __VA_ARGS__ \
- : clobbers : cc_label); \
- if (0) { \
-cc_label: c = true; \
- } \
- c; \
-})
-
-#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) */
-
-/* Use flags output or a set instruction */
-
#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \
({ \
bool c; \
- asm_inline volatile (fullop CC_SET(cc) \
- : [var] "+m" (_var), CC_OUT(cc) (c) \
+ asm_inline volatile (fullop \
+ : [var] "+m" (_var), "=@cc" #cc (c) \
: __VA_ARGS__ : clobbers); \
c; \
})
-#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) */
-
#define GEN_UNARY_RMWcc_4(op, var, cc, arg0) \
__GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM())
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index f9046c4b9a2b..0e6c0940100f 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -491,8 +491,7 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
/* "pvalidate" mnemonic support in binutils 2.36 and newer */
asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFF\n\t"
- CC_SET(c)
- : CC_OUT(c) (no_rmpupdate), "=a"(rc)
+ : "=@ccc"(no_rmpupdate), "=a"(rc)
: "a"(vaddr), "c"(rmp_psize), "d"(validate)
: "memory", "cc");
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index c72d46175374..5c03aaa89014 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -83,8 +83,7 @@ static inline int __const_sigismember(sigset_t *set, int _sig)
static inline int __gen_sigismember(sigset_t *set, int _sig)
{
bool ret;
- asm("btl %2,%1" CC_SET(c)
- : CC_OUT(c) (ret) : "m"(*set), "Ir"(_sig-1));
+ asm("btl %2,%1" : "=@ccc"(ret) : "m"(*set), "Ir"(_sig-1));
return ret;
}
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index fde2bd7af19e..46aa2c9c1bda 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -75,9 +75,7 @@ static inline u32 rdpkru(void)
* "rdpkru" instruction. Places PKRU contents in to EAX,
* clears EDX and requires that ecx=0.
*/
- asm volatile(".byte 0x0f,0x01,0xee\n\t"
- : "=a" (pkru), "=d" (edx)
- : "c" (ecx));
+ asm volatile("rdpkru" : "=a" (pkru), "=d" (edx) : "c" (ecx));
return pkru;
}
@@ -89,8 +87,7 @@ static inline void wrpkru(u32 pkru)
* "wrpkru" instruction. Loads contents in EAX to PKRU,
* requires that ecx = edx = 0.
*/
- asm volatile(".byte 0x0f,0x01,0xef\n\t"
- : : "a" (pkru), "c"(ecx), "d"(edx));
+ asm volatile("wrpkru" : : "a" (pkru), "c"(ecx), "d"(edx));
}
#else
@@ -287,8 +284,7 @@ static inline int enqcmds(void __iomem *dst, const void *src)
* See movdir64b()'s comment on operand specification.
*/
asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90"
- CC_SET(z)
- : CC_OUT(z) (zf), "+m" (*__dst)
+ : "=@ccz" (zf), "+m" (*__dst)
: "m" (*__src), "a" (__dst), "d" (__src));
/* Submission failure is indicated via EFLAGS.ZF=1 */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index ffc27f676243..17f6c3fedeee 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -299,6 +299,7 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_
#define SVM_SEV_FEAT_RESTRICTED_INJECTION BIT(3)
#define SVM_SEV_FEAT_ALTERNATE_INJECTION BIT(4)
#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
+#define SVM_SEV_FEAT_SECURE_TSC BIT(9)
#define VMCB_ALLOWED_SEV_FEATURES_VALID BIT_ULL(63)
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 5337f1be18f6..f2d142a0a862 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -178,9 +178,9 @@ void int3_emulate_ret(struct pt_regs *regs)
}
static __always_inline
-void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp)
+bool __emulate_cc(unsigned long flags, u8 cc)
{
- static const unsigned long jcc_mask[6] = {
+ static const unsigned long cc_mask[6] = {
[0] = X86_EFLAGS_OF,
[1] = X86_EFLAGS_CF,
[2] = X86_EFLAGS_ZF,
@@ -193,15 +193,21 @@ void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned lo
bool match;
if (cc < 0xc) {
- match = regs->flags & jcc_mask[cc >> 1];
+ match = flags & cc_mask[cc >> 1];
} else {
- match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
- ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
+ match = ((flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
+ ((flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
if (cc >= 0xe)
- match = match || (regs->flags & X86_EFLAGS_ZF);
+ match = match || (flags & X86_EFLAGS_ZF);
}
- if ((match && !invert) || (!match && invert))
+ return (match && !invert) || (!match && invert);
+}
+
+static __always_inline
+void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp)
+{
+ if (__emulate_cc(regs->flags, cc))
ip += disp;
int3_emulate_jmp(regs, ip);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 3a7755c1a441..91a3fb8ae7ff 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -378,7 +378,7 @@ do { \
asm_goto_output("\n" \
"1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
_ASM_EXTABLE_UA(1b, %l[label]) \
- : CC_OUT(z) (success), \
+ : "=@ccz" (success), \
[ptr] "+m" (*_ptr), \
[old] "+a" (__old) \
: [new] ltype (__new) \
@@ -397,7 +397,7 @@ do { \
asm_goto_output("\n" \
"1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
_ASM_EXTABLE_UA(1b, %l[label]) \
- : CC_OUT(z) (success), \
+ : "=@ccz" (success), \
"+A" (__old), \
[ptr] "+m" (*_ptr) \
: "b" ((u32)__new), \
@@ -417,11 +417,10 @@ do { \
__typeof__(*(_ptr)) __new = (_new); \
asm volatile("\n" \
"1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
- CC_SET(z) \
"2:\n" \
_ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \
%[errout]) \
- : CC_OUT(z) (success), \
+ : "=@ccz" (success), \
[errout] "+r" (__err), \
[ptr] "+m" (*_ptr), \
[old] "+a" (__old) \
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index cca7d6641287..c85c50019523 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -106,6 +106,7 @@
#define VM_EXIT_CLEAR_BNDCFGS 0x00800000
#define VM_EXIT_PT_CONCEAL_PIP 0x01000000
#define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000
+#define VM_EXIT_LOAD_CET_STATE 0x10000000
#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
@@ -119,6 +120,7 @@
#define VM_ENTRY_LOAD_BNDCFGS 0x00010000
#define VM_ENTRY_PT_CONCEAL_PIP 0x00020000
#define VM_ENTRY_LOAD_IA32_RTIT_CTL 0x00040000
+#define VM_ENTRY_LOAD_CET_STATE 0x00100000
#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
@@ -132,6 +134,7 @@
#define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49)
#define VMX_BASIC_INOUT BIT_ULL(54)
#define VMX_BASIC_TRUE_CTLS BIT_ULL(55)
+#define VMX_BASIC_NO_HW_ERROR_CODE_CC BIT_ULL(56)
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
{
@@ -369,6 +372,9 @@ enum vmcs_field {
GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822,
GUEST_SYSENTER_ESP = 0x00006824,
GUEST_SYSENTER_EIP = 0x00006826,
+ GUEST_S_CET = 0x00006828,
+ GUEST_SSP = 0x0000682a,
+ GUEST_INTR_SSP_TABLE = 0x0000682c,
HOST_CR0 = 0x00006c00,
HOST_CR3 = 0x00006c02,
HOST_CR4 = 0x00006c04,
@@ -381,6 +387,9 @@ enum vmcs_field {
HOST_IA32_SYSENTER_EIP = 0x00006c12,
HOST_RSP = 0x00006c14,
HOST_RIP = 0x00006c16,
+ HOST_S_CET = 0x00006c18,
+ HOST_SSP = 0x00006c1a,
+ HOST_INTR_SSP_TABLE = 0x00006c1c
};
/*
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 0f15d683817d..d420c9c066d4 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -35,6 +35,11 @@
#define MC_VECTOR 18
#define XM_VECTOR 19
#define VE_VECTOR 20
+#define CP_VECTOR 21
+
+#define HV_VECTOR 28
+#define VC_VECTOR 29
+#define SX_VECTOR 30
/* Select x86 specific features in <linux/kvm.h> */
#define __KVM_HAVE_PIT
@@ -411,6 +416,35 @@ struct kvm_xcrs {
__u64 padding[16];
};
+#define KVM_X86_REG_TYPE_MSR 2
+#define KVM_X86_REG_TYPE_KVM 3
+
+#define KVM_X86_KVM_REG_SIZE(reg) \
+({ \
+ reg == KVM_REG_GUEST_SSP ? KVM_REG_SIZE_U64 : 0; \
+})
+
+#define KVM_X86_REG_TYPE_SIZE(type, reg) \
+({ \
+ __u64 type_size = (__u64)type << 32; \
+ \
+ type_size |= type == KVM_X86_REG_TYPE_MSR ? KVM_REG_SIZE_U64 : \
+ type == KVM_X86_REG_TYPE_KVM ? KVM_X86_KVM_REG_SIZE(reg) : \
+ 0; \
+ type_size; \
+})
+
+#define KVM_X86_REG_ID(type, index) \
+ (KVM_REG_X86 | KVM_X86_REG_TYPE_SIZE(type, index) | index)
+
+#define KVM_X86_REG_MSR(index) \
+ KVM_X86_REG_ID(KVM_X86_REG_TYPE_MSR, index)
+#define KVM_X86_REG_KVM(index) \
+ KVM_X86_REG_ID(KVM_X86_REG_TYPE_KVM, index)
+
+/* KVM-defined registers starting from 0 */
+#define KVM_REG_GUEST_SSP 0
+
#define KVM_SYNC_X86_REGS (1UL << 0)
#define KVM_SYNC_X86_SREGS (1UL << 1)
#define KVM_SYNC_X86_EVENTS (1UL << 2)
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index f0f4a4cf84a7..9792e329343e 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -94,6 +94,8 @@
#define EXIT_REASON_BUS_LOCK 74
#define EXIT_REASON_NOTIFY 75
#define EXIT_REASON_TDCALL 77
+#define EXIT_REASON_MSR_READ_IMM 84
+#define EXIT_REASON_MSR_WRITE_IMM 85
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -158,7 +160,9 @@
{ EXIT_REASON_TPAUSE, "TPAUSE" }, \
{ EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \
{ EXIT_REASON_NOTIFY, "NOTIFY" }, \
- { EXIT_REASON_TDCALL, "TDCALL" }
+ { EXIT_REASON_TDCALL, "TDCALL" }, \
+ { EXIT_REASON_MSR_READ_IMM, "MSR_READ_IMM" }, \
+ { EXIT_REASON_MSR_WRITE_IMM, "MSR_WRITE_IMM" }
#define VMX_EXIT_REASON_FLAGS \
{ VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 8698d66563ed..0281703da5e2 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -89,7 +89,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
*/
flags->bm_control = 0;
}
- if (c->x86_vendor == X86_VENDOR_AMD && c->x86 >= 0x17) {
+ if (cpu_feature_enabled(X86_FEATURE_ZEN)) {
/*
* For all AMD Zen or newer CPUs that support C3, caches
* should not be flushed by software while entering C3
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 79ae9cb50019..8ee5ff547357 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -147,10 +147,10 @@ static void *its_init_thunk(void *thunk, int reg)
/*
* When ITS uses indirect branch thunk the fineibt_paranoid
* caller sequence doesn't fit in the caller site. So put the
- * remaining part of the sequence (<ea> + JNE) into the ITS
+ * remaining part of the sequence (UDB + JNE) into the ITS
* thunk.
*/
- bytes[i++] = 0xea; /* invalid instruction */
+ bytes[i++] = 0xd6; /* UDB */
bytes[i++] = 0x75; /* JNE */
bytes[i++] = 0xfd;
@@ -163,7 +163,7 @@ static void *its_init_thunk(void *thunk, int reg)
reg -= 8;
}
bytes[i++] = 0xff;
- bytes[i++] = 0xe0 + reg; /* jmp *reg */
+ bytes[i++] = 0xe0 + reg; /* JMP *reg */
bytes[i++] = 0xcc;
return thunk + offset;
@@ -713,20 +713,33 @@ static inline bool is_jcc32(struct insn *insn)
#if defined(CONFIG_MITIGATION_RETPOLINE) && defined(CONFIG_OBJTOOL)
/*
- * CALL/JMP *%\reg
+ * [CS]{,3} CALL/JMP *%\reg [INT3]*
*/
-static int emit_indirect(int op, int reg, u8 *bytes)
+static int emit_indirect(int op, int reg, u8 *bytes, int len)
{
+ int cs = 0, bp = 0;
int i = 0;
u8 modrm;
+ /*
+ * Set @len to the excess bytes after writing the instruction.
+ */
+ len -= 2 + (reg >= 8);
+ WARN_ON_ONCE(len < 0);
+
switch (op) {
case CALL_INSN_OPCODE:
modrm = 0x10; /* Reg = 2; CALL r/m */
+ /*
+ * Additional NOP is better than prefix decode penalty.
+ */
+ if (len <= 3)
+ cs = len;
break;
case JMP32_INSN_OPCODE:
modrm = 0x20; /* Reg = 4; JMP r/m */
+ bp = len;
break;
default:
@@ -734,6 +747,9 @@ static int emit_indirect(int op, int reg, u8 *bytes)
return -1;
}
+ while (cs--)
+ bytes[i++] = 0x2e; /* CS-prefix */
+
if (reg >= 8) {
bytes[i++] = 0x41; /* REX.B prefix */
reg -= 8;
@@ -745,6 +761,9 @@ static int emit_indirect(int op, int reg, u8 *bytes)
bytes[i++] = 0xff; /* opcode */
bytes[i++] = modrm;
+ while (bp--)
+ bytes[i++] = 0xcc; /* INT3 */
+
return i;
}
@@ -918,20 +937,11 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
return emit_its_trampoline(addr, insn, reg, bytes);
#endif
- ret = emit_indirect(op, reg, bytes + i);
+ ret = emit_indirect(op, reg, bytes + i, insn->length - i);
if (ret < 0)
return ret;
i += ret;
- /*
- * The compiler is supposed to EMIT an INT3 after every unconditional
- * JMP instruction due to AMD BTC. However, if the compiler is too old
- * or MITIGATION_SLS isn't enabled, we still need an INT3 after
- * indirect JMPs even on Intel.
- */
- if (op == JMP32_INSN_OPCODE && i < insn->length)
- bytes[i++] = INT3_INSN_OPCODE;
-
for (; i < insn->length;)
bytes[i++] = BYTES_NOP1;
@@ -970,7 +980,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
case JMP32_INSN_OPCODE:
/* Check for cfi_paranoid + ITS */
dest = addr + insn.length + insn.immediate.value;
- if (dest[-1] == 0xea && (dest[0] & 0xf0) == 0x70) {
+ if (dest[-1] == 0xd6 && (dest[0] & 0xf0) == 0x70) {
WARN_ON_ONCE(cfi_mode != CFI_FINEIBT);
continue;
}
@@ -1177,6 +1187,7 @@ void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { }
#endif
enum cfi_mode cfi_mode __ro_after_init = __CFI_DEFAULT;
+static bool cfi_debug __ro_after_init;
#ifdef CONFIG_FINEIBT_BHI
bool cfi_bhi __ro_after_init = false;
@@ -1259,6 +1270,8 @@ static __init int cfi_parse_cmdline(char *str)
} else if (!strcmp(str, "off")) {
cfi_mode = CFI_OFF;
cfi_rand = false;
+ } else if (!strcmp(str, "debug")) {
+ cfi_debug = true;
} else if (!strcmp(str, "kcfi")) {
cfi_mode = CFI_KCFI;
} else if (!strcmp(str, "fineibt")) {
@@ -1266,26 +1279,26 @@ static __init int cfi_parse_cmdline(char *str)
} else if (!strcmp(str, "norand")) {
cfi_rand = false;
} else if (!strcmp(str, "warn")) {
- pr_alert("CFI mismatch non-fatal!\n");
+ pr_alert("CFI: mismatch non-fatal!\n");
cfi_warn = true;
} else if (!strcmp(str, "paranoid")) {
if (cfi_mode == CFI_FINEIBT) {
cfi_paranoid = true;
} else {
- pr_err("Ignoring paranoid; depends on fineibt.\n");
+ pr_err("CFI: ignoring paranoid; depends on fineibt.\n");
}
} else if (!strcmp(str, "bhi")) {
#ifdef CONFIG_FINEIBT_BHI
if (cfi_mode == CFI_FINEIBT) {
cfi_bhi = true;
} else {
- pr_err("Ignoring bhi; depends on fineibt.\n");
+ pr_err("CFI: ignoring bhi; depends on fineibt.\n");
}
#else
- pr_err("Ignoring bhi; depends on FINEIBT_BHI=y.\n");
+ pr_err("CFI: ignoring bhi; depends on FINEIBT_BHI=y.\n");
#endif
} else {
- pr_err("Ignoring unknown cfi option (%s).", str);
+ pr_err("CFI: Ignoring unknown option (%s).", str);
}
str = next;
@@ -1300,9 +1313,9 @@ early_param("cfi", cfi_parse_cmdline);
*
* __cfi_\func: __cfi_\func:
* movl $0x12345678,%eax // 5 endbr64 // 4
- * nop subl $0x12345678,%r10d // 7
- * nop jne __cfi_\func+6 // 2
- * nop nop3 // 3
+ * nop subl $0x12345678,%eax // 5
+ * nop jne.d32,pn \func+3 // 7
+ * nop
* nop
* nop
* nop
@@ -1311,34 +1324,44 @@ early_param("cfi", cfi_parse_cmdline);
* nop
* nop
* nop
+ * \func: \func:
+ * endbr64 nopl -42(%rax)
*
*
* caller: caller:
- * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%r10d // 6
+ * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%eax // 5
* addl $-15(%r11),%r10d // 4 lea -0x10(%r11),%r11 // 4
- * je 1f // 2 nop4 // 4
+ * je 1f // 2 nop5 // 5
* ud2 // 2
* 1: cs call __x86_indirect_thunk_r11 // 6 call *%r11; nop3; // 6
*
+ *
+ * Notably, the FineIBT sequences are crafted such that branches are presumed
+ * non-taken. This is based on Agner Fog's optimization manual, which states:
+ *
+ * "Make conditional jumps most often not taken: The efficiency and throughput
+ * for not-taken branches is better than for taken branches on most
+ * processors. Therefore, it is good to place the most frequent branch first"
*/
/*
* <fineibt_preamble_start>:
* 0: f3 0f 1e fa endbr64
- * 4: 41 81 <ea> 78 56 34 12 sub $0x12345678, %r10d
- * b: 75 f9 jne 6 <fineibt_preamble_start+0x6>
- * d: 0f 1f 00 nopl (%rax)
+ * 4: 2d 78 56 34 12 sub $0x12345678, %eax
+ * 9: 2e 0f 85 03 00 00 00 jne,pn 13 <fineibt_preamble_start+0x13>
+ * 10: 0f 1f 40 d6 nopl -0x2a(%rax)
*
- * Note that the JNE target is the 0xEA byte inside the SUB, this decodes as
- * (bad) on x86_64 and raises #UD.
+ * Note that the JNE target is the 0xD6 byte inside the NOPL, this decodes as
+ * UDB on x86_64 and raises #UD.
*/
asm( ".pushsection .rodata \n"
"fineibt_preamble_start: \n"
" endbr64 \n"
- " subl $0x12345678, %r10d \n"
+ " subl $0x12345678, %eax \n"
"fineibt_preamble_bhi: \n"
- " jne fineibt_preamble_start+6 \n"
- ASM_NOP3
+ " cs jne.d32 fineibt_preamble_start+0x13 \n"
+ "#fineibt_func: \n"
+ " nopl -42(%rax) \n"
"fineibt_preamble_end: \n"
".popsection\n"
);
@@ -1349,20 +1372,20 @@ extern u8 fineibt_preamble_end[];
#define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start)
#define fineibt_preamble_bhi (fineibt_preamble_bhi - fineibt_preamble_start)
-#define fineibt_preamble_ud 6
-#define fineibt_preamble_hash 7
+#define fineibt_preamble_ud 0x13
+#define fineibt_preamble_hash 5
/*
* <fineibt_caller_start>:
- * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d
- * 6: 4d 8d 5b f0 lea -0x10(%r11), %r11
- * a: 0f 1f 40 00 nopl 0x0(%rax)
+ * 0: b8 78 56 34 12 mov $0x12345678, %eax
+ * 5: 4d 8d 5b f0 lea -0x10(%r11), %r11
+ * 9: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
*/
asm( ".pushsection .rodata \n"
"fineibt_caller_start: \n"
- " movl $0x12345678, %r10d \n"
+ " movl $0x12345678, %eax \n"
" lea -0x10(%r11), %r11 \n"
- ASM_NOP4
+ ASM_NOP5
"fineibt_caller_end: \n"
".popsection \n"
);
@@ -1371,7 +1394,7 @@ extern u8 fineibt_caller_start[];
extern u8 fineibt_caller_end[];
#define fineibt_caller_size (fineibt_caller_end - fineibt_caller_start)
-#define fineibt_caller_hash 2
+#define fineibt_caller_hash 1
#define fineibt_caller_jmp (fineibt_caller_size - 2)
@@ -1388,9 +1411,9 @@ extern u8 fineibt_caller_end[];
* of adding a load.
*
* <fineibt_paranoid_start>:
- * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d
- * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d
- * a: 4d 8d 5b <f0> lea -0x10(%r11), %r11
+ * 0: b8 78 56 34 12 mov $0x12345678, %eax
+ * 5: 41 3b 43 f5 cmp -0x11(%r11), %eax
+ * 9: 2e 4d 8d 5b <f0> cs lea -0x10(%r11), %r11
* e: 75 fd jne d <fineibt_paranoid_start+0xd>
* 10: 41 ff d3 call *%r11
* 13: 90 nop
@@ -1402,13 +1425,13 @@ extern u8 fineibt_caller_end[];
*/
asm( ".pushsection .rodata \n"
"fineibt_paranoid_start: \n"
- " movl $0x12345678, %r10d \n"
- " cmpl -9(%r11), %r10d \n"
- " lea -0x10(%r11), %r11 \n"
+ " mov $0x12345678, %eax \n"
+ " cmpl -11(%r11), %eax \n"
+ " cs lea -0x10(%r11), %r11 \n"
+ "#fineibt_caller_size: \n"
" jne fineibt_paranoid_start+0xd \n"
"fineibt_paranoid_ind: \n"
- " call *%r11 \n"
- " nop \n"
+ " cs call *%r11 \n"
"fineibt_paranoid_end: \n"
".popsection \n"
);
@@ -1520,51 +1543,67 @@ static int cfi_rand_preamble(s32 *start, s32 *end)
return 0;
}
+/*
+ * Inline the bhi-arity 1 case:
+ *
+ * __cfi_foo:
+ * 0: f3 0f 1e fa endbr64
+ * 4: 2d 78 56 34 12 sub $0x12345678, %eax
+ * 9: 49 0f 45 fa cmovne %rax, %rdi
+ * d: 2e 75 03 jne,pn foo+0x3
+ *
+ * foo:
+ * 10: 0f 1f 40 <d6> nopl -42(%rax)
+ *
+ * Notably, this scheme is incompatible with permissive CFI
+ * because the CMOVcc is unconditional and RDI will have been
+ * clobbered.
+ */
+asm( ".pushsection .rodata \n"
+ "fineibt_bhi1_start: \n"
+ " cmovne %rax, %rdi \n"
+ " cs jne fineibt_bhi1_func + 0x3 \n"
+ "fineibt_bhi1_func: \n"
+ " nopl -42(%rax) \n"
+ "fineibt_bhi1_end: \n"
+ ".popsection \n"
+);
+
+extern u8 fineibt_bhi1_start[];
+extern u8 fineibt_bhi1_end[];
+
+#define fineibt_bhi1_size (fineibt_bhi1_end - fineibt_bhi1_start)
+
static void cfi_fineibt_bhi_preamble(void *addr, int arity)
{
+ u8 bytes[MAX_INSN_SIZE];
+
if (!arity)
return;
if (!cfi_warn && arity == 1) {
- /*
- * Crazy scheme to allow arity-1 inline:
- *
- * __cfi_foo:
- * 0: f3 0f 1e fa endbr64
- * 4: 41 81 <ea> 78 56 34 12 sub 0x12345678, %r10d
- * b: 49 0f 45 fa cmovne %r10, %rdi
- * f: 75 f5 jne __cfi_foo+6
- * 11: 0f 1f 00 nopl (%rax)
- *
- * Code that direct calls to foo()+0, decodes the tail end as:
- *
- * foo:
- * 0: f5 cmc
- * 1: 0f 1f 00 nopl (%rax)
- *
- * which clobbers CF, but does not affect anything ABI
- * wise.
- *
- * Notably, this scheme is incompatible with permissive CFI
- * because the CMOVcc is unconditional and RDI will have been
- * clobbered.
- */
- const u8 magic[9] = {
- 0x49, 0x0f, 0x45, 0xfa,
- 0x75, 0xf5,
- BYTES_NOP3,
- };
-
- text_poke_early(addr + fineibt_preamble_bhi, magic, 9);
-
+ text_poke_early(addr + fineibt_preamble_bhi,
+ fineibt_bhi1_start, fineibt_bhi1_size);
return;
}
- text_poke_early(addr + fineibt_preamble_bhi,
- text_gen_insn(CALL_INSN_OPCODE,
- addr + fineibt_preamble_bhi,
- __bhi_args[arity]),
- CALL_INSN_SIZE);
+ /*
+ * Replace the bytes at fineibt_preamble_bhi with a CALL instruction
+ * that lines up exactly with the end of the preamble, such that the
+ * return address will be foo+0.
+ *
+ * __cfi_foo:
+ * 0: f3 0f 1e fa endbr64
+ * 4: 2d 78 56 34 12 sub $0x12345678, %eax
+ * 9: 2e 2e e8 DD DD DD DD cs cs call __bhi_args[arity]
+ */
+ bytes[0] = 0x2e;
+ bytes[1] = 0x2e;
+ __text_gen_insn(bytes + 2, CALL_INSN_OPCODE,
+ addr + fineibt_preamble_bhi + 2,
+ __bhi_args[arity], CALL_INSN_SIZE);
+
+ text_poke_early(addr + fineibt_preamble_bhi, bytes, 7);
}
static int cfi_rewrite_preamble(s32 *start, s32 *end)
@@ -1655,8 +1694,6 @@ static int cfi_rewrite_callers(s32 *start, s32 *end)
{
s32 *s;
- BUG_ON(fineibt_paranoid_size != 20);
-
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
struct insn insn;
@@ -1696,8 +1733,9 @@ static int cfi_rewrite_callers(s32 *start, s32 *end)
emit_paranoid_trampoline(addr + fineibt_caller_size,
&insn, 11, bytes + fineibt_caller_size);
} else {
- ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind);
- if (WARN_ON_ONCE(ret != 3))
+ int len = fineibt_paranoid_size - fineibt_paranoid_ind;
+ ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind, len);
+ if (WARN_ON_ONCE(ret != len))
continue;
}
@@ -1707,13 +1745,20 @@ static int cfi_rewrite_callers(s32 *start, s32 *end)
return 0;
}
+#define pr_cfi_debug(X...) if (cfi_debug) pr_info(X)
+
+#define FINEIBT_WARN(_f, _v) \
+ WARN_ONCE((_f) != (_v), "FineIBT: " #_f " %ld != %d\n", _f, _v)
+
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
s32 *start_cfi, s32 *end_cfi, bool builtin)
{
int ret;
- if (WARN_ONCE(fineibt_preamble_size != 16,
- "FineIBT preamble wrong size: %ld", fineibt_preamble_size))
+ if (FINEIBT_WARN(fineibt_preamble_size, 20) ||
+ FINEIBT_WARN(fineibt_preamble_bhi + fineibt_bhi1_size, 20) ||
+ FINEIBT_WARN(fineibt_caller_size, 14) ||
+ FINEIBT_WARN(fineibt_paranoid_size, 20))
return;
if (cfi_mode == CFI_AUTO) {
@@ -1734,6 +1779,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
* rewrite them. This disables all CFI. If this succeeds but any of the
* later stages fails, we're without CFI.
*/
+ pr_cfi_debug("CFI: disabling all indirect call checking\n");
ret = cfi_disable_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
@@ -1744,43 +1790,53 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
cfi_bpf_hash = cfi_rehash(cfi_bpf_hash);
cfi_bpf_subprog_hash = cfi_rehash(cfi_bpf_subprog_hash);
}
+ pr_cfi_debug("CFI: cfi_seed: 0x%08x\n", cfi_seed);
+ pr_cfi_debug("CFI: rehashing all preambles\n");
ret = cfi_rand_preamble(start_cfi, end_cfi);
if (ret)
goto err;
+ pr_cfi_debug("CFI: rehashing all indirect calls\n");
ret = cfi_rand_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
+ } else {
+ pr_cfi_debug("CFI: rehashing disabled\n");
}
switch (cfi_mode) {
case CFI_OFF:
if (builtin)
- pr_info("Disabling CFI\n");
+ pr_info("CFI: disabled\n");
return;
case CFI_KCFI:
+ pr_cfi_debug("CFI: re-enabling all indirect call checking\n");
ret = cfi_enable_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
if (builtin)
- pr_info("Using kCFI\n");
+ pr_info("CFI: Using %sretpoline kCFI\n",
+ cfi_rand ? "rehashed " : "");
return;
case CFI_FINEIBT:
+ pr_cfi_debug("CFI: adding FineIBT to all preambles\n");
/* place the FineIBT preamble at func()-16 */
ret = cfi_rewrite_preamble(start_cfi, end_cfi);
if (ret)
goto err;
/* rewrite the callers to target func()-16 */
+ pr_cfi_debug("CFI: rewriting indirect call sites to use FineIBT\n");
ret = cfi_rewrite_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
/* now that nobody targets func()+0, remove ENDBR there */
+ pr_cfi_debug("CFI: removing old endbr insns\n");
cfi_rewrite_endbr(start_cfi, end_cfi);
if (builtin) {
@@ -1823,11 +1879,11 @@ static void poison_cfi(void *addr)
/*
* __cfi_\func:
- * osp nopl (%rax)
- * subl $0, %r10d
- * jz 1f
- * ud2
- * 1: nop
+ * nopl -42(%rax)
+ * sub $0, %eax
+ * jne \func+3
+ * \func:
+ * nopl -42(%rax)
*/
poison_endbr(addr);
poison_hash(addr + fineibt_preamble_hash);
@@ -1853,12 +1909,14 @@ static void poison_cfi(void *addr)
}
}
+#define fineibt_prefix_size (fineibt_preamble_size - ENDBR_INSN_SIZE)
+
/*
- * When regs->ip points to a 0xEA byte in the FineIBT preamble,
+ * When regs->ip points to a 0xD6 byte in the FineIBT preamble,
* return true and fill out target and type.
*
* We check the preamble by checking for the ENDBR instruction relative to the
- * 0xEA instruction.
+ * UDB instruction.
*/
static bool decode_fineibt_preamble(struct pt_regs *regs, unsigned long *target, u32 *type)
{
@@ -1868,10 +1926,10 @@ static bool decode_fineibt_preamble(struct pt_regs *regs, unsigned long *target,
if (!exact_endbr((void *)addr))
return false;
- *target = addr + fineibt_preamble_size;
+ *target = addr + fineibt_prefix_size;
__get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault);
- *type = (u32)regs->r10 + hash;
+ *type = (u32)regs->ax + hash;
/*
* Since regs->ip points to the middle of an instruction; it cannot
@@ -1909,12 +1967,12 @@ static bool decode_fineibt_bhi(struct pt_regs *regs, unsigned long *target, u32
__get_kernel_nofault(&addr, regs->sp, unsigned long, Efault);
*target = addr;
- addr -= fineibt_preamble_size;
+ addr -= fineibt_prefix_size;
if (!exact_endbr((void *)addr))
return false;
__get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault);
- *type = (u32)regs->r10 + hash;
+ *type = (u32)regs->ax + hash;
/*
* The UD2 sites are constructed with a RET immediately following,
@@ -1931,7 +1989,7 @@ static bool is_paranoid_thunk(unsigned long addr)
u32 thunk;
__get_kernel_nofault(&thunk, (u32 *)addr, u32, Efault);
- return (thunk & 0x00FFFFFF) == 0xfd75ea;
+ return (thunk & 0x00FFFFFF) == 0xfd75d6;
Efault:
return false;
@@ -1939,8 +1997,7 @@ Efault:
/*
* regs->ip points to a LOCK Jcc.d8 instruction from the fineibt_paranoid_start[]
- * sequence, or to an invalid instruction (0xea) + Jcc.d8 for cfi_paranoid + ITS
- * thunk.
+ * sequence, or to UDB + Jcc.d8 for cfi_paranoid + ITS thunk.
*/
static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, u32 *type)
{
@@ -1950,8 +2007,8 @@ static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target,
return false;
if (is_cfi_trap(addr + fineibt_caller_size - LEN_UD2)) {
- *target = regs->r11 + fineibt_preamble_size;
- *type = regs->r10;
+ *target = regs->r11 + fineibt_prefix_size;
+ *type = regs->ax;
/*
* Since the trapping instruction is the exact, but LOCK prefixed,
@@ -1963,14 +2020,14 @@ static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target,
/*
* The cfi_paranoid + ITS thunk combination results in:
*
- * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d
- * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d
- * a: 4d 8d 5b f0 lea -0x10(%r11), %r11
+ * 0: b8 78 56 34 12 mov $0x12345678, %eax
+ * 5: 41 3b 43 f7 cmp -11(%r11), %eax
+ * a: 2e 3d 8d 5b f0 cs lea -0x10(%r11), %r11
* e: 2e e8 XX XX XX XX cs call __x86_indirect_paranoid_thunk_r11
*
* Where the paranoid_thunk looks like:
*
- * 1d: <ea> (bad)
+ * 1d: <d6> udb
* __x86_indirect_paranoid_thunk_r11:
* 1e: 75 fd jne 1d
* __x86_indirect_its_thunk_r11:
@@ -1979,8 +2036,8 @@ static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target,
*
*/
if (is_paranoid_thunk(regs->ip)) {
- *target = regs->r11 + fineibt_preamble_size;
- *type = regs->r10;
+ *target = regs->r11 + fineibt_prefix_size;
+ *type = regs->ax;
regs->ip = *target;
return true;
@@ -2005,6 +2062,8 @@ bool decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type)
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
s32 *start_cfi, s32 *end_cfi, bool builtin)
{
+ if (IS_ENABLED(CONFIG_CFI) && builtin)
+ pr_info("CFI: Using standard kCFI\n");
}
#ifdef CONFIG_X86_KERNEL_IBT
@@ -2321,6 +2380,7 @@ void __init alternative_instructions(void)
__apply_fineibt(__retpoline_sites, __retpoline_sites_end,
__cfi_sites, __cfi_sites_end, true);
+ cfi_debug = false;
/*
* Rewrite the retpolines, must be done before alternatives since
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 6259b474073b..32ba599a51f8 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -102,6 +102,7 @@ static void __used common(void)
BLANK();
DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+ OFFSET(C_PTREGS_SIZE, pt_regs, orig_ax);
/* TLB state for the entry code */
OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
diff --git a/arch/x86/kernel/cfi.c b/arch/x86/kernel/cfi.c
index 77086cf565ec..638eb5c933e0 100644
--- a/arch/x86/kernel/cfi.c
+++ b/arch/x86/kernel/cfi.c
@@ -27,7 +27,7 @@ static bool decode_cfi_insn(struct pt_regs *regs, unsigned long *target,
* for indirect call checks:
*
*   movl -<id>, %r10d ; 6 bytes
- * addl -4(%reg), %r10d ; 4 bytes
+ * addl -<pos>(%reg), %r10d; 4 bytes
* je .Ltmp1 ; 2 bytes
* ud2 ; <- regs->ip
* .Ltmp1:
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index c78f860419d6..c4febdbcfe4d 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -38,10 +38,6 @@
bool hv_nested;
struct ms_hyperv_info ms_hyperv;
-/* Used in modules via hv_do_hypercall(): see arch/x86/include/asm/mshyperv.h */
-bool hyperv_paravisor_present __ro_after_init;
-EXPORT_SYMBOL_GPL(hyperv_paravisor_present);
-
#if IS_ENABLED(CONFIG_HYPERV)
static inline unsigned int hv_get_nested_msr(unsigned int reg)
{
@@ -288,8 +284,18 @@ static void __init x86_setup_ops_for_tsc_pg_clock(void)
old_restore_sched_clock_state = x86_platform.restore_sched_clock_state;
x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state;
}
+
+#ifdef CONFIG_X86_64
+DEFINE_STATIC_CALL(hv_hypercall, hv_std_hypercall);
+EXPORT_STATIC_CALL_TRAMP_GPL(hv_hypercall);
+#define hypercall_update(hc) static_call_update(hv_hypercall, hc)
+#endif
#endif /* CONFIG_HYPERV */
+#ifndef hypercall_update
+#define hypercall_update(hc) (void)hc
+#endif
+
static uint32_t __init ms_hyperv_platform(void)
{
u32 eax;
@@ -484,14 +490,14 @@ static void __init ms_hyperv_init_platform(void)
ms_hyperv.shared_gpa_boundary =
BIT_ULL(ms_hyperv.shared_gpa_boundary_bits);
- hyperv_paravisor_present = !!ms_hyperv.paravisor_present;
-
pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
static_branch_enable(&isolation_type_snp);
+ if (!ms_hyperv.paravisor_present)
+ hypercall_update(hv_snp_hypercall);
} else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) {
static_branch_enable(&isolation_type_tdx);
@@ -499,6 +505,7 @@ static void __init ms_hyperv_init_platform(void)
ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED;
if (!ms_hyperv.paravisor_present) {
+ hypercall_update(hv_tdx_hypercall);
/*
* Mark the Hyper-V TSC page feature as disabled
* in a TDX VM without paravisor so that the
@@ -565,6 +572,11 @@ static void __init ms_hyperv_init_platform(void)
machine_ops.crash_shutdown = hv_machine_crash_shutdown;
#endif
#endif
+ /*
+ * HV_ACCESS_TSC_INVARIANT is always zero for the root partition. Root
+ * partition doesn't need to write to synthetic MSR to enable invariant
+ * TSC feature. It sees what the hardware provides.
+ */
if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) {
/*
* Writing to synthetic MSR 0x40000118 updates/changes the
@@ -636,8 +648,12 @@ static void __init ms_hyperv_init_platform(void)
* TSC should be marked as unstable only after Hyper-V
* clocksource has been initialized. This ensures that the
* stability of the sched_clock is not altered.
+ *
+ * HV_ACCESS_TSC_INVARIANT is always zero for the root partition. No
+ * need to check for it.
*/
- if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
+ if (!hv_root_partition() &&
+ !(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
mark_tsc_unstable("running on Hyper-V");
hardlockup_detector_disable();
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 18cf79d6e2c5..763534d77f59 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -1,21 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.0+
/*
* MTRR (Memory Type Range Register) cleanup
*
* Copyright (C) 2009 Yinghai Lu
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/init.h>
#include <linux/pci.h>
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index ecbda0341a8a..4b3d492afe17 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -1,22 +1,9 @@
+// SPDX-License-Identifier: LGPL-2.0+
/* Generic MTRR (Memory Type Range Register) driver.
Copyright (C) 1997-2000 Richard Gooch
Copyright (c) 2002 Patrick Mochel
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with this library; if not, write to the Free
- Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
Richard Gooch may be reached by email at rgooch@atnf.csiro.au
The postal address is:
Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 4cee6213d667..caa4dc885c21 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
+ { X86_FEATURE_MSR_IMM, CPUID_ECX, 5, 0x00000007, 1 },
{ X86_FEATURE_APX, CPUID_EDX, 21, 0x00000007, 1 },
{ X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
{ X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h
index 99004b02e2ed..42a088a337c5 100644
--- a/arch/x86/kernel/cpu/sgx/encls.h
+++ b/arch/x86/kernel/cpu/sgx/encls.h
@@ -68,7 +68,7 @@ static inline bool encls_failed(int ret)
({ \
int ret; \
asm volatile( \
- "1: .byte 0x0f, 0x01, 0xcf;\n\t" \
+ "1: encls\n" \
"2:\n" \
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_SGX) \
: "=a"(ret) \
@@ -111,8 +111,8 @@ static inline bool encls_failed(int ret)
({ \
int ret; \
asm volatile( \
- "1: .byte 0x0f, 0x01, 0xcf;\n\t" \
- " xor %%eax,%%eax;\n" \
+ "1: encls\n\t" \
+ "xor %%eax,%%eax\n" \
"2:\n" \
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_SGX) \
: "=a"(ret), "=b"(rbx_out) \
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index f79c5edc0b89..6ab9eac64670 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -97,9 +97,11 @@ void __init native_init_IRQ(void)
/* Execute any quirks before the call gates are initialised: */
x86_init.irqs.pre_vector_init();
- if (cpu_feature_enabled(X86_FEATURE_FRED))
+ /* FRED's IRQ path may be used even if FRED isn't fully enabled. */
+ if (IS_ENABLED(CONFIG_X86_FRED))
fred_complete_exception_setup();
- else
+
+ if (!cpu_feature_enabled(X86_FEATURE_FRED))
idt_setup_apic_and_irq_gates();
lapic_assign_system_vectors();
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 15088d14904f..201137b98fb8 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -479,6 +479,10 @@ void __nocfi machine_kexec(struct kimage *image)
__ftrace_enabled_restore(save_ftrace_enabled);
}
+/*
+ * Handover to the next kernel, no CFI concern.
+ */
+ANNOTATE_NOCFI_SYM(machine_kexec);
/* arch-dependent functionality related to kexec file-based syscall */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 36354b470590..6b22611e69cc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -97,7 +97,7 @@ __always_inline int is_valid_bugaddr(unsigned long addr)
* Check for UD1 or UD2, accounting for Address Size Override Prefixes.
* If it's a UD1, further decode to determine its use:
*
- * FineIBT: ea (bad)
+ * FineIBT: d6 udb
* FineIBT: f0 75 f9 lock jne . - 6
* UBSan{0}: 67 0f b9 00 ud1 (%eax),%eax
* UBSan{10}: 67 0f b9 40 10 ud1 0x10(%eax),%eax
@@ -130,9 +130,9 @@ __always_inline int decode_bug(unsigned long addr, s32 *imm, int *len)
WARN_ON_ONCE(!lock);
return BUG_LOCK;
- case 0xea:
+ case 0xd6:
*len = addr - start;
- return BUG_EA;
+ return BUG_UDB;
case OPCODE_ESCAPE:
break;
@@ -341,7 +341,7 @@ static noinstr bool handle_bug(struct pt_regs *regs)
}
fallthrough;
- case BUG_EA:
+ case BUG_UDB:
case BUG_LOCK:
if (handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
handled = true;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 4e43923656d0..278f08194ec8 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -40,7 +40,7 @@ config KVM_X86
select HAVE_KVM_MSI
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_NO_POLL
- select KVM_XFER_TO_GUEST_WORK
+ select VIRT_XFER_TO_GUEST_WORK
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_VFIO
select HAVE_KVM_PM_NOTIFIER if PM
@@ -96,6 +96,7 @@ config KVM_SW_PROTECTED_VM
config KVM_INTEL
tristate "KVM for Intel (and compatible) processors support"
depends on KVM && IA32_FEAT_CTL
+ select X86_FRED if X86_64
help
Provides support for KVM on processors equipped with Intel's VT
extensions, a.k.a. Virtual Machine Extensions (VMX).
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e2836a255b16..52524e0ca97f 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -34,7 +34,7 @@
* aligned to sizeof(unsigned long) because it's not accessed via bitops.
*/
u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
-EXPORT_SYMBOL_GPL(kvm_cpu_caps);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_cpu_caps);
struct cpuid_xstate_sizes {
u32 eax;
@@ -131,7 +131,7 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry2(
return NULL;
}
-EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry2);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_find_cpuid_entry2);
static int kvm_check_cpuid(struct kvm_vcpu *vcpu)
{
@@ -263,6 +263,17 @@ static u64 cpuid_get_supported_xcr0(struct kvm_vcpu *vcpu)
return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
}
+static u64 cpuid_get_supported_xss(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry_index(vcpu, 0xd, 1);
+ if (!best)
+ return 0;
+
+ return (best->ecx | ((u64)best->edx << 32)) & kvm_caps.supported_xss;
+}
+
static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
struct kvm_cpuid_entry2 *entry,
unsigned int x86_feature,
@@ -305,7 +316,8 @@ static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
best = kvm_find_cpuid_entry_index(vcpu, 0xD, 1);
if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
- best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
+ best->ebx = xstate_required_size(vcpu->arch.xcr0 |
+ vcpu->arch.ia32_xss, true);
}
static bool kvm_cpuid_has_hyperv(struct kvm_vcpu *vcpu)
@@ -424,6 +436,7 @@ void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
}
vcpu->arch.guest_supported_xcr0 = cpuid_get_supported_xcr0(vcpu);
+ vcpu->arch.guest_supported_xss = cpuid_get_supported_xss(vcpu);
vcpu->arch.pv_cpuid.features = kvm_apply_cpuid_pv_features_quirk(vcpu);
@@ -448,6 +461,8 @@ void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
* adjustments to the reserved GPA bits.
*/
kvm_mmu_after_set_cpuid(vcpu);
+
+ kvm_make_request(KVM_REQ_RECALC_INTERCEPTS, vcpu);
}
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
@@ -931,6 +946,7 @@ void kvm_set_cpu_caps(void)
VENDOR_F(WAITPKG),
F(SGX_LC),
F(BUS_LOCK_DETECT),
+ X86_64_F(SHSTK),
);
/*
@@ -940,6 +956,14 @@ void kvm_set_cpu_caps(void)
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
kvm_cpu_cap_clear(X86_FEATURE_PKU);
+ /*
+ * Shadow Stacks aren't implemented in the Shadow MMU. Shadow Stack
+ * accesses require "magic" Writable=0,Dirty=1 protection, which KVM
+ * doesn't know how to emulate or map.
+ */
+ if (!tdp_enabled)
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+
kvm_cpu_cap_init(CPUID_7_EDX,
F(AVX512_4VNNIW),
F(AVX512_4FMAPS),
@@ -957,8 +981,19 @@ void kvm_set_cpu_caps(void)
F(AMX_INT8),
F(AMX_BF16),
F(FLUSH_L1D),
+ F(IBT),
);
+ /*
+ * Disable support for IBT and SHSTK if KVM is configured to emulate
+ * accesses to reserved GPAs, as KVM's emulator doesn't support IBT or
+ * SHSTK, nor does KVM handle Shadow Stack #PFs (see above).
+ */
+ if (allow_smaller_maxphyaddr) {
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
+ }
+
if (boot_cpu_has(X86_FEATURE_AMD_IBPB_RET) &&
boot_cpu_has(X86_FEATURE_AMD_IBPB) &&
boot_cpu_has(X86_FEATURE_AMD_IBRS))
@@ -985,6 +1020,10 @@ void kvm_set_cpu_caps(void)
F(LAM),
);
+ kvm_cpu_cap_init(CPUID_7_1_ECX,
+ SCATTERED_F(MSR_IMM),
+ );
+
kvm_cpu_cap_init(CPUID_7_1_EDX,
F(AVX_VNNI_INT8),
F(AVX_NE_CONVERT),
@@ -1222,7 +1261,7 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_clear(X86_FEATURE_RDPID);
}
}
-EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cpu_caps);
#undef F
#undef SCATTERED_F
@@ -1411,9 +1450,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
goto out;
cpuid_entry_override(entry, CPUID_7_1_EAX);
+ cpuid_entry_override(entry, CPUID_7_1_ECX);
cpuid_entry_override(entry, CPUID_7_1_EDX);
entry->ebx = 0;
- entry->ecx = 0;
}
if (max_idx >= 2) {
entry = do_host_cpuid(array, function, 2);
@@ -1820,7 +1859,8 @@ static int get_cpuid_func(struct kvm_cpuid_array *array, u32 func,
int r;
if (func == CENTAUR_CPUID_SIGNATURE &&
- boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR)
+ boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
return 0;
r = do_cpuid_func(array, func, type);
@@ -2001,7 +2041,7 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
if (function == 7 && index == 0) {
u64 data;
if ((*ebx & (feature_bit(RTM) | feature_bit(HLE))) &&
- !__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
+ !kvm_msr_read(vcpu, MSR_IA32_TSX_CTRL, &data) &&
(data & TSX_CTRL_CPUID_CLEAR))
*ebx &= ~(feature_bit(RTM) | feature_bit(HLE));
} else if (function == 0x80000007) {
@@ -2045,7 +2085,7 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
used_max_basic);
return exact;
}
-EXPORT_SYMBOL_GPL(kvm_cpuid);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_cpuid);
int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
{
@@ -2063,4 +2103,4 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
kvm_rdx_write(vcpu, edx);
return kvm_skip_emulated_instruction(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_cpuid);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 1349e278cd2a..4e3da5b497b8 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -26,6 +26,7 @@
#include <asm/debugreg.h>
#include <asm/nospec-branch.h>
#include <asm/ibt.h>
+#include <asm/text-patching.h>
#include "x86.h"
#include "tss.h"
@@ -166,7 +167,6 @@
#define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
#define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
#define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
-#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
#define NoWrite ((u64)1 << 45) /* No writeback */
#define SrcWrite ((u64)1 << 46) /* Write back src operand */
#define NoMod ((u64)1 << 47) /* Mod field is ignored */
@@ -178,6 +178,7 @@
#define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
#define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
#define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */
+#define ShadowStack ((u64)1 << 57) /* Instruction affects Shadow Stacks. */
#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
@@ -202,7 +203,6 @@ struct opcode {
const struct escape *esc;
const struct instr_dual *idual;
const struct mode_dual *mdual;
- void (*fastop)(struct fastop *fake);
} u;
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
};
@@ -266,186 +266,130 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
X86_EFLAGS_PF|X86_EFLAGS_CF)
#ifdef CONFIG_X86_64
-#define ON64(x) x
+#define ON64(x...) x
#else
-#define ON64(x)
+#define ON64(x...)
#endif
-/*
- * fastop functions have a special calling convention:
- *
- * dst: rax (in/out)
- * src: rdx (in/out)
- * src2: rcx (in)
- * flags: rflags (in/out)
- * ex: rsi (in:fastop pointer, out:zero if exception)
- *
- * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
- * different operand sizes can be reached by calculation, rather than a jump
- * table (which would be bigger than the code).
- *
- * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR
- * and 1 for the straight line speculation INT3, leaves 7 bytes for the
- * body of the function. Currently none is larger than 4.
- */
-static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
-
-#define FASTOP_SIZE 16
-
-#define __FOP_FUNC(name) \
- ".align " __stringify(FASTOP_SIZE) " \n\t" \
- ".type " name ", @function \n\t" \
- name ":\n\t" \
- ASM_ENDBR \
- IBT_NOSEAL(name)
-
-#define FOP_FUNC(name) \
- __FOP_FUNC(#name)
-
-#define __FOP_RET(name) \
- "11: " ASM_RET \
- ".size " name ", .-" name "\n\t"
-
-#define FOP_RET(name) \
- __FOP_RET(#name)
-
-#define __FOP_START(op, align) \
- extern void em_##op(struct fastop *fake); \
- asm(".pushsection .text, \"ax\" \n\t" \
- ".global em_" #op " \n\t" \
- ".align " __stringify(align) " \n\t" \
- "em_" #op ":\n\t"
-
-#define FOP_START(op) __FOP_START(op, FASTOP_SIZE)
-
-#define FOP_END \
- ".popsection")
-
-#define __FOPNOP(name) \
- __FOP_FUNC(name) \
- __FOP_RET(name)
-
-#define FOPNOP() \
- __FOPNOP(__stringify(__UNIQUE_ID(nop)))
-
-#define FOP1E(op, dst) \
- __FOP_FUNC(#op "_" #dst) \
- "10: " #op " %" #dst " \n\t" \
- __FOP_RET(#op "_" #dst)
-
-#define FOP1EEX(op, dst) \
- FOP1E(op, dst) _ASM_EXTABLE_TYPE_REG(10b, 11b, EX_TYPE_ZERO_REG, %%esi)
-
-#define FASTOP1(op) \
- FOP_START(op) \
- FOP1E(op##b, al) \
- FOP1E(op##w, ax) \
- FOP1E(op##l, eax) \
- ON64(FOP1E(op##q, rax)) \
- FOP_END
-
-/* 1-operand, using src2 (for MUL/DIV r/m) */
-#define FASTOP1SRC2(op, name) \
- FOP_START(name) \
- FOP1E(op, cl) \
- FOP1E(op, cx) \
- FOP1E(op, ecx) \
- ON64(FOP1E(op, rcx)) \
- FOP_END
-
-/* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
-#define FASTOP1SRC2EX(op, name) \
- FOP_START(name) \
- FOP1EEX(op, cl) \
- FOP1EEX(op, cx) \
- FOP1EEX(op, ecx) \
- ON64(FOP1EEX(op, rcx)) \
- FOP_END
-
-#define FOP2E(op, dst, src) \
- __FOP_FUNC(#op "_" #dst "_" #src) \
- #op " %" #src ", %" #dst " \n\t" \
- __FOP_RET(#op "_" #dst "_" #src)
-
-#define FASTOP2(op) \
- FOP_START(op) \
- FOP2E(op##b, al, dl) \
- FOP2E(op##w, ax, dx) \
- FOP2E(op##l, eax, edx) \
- ON64(FOP2E(op##q, rax, rdx)) \
- FOP_END
-
-/* 2 operand, word only */
-#define FASTOP2W(op) \
- FOP_START(op) \
- FOPNOP() \
- FOP2E(op##w, ax, dx) \
- FOP2E(op##l, eax, edx) \
- ON64(FOP2E(op##q, rax, rdx)) \
- FOP_END
-
-/* 2 operand, src is CL */
-#define FASTOP2CL(op) \
- FOP_START(op) \
- FOP2E(op##b, al, cl) \
- FOP2E(op##w, ax, cl) \
- FOP2E(op##l, eax, cl) \
- ON64(FOP2E(op##q, rax, cl)) \
- FOP_END
-
-/* 2 operand, src and dest are reversed */
-#define FASTOP2R(op, name) \
- FOP_START(name) \
- FOP2E(op##b, dl, al) \
- FOP2E(op##w, dx, ax) \
- FOP2E(op##l, edx, eax) \
- ON64(FOP2E(op##q, rdx, rax)) \
- FOP_END
-
-#define FOP3E(op, dst, src, src2) \
- __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
- #op " %" #src2 ", %" #src ", %" #dst " \n\t"\
- __FOP_RET(#op "_" #dst "_" #src "_" #src2)
-
-/* 3-operand, word-only, src2=cl */
-#define FASTOP3WCL(op) \
- FOP_START(op) \
- FOPNOP() \
- FOP3E(op##w, ax, dx, cl) \
- FOP3E(op##l, eax, edx, cl) \
- ON64(FOP3E(op##q, rax, rdx, cl)) \
- FOP_END
-
-/* Special case for SETcc - 1 instruction per cc */
-#define FOP_SETCC(op) \
- FOP_FUNC(op) \
- #op " %al \n\t" \
- FOP_RET(op)
-
-FOP_START(setcc)
-FOP_SETCC(seto)
-FOP_SETCC(setno)
-FOP_SETCC(setc)
-FOP_SETCC(setnc)
-FOP_SETCC(setz)
-FOP_SETCC(setnz)
-FOP_SETCC(setbe)
-FOP_SETCC(setnbe)
-FOP_SETCC(sets)
-FOP_SETCC(setns)
-FOP_SETCC(setp)
-FOP_SETCC(setnp)
-FOP_SETCC(setl)
-FOP_SETCC(setnl)
-FOP_SETCC(setle)
-FOP_SETCC(setnle)
-FOP_END;
-
-FOP_START(salc)
-FOP_FUNC(salc)
-"pushf; sbb %al, %al; popf \n\t"
-FOP_RET(salc)
-FOP_END;
+#define EM_ASM_START(op) \
+static int em_##op(struct x86_emulate_ctxt *ctxt) \
+{ \
+ unsigned long flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; \
+ int bytes = 1, ok = 1; \
+ if (!(ctxt->d & ByteOp)) \
+ bytes = ctxt->dst.bytes; \
+ switch (bytes) {
+
+#define __EM_ASM(str) \
+ asm("push %[flags]; popf \n\t" \
+ "10: " str \
+ "pushf; pop %[flags] \n\t" \
+ "11: \n\t" \
+ : "+a" (ctxt->dst.val), \
+ "+d" (ctxt->src.val), \
+ [flags] "+D" (flags), \
+ "+S" (ok) \
+ : "c" (ctxt->src2.val))
+
+#define __EM_ASM_1(op, dst) \
+ __EM_ASM(#op " %%" #dst " \n\t")
+
+#define __EM_ASM_1_EX(op, dst) \
+ __EM_ASM(#op " %%" #dst " \n\t" \
+ _ASM_EXTABLE_TYPE_REG(10b, 11f, EX_TYPE_ZERO_REG, %%esi))
+
+#define __EM_ASM_2(op, dst, src) \
+ __EM_ASM(#op " %%" #src ", %%" #dst " \n\t")
+
+#define __EM_ASM_3(op, dst, src, src2) \
+ __EM_ASM(#op " %%" #src2 ", %%" #src ", %%" #dst " \n\t")
+
+#define EM_ASM_END \
+ } \
+ ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); \
+ return !ok ? emulate_de(ctxt) : X86EMUL_CONTINUE; \
+}
+
+/* 1-operand, using "a" (dst) */
+#define EM_ASM_1(op) \
+ EM_ASM_START(op) \
+ case 1: __EM_ASM_1(op##b, al); break; \
+ case 2: __EM_ASM_1(op##w, ax); break; \
+ case 4: __EM_ASM_1(op##l, eax); break; \
+ ON64(case 8: __EM_ASM_1(op##q, rax); break;) \
+ EM_ASM_END
+
+/* 1-operand, using "c" (src2) */
+#define EM_ASM_1SRC2(op, name) \
+ EM_ASM_START(name) \
+ case 1: __EM_ASM_1(op##b, cl); break; \
+ case 2: __EM_ASM_1(op##w, cx); break; \
+ case 4: __EM_ASM_1(op##l, ecx); break; \
+ ON64(case 8: __EM_ASM_1(op##q, rcx); break;) \
+ EM_ASM_END
+
+/* 1-operand, using "c" (src2) with exception */
+#define EM_ASM_1SRC2EX(op, name) \
+ EM_ASM_START(name) \
+ case 1: __EM_ASM_1_EX(op##b, cl); break; \
+ case 2: __EM_ASM_1_EX(op##w, cx); break; \
+ case 4: __EM_ASM_1_EX(op##l, ecx); break; \
+ ON64(case 8: __EM_ASM_1_EX(op##q, rcx); break;) \
+ EM_ASM_END
+
+/* 2-operand, using "a" (dst), "d" (src) */
+#define EM_ASM_2(op) \
+ EM_ASM_START(op) \
+ case 1: __EM_ASM_2(op##b, al, dl); break; \
+ case 2: __EM_ASM_2(op##w, ax, dx); break; \
+ case 4: __EM_ASM_2(op##l, eax, edx); break; \
+ ON64(case 8: __EM_ASM_2(op##q, rax, rdx); break;) \
+ EM_ASM_END
+
+/* 2-operand, reversed */
+#define EM_ASM_2R(op, name) \
+ EM_ASM_START(name) \
+ case 1: __EM_ASM_2(op##b, dl, al); break; \
+ case 2: __EM_ASM_2(op##w, dx, ax); break; \
+ case 4: __EM_ASM_2(op##l, edx, eax); break; \
+ ON64(case 8: __EM_ASM_2(op##q, rdx, rax); break;) \
+ EM_ASM_END
+
+/* 2-operand, word only (no byte op) */
+#define EM_ASM_2W(op) \
+ EM_ASM_START(op) \
+ case 1: break; \
+ case 2: __EM_ASM_2(op##w, ax, dx); break; \
+ case 4: __EM_ASM_2(op##l, eax, edx); break; \
+ ON64(case 8: __EM_ASM_2(op##q, rax, rdx); break;) \
+ EM_ASM_END
+
+/* 2-operand, using "a" (dst) and CL (src2) */
+#define EM_ASM_2CL(op) \
+ EM_ASM_START(op) \
+ case 1: __EM_ASM_2(op##b, al, cl); break; \
+ case 2: __EM_ASM_2(op##w, ax, cl); break; \
+ case 4: __EM_ASM_2(op##l, eax, cl); break; \
+ ON64(case 8: __EM_ASM_2(op##q, rax, cl); break;) \
+ EM_ASM_END
+
+/* 3-operand, using "a" (dst), "d" (src) and CL (src2) */
+#define EM_ASM_3WCL(op) \
+ EM_ASM_START(op) \
+ case 1: break; \
+ case 2: __EM_ASM_3(op##w, ax, dx, cl); break; \
+ case 4: __EM_ASM_3(op##l, eax, edx, cl); break; \
+ ON64(case 8: __EM_ASM_3(op##q, rax, rdx, cl); break;) \
+ EM_ASM_END
+
+static int em_salc(struct x86_emulate_ctxt *ctxt)
+{
+ /*
+ * Set AL 0xFF if CF is set, or 0x00 when clear.
+ */
+ ctxt->dst.val = 0xFF * !!(ctxt->eflags & X86_EFLAGS_CF);
+ return X86EMUL_CONTINUE;
+}
/*
* XXX: inoutclob user must know where the argument is being expanded.
@@ -1006,56 +950,55 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
return rc;
}
-FASTOP2(add);
-FASTOP2(or);
-FASTOP2(adc);
-FASTOP2(sbb);
-FASTOP2(and);
-FASTOP2(sub);
-FASTOP2(xor);
-FASTOP2(cmp);
-FASTOP2(test);
-
-FASTOP1SRC2(mul, mul_ex);
-FASTOP1SRC2(imul, imul_ex);
-FASTOP1SRC2EX(div, div_ex);
-FASTOP1SRC2EX(idiv, idiv_ex);
-
-FASTOP3WCL(shld);
-FASTOP3WCL(shrd);
-
-FASTOP2W(imul);
-
-FASTOP1(not);
-FASTOP1(neg);
-FASTOP1(inc);
-FASTOP1(dec);
-
-FASTOP2CL(rol);
-FASTOP2CL(ror);
-FASTOP2CL(rcl);
-FASTOP2CL(rcr);
-FASTOP2CL(shl);
-FASTOP2CL(shr);
-FASTOP2CL(sar);
-
-FASTOP2W(bsf);
-FASTOP2W(bsr);
-FASTOP2W(bt);
-FASTOP2W(bts);
-FASTOP2W(btr);
-FASTOP2W(btc);
-
-FASTOP2(xadd);
-
-FASTOP2R(cmp, cmp_r);
+EM_ASM_2(add);
+EM_ASM_2(or);
+EM_ASM_2(adc);
+EM_ASM_2(sbb);
+EM_ASM_2(and);
+EM_ASM_2(sub);
+EM_ASM_2(xor);
+EM_ASM_2(cmp);
+EM_ASM_2(test);
+EM_ASM_2(xadd);
+
+EM_ASM_1SRC2(mul, mul_ex);
+EM_ASM_1SRC2(imul, imul_ex);
+EM_ASM_1SRC2EX(div, div_ex);
+EM_ASM_1SRC2EX(idiv, idiv_ex);
+
+EM_ASM_3WCL(shld);
+EM_ASM_3WCL(shrd);
+
+EM_ASM_2W(imul);
+
+EM_ASM_1(not);
+EM_ASM_1(neg);
+EM_ASM_1(inc);
+EM_ASM_1(dec);
+
+EM_ASM_2CL(rol);
+EM_ASM_2CL(ror);
+EM_ASM_2CL(rcl);
+EM_ASM_2CL(rcr);
+EM_ASM_2CL(shl);
+EM_ASM_2CL(shr);
+EM_ASM_2CL(sar);
+
+EM_ASM_2W(bsf);
+EM_ASM_2W(bsr);
+EM_ASM_2W(bt);
+EM_ASM_2W(bts);
+EM_ASM_2W(btr);
+EM_ASM_2W(btc);
+
+EM_ASM_2R(cmp, cmp_r);
static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
{
/* If src is zero, do not writeback, but update flags */
if (ctxt->src.val == 0)
ctxt->dst.type = OP_NONE;
- return fastop(ctxt, em_bsf);
+ return em_bsf(ctxt);
}
static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
@@ -1063,18 +1006,12 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
/* If src is zero, do not writeback, but update flags */
if (ctxt->src.val == 0)
ctxt->dst.type = OP_NONE;
- return fastop(ctxt, em_bsr);
+ return em_bsr(ctxt);
}
static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
{
- u8 rc;
- void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf);
-
- flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
- asm("push %[flags]; popf; " CALL_NOSPEC
- : "=a"(rc), ASM_CALL_CONSTRAINT : [thunk_target]"r"(fop), [flags]"r"(flags));
- return rc;
+ return __emulate_cc(flags, condition & 0xf);
}
static void fetch_register_operand(struct operand *op)
@@ -1553,6 +1490,37 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
return linear_write_system(ctxt, addr, desc, sizeof(*desc));
}
+static bool emulator_is_ssp_invalid(struct x86_emulate_ctxt *ctxt, u8 cpl)
+{
+ const u32 MSR_IA32_X_CET = cpl == 3 ? MSR_IA32_U_CET : MSR_IA32_S_CET;
+ u64 efer = 0, cet = 0, ssp = 0;
+
+ if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET))
+ return false;
+
+ if (ctxt->ops->get_msr(ctxt, MSR_EFER, &efer))
+ return true;
+
+ /* SSP is guaranteed to be valid if the vCPU was already in 32-bit mode. */
+ if (!(efer & EFER_LMA))
+ return false;
+
+ if (ctxt->ops->get_msr(ctxt, MSR_IA32_X_CET, &cet))
+ return true;
+
+ if (!(cet & CET_SHSTK_EN))
+ return false;
+
+ if (ctxt->ops->get_msr(ctxt, MSR_KVM_INTERNAL_GUEST_SSP, &ssp))
+ return true;
+
+ /*
+ * On transfer from 64-bit mode to compatibility mode, SSP[63:32] must
+ * be 0, i.e. SSP must be a 32-bit value outside of 64-bit mode.
+ */
+ return ssp >> 32;
+}
+
static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
u16 selector, int seg, u8 cpl,
enum x86_transfer_type transfer,
@@ -1693,6 +1661,10 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
if (efer & EFER_LMA)
goto exception;
}
+ if (!seg_desc.l && emulator_is_ssp_invalid(ctxt, cpl)) {
+ err_code = 0;
+ goto exception;
+ }
/* CS(RPL) <- CPL */
selector = (selector & 0xfffc) | cpl;
@@ -2289,7 +2261,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
ctxt->src.orig_val = ctxt->src.val;
ctxt->src.val = ctxt->dst.orig_val;
- fastop(ctxt, em_cmp);
+ em_cmp(ctxt);
if (ctxt->eflags & X86_EFLAGS_ZF) {
/* Success: write back to memory; no update of EAX */
@@ -3054,7 +3026,7 @@ static int em_das(struct x86_emulate_ctxt *ctxt)
ctxt->src.type = OP_IMM;
ctxt->src.val = 0;
ctxt->src.bytes = 1;
- fastop(ctxt, em_or);
+ em_or(ctxt);
ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
if (cf)
ctxt->eflags |= X86_EFLAGS_CF;
@@ -3080,7 +3052,7 @@ static int em_aam(struct x86_emulate_ctxt *ctxt)
ctxt->src.type = OP_IMM;
ctxt->src.val = 0;
ctxt->src.bytes = 1;
- fastop(ctxt, em_or);
+ em_or(ctxt);
return X86EMUL_CONTINUE;
}
@@ -3098,7 +3070,7 @@ static int em_aad(struct x86_emulate_ctxt *ctxt)
ctxt->src.type = OP_IMM;
ctxt->src.val = 0;
ctxt->src.bytes = 1;
- fastop(ctxt, em_or);
+ em_or(ctxt);
return X86EMUL_CONTINUE;
}
@@ -3189,7 +3161,7 @@ static int em_xchg(struct x86_emulate_ctxt *ctxt)
static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
{
ctxt->dst.val = ctxt->src2.val;
- return fastop(ctxt, em_imul);
+ return em_imul(ctxt);
}
static int em_cwd(struct x86_emulate_ctxt *ctxt)
@@ -3968,7 +3940,6 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
#define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
-#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
#define II(_f, _e, _i) \
{ .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
#define IIP(_f, _e, _i, _p) \
@@ -3983,9 +3954,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
#define I2bvIP(_f, _e, _i, _p) \
IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
-#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
- F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
- F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
+#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \
+ I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
+ I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
static const struct opcode group7_rm0[] = {
N,
@@ -4023,14 +3994,14 @@ static const struct opcode group7_rm7[] = {
};
static const struct opcode group1[] = {
- F(Lock, em_add),
- F(Lock | PageTable, em_or),
- F(Lock, em_adc),
- F(Lock, em_sbb),
- F(Lock | PageTable, em_and),
- F(Lock, em_sub),
- F(Lock, em_xor),
- F(NoWrite, em_cmp),
+ I(Lock, em_add),
+ I(Lock | PageTable, em_or),
+ I(Lock, em_adc),
+ I(Lock, em_sbb),
+ I(Lock | PageTable, em_and),
+ I(Lock, em_sub),
+ I(Lock, em_xor),
+ I(NoWrite, em_cmp),
};
static const struct opcode group1A[] = {
@@ -4038,38 +4009,38 @@ static const struct opcode group1A[] = {
};
static const struct opcode group2[] = {
- F(DstMem | ModRM, em_rol),
- F(DstMem | ModRM, em_ror),
- F(DstMem | ModRM, em_rcl),
- F(DstMem | ModRM, em_rcr),
- F(DstMem | ModRM, em_shl),
- F(DstMem | ModRM, em_shr),
- F(DstMem | ModRM, em_shl),
- F(DstMem | ModRM, em_sar),
+ I(DstMem | ModRM, em_rol),
+ I(DstMem | ModRM, em_ror),
+ I(DstMem | ModRM, em_rcl),
+ I(DstMem | ModRM, em_rcr),
+ I(DstMem | ModRM, em_shl),
+ I(DstMem | ModRM, em_shr),
+ I(DstMem | ModRM, em_shl),
+ I(DstMem | ModRM, em_sar),
};
static const struct opcode group3[] = {
- F(DstMem | SrcImm | NoWrite, em_test),
- F(DstMem | SrcImm | NoWrite, em_test),
- F(DstMem | SrcNone | Lock, em_not),
- F(DstMem | SrcNone | Lock, em_neg),
- F(DstXacc | Src2Mem, em_mul_ex),
- F(DstXacc | Src2Mem, em_imul_ex),
- F(DstXacc | Src2Mem, em_div_ex),
- F(DstXacc | Src2Mem, em_idiv_ex),
+ I(DstMem | SrcImm | NoWrite, em_test),
+ I(DstMem | SrcImm | NoWrite, em_test),
+ I(DstMem | SrcNone | Lock, em_not),
+ I(DstMem | SrcNone | Lock, em_neg),
+ I(DstXacc | Src2Mem, em_mul_ex),
+ I(DstXacc | Src2Mem, em_imul_ex),
+ I(DstXacc | Src2Mem, em_div_ex),
+ I(DstXacc | Src2Mem, em_idiv_ex),
};
static const struct opcode group4[] = {
- F(ByteOp | DstMem | SrcNone | Lock, em_inc),
- F(ByteOp | DstMem | SrcNone | Lock, em_dec),
+ I(ByteOp | DstMem | SrcNone | Lock, em_inc),
+ I(ByteOp | DstMem | SrcNone | Lock, em_dec),
N, N, N, N, N, N,
};
static const struct opcode group5[] = {
- F(DstMem | SrcNone | Lock, em_inc),
- F(DstMem | SrcNone | Lock, em_dec),
- I(SrcMem | NearBranch | IsBranch, em_call_near_abs),
- I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far),
+ I(DstMem | SrcNone | Lock, em_inc),
+ I(DstMem | SrcNone | Lock, em_dec),
+ I(SrcMem | NearBranch | IsBranch | ShadowStack, em_call_near_abs),
+ I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack, em_call_far),
I(SrcMem | NearBranch | IsBranch, em_jmp_abs),
I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far),
I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined),
@@ -4103,10 +4074,10 @@ static const struct group_dual group7 = { {
static const struct opcode group8[] = {
N, N, N, N,
- F(DstMem | SrcImmByte | NoWrite, em_bt),
- F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
- F(DstMem | SrcImmByte | Lock, em_btr),
- F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
+ I(DstMem | SrcImmByte | NoWrite, em_bt),
+ I(DstMem | SrcImmByte | Lock | PageTable, em_bts),
+ I(DstMem | SrcImmByte | Lock, em_btr),
+ I(DstMem | SrcImmByte | Lock | PageTable, em_btc),
};
/*
@@ -4243,31 +4214,31 @@ static const struct instr_dual instr_dual_8d = {
static const struct opcode opcode_table[256] = {
/* 0x00 - 0x07 */
- F6ALU(Lock, em_add),
+ I6ALU(Lock, em_add),
I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
/* 0x08 - 0x0F */
- F6ALU(Lock | PageTable, em_or),
+ I6ALU(Lock | PageTable, em_or),
I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
N,
/* 0x10 - 0x17 */
- F6ALU(Lock, em_adc),
+ I6ALU(Lock, em_adc),
I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
/* 0x18 - 0x1F */
- F6ALU(Lock, em_sbb),
+ I6ALU(Lock, em_sbb),
I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
/* 0x20 - 0x27 */
- F6ALU(Lock | PageTable, em_and), N, N,
+ I6ALU(Lock | PageTable, em_and), N, N,
/* 0x28 - 0x2F */
- F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
+ I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
/* 0x30 - 0x37 */
- F6ALU(Lock, em_xor), N, N,
+ I6ALU(Lock, em_xor), N, N,
/* 0x38 - 0x3F */
- F6ALU(NoWrite, em_cmp), N, N,
+ I6ALU(NoWrite, em_cmp), N, N,
/* 0x40 - 0x4F */
- X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
+ X8(I(DstReg, em_inc)), X8(I(DstReg, em_dec)),
/* 0x50 - 0x57 */
X8(I(SrcReg | Stack, em_push)),
/* 0x58 - 0x5F */
@@ -4291,7 +4262,7 @@ static const struct opcode opcode_table[256] = {
G(DstMem | SrcImm, group1),
G(ByteOp | DstMem | SrcImm | No64, group1),
G(DstMem | SrcImmByte, group1),
- F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
+ I2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
/* 0x88 - 0x8F */
I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
@@ -4304,7 +4275,7 @@ static const struct opcode opcode_table[256] = {
DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
/* 0x98 - 0x9F */
D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
- I(SrcImmFAddr | No64 | IsBranch, em_call_far), N,
+ I(SrcImmFAddr | No64 | IsBranch | ShadowStack, em_call_far), N,
II(ImplicitOps | Stack, em_pushf, pushf),
II(ImplicitOps | Stack, em_popf, popf),
I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
@@ -4312,37 +4283,37 @@ static const struct opcode opcode_table[256] = {
I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
- F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
+ I2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
/* 0xA8 - 0xAF */
- F2bv(DstAcc | SrcImm | NoWrite, em_test),
+ I2bv(DstAcc | SrcImm | NoWrite, em_test),
I2bv(SrcAcc | DstDI | Mov | String, em_mov),
I2bv(SrcSI | DstAcc | Mov | String, em_mov),
- F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
+ I2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
/* 0xB0 - 0xB7 */
X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
/* 0xB8 - 0xBF */
X8(I(DstReg | SrcImm64 | Mov, em_mov)),
/* 0xC0 - 0xC7 */
G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
- I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch, em_ret_near_imm),
- I(ImplicitOps | NearBranch | IsBranch, em_ret),
+ I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch | ShadowStack, em_ret_near_imm),
+ I(ImplicitOps | NearBranch | IsBranch | ShadowStack, em_ret),
I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
G(ByteOp, group11), G(0, group11),
/* 0xC8 - 0xCF */
- I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter),
- I(Stack | IsBranch, em_leave),
- I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm),
- I(ImplicitOps | IsBranch, em_ret_far),
- D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn),
+ I(Stack | SrcImmU16 | Src2ImmByte, em_enter),
+ I(Stack, em_leave),
+ I(ImplicitOps | SrcImmU16 | IsBranch | ShadowStack, em_ret_far_imm),
+ I(ImplicitOps | IsBranch | ShadowStack, em_ret_far),
+ D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch | ShadowStack, intn),
D(ImplicitOps | No64 | IsBranch),
- II(ImplicitOps | IsBranch, em_iret, iret),
+ II(ImplicitOps | IsBranch | ShadowStack, em_iret, iret),
/* 0xD0 - 0xD7 */
G(Src2One | ByteOp, group2), G(Src2One, group2),
G(Src2CL | ByteOp, group2), G(Src2CL, group2),
I(DstAcc | SrcImmUByte | No64, em_aam),
I(DstAcc | SrcImmUByte | No64, em_aad),
- F(DstAcc | ByteOp | No64, em_salc),
+ I(DstAcc | ByteOp | No64, em_salc),
I(DstAcc | SrcXLat | ByteOp, em_mov),
/* 0xD8 - 0xDF */
N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
@@ -4352,7 +4323,7 @@ static const struct opcode opcode_table[256] = {
I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
/* 0xE8 - 0xEF */
- I(SrcImm | NearBranch | IsBranch, em_call),
+ I(SrcImm | NearBranch | IsBranch | ShadowStack, em_call),
D(SrcImm | ImplicitOps | NearBranch | IsBranch),
I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
@@ -4371,7 +4342,7 @@ static const struct opcode opcode_table[256] = {
static const struct opcode twobyte_table[256] = {
/* 0x00 - 0x0F */
G(0, group6), GD(0, &group7), N, N,
- N, I(ImplicitOps | EmulateOnUD | IsBranch, em_syscall),
+ N, I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_syscall),
II(ImplicitOps | Priv, em_clts, clts), N,
DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
@@ -4402,8 +4373,8 @@ static const struct opcode twobyte_table[256] = {
IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
II(ImplicitOps | Priv, em_rdmsr, rdmsr),
IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
- I(ImplicitOps | EmulateOnUD | IsBranch, em_sysenter),
- I(ImplicitOps | Priv | EmulateOnUD | IsBranch, em_sysexit),
+ I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_sysenter),
+ I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack, em_sysexit),
N, N,
N, N, N, N, N, N, N, N,
/* 0x40 - 0x4F */
@@ -4427,32 +4398,32 @@ static const struct opcode twobyte_table[256] = {
/* 0xA0 - 0xA7 */
I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
II(ImplicitOps, em_cpuid, cpuid),
- F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
- F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
- F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
+ I(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
+ I(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
+ I(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
/* 0xA8 - 0xAF */
I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
- F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
- F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
- F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
- GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
+ I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
+ I(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
+ I(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
+ GD(0, &group15), I(DstReg | SrcMem | ModRM, em_imul),
/* 0xB0 - 0xB7 */
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
- F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
+ I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
/* 0xB8 - 0xBF */
N, N,
G(BitOp, group8),
- F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
+ I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
I(DstReg | SrcMem | ModRM, em_bsf_c),
I(DstReg | SrcMem | ModRM, em_bsr_c),
D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
/* 0xC0 - 0xC7 */
- F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
+ I2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
N, ID(0, &instr_dual_0f_c3),
N, N, N, GD(0, &group9),
/* 0xC8 - 0xCF */
@@ -4514,6 +4485,60 @@ static const struct opcode opcode_map_0f_38[256] = {
#undef I2bvIP
#undef I6ALU
+static bool is_shstk_instruction(struct x86_emulate_ctxt *ctxt)
+{
+ return ctxt->d & ShadowStack;
+}
+
+static bool is_ibt_instruction(struct x86_emulate_ctxt *ctxt)
+{
+ u64 flags = ctxt->d;
+
+ if (!(flags & IsBranch))
+ return false;
+
+ /*
+ * All far JMPs and CALLs (including SYSCALL, SYSENTER, and INTn) are
+ * indirect and thus affect IBT state. All far RETs (including SYSEXIT
+ * and IRET) are protected via Shadow Stacks and thus don't affect IBT
+ * state. IRET #GPs when returning to virtual-8086 and IBT or SHSTK is
+ * enabled, but that should be handled by IRET emulation (in the very
+ * unlikely scenario that KVM adds support for fully emulating IRET).
+ */
+ if (!(flags & NearBranch))
+ return ctxt->execute != em_iret &&
+ ctxt->execute != em_ret_far &&
+ ctxt->execute != em_ret_far_imm &&
+ ctxt->execute != em_sysexit;
+
+ switch (flags & SrcMask) {
+ case SrcReg:
+ case SrcMem:
+ case SrcMem16:
+ case SrcMem32:
+ return true;
+ case SrcMemFAddr:
+ case SrcImmFAddr:
+ /* Far branches should be handled above. */
+ WARN_ON_ONCE(1);
+ return true;
+ case SrcNone:
+ case SrcImm:
+ case SrcImmByte:
+ /*
+ * Note, ImmU16 is used only for the stack adjustment operand on ENTER
+ * and RET instructions. ENTER isn't a branch and RET FAR is handled
+ * by the NearBranch check above. RET itself isn't an indirect branch.
+ */
+ case SrcImmU16:
+ return false;
+ default:
+ WARN_ONCE(1, "Unexpected Src operand '%llx' on branch",
+ flags & SrcMask);
+ return false;
+ }
+}
+
static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
{
unsigned size;
@@ -4943,6 +4968,40 @@ done_prefixes:
ctxt->execute = opcode.u.execute;
+ /*
+ * Reject emulation if KVM might need to emulate shadow stack updates
+ * and/or indirect branch tracking enforcement, which the emulator
+ * doesn't support.
+ */
+ if ((is_ibt_instruction(ctxt) || is_shstk_instruction(ctxt)) &&
+ ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) {
+ u64 u_cet = 0, s_cet = 0;
+
+ /*
+ * Check both User and Supervisor on far transfers as inter-
+ * privilege level transfers are impacted by CET at the target
+ * privilege level, and that is not known at this time. The
+ * expectation is that the guest will not require emulation of
+ * any CET-affected instructions at any privilege level.
+ */
+ if (!(ctxt->d & NearBranch))
+ u_cet = s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
+ else if (ctxt->ops->cpl(ctxt) == 3)
+ u_cet = CET_SHSTK_EN | CET_ENDBR_EN;
+ else
+ s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
+
+ if ((u_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet)) ||
+ (s_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet)))
+ return EMULATION_FAILED;
+
+ if ((u_cet | s_cet) & CET_SHSTK_EN && is_shstk_instruction(ctxt))
+ return EMULATION_FAILED;
+
+ if ((u_cet | s_cet) & CET_ENDBR_EN && is_ibt_instruction(ctxt))
+ return EMULATION_FAILED;
+ }
+
if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
likely(!(ctxt->d & EmulateOnUD)))
return EMULATION_FAILED;
@@ -5074,24 +5133,6 @@ static void fetch_possible_mmx_operand(struct operand *op)
kvm_read_mmx_reg(op->addr.mm, &op->mm_val);
}
-static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
-{
- ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
-
- if (!(ctxt->d & ByteOp))
- fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
-
- asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
- : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
- [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
- : "c"(ctxt->src2.val));
-
- ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
- if (!fop) /* exception is returned in fop variable */
- return emulate_de(ctxt);
- return X86EMUL_CONTINUE;
-}
-
void init_decode_cache(struct x86_emulate_ctxt *ctxt)
{
/* Clear fields that are set conditionally but read without a guard. */
@@ -5107,12 +5148,11 @@ void init_decode_cache(struct x86_emulate_ctxt *ctxt)
ctxt->mem_read.end = 0;
}
-int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
+int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts)
{
const struct x86_emulate_ops *ops = ctxt->ops;
int rc = X86EMUL_CONTINUE;
int saved_dst_type = ctxt->dst.type;
- bool is_guest_mode = ctxt->ops->is_guest_mode(ctxt);
ctxt->mem_read.pos = 0;
@@ -5160,7 +5200,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
fetch_possible_mmx_operand(&ctxt->dst);
}
- if (unlikely(is_guest_mode) && ctxt->intercept) {
+ if (unlikely(check_intercepts) && ctxt->intercept) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_PRE_EXCEPT);
if (rc != X86EMUL_CONTINUE)
@@ -5189,7 +5229,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
goto done;
}
- if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) {
+ if (unlikely(check_intercepts) && (ctxt->d & Intercept)) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_POST_EXCEPT);
if (rc != X86EMUL_CONTINUE)
@@ -5243,7 +5283,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
special_insn:
- if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) {
+ if (unlikely(check_intercepts) && (ctxt->d & Intercept)) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_POST_MEMACCESS);
if (rc != X86EMUL_CONTINUE)
@@ -5256,10 +5296,7 @@ special_insn:
ctxt->eflags &= ~X86_EFLAGS_RF;
if (ctxt->execute) {
- if (ctxt->d & Fastop)
- rc = fastop(ctxt, ctxt->fop);
- else
- rc = ctxt->execute(ctxt);
+ rc = ctxt->execute(ctxt);
if (rc != X86EMUL_CONTINUE)
goto done;
goto writeback;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 72b19a88a776..38595ecb990d 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -923,7 +923,7 @@ bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
return false;
return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
}
-EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_hv_assist_page_enabled);
int kvm_hv_get_assist_page(struct kvm_vcpu *vcpu)
{
@@ -935,7 +935,7 @@ int kvm_hv_get_assist_page(struct kvm_vcpu *vcpu)
return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data,
&hv_vcpu->vp_assist_page, sizeof(struct hv_vp_assist_page));
}
-EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_hv_get_assist_page);
static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
{
@@ -1168,15 +1168,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0);
- mutex_lock(&hv->hv_lock);
+ guard(mutex)(&hv->hv_lock);
if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
hv->hv_tsc_page_status == HV_TSC_PAGE_SET ||
hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET)
- goto out_unlock;
+ return;
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
- goto out_unlock;
+ return;
gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
/*
@@ -1192,7 +1192,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
goto out_err;
hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
- goto out_unlock;
+ return;
}
/*
@@ -1228,12 +1228,10 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
goto out_err;
hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
- goto out_unlock;
+ return;
out_err:
hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
-out_unlock:
- mutex_unlock(&hv->hv_lock);
}
void kvm_hv_request_tsc_page_update(struct kvm *kvm)
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 2b5d389bca5f..2c2783296aed 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
/*
* Copyright (C) 2001 MandrakeSoft S.A.
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
@@ -8,20 +9,6 @@
* http://www.linux-mandrake.com/
* http://www.mandrakesoft.com/
*
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
* Yunhong Jiang <yunhong.jiang@intel.com>
* Yaozu (Eddie) Dong <eddie.dong@intel.com>
* Based on Xen 3.1 code.
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 16da89259011..7cc8950005b6 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -103,7 +103,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
}
-EXPORT_SYMBOL_GPL(kvm_cpu_has_injectable_intr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_cpu_has_injectable_intr);
/*
* check if there is pending interrupt without
@@ -119,7 +119,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
}
-EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_cpu_has_interrupt);
/*
* Read pending interrupt(from non-APIC source)
@@ -148,7 +148,7 @@ int kvm_cpu_get_extint(struct kvm_vcpu *v)
WARN_ON_ONCE(!irqchip_split(v->kvm));
return get_userspace_extint(v);
}
-EXPORT_SYMBOL_GPL(kvm_cpu_get_extint);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_cpu_get_extint);
/*
* Read pending interrupt vector and intack.
@@ -195,63 +195,6 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
return irqchip_in_kernel(kvm);
}
-int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
- struct kvm_lapic_irq *irq, struct dest_map *dest_map)
-{
- int r = -1;
- struct kvm_vcpu *vcpu, *lowest = NULL;
- unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
- unsigned int dest_vcpus = 0;
-
- if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
- return r;
-
- if (irq->dest_mode == APIC_DEST_PHYSICAL &&
- irq->dest_id == 0xff && kvm_lowest_prio_delivery(irq)) {
- pr_info("apic: phys broadcast and lowest prio\n");
- irq->delivery_mode = APIC_DM_FIXED;
- }
-
- memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (!kvm_apic_present(vcpu))
- continue;
-
- if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
- irq->dest_id, irq->dest_mode))
- continue;
-
- if (!kvm_lowest_prio_delivery(irq)) {
- if (r < 0)
- r = 0;
- r += kvm_apic_set_irq(vcpu, irq, dest_map);
- } else if (kvm_apic_sw_enabled(vcpu->arch.apic)) {
- if (!kvm_vector_hashing_enabled()) {
- if (!lowest)
- lowest = vcpu;
- else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
- lowest = vcpu;
- } else {
- __set_bit(i, dest_vcpu_bitmap);
- dest_vcpus++;
- }
- }
- }
-
- if (dest_vcpus != 0) {
- int idx = kvm_vector_to_index(irq->vector, dest_vcpus,
- dest_vcpu_bitmap, KVM_MAX_VCPUS);
-
- lowest = kvm_get_vcpu(kvm, idx);
- }
-
- if (lowest)
- r = kvm_apic_set_irq(lowest, irq, dest_map);
-
- return r;
-}
-
static void kvm_msi_to_lapic_irq(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq *irq)
@@ -411,34 +354,6 @@ int kvm_set_routing_entry(struct kvm *kvm,
return 0;
}
-bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
- struct kvm_vcpu **dest_vcpu)
-{
- int r = 0;
- unsigned long i;
- struct kvm_vcpu *vcpu;
-
- if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
- return true;
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (!kvm_apic_present(vcpu))
- continue;
-
- if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
- irq->dest_id, irq->dest_mode))
- continue;
-
- if (++r == 2)
- return false;
-
- *dest_vcpu = vcpu;
- }
-
- return r == 1;
-}
-EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu);
-
void kvm_scan_ioapic_irq(struct kvm_vcpu *vcpu, u32 dest_id, u16 dest_mode,
u8 vector, unsigned long *ioapic_handled_vectors)
{
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 5e62c1f79ce6..34f4a78a7a01 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -121,8 +121,4 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
int apic_has_pending_timer(struct kvm_vcpu *vcpu);
-int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
- struct kvm_lapic_irq *irq,
- struct dest_map *dest_map);
-
#endif
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 36a8786db291..8ddb01191d6f 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -7,7 +7,8 @@
#define KVM_POSSIBLE_CR0_GUEST_BITS (X86_CR0_TS | X86_CR0_WP)
#define KVM_POSSIBLE_CR4_GUEST_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
- | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE)
+ | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE \
+ | X86_CR4_CET)
#define X86_CR0_PDPTR_BITS (X86_CR0_CD | X86_CR0_NW | X86_CR0_PG)
#define X86_CR4_TLBFLUSH_BITS (X86_CR4_PGE | X86_CR4_PCIDE | X86_CR4_PAE | X86_CR4_SMEP)
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index c1df5acfacaf..7b5ddb787a25 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -235,7 +235,6 @@ struct x86_emulate_ops {
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
bool (*is_smm)(struct x86_emulate_ctxt *ctxt);
- bool (*is_guest_mode)(struct x86_emulate_ctxt *ctxt);
int (*leave_smm)(struct x86_emulate_ctxt *ctxt);
void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
@@ -521,7 +520,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
#define EMULATION_RESTART 1
#define EMULATION_INTERCEPTED 2
void init_decode_cache(struct x86_emulate_ctxt *ctxt);
-int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
+int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts);
int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
u16 tss_selector, int idt_index, int reason,
bool has_error_code, u32 error_code);
diff --git a/arch/x86/kvm/kvm_onhyperv.c b/arch/x86/kvm/kvm_onhyperv.c
index ded0bd688c65..ee53e75a60cb 100644
--- a/arch/x86/kvm/kvm_onhyperv.c
+++ b/arch/x86/kvm/kvm_onhyperv.c
@@ -101,13 +101,13 @@ int hv_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, gfn_t nr_pages)
return __hv_flush_remote_tlbs_range(kvm, &range);
}
-EXPORT_SYMBOL_GPL(hv_flush_remote_tlbs_range);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(hv_flush_remote_tlbs_range);
int hv_flush_remote_tlbs(struct kvm *kvm)
{
return __hv_flush_remote_tlbs_range(kvm, NULL);
}
-EXPORT_SYMBOL_GPL(hv_flush_remote_tlbs);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(hv_flush_remote_tlbs);
void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp)
{
@@ -121,4 +121,4 @@ void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp)
spin_unlock(&kvm_arch->hv_root_tdp_lock);
}
}
-EXPORT_SYMBOL_GPL(hv_track_root_tdp);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(hv_track_root_tdp);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 5fc437341e03..0ae7f913d782 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -74,6 +74,10 @@ module_param(lapic_timer_advance, bool, 0444);
#define LAPIC_TIMER_ADVANCE_NS_MAX 5000
/* step-by-step approximation to mitigate fluctuation */
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
+
+static bool __read_mostly vector_hashing_enabled = true;
+module_param_named(vector_hashing, vector_hashing_enabled, bool, 0444);
+
static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data);
static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data);
@@ -102,7 +106,7 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
}
__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
-EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_has_noapic_vcpu);
__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
@@ -130,7 +134,7 @@ static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
(kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
}
-bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
+static bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
{
return kvm_x86_ops.set_hv_timer
&& !(kvm_mwait_in_guest(vcpu->kvm) ||
@@ -642,7 +646,7 @@ bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr)
return ((max_updated_irr != -1) &&
(max_updated_irr == *max_irr));
}
-EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_apic_update_irr);
bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr)
{
@@ -653,7 +657,7 @@ bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr
apic->irr_pending = true;
return irr_updated;
}
-EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_update_irr);
static inline int apic_search_irr(struct kvm_lapic *apic)
{
@@ -693,7 +697,7 @@ void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec)
{
apic_clear_irr(vec, vcpu->arch.apic);
}
-EXPORT_SYMBOL_GPL(kvm_apic_clear_irr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_clear_irr);
static void *apic_vector_to_isr(int vec, struct kvm_lapic *apic)
{
@@ -775,7 +779,7 @@ void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu)
kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
}
-EXPORT_SYMBOL_GPL(kvm_apic_update_hwapic_isr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_update_hwapic_isr);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
{
@@ -786,7 +790,7 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
*/
return apic_find_highest_irr(vcpu->arch.apic);
}
-EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lapic_find_highest_irr);
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
int vector, int level, int trig_mode,
@@ -950,7 +954,7 @@ void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
{
apic_update_ppr(vcpu->arch.apic);
}
-EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_update_ppr);
static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
{
@@ -1061,21 +1065,14 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
return false;
}
}
-EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_match_dest);
-int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
- const unsigned long *bitmap, u32 bitmap_size)
+static int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
+ const unsigned long *bitmap, u32 bitmap_size)
{
- u32 mod;
- int i, idx = -1;
-
- mod = vector % dest_vcpus;
-
- for (i = 0; i <= mod; i++) {
- idx = find_next_bit(bitmap, bitmap_size, idx + 1);
- BUG_ON(idx == bitmap_size);
- }
+ int idx = find_nth_bit(bitmap, bitmap_size, vector % dest_vcpus);
+ BUG_ON(idx >= bitmap_size);
return idx;
}
@@ -1106,6 +1103,16 @@ static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
return false;
}
+static bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
+{
+ return (irq->delivery_mode == APIC_DM_LOWEST || irq->msi_redir_hint);
+}
+
+static int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
+{
+ return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
+}
+
/* Return true if the interrupt can be handled by using *bitmap as index mask
* for valid destinations in *dst array.
* Return false if kvm_apic_map_get_dest_lapic did nothing useful.
@@ -1149,7 +1156,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
if (!kvm_lowest_prio_delivery(irq))
return true;
- if (!kvm_vector_hashing_enabled()) {
+ if (!vector_hashing_enabled) {
lowest = -1;
for_each_set_bit(i, bitmap, 16) {
if (!(*dst)[i])
@@ -1230,8 +1237,9 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
* interrupt.
* - Otherwise, use remapped mode to inject the interrupt.
*/
-bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
- struct kvm_vcpu **dest_vcpu)
+static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
+ struct kvm_lapic_irq *irq,
+ struct kvm_vcpu **dest_vcpu)
{
struct kvm_apic_map *map;
unsigned long bitmap;
@@ -1258,6 +1266,91 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
return ret;
}
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+ struct kvm_vcpu **dest_vcpu)
+{
+ int r = 0;
+ unsigned long i;
+ struct kvm_vcpu *vcpu;
+
+ if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
+ return true;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!kvm_apic_present(vcpu))
+ continue;
+
+ if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
+ irq->dest_id, irq->dest_mode))
+ continue;
+
+ if (++r == 2)
+ return false;
+
+ *dest_vcpu = vcpu;
+ }
+
+ return r == 1;
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_intr_is_single_vcpu);
+
+int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+ struct kvm_lapic_irq *irq, struct dest_map *dest_map)
+{
+ int r = -1;
+ struct kvm_vcpu *vcpu, *lowest = NULL;
+ unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ unsigned int dest_vcpus = 0;
+
+ if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
+ return r;
+
+ if (irq->dest_mode == APIC_DEST_PHYSICAL &&
+ irq->dest_id == 0xff && kvm_lowest_prio_delivery(irq)) {
+ pr_info("apic: phys broadcast and lowest prio\n");
+ irq->delivery_mode = APIC_DM_FIXED;
+ }
+
+ memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!kvm_apic_present(vcpu))
+ continue;
+
+ if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
+ irq->dest_id, irq->dest_mode))
+ continue;
+
+ if (!kvm_lowest_prio_delivery(irq)) {
+ if (r < 0)
+ r = 0;
+ r += kvm_apic_set_irq(vcpu, irq, dest_map);
+ } else if (kvm_apic_sw_enabled(vcpu->arch.apic)) {
+ if (!vector_hashing_enabled) {
+ if (!lowest)
+ lowest = vcpu;
+ else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
+ lowest = vcpu;
+ } else {
+ __set_bit(i, dest_vcpu_bitmap);
+ dest_vcpus++;
+ }
+ }
+ }
+
+ if (dest_vcpus != 0) {
+ int idx = kvm_vector_to_index(irq->vector, dest_vcpus,
+ dest_vcpu_bitmap, KVM_MAX_VCPUS);
+
+ lowest = kvm_get_vcpu(kvm, idx);
+ }
+
+ if (lowest)
+ r = kvm_apic_set_irq(lowest, irq, dest_map);
+
+ return r;
+}
+
/*
* Add a pending IRQ into lapic.
* Return 1 if successfully added and 0 if discarded.
@@ -1401,11 +1494,6 @@ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
rcu_read_unlock();
}
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
-{
- return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
-}
-
static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
{
return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
@@ -1481,32 +1569,38 @@ void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
kvm_ioapic_send_eoi(apic, vector);
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_set_eoi_accelerated);
-void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
+static void kvm_icr_to_lapic_irq(struct kvm_lapic *apic, u32 icr_low,
+ u32 icr_high, struct kvm_lapic_irq *irq)
{
- struct kvm_lapic_irq irq;
-
/* KVM has no delay and should always clear the BUSY/PENDING flag. */
WARN_ON_ONCE(icr_low & APIC_ICR_BUSY);
- irq.vector = icr_low & APIC_VECTOR_MASK;
- irq.delivery_mode = icr_low & APIC_MODE_MASK;
- irq.dest_mode = icr_low & APIC_DEST_MASK;
- irq.level = (icr_low & APIC_INT_ASSERT) != 0;
- irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
- irq.shorthand = icr_low & APIC_SHORT_MASK;
- irq.msi_redir_hint = false;
+ irq->vector = icr_low & APIC_VECTOR_MASK;
+ irq->delivery_mode = icr_low & APIC_MODE_MASK;
+ irq->dest_mode = icr_low & APIC_DEST_MASK;
+ irq->level = (icr_low & APIC_INT_ASSERT) != 0;
+ irq->trig_mode = icr_low & APIC_INT_LEVELTRIG;
+ irq->shorthand = icr_low & APIC_SHORT_MASK;
+ irq->msi_redir_hint = false;
if (apic_x2apic_mode(apic))
- irq.dest_id = icr_high;
+ irq->dest_id = icr_high;
else
- irq.dest_id = GET_XAPIC_DEST_FIELD(icr_high);
+ irq->dest_id = GET_XAPIC_DEST_FIELD(icr_high);
+}
+
+void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
+{
+ struct kvm_lapic_irq irq;
+
+ kvm_icr_to_lapic_irq(apic, icr_low, icr_high, &irq);
trace_kvm_apic_ipi(icr_low, irq.dest_id);
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
}
-EXPORT_SYMBOL_GPL(kvm_apic_send_ipi);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_send_ipi);
static u32 apic_get_tmcct(struct kvm_lapic *apic)
{
@@ -1623,7 +1717,7 @@ u64 kvm_lapic_readable_reg_mask(struct kvm_lapic *apic)
return valid_reg_mask;
}
-EXPORT_SYMBOL_GPL(kvm_lapic_readable_reg_mask);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lapic_readable_reg_mask);
static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
void *data)
@@ -1864,7 +1958,7 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
lapic_timer_int_injected(vcpu))
__kvm_wait_lapic_expire(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_wait_lapic_expire);
static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
{
@@ -2178,7 +2272,7 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
out:
preempt_enable();
}
-EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lapic_expired_hv_timer);
void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
{
@@ -2431,11 +2525,11 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
{
kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
}
-EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lapic_set_eoi);
#define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13))
-int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
+static int __kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data, bool fast)
{
if (data & X2APIC_ICR_RESERVED_BITS)
return 1;
@@ -2450,7 +2544,20 @@ int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
*/
data &= ~APIC_ICR_BUSY;
- kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
+ if (fast) {
+ struct kvm_lapic_irq irq;
+ int ignored;
+
+ kvm_icr_to_lapic_irq(apic, (u32)data, (u32)(data >> 32), &irq);
+
+ if (!kvm_irq_delivery_to_apic_fast(apic->vcpu->kvm, apic, &irq,
+ &ignored, NULL))
+ return -EWOULDBLOCK;
+
+ trace_kvm_apic_ipi((u32)data, irq.dest_id);
+ } else {
+ kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
+ }
if (kvm_x86_ops.x2apic_icr_is_split) {
kvm_lapic_set_reg(apic, APIC_ICR, data);
kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32);
@@ -2461,6 +2568,16 @@ int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
return 0;
}
+static int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
+{
+ return __kvm_x2apic_icr_write(apic, data, false);
+}
+
+int kvm_x2apic_icr_write_fast(struct kvm_lapic *apic, u64 data)
+{
+ return __kvm_x2apic_icr_write(apic, data, true);
+}
+
static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic)
{
if (kvm_x86_ops.x2apic_icr_is_split)
@@ -2491,7 +2608,7 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
else
kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
}
-EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_write_nodecode);
void kvm_free_lapic(struct kvm_vcpu *vcpu)
{
@@ -2629,7 +2746,7 @@ int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated)
kvm_recalculate_apic_map(vcpu->kvm);
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_apic_set_base);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_set_base);
void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
{
@@ -2661,26 +2778,23 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
int kvm_alloc_apic_access_page(struct kvm *kvm)
{
void __user *hva;
- int ret = 0;
- mutex_lock(&kvm->slots_lock);
+ guard(mutex)(&kvm->slots_lock);
+
if (kvm->arch.apic_access_memslot_enabled ||
kvm->arch.apic_access_memslot_inhibited)
- goto out;
+ return 0;
hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
- if (IS_ERR(hva)) {
- ret = PTR_ERR(hva);
- goto out;
- }
+ if (IS_ERR(hva))
+ return PTR_ERR(hva);
kvm->arch.apic_access_memslot_enabled = true;
-out:
- mutex_unlock(&kvm->slots_lock);
- return ret;
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(kvm_alloc_apic_access_page);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_alloc_apic_access_page);
void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu)
{
@@ -2944,7 +3058,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
__apic_update_ppr(apic, &ppr);
return apic_has_interrupt_for_ppr(apic, ppr);
}
-EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_has_interrupt);
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
{
@@ -3003,7 +3117,7 @@ void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector)
}
}
-EXPORT_SYMBOL_GPL(kvm_apic_ack_interrupt);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_ack_interrupt);
static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s, bool set)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 72de14527698..282b9b7da98c 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -105,7 +105,6 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu);
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int shorthand, unsigned int dest, int dest_mode);
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec);
bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr);
bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr);
@@ -119,6 +118,9 @@ void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu);
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map);
+int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+ struct kvm_lapic_irq *irq,
+ struct dest_map *dest_map);
void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high);
int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated);
@@ -137,7 +139,7 @@ int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
-int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data);
+int kvm_x2apic_icr_write_fast(struct kvm_lapic *apic, u64 data);
int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
@@ -222,12 +224,6 @@ static inline bool kvm_apic_init_sipi_allowed(struct kvm_vcpu *vcpu)
!kvm_x86_call(apic_init_signal_blocked)(vcpu);
}
-static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
-{
- return (irq->delivery_mode == APIC_DM_LOWEST ||
- irq->msi_redir_hint);
-}
-
static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
{
return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
@@ -240,16 +236,13 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
unsigned long *vcpu_bitmap);
-bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
- struct kvm_vcpu **dest_vcpu);
-int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
- const unsigned long *bitmap, u32 bitmap_size);
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+ struct kvm_vcpu **dest_vcpu);
void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu);
void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu);
void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu);
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu);
void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu);
-bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu);
static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
{
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index b4b6860ab971..f63074048ec6 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -212,7 +212,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
fault = (mmu->permissions[index] >> pte_access) & 1;
- WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
+ WARN_ON_ONCE(pfec & (PFERR_PK_MASK | PFERR_SS_MASK | PFERR_RSVD_MASK));
if (unlikely(mmu->pkru_mask)) {
u32 pkru_bits, offset;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 56c80588efa0..667d66cf76d5 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -110,7 +110,7 @@ static bool __ro_after_init tdp_mmu_allowed;
#ifdef CONFIG_X86_64
bool __read_mostly tdp_mmu_enabled = true;
module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0444);
-EXPORT_SYMBOL_GPL(tdp_mmu_enabled);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(tdp_mmu_enabled);
#endif
static int max_huge_page_level __read_mostly;
@@ -776,7 +776,8 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
kvm_flush_remote_tlbs_gfn(kvm, gfn, PG_LEVEL_4K);
}
-void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+ enum kvm_mmu_type mmu_type)
{
/*
* If it's possible to replace the shadow page with an NX huge page,
@@ -790,8 +791,9 @@ void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
return;
++kvm->stat.nx_lpage_splits;
+ ++kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages;
list_add_tail(&sp->possible_nx_huge_page_link,
- &kvm->arch.possible_nx_huge_pages);
+ &kvm->arch.possible_nx_huge_pages[mmu_type].pages);
}
static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
@@ -800,7 +802,7 @@ static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
sp->nx_huge_page_disallowed = true;
if (nx_huge_page_possible)
- track_possible_nx_huge_page(kvm, sp);
+ track_possible_nx_huge_page(kvm, sp, KVM_SHADOW_MMU);
}
static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
@@ -819,12 +821,14 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
kvm_mmu_gfn_allow_lpage(slot, gfn);
}
-void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+ enum kvm_mmu_type mmu_type)
{
if (list_empty(&sp->possible_nx_huge_page_link))
return;
--kvm->stat.nx_lpage_splits;
+ --kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages;
list_del_init(&sp->possible_nx_huge_page_link);
}
@@ -832,7 +836,7 @@ static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
sp->nx_huge_page_disallowed = false;
- untrack_possible_nx_huge_page(kvm, sp);
+ untrack_possible_nx_huge_page(kvm, sp, KVM_SHADOW_MMU);
}
static struct kvm_memory_slot *gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu,
@@ -3861,7 +3865,7 @@ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
write_unlock(&kvm->mmu_lock);
}
}
-EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_free_roots);
void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
{
@@ -3888,7 +3892,7 @@ void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
kvm_mmu_free_roots(kvm, mmu, roots_to_free);
}
-EXPORT_SYMBOL_GPL(kvm_mmu_free_guest_mode_roots);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_free_guest_mode_roots);
static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, int quadrant,
u8 level)
@@ -4663,10 +4667,16 @@ static int kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu,
/*
* Retry the page fault if the gfn hit a memslot that is being deleted
* or moved. This ensures any existing SPTEs for the old memslot will
- * be zapped before KVM inserts a new MMIO SPTE for the gfn.
+ * be zapped before KVM inserts a new MMIO SPTE for the gfn. Punt the
+ * error to userspace if this is a prefault, as KVM's prefaulting ABI
+ * doesn't provide the same forward progress guarantees as KVM_RUN.
*/
- if (slot->flags & KVM_MEMSLOT_INVALID)
+ if (slot->flags & KVM_MEMSLOT_INVALID) {
+ if (fault->prefetch)
+ return -EAGAIN;
+
return RET_PF_RETRY;
+ }
if (slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT) {
/*
@@ -4866,7 +4876,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
return r;
}
-EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_page_fault);
#ifdef CONFIG_X86_64
static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,
@@ -4956,7 +4966,7 @@ int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code, u8 *level
return -EIO;
}
}
-EXPORT_SYMBOL_GPL(kvm_tdp_map_page);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_tdp_map_page);
long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
struct kvm_pre_fault_memory *range)
@@ -5152,7 +5162,7 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
__clear_sp_write_flooding_count(sp);
}
}
-EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_new_pgd);
static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
unsigned int access)
@@ -5798,7 +5808,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
shadow_mmu_init_context(vcpu, context, cpu_role, root_role);
kvm_mmu_new_pgd(vcpu, nested_cr3);
}
-EXPORT_SYMBOL_GPL(kvm_init_shadow_npt_mmu);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_init_shadow_npt_mmu);
static union kvm_cpu_role
kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
@@ -5852,7 +5862,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
kvm_mmu_new_pgd(vcpu, new_eptp);
}
-EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_init_shadow_ept_mmu);
static void init_kvm_softmmu(struct kvm_vcpu *vcpu,
union kvm_cpu_role cpu_role)
@@ -5917,7 +5927,7 @@ void kvm_init_mmu(struct kvm_vcpu *vcpu)
else
init_kvm_softmmu(vcpu, cpu_role);
}
-EXPORT_SYMBOL_GPL(kvm_init_mmu);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_init_mmu);
void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
@@ -5953,7 +5963,7 @@ void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
kvm_mmu_unload(vcpu);
kvm_init_mmu(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_reset_context);
int kvm_mmu_load(struct kvm_vcpu *vcpu)
{
@@ -5987,7 +5997,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
out:
return r;
}
-EXPORT_SYMBOL_GPL(kvm_mmu_load);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_load);
void kvm_mmu_unload(struct kvm_vcpu *vcpu)
{
@@ -6049,7 +6059,7 @@ void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu)
__kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.root_mmu);
__kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu);
}
-EXPORT_SYMBOL_GPL(kvm_mmu_free_obsolete_roots);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_free_obsolete_roots);
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
int *bytes)
@@ -6375,7 +6385,7 @@ emulate:
return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn,
insn_len);
}
-EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_page_fault);
void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg)
{
@@ -6391,7 +6401,7 @@ void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg)
pr_cont(", spte[%d] = 0x%llx", level, sptes[level]);
pr_cont("\n");
}
-EXPORT_SYMBOL_GPL(kvm_mmu_print_sptes);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_print_sptes);
static void __kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
u64 addr, hpa_t root_hpa)
@@ -6457,7 +6467,7 @@ void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
__kvm_mmu_invalidate_addr(vcpu, mmu, addr, mmu->prev_roots[i].hpa);
}
}
-EXPORT_SYMBOL_GPL(kvm_mmu_invalidate_addr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_invalidate_addr);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
@@ -6474,7 +6484,7 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
kvm_mmu_invalidate_addr(vcpu, vcpu->arch.walk_mmu, gva, KVM_MMU_ROOTS_ALL);
++vcpu->stat.invlpg;
}
-EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_invlpg);
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
@@ -6527,7 +6537,7 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
else
max_huge_page_level = PG_LEVEL_2M;
}
-EXPORT_SYMBOL_GPL(kvm_configure_mmu);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_configure_mmu);
static void free_mmu_pages(struct kvm_mmu *mmu)
{
@@ -6751,11 +6761,12 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
int kvm_mmu_init_vm(struct kvm *kvm)
{
- int r;
+ int r, i;
kvm->arch.shadow_mmio_value = shadow_mmio_value;
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
- INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
+ for (i = 0; i < KVM_NR_MMU_TYPES; ++i)
+ INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages[i].pages);
spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
if (tdp_mmu_enabled) {
@@ -7193,7 +7204,7 @@ restart:
return need_tlb_flush;
}
-EXPORT_SYMBOL_GPL(kvm_zap_gfn_range);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_zap_gfn_range);
static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *slot)
@@ -7596,19 +7607,64 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel
return err;
}
-static void kvm_recover_nx_huge_pages(struct kvm *kvm)
+static unsigned long nx_huge_pages_to_zap(struct kvm *kvm,
+ enum kvm_mmu_type mmu_type)
+{
+ unsigned long pages = READ_ONCE(kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages);
+ unsigned int ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
+
+ return ratio ? DIV_ROUND_UP(pages, ratio) : 0;
+}
+
+static bool kvm_mmu_sp_dirty_logging_enabled(struct kvm *kvm,
+ struct kvm_mmu_page *sp)
{
- unsigned long nx_lpage_splits = kvm->stat.nx_lpage_splits;
struct kvm_memory_slot *slot;
- int rcu_idx;
+
+ /*
+ * Skip the memslot lookup if dirty tracking can't possibly be enabled,
+ * as memslot lookups are relatively expensive.
+ *
+ * If a memslot update is in progress, reading an incorrect value of
+ * kvm->nr_memslots_dirty_logging is not a problem: if it is becoming
+ * zero, KVM will do an unnecessary memslot lookup; if it is becoming
+ * nonzero, the page will be zapped unnecessarily. Either way, this
+ * only affects efficiency in racy situations, and not correctness.
+ */
+ if (!atomic_read(&kvm->nr_memslots_dirty_logging))
+ return false;
+
+ slot = __gfn_to_memslot(kvm_memslots_for_spte_role(kvm, sp->role), sp->gfn);
+ if (WARN_ON_ONCE(!slot))
+ return false;
+
+ return kvm_slot_dirty_track_enabled(slot);
+}
+
+static void kvm_recover_nx_huge_pages(struct kvm *kvm,
+ const enum kvm_mmu_type mmu_type)
+{
+#ifdef CONFIG_X86_64
+ const bool is_tdp_mmu = mmu_type == KVM_TDP_MMU;
+ spinlock_t *tdp_mmu_pages_lock = &kvm->arch.tdp_mmu_pages_lock;
+#else
+ const bool is_tdp_mmu = false;
+ spinlock_t *tdp_mmu_pages_lock = NULL;
+#endif
+ unsigned long to_zap = nx_huge_pages_to_zap(kvm, mmu_type);
+ struct list_head *nx_huge_pages;
struct kvm_mmu_page *sp;
- unsigned int ratio;
LIST_HEAD(invalid_list);
bool flush = false;
- ulong to_zap;
+ int rcu_idx;
+
+ nx_huge_pages = &kvm->arch.possible_nx_huge_pages[mmu_type].pages;
rcu_idx = srcu_read_lock(&kvm->srcu);
- write_lock(&kvm->mmu_lock);
+ if (is_tdp_mmu)
+ read_lock(&kvm->mmu_lock);
+ else
+ write_lock(&kvm->mmu_lock);
/*
* Zapping TDP MMU shadow pages, including the remote TLB flush, must
@@ -7617,11 +7673,15 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
*/
rcu_read_lock();
- ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
- to_zap = ratio ? DIV_ROUND_UP(nx_lpage_splits, ratio) : 0;
for ( ; to_zap; --to_zap) {
- if (list_empty(&kvm->arch.possible_nx_huge_pages))
+ if (is_tdp_mmu)
+ spin_lock(tdp_mmu_pages_lock);
+
+ if (list_empty(nx_huge_pages)) {
+ if (is_tdp_mmu)
+ spin_unlock(tdp_mmu_pages_lock);
break;
+ }
/*
* We use a separate list instead of just using active_mmu_pages
@@ -7630,56 +7690,44 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
* the total number of shadow pages. And because the TDP MMU
* doesn't use active_mmu_pages.
*/
- sp = list_first_entry(&kvm->arch.possible_nx_huge_pages,
+ sp = list_first_entry(nx_huge_pages,
struct kvm_mmu_page,
possible_nx_huge_page_link);
WARN_ON_ONCE(!sp->nx_huge_page_disallowed);
WARN_ON_ONCE(!sp->role.direct);
+ unaccount_nx_huge_page(kvm, sp);
+
+ if (is_tdp_mmu)
+ spin_unlock(tdp_mmu_pages_lock);
+
/*
- * Unaccount and do not attempt to recover any NX Huge Pages
- * that are being dirty tracked, as they would just be faulted
- * back in as 4KiB pages. The NX Huge Pages in this slot will be
- * recovered, along with all the other huge pages in the slot,
- * when dirty logging is disabled.
- *
- * Since gfn_to_memslot() is relatively expensive, it helps to
- * skip it if it the test cannot possibly return true. On the
- * other hand, if any memslot has logging enabled, chances are
- * good that all of them do, in which case unaccount_nx_huge_page()
- * is much cheaper than zapping the page.
- *
- * If a memslot update is in progress, reading an incorrect value
- * of kvm->nr_memslots_dirty_logging is not a problem: if it is
- * becoming zero, gfn_to_memslot() will be done unnecessarily; if
- * it is becoming nonzero, the page will be zapped unnecessarily.
- * Either way, this only affects efficiency in racy situations,
- * and not correctness.
+ * Do not attempt to recover any NX Huge Pages that are being
+ * dirty tracked, as they would just be faulted back in as 4KiB
+ * pages. The NX Huge Pages in this slot will be recovered,
+ * along with all the other huge pages in the slot, when dirty
+ * logging is disabled.
*/
- slot = NULL;
- if (atomic_read(&kvm->nr_memslots_dirty_logging)) {
- struct kvm_memslots *slots;
+ if (!kvm_mmu_sp_dirty_logging_enabled(kvm, sp)) {
+ if (is_tdp_mmu)
+ flush |= kvm_tdp_mmu_zap_possible_nx_huge_page(kvm, sp);
+ else
+ kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
- slots = kvm_memslots_for_spte_role(kvm, sp->role);
- slot = __gfn_to_memslot(slots, sp->gfn);
- WARN_ON_ONCE(!slot);
}
- if (slot && kvm_slot_dirty_track_enabled(slot))
- unaccount_nx_huge_page(kvm, sp);
- else if (is_tdp_mmu_page(sp))
- flush |= kvm_tdp_mmu_zap_sp(kvm, sp);
- else
- kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
WARN_ON_ONCE(sp->nx_huge_page_disallowed);
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
rcu_read_unlock();
- cond_resched_rwlock_write(&kvm->mmu_lock);
- flush = false;
+ if (is_tdp_mmu)
+ cond_resched_rwlock_read(&kvm->mmu_lock);
+ else
+ cond_resched_rwlock_write(&kvm->mmu_lock);
+ flush = false;
rcu_read_lock();
}
}
@@ -7687,7 +7735,10 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
rcu_read_unlock();
- write_unlock(&kvm->mmu_lock);
+ if (is_tdp_mmu)
+ read_unlock(&kvm->mmu_lock);
+ else
+ write_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, rcu_idx);
}
@@ -7698,9 +7749,10 @@ static void kvm_nx_huge_page_recovery_worker_kill(void *data)
static bool kvm_nx_huge_page_recovery_worker(void *data)
{
struct kvm *kvm = data;
+ long remaining_time;
bool enabled;
uint period;
- long remaining_time;
+ int i;
enabled = calc_nx_huge_pages_recovery_period(&period);
if (!enabled)
@@ -7715,7 +7767,8 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
}
__set_current_state(TASK_RUNNING);
- kvm_recover_nx_huge_pages(kvm);
+ for (i = 0; i < KVM_NR_MMU_TYPES; ++i)
+ kvm_recover_nx_huge_pages(kvm, i);
kvm->arch.nx_huge_page_last = get_jiffies_64();
return true;
}
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index b776be783a2f..ed5c01df21ba 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -416,7 +416,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm, struct kvm_page_fault *fault,
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level);
-void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
-void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
+void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+ enum kvm_mmu_type mmu_type);
+void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+ enum kvm_mmu_type mmu_type);
#endif /* __KVM_X86_MMU_INTERNAL_H */
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index f35a830ce469..764e3015d021 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -51,6 +51,9 @@
{ PFERR_PRESENT_MASK, "P" }, \
{ PFERR_WRITE_MASK, "W" }, \
{ PFERR_USER_MASK, "U" }, \
+ { PFERR_PK_MASK, "PK" }, \
+ { PFERR_SS_MASK, "SS" }, \
+ { PFERR_SGX_MASK, "SGX" }, \
{ PFERR_RSVD_MASK, "RSVD" }, \
{ PFERR_FETCH_MASK, "F" }
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index df31039b5d63..37647afde7d3 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -22,7 +22,7 @@
bool __read_mostly enable_mmio_caching = true;
static bool __ro_after_init allow_mmio_caching;
module_param_named(mmio_caching, enable_mmio_caching, bool, 0444);
-EXPORT_SYMBOL_GPL(enable_mmio_caching);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_mmio_caching);
bool __read_mostly kvm_ad_enabled;
@@ -470,13 +470,13 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask)
shadow_mmio_mask = mmio_mask;
shadow_mmio_access_mask = access_mask;
}
-EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_set_mmio_spte_mask);
void kvm_mmu_set_mmio_spte_value(struct kvm *kvm, u64 mmio_value)
{
kvm->arch.shadow_mmio_value = mmio_value;
}
-EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_value);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_set_mmio_spte_value);
void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask)
{
@@ -487,7 +487,7 @@ void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask)
shadow_me_value = me_value;
shadow_me_mask = me_mask;
}
-EXPORT_SYMBOL_GPL(kvm_mmu_set_me_spte_mask);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_set_me_spte_mask);
void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only)
{
@@ -513,7 +513,7 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only)
kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE,
VMX_EPT_RWX_MASK | VMX_EPT_SUPPRESS_VE_BIT, 0);
}
-EXPORT_SYMBOL_GPL(kvm_mmu_set_ept_masks);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_set_ept_masks);
void kvm_mmu_reset_all_pte_masks(void)
{
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 740cb06accdb..c5734ca5c17d 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -355,7 +355,7 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
sp->nx_huge_page_disallowed = false;
- untrack_possible_nx_huge_page(kvm, sp);
+ untrack_possible_nx_huge_page(kvm, sp, KVM_TDP_MMU);
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
}
@@ -925,23 +925,52 @@ static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
rcu_read_unlock();
}
-bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
+bool kvm_tdp_mmu_zap_possible_nx_huge_page(struct kvm *kvm,
+ struct kvm_mmu_page *sp)
{
- u64 old_spte;
+ struct tdp_iter iter = {
+ .old_spte = sp->ptep ? kvm_tdp_mmu_read_spte(sp->ptep) : 0,
+ .sptep = sp->ptep,
+ .level = sp->role.level + 1,
+ .gfn = sp->gfn,
+ .as_id = kvm_mmu_page_as_id(sp),
+ };
+
+ lockdep_assert_held_read(&kvm->mmu_lock);
+
+ if (WARN_ON_ONCE(!is_tdp_mmu_page(sp)))
+ return false;
/*
- * This helper intentionally doesn't allow zapping a root shadow page,
- * which doesn't have a parent page table and thus no associated entry.
+ * Root shadow pages don't have a parent page table and thus no
+ * associated entry, but they can never be possible NX huge pages.
*/
if (WARN_ON_ONCE(!sp->ptep))
return false;
- old_spte = kvm_tdp_mmu_read_spte(sp->ptep);
- if (WARN_ON_ONCE(!is_shadow_present_pte(old_spte)))
+ /*
+ * Since mmu_lock is held in read mode, it's possible another task has
+ * already modified the SPTE. Zap the SPTE if and only if the SPTE
+ * points at the SP's page table, as checking shadow-present isn't
+ * sufficient, e.g. the SPTE could be replaced by a leaf SPTE, or even
+ * another SP. Note, spte_to_child_pt() also checks that the SPTE is
+ * shadow-present, i.e. guards against zapping a frozen SPTE.
+ */
+ if ((tdp_ptep_t)sp->spt != spte_to_child_pt(iter.old_spte, iter.level))
return false;
- tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte,
- SHADOW_NONPRESENT_VALUE, sp->gfn, sp->role.level + 1);
+ /*
+ * If a different task modified the SPTE, then it should be impossible
+ * for the SPTE to still be used for the to-be-zapped SP. Non-leaf
+ * SPTEs don't have Dirty bits, KVM always sets the Accessed bit when
+ * creating non-leaf SPTEs, and all other bits are immutable for non-
+ * leaf SPTEs, i.e. the only legal operations for non-leaf SPTEs are
+ * zapping and replacement.
+ */
+ if (tdp_mmu_set_spte_atomic(kvm, &iter, SHADOW_NONPRESENT_VALUE)) {
+ WARN_ON_ONCE((tdp_ptep_t)sp->spt == spte_to_child_pt(iter.old_spte, iter.level));
+ return false;
+ }
return true;
}
@@ -1303,7 +1332,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
fault->req_level >= iter.level) {
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
if (sp->nx_huge_page_disallowed)
- track_possible_nx_huge_page(kvm, sp);
+ track_possible_nx_huge_page(kvm, sp, KVM_TDP_MMU);
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
}
}
@@ -1953,7 +1982,7 @@ bool kvm_tdp_mmu_gpa_is_mapped(struct kvm_vcpu *vcpu, u64 gpa)
spte = sptes[leaf];
return is_shadow_present_pte(spte) && is_last_spte(spte, leaf);
}
-EXPORT_SYMBOL_GPL(kvm_tdp_mmu_gpa_is_mapped);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_tdp_mmu_gpa_is_mapped);
/*
* Returns the last level spte pointer of the shadow page walk for the given
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 52acf99d40a0..bd62977c9199 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -64,7 +64,8 @@ static inline struct kvm_mmu_page *tdp_mmu_get_root(struct kvm_vcpu *vcpu,
}
bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush);
-bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp);
+bool kvm_tdp_mmu_zap_possible_nx_huge_page(struct kvm *kvm,
+ struct kvm_mmu_page *sp);
void kvm_tdp_mmu_zap_all(struct kvm *kvm);
void kvm_tdp_mmu_invalidate_roots(struct kvm *kvm,
enum kvm_tdp_mmu_root_types root_types);
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 75e9cfc689f8..40ac4cb44ed2 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -26,11 +26,18 @@
/* This is enough to filter the vast majority of currently defined events. */
#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
+/* Unadultered PMU capabilities of the host, i.e. of hardware. */
+static struct x86_pmu_capability __read_mostly kvm_host_pmu;
+
+/* KVM's PMU capabilities, i.e. the intersection of KVM and hardware support. */
struct x86_pmu_capability __read_mostly kvm_pmu_cap;
-EXPORT_SYMBOL_GPL(kvm_pmu_cap);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_pmu_cap);
-struct kvm_pmu_emulated_event_selectors __read_mostly kvm_pmu_eventsel;
-EXPORT_SYMBOL_GPL(kvm_pmu_eventsel);
+struct kvm_pmu_emulated_event_selectors {
+ u64 INSTRUCTIONS_RETIRED;
+ u64 BRANCH_INSTRUCTIONS_RETIRED;
+};
+static struct kvm_pmu_emulated_event_selectors __read_mostly kvm_pmu_eventsel;
/* Precise Distribution of Instructions Retired (PDIR) */
static const struct x86_cpu_id vmx_pebs_pdir_cpu[] = {
@@ -96,6 +103,54 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
#undef __KVM_X86_PMU_OP
}
+void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
+{
+ bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL;
+ int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS;
+
+ perf_get_x86_pmu_capability(&kvm_host_pmu);
+
+ /*
+ * Hybrid PMUs don't play nice with virtualization without careful
+ * configuration by userspace, and KVM's APIs for reporting supported
+ * vPMU features do not account for hybrid PMUs. Disable vPMU support
+ * for hybrid PMUs until KVM gains a way to let userspace opt-in.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
+ enable_pmu = false;
+
+ if (enable_pmu) {
+ /*
+ * WARN if perf did NOT disable hardware PMU if the number of
+ * architecturally required GP counters aren't present, i.e. if
+ * there are a non-zero number of counters, but fewer than what
+ * is architecturally required.
+ */
+ if (!kvm_host_pmu.num_counters_gp ||
+ WARN_ON_ONCE(kvm_host_pmu.num_counters_gp < min_nr_gp_ctrs))
+ enable_pmu = false;
+ else if (is_intel && !kvm_host_pmu.version)
+ enable_pmu = false;
+ }
+
+ if (!enable_pmu) {
+ memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap));
+ return;
+ }
+
+ memcpy(&kvm_pmu_cap, &kvm_host_pmu, sizeof(kvm_host_pmu));
+ kvm_pmu_cap.version = min(kvm_pmu_cap.version, 2);
+ kvm_pmu_cap.num_counters_gp = min(kvm_pmu_cap.num_counters_gp,
+ pmu_ops->MAX_NR_GP_COUNTERS);
+ kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
+ KVM_MAX_NR_FIXED_COUNTERS);
+
+ kvm_pmu_eventsel.INSTRUCTIONS_RETIRED =
+ perf_get_hw_event_config(PERF_COUNT_HW_INSTRUCTIONS);
+ kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED =
+ perf_get_hw_event_config(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+}
+
static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -318,7 +373,7 @@ void pmc_write_counter(struct kvm_pmc *pmc, u64 val)
pmc->counter &= pmc_bitmask(pmc);
pmc_update_sample_period(pmc);
}
-EXPORT_SYMBOL_GPL(pmc_write_counter);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(pmc_write_counter);
static int filter_cmp(const void *pa, const void *pb, u64 mask)
{
@@ -426,7 +481,7 @@ static bool is_fixed_event_allowed(struct kvm_x86_pmu_event_filter *filter,
return true;
}
-static bool check_pmu_event_filter(struct kvm_pmc *pmc)
+static bool pmc_is_event_allowed(struct kvm_pmc *pmc)
{
struct kvm_x86_pmu_event_filter *filter;
struct kvm *kvm = pmc->vcpu->kvm;
@@ -441,12 +496,6 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
return is_fixed_event_allowed(filter, pmc->idx);
}
-static bool pmc_event_is_allowed(struct kvm_pmc *pmc)
-{
- return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) &&
- check_pmu_event_filter(pmc);
-}
-
static int reprogram_counter(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -457,7 +506,8 @@ static int reprogram_counter(struct kvm_pmc *pmc)
emulate_overflow = pmc_pause_counter(pmc);
- if (!pmc_event_is_allowed(pmc))
+ if (!pmc_is_globally_enabled(pmc) || !pmc_is_locally_enabled(pmc) ||
+ !pmc_is_event_allowed(pmc))
return 0;
if (emulate_overflow)
@@ -492,6 +542,47 @@ static int reprogram_counter(struct kvm_pmc *pmc)
eventsel & ARCH_PERFMON_EVENTSEL_INT);
}
+static bool pmc_is_event_match(struct kvm_pmc *pmc, u64 eventsel)
+{
+ /*
+ * Ignore checks for edge detect (all events currently emulated by KVM
+ * are always rising edges), pin control (unsupported by modern CPUs),
+ * and counter mask and its invert flag (KVM doesn't emulate multiple
+ * events in a single clock cycle).
+ *
+ * Note, the uppermost nibble of AMD's mask overlaps Intel's IN_TX (bit
+ * 32) and IN_TXCP (bit 33), as well as two reserved bits (bits 35:34).
+ * Checking the "in HLE/RTM transaction" flags is correct as the vCPU
+ * can't be in a transaction if KVM is emulating an instruction.
+ *
+ * Checking the reserved bits might be wrong if they are defined in the
+ * future, but so could ignoring them, so do the simple thing for now.
+ */
+ return !((pmc->eventsel ^ eventsel) & AMD64_RAW_EVENT_MASK_NB);
+}
+
+void kvm_pmu_recalc_pmc_emulation(struct kvm_pmu *pmu, struct kvm_pmc *pmc)
+{
+ bitmap_clear(pmu->pmc_counting_instructions, pmc->idx, 1);
+ bitmap_clear(pmu->pmc_counting_branches, pmc->idx, 1);
+
+ /*
+ * Do NOT consult the PMU event filters, as the filters must be checked
+ * at the time of emulation to ensure KVM uses fresh information, e.g.
+ * omitting a PMC from a bitmap could result in a missed event if the
+ * filter is changed to allow counting the event.
+ */
+ if (!pmc_is_locally_enabled(pmc))
+ return;
+
+ if (pmc_is_event_match(pmc, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED))
+ bitmap_set(pmu->pmc_counting_instructions, pmc->idx, 1);
+
+ if (pmc_is_event_match(pmc, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED))
+ bitmap_set(pmu->pmc_counting_branches, pmc->idx, 1);
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_pmu_recalc_pmc_emulation);
+
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
{
DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX);
@@ -527,6 +618,9 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
*/
if (unlikely(pmu->need_cleanup))
kvm_pmu_cleanup(vcpu);
+
+ kvm_for_each_pmc(pmu, pmc, bit, bitmap)
+ kvm_pmu_recalc_pmc_emulation(pmu, pmc);
}
int kvm_pmu_check_rdpmc_early(struct kvm_vcpu *vcpu, unsigned int idx)
@@ -650,6 +744,7 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = pmu->global_ctrl;
break;
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET:
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
msr_info->data = 0;
break;
@@ -711,6 +806,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!msr_info->host_initiated)
pmu->global_status &= ~data;
break;
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET:
+ if (!msr_info->host_initiated)
+ pmu->global_status |= data & ~pmu->global_status_rsvd;
+ break;
default:
kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
return kvm_pmu_call(set_msr)(vcpu, msr_info);
@@ -789,6 +888,10 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
*/
if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters)
pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0);
+
+ bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters);
+ bitmap_set(pmu->all_valid_pmc_idx, KVM_FIXED_PMC_BASE_IDX,
+ pmu->nr_arch_fixed_counters);
}
void kvm_pmu_init(struct kvm_vcpu *vcpu)
@@ -813,7 +916,7 @@ void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
pmu->pmc_in_use, X86_PMC_IDX_MAX);
kvm_for_each_pmc(pmu, pmc, i, bitmask) {
- if (pmc->perf_event && !pmc_speculative_in_use(pmc))
+ if (pmc->perf_event && !pmc_is_locally_enabled(pmc))
pmc_stop_counter(pmc);
}
@@ -860,44 +963,46 @@ static inline bool cpl_is_matched(struct kvm_pmc *pmc)
select_user;
}
-void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel)
+static void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu,
+ const unsigned long *event_pmcs)
{
DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX);
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc;
- int i;
+ int i, idx;
BUILD_BUG_ON(sizeof(pmu->global_ctrl) * BITS_PER_BYTE != X86_PMC_IDX_MAX);
+ if (bitmap_empty(event_pmcs, X86_PMC_IDX_MAX))
+ return;
+
if (!kvm_pmu_has_perf_global_ctrl(pmu))
- bitmap_copy(bitmap, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
- else if (!bitmap_and(bitmap, pmu->all_valid_pmc_idx,
+ bitmap_copy(bitmap, event_pmcs, X86_PMC_IDX_MAX);
+ else if (!bitmap_and(bitmap, event_pmcs,
(unsigned long *)&pmu->global_ctrl, X86_PMC_IDX_MAX))
return;
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
kvm_for_each_pmc(pmu, pmc, i, bitmap) {
- /*
- * Ignore checks for edge detect (all events currently emulated
- * but KVM are always rising edges), pin control (unsupported
- * by modern CPUs), and counter mask and its invert flag (KVM
- * doesn't emulate multiple events in a single clock cycle).
- *
- * Note, the uppermost nibble of AMD's mask overlaps Intel's
- * IN_TX (bit 32) and IN_TXCP (bit 33), as well as two reserved
- * bits (bits 35:34). Checking the "in HLE/RTM transaction"
- * flags is correct as the vCPU can't be in a transaction if
- * KVM is emulating an instruction. Checking the reserved bits
- * might be wrong if they are defined in the future, but so
- * could ignoring them, so do the simple thing for now.
- */
- if (((pmc->eventsel ^ eventsel) & AMD64_RAW_EVENT_MASK_NB) ||
- !pmc_event_is_allowed(pmc) || !cpl_is_matched(pmc))
+ if (!pmc_is_event_allowed(pmc) || !cpl_is_matched(pmc))
continue;
kvm_pmu_incr_counter(pmc);
}
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+}
+
+void kvm_pmu_instruction_retired(struct kvm_vcpu *vcpu)
+{
+ kvm_pmu_trigger_event(vcpu, vcpu_to_pmu(vcpu)->pmc_counting_instructions);
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_pmu_instruction_retired);
+
+void kvm_pmu_branch_retired(struct kvm_vcpu *vcpu)
+{
+ kvm_pmu_trigger_event(vcpu, vcpu_to_pmu(vcpu)->pmc_counting_branches);
}
-EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_pmu_branch_retired);
static bool is_masked_filter_valid(const struct kvm_x86_pmu_event_filter *filter)
{
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 103604c4b33b..5c3939e91f1d 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -23,11 +23,6 @@
#define KVM_FIXED_PMC_BASE_IDX INTEL_PMC_IDX_FIXED
-struct kvm_pmu_emulated_event_selectors {
- u64 INSTRUCTIONS_RETIRED;
- u64 BRANCH_INSTRUCTIONS_RETIRED;
-};
-
struct kvm_pmu_ops {
struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
unsigned int idx, u64 *mask);
@@ -165,7 +160,7 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
return NULL;
}
-static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
+static inline bool pmc_is_locally_enabled(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -178,57 +173,15 @@ static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
}
extern struct x86_pmu_capability kvm_pmu_cap;
-extern struct kvm_pmu_emulated_event_selectors kvm_pmu_eventsel;
-static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
-{
- bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL;
- int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS;
+void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops);
- /*
- * Hybrid PMUs don't play nice with virtualization without careful
- * configuration by userspace, and KVM's APIs for reporting supported
- * vPMU features do not account for hybrid PMUs. Disable vPMU support
- * for hybrid PMUs until KVM gains a way to let userspace opt-in.
- */
- if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
- enable_pmu = false;
-
- if (enable_pmu) {
- perf_get_x86_pmu_capability(&kvm_pmu_cap);
-
- /*
- * WARN if perf did NOT disable hardware PMU if the number of
- * architecturally required GP counters aren't present, i.e. if
- * there are a non-zero number of counters, but fewer than what
- * is architecturally required.
- */
- if (!kvm_pmu_cap.num_counters_gp ||
- WARN_ON_ONCE(kvm_pmu_cap.num_counters_gp < min_nr_gp_ctrs))
- enable_pmu = false;
- else if (is_intel && !kvm_pmu_cap.version)
- enable_pmu = false;
- }
-
- if (!enable_pmu) {
- memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap));
- return;
- }
-
- kvm_pmu_cap.version = min(kvm_pmu_cap.version, 2);
- kvm_pmu_cap.num_counters_gp = min(kvm_pmu_cap.num_counters_gp,
- pmu_ops->MAX_NR_GP_COUNTERS);
- kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
- KVM_MAX_NR_FIXED_COUNTERS);
-
- kvm_pmu_eventsel.INSTRUCTIONS_RETIRED =
- perf_get_hw_event_config(PERF_COUNT_HW_INSTRUCTIONS);
- kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED =
- perf_get_hw_event_config(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
-}
+void kvm_pmu_recalc_pmc_emulation(struct kvm_pmu *pmu, struct kvm_pmc *pmc);
static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc)
{
+ kvm_pmu_recalc_pmc_emulation(pmc_to_pmu(pmc), pmc);
+
set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
}
@@ -272,7 +225,8 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu);
void kvm_pmu_cleanup(struct kvm_vcpu *vcpu);
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp);
-void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel);
+void kvm_pmu_instruction_retired(struct kvm_vcpu *vcpu);
+void kvm_pmu_branch_retired(struct kvm_vcpu *vcpu);
bool is_vmware_backdoor_pmc(u32 pmc_idx);
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index c53b92379e6e..743ab25ba787 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -25,6 +25,9 @@
#define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1)
#define KVM_X86_FEATURE_SGX_EDECCSSA KVM_X86_FEATURE(CPUID_12_EAX, 11)
+/* Intel-defined sub-features, CPUID level 0x00000007:1 (ECX) */
+#define KVM_X86_FEATURE_MSR_IMM KVM_X86_FEATURE(CPUID_7_1_ECX, 5)
+
/* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */
#define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
#define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5)
@@ -87,6 +90,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_7_2_EDX] = { 7, 2, CPUID_EDX},
[CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX},
[CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX},
+ [CPUID_7_1_ECX] = { 7, 1, CPUID_ECX},
};
/*
@@ -128,6 +132,7 @@ static __always_inline u32 __feature_translate(int x86_feature)
KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO);
KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO);
+ KVM_X86_TRANSLATE_FEATURE(MSR_IMM);
default:
return x86_feature;
}
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 9864c057187d..f623c5986119 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -131,7 +131,7 @@ void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
kvm_mmu_reset_context(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_smm_changed);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_smm_changed);
void process_smi(struct kvm_vcpu *vcpu)
{
@@ -269,6 +269,10 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu);
+
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
+ kvm_msr_read(vcpu, MSR_KVM_INTERNAL_GUEST_SSP, &smram->ssp))
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
}
#endif
@@ -529,7 +533,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
vcpu->arch.smbase = smstate->smbase;
- if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
+ if (__kvm_emulate_msr_write(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
return X86EMUL_UNHANDLEABLE;
rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR);
@@ -558,6 +562,10 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
ctxt->interruptibility = (u8)smstate->int_shadow;
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
+ kvm_msr_write(vcpu, MSR_KVM_INTERNAL_GUEST_SSP, smstate->ssp))
+ return X86EMUL_UNHANDLEABLE;
+
return X86EMUL_CONTINUE;
}
#endif
@@ -620,7 +628,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
/* And finally go back to 32-bit mode. */
efer = 0;
- kvm_set_msr(vcpu, MSR_EFER, efer);
+ __kvm_emulate_msr_write(vcpu, MSR_EFER, efer);
}
#endif
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index 551703fbe200..db3c88f16138 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -116,8 +116,8 @@ struct kvm_smram_state_64 {
u32 smbase;
u32 reserved4[5];
- /* ssp and svm_* fields below are not implemented by KVM */
u64 ssp;
+ /* svm_* fields below are not implemented by KVM */
u64 svm_guest_pat;
u64 svm_host_efer;
u64 svm_host_cr4;
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index a34c5c3b164e..f286b5706d7c 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -64,6 +64,34 @@
static_assert(__AVIC_GATAG(AVIC_VM_ID_MASK, AVIC_VCPU_IDX_MASK) == -1u);
+#define AVIC_AUTO_MODE -1
+
+static int avic_param_set(const char *val, const struct kernel_param *kp)
+{
+ if (val && sysfs_streq(val, "auto")) {
+ *(int *)kp->arg = AVIC_AUTO_MODE;
+ return 0;
+ }
+
+ return param_set_bint(val, kp);
+}
+
+static const struct kernel_param_ops avic_ops = {
+ .flags = KERNEL_PARAM_OPS_FL_NOARG,
+ .set = avic_param_set,
+ .get = param_get_bool,
+};
+
+/*
+ * Enable / disable AVIC. In "auto" mode (default behavior), AVIC is enabled
+ * for Zen4+ CPUs with x2AVIC (and all other criteria for enablement are met).
+ */
+static int avic = AVIC_AUTO_MODE;
+module_param_cb(avic, &avic_ops, &avic, 0444);
+__MODULE_PARM_TYPE(avic, "bool");
+
+module_param(enable_ipiv, bool, 0444);
+
static bool force_avic;
module_param_unsafe(force_avic, bool, 0444);
@@ -77,7 +105,58 @@ static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
static u32 next_vm_id = 0;
static bool next_vm_id_wrapped = 0;
static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
-bool x2avic_enabled;
+static bool x2avic_enabled;
+
+
+static void avic_set_x2apic_msr_interception(struct vcpu_svm *svm,
+ bool intercept)
+{
+ static const u32 x2avic_passthrough_msrs[] = {
+ X2APIC_MSR(APIC_ID),
+ X2APIC_MSR(APIC_LVR),
+ X2APIC_MSR(APIC_TASKPRI),
+ X2APIC_MSR(APIC_ARBPRI),
+ X2APIC_MSR(APIC_PROCPRI),
+ X2APIC_MSR(APIC_EOI),
+ X2APIC_MSR(APIC_RRR),
+ X2APIC_MSR(APIC_LDR),
+ X2APIC_MSR(APIC_DFR),
+ X2APIC_MSR(APIC_SPIV),
+ X2APIC_MSR(APIC_ISR),
+ X2APIC_MSR(APIC_TMR),
+ X2APIC_MSR(APIC_IRR),
+ X2APIC_MSR(APIC_ESR),
+ X2APIC_MSR(APIC_ICR),
+ X2APIC_MSR(APIC_ICR2),
+
+ /*
+ * Note! Always intercept LVTT, as TSC-deadline timer mode
+ * isn't virtualized by hardware, and the CPU will generate a
+ * #GP instead of a #VMEXIT.
+ */
+ X2APIC_MSR(APIC_LVTTHMR),
+ X2APIC_MSR(APIC_LVTPC),
+ X2APIC_MSR(APIC_LVT0),
+ X2APIC_MSR(APIC_LVT1),
+ X2APIC_MSR(APIC_LVTERR),
+ X2APIC_MSR(APIC_TMICT),
+ X2APIC_MSR(APIC_TMCCT),
+ X2APIC_MSR(APIC_TDCR),
+ };
+ int i;
+
+ if (intercept == svm->x2avic_msrs_intercepted)
+ return;
+
+ if (!x2avic_enabled)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(x2avic_passthrough_msrs); i++)
+ svm_set_intercept_for_msr(&svm->vcpu, x2avic_passthrough_msrs[i],
+ MSR_TYPE_RW, intercept);
+
+ svm->x2avic_msrs_intercepted = intercept;
+}
static void avic_activate_vmcb(struct vcpu_svm *svm)
{
@@ -99,7 +178,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
vmcb->control.int_ctl |= X2APIC_MODE_MASK;
vmcb->control.avic_physical_id |= X2AVIC_MAX_PHYSICAL_ID;
/* Disabling MSR intercept for x2APIC registers */
- svm_set_x2apic_msr_interception(svm, false);
+ avic_set_x2apic_msr_interception(svm, false);
} else {
/*
* Flush the TLB, the guest may have inserted a non-APIC
@@ -110,7 +189,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
/* For xAVIC and hybrid-xAVIC modes */
vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID;
/* Enabling MSR intercept for x2APIC registers */
- svm_set_x2apic_msr_interception(svm, true);
+ avic_set_x2apic_msr_interception(svm, true);
}
}
@@ -130,7 +209,7 @@ static void avic_deactivate_vmcb(struct vcpu_svm *svm)
return;
/* Enabling MSR intercept for x2APIC registers */
- svm_set_x2apic_msr_interception(svm, true);
+ avic_set_x2apic_msr_interception(svm, true);
}
/* Note:
@@ -1090,23 +1169,27 @@ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
avic_vcpu_load(vcpu, vcpu->cpu);
}
-/*
- * Note:
- * - The module param avic enable both xAPIC and x2APIC mode.
- * - Hypervisor can support both xAVIC and x2AVIC in the same guest.
- * - The mode can be switched at run-time.
- */
-bool avic_hardware_setup(void)
+static bool __init avic_want_avic_enabled(void)
{
- if (!npt_enabled)
+ /*
+ * In "auto" mode, enable AVIC by default for Zen4+ if x2AVIC is
+ * supported (to avoid enabling partial support by default, and because
+ * x2AVIC should be supported by all Zen4+ CPUs). Explicitly check for
+ * family 0x19 and later (Zen5+), as the kernel's synthetic ZenX flags
+ * aren't inclusive of previous generations, i.e. the kernel will set
+ * at most one ZenX feature flag.
+ */
+ if (avic == AVIC_AUTO_MODE)
+ avic = boot_cpu_has(X86_FEATURE_X2AVIC) &&
+ (boot_cpu_data.x86 > 0x19 || cpu_feature_enabled(X86_FEATURE_ZEN4));
+
+ if (!avic || !npt_enabled)
return false;
/* AVIC is a prerequisite for x2AVIC. */
if (!boot_cpu_has(X86_FEATURE_AVIC) && !force_avic) {
- if (boot_cpu_has(X86_FEATURE_X2AVIC)) {
- pr_warn(FW_BUG "Cannot support x2AVIC due to AVIC is disabled");
- pr_warn(FW_BUG "Try enable AVIC using force_avic option");
- }
+ if (boot_cpu_has(X86_FEATURE_X2AVIC))
+ pr_warn(FW_BUG "Cannot enable x2AVIC, AVIC is unsupported\n");
return false;
}
@@ -1116,21 +1199,37 @@ bool avic_hardware_setup(void)
return false;
}
- if (boot_cpu_has(X86_FEATURE_AVIC)) {
- pr_info("AVIC enabled\n");
- } else if (force_avic) {
- /*
- * Some older systems does not advertise AVIC support.
- * See Revision Guide for specific AMD processor for more detail.
- */
- pr_warn("AVIC is not supported in CPUID but force enabled");
- pr_warn("Your system might crash and burn");
- }
+ /*
+ * Print a scary message if AVIC is force enabled to make it abundantly
+ * clear that ignoring CPUID could have repercussions. See Revision
+ * Guide for specific AMD processor for more details.
+ */
+ if (!boot_cpu_has(X86_FEATURE_AVIC))
+ pr_warn("AVIC unsupported in CPUID but force enabled, your system might crash and burn\n");
+
+ return true;
+}
+
+/*
+ * Note:
+ * - The module param avic enable both xAPIC and x2APIC mode.
+ * - Hypervisor can support both xAVIC and x2AVIC in the same guest.
+ * - The mode can be switched at run-time.
+ */
+bool __init avic_hardware_setup(void)
+{
+ avic = avic_want_avic_enabled();
+ if (!avic)
+ return false;
+
+ pr_info("AVIC enabled\n");
/* AVIC is a prerequisite for x2AVIC. */
x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC);
if (x2avic_enabled)
pr_info("x2AVIC enabled\n");
+ else
+ svm_x86_ops.allow_apicv_in_x2apic_without_x2apic_virtualization = true;
/*
* Disable IPI virtualization for AMD Family 17h CPUs (Zen1 and Zen2)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index b7fd2e869998..a6443feab252 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -636,6 +636,14 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
vmcb_mark_dirty(vmcb02, VMCB_DT);
}
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
+ (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_CET)))) {
+ vmcb02->save.s_cet = vmcb12->save.s_cet;
+ vmcb02->save.isst_addr = vmcb12->save.isst_addr;
+ vmcb02->save.ssp = vmcb12->save.ssp;
+ vmcb_mark_dirty(vmcb02, VMCB_CET);
+ }
+
kvm_set_rflags(vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
svm_set_efer(vcpu, svm->nested.save.efer);
@@ -1044,6 +1052,12 @@ void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
to_save->rsp = from_save->rsp;
to_save->rip = from_save->rip;
to_save->cpl = 0;
+
+ if (kvm_cpu_cap_has(X86_FEATURE_SHSTK)) {
+ to_save->s_cet = from_save->s_cet;
+ to_save->isst_addr = from_save->isst_addr;
+ to_save->ssp = from_save->ssp;
+ }
}
void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
@@ -1111,6 +1125,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb12->save.dr6 = svm->vcpu.arch.dr6;
vmcb12->save.cpl = vmcb02->save.cpl;
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) {
+ vmcb12->save.s_cet = vmcb02->save.s_cet;
+ vmcb12->save.isst_addr = vmcb02->save.isst_addr;
+ vmcb12->save.ssp = vmcb02->save.ssp;
+ }
+
vmcb12->control.int_state = vmcb02->control.int_state;
vmcb12->control.exit_code = vmcb02->control.exit_code;
vmcb12->control.exit_code_hi = vmcb02->control.exit_code_hi;
@@ -1798,17 +1818,15 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE)
return -EINVAL;
- ret = -ENOMEM;
- ctl = kzalloc(sizeof(*ctl), GFP_KERNEL);
- save = kzalloc(sizeof(*save), GFP_KERNEL);
- if (!ctl || !save)
- goto out_free;
+ ctl = memdup_user(&user_vmcb->control, sizeof(*ctl));
+ if (IS_ERR(ctl))
+ return PTR_ERR(ctl);
- ret = -EFAULT;
- if (copy_from_user(ctl, &user_vmcb->control, sizeof(*ctl)))
- goto out_free;
- if (copy_from_user(save, &user_vmcb->save, sizeof(*save)))
- goto out_free;
+ save = memdup_user(&user_vmcb->save, sizeof(*save));
+ if (IS_ERR(save)) {
+ kfree(ctl);
+ return PTR_ERR(save);
+ }
ret = -EINVAL;
__nested_copy_vmcb_control_to_cache(vcpu, &ctl_cached, ctl);
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 288f7f2a46f2..bc062285fbf5 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -41,7 +41,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
unsigned int idx;
- if (!vcpu->kvm->arch.enable_pmu)
+ if (!pmu->version)
return NULL;
switch (msr) {
@@ -113,6 +113,7 @@ static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET:
return pmu->version > 1;
default:
if (msr > MSR_F15H_PERF_CTR5 &&
@@ -199,17 +200,16 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
kvm_pmu_cap.num_counters_gp);
if (pmu->version > 1) {
- pmu->global_ctrl_rsvd = ~((1ull << pmu->nr_arch_gp_counters) - 1);
+ pmu->global_ctrl_rsvd = ~(BIT_ULL(pmu->nr_arch_gp_counters) - 1);
pmu->global_status_rsvd = pmu->global_ctrl_rsvd;
}
- pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
+ pmu->counter_bitmask[KVM_PMC_GP] = BIT_ULL(48) - 1;
pmu->reserved_bits = 0xfffffff000280000ull;
pmu->raw_event_mask = AMD64_RAW_EVENT_MASK;
/* not applicable to AMD; but clean them to prevent any fall out */
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
pmu->nr_arch_fixed_counters = 0;
- bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters);
}
static void amd_pmu_init(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 5bac4d20aec0..0835c664fbfd 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -37,7 +37,6 @@
#include "trace.h"
#define GHCB_VERSION_MAX 2ULL
-#define GHCB_VERSION_DEFAULT 2ULL
#define GHCB_VERSION_MIN 1ULL
#define GHCB_HV_FT_SUPPORTED (GHCB_HV_FT_SNP | GHCB_HV_FT_SNP_AP_CREATION)
@@ -59,6 +58,9 @@ static bool sev_es_debug_swap_enabled = true;
module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444);
static u64 sev_supported_vmsa_features;
+static unsigned int nr_ciphertext_hiding_asids;
+module_param_named(ciphertext_hiding_asids, nr_ciphertext_hiding_asids, uint, 0444);
+
#define AP_RESET_HOLD_NONE 0
#define AP_RESET_HOLD_NAE_EVENT 1
#define AP_RESET_HOLD_MSR_PROTO 2
@@ -85,6 +87,10 @@ static DECLARE_RWSEM(sev_deactivate_lock);
static DEFINE_MUTEX(sev_bitmap_lock);
unsigned int max_sev_asid;
static unsigned int min_sev_asid;
+static unsigned int max_sev_es_asid;
+static unsigned int min_sev_es_asid;
+static unsigned int max_snp_asid;
+static unsigned int min_snp_asid;
static unsigned long sev_me_mask;
static unsigned int nr_asids;
static unsigned long *sev_asid_bitmap;
@@ -147,6 +153,14 @@ static bool sev_vcpu_has_debug_swap(struct vcpu_svm *svm)
return sev->vmsa_features & SVM_SEV_FEAT_DEBUG_SWAP;
}
+static bool snp_is_secure_tsc_enabled(struct kvm *kvm)
+{
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
+
+ return (sev->vmsa_features & SVM_SEV_FEAT_SECURE_TSC) &&
+ !WARN_ON_ONCE(!sev_snp_guest(kvm));
+}
+
/* Must be called with the sev_bitmap_lock held */
static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid)
{
@@ -173,20 +187,34 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
misc_cg_uncharge(type, sev->misc_cg, 1);
}
-static int sev_asid_new(struct kvm_sev_info *sev)
+static int sev_asid_new(struct kvm_sev_info *sev, unsigned long vm_type)
{
/*
* SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
* SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
- * Note: min ASID can end up larger than the max if basic SEV support is
- * effectively disabled by disallowing use of ASIDs for SEV guests.
*/
- unsigned int min_asid = sev->es_active ? 1 : min_sev_asid;
- unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
- unsigned int asid;
+ unsigned int min_asid, max_asid, asid;
bool retry = true;
int ret;
+ if (vm_type == KVM_X86_SNP_VM) {
+ min_asid = min_snp_asid;
+ max_asid = max_snp_asid;
+ } else if (sev->es_active) {
+ min_asid = min_sev_es_asid;
+ max_asid = max_sev_es_asid;
+ } else {
+ min_asid = min_sev_asid;
+ max_asid = max_sev_asid;
+ }
+
+ /*
+ * The min ASID can end up larger than the max if basic SEV support is
+ * effectively disabled by disallowing use of ASIDs for SEV guests.
+ * Similarly for SEV-ES guests the min ASID can end up larger than the
+ * max when ciphertext hiding is enabled, effectively disabling SEV-ES
+ * support.
+ */
if (min_asid > max_asid)
return -ENOTTY;
@@ -406,6 +434,7 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct sev_platform_init_args init_args = {0};
bool es_active = vm_type != KVM_X86_SEV_VM;
+ bool snp_active = vm_type == KVM_X86_SNP_VM;
u64 valid_vmsa_features = es_active ? sev_supported_vmsa_features : 0;
int ret;
@@ -415,12 +444,26 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
if (data->flags)
return -EINVAL;
+ if (!snp_active)
+ valid_vmsa_features &= ~SVM_SEV_FEAT_SECURE_TSC;
+
if (data->vmsa_features & ~valid_vmsa_features)
return -EINVAL;
if (data->ghcb_version > GHCB_VERSION_MAX || (!es_active && data->ghcb_version))
return -EINVAL;
+ /*
+ * KVM supports the full range of mandatory features defined by version
+ * 2 of the GHCB protocol, so default to that for SEV-ES guests created
+ * via KVM_SEV_INIT2 (KVM_SEV_INIT forces version 1).
+ */
+ if (es_active && !data->ghcb_version)
+ data->ghcb_version = 2;
+
+ if (snp_active && data->ghcb_version < 2)
+ return -EINVAL;
+
if (unlikely(sev->active))
return -EINVAL;
@@ -429,18 +472,10 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
sev->vmsa_features = data->vmsa_features;
sev->ghcb_version = data->ghcb_version;
- /*
- * Currently KVM supports the full range of mandatory features defined
- * by version 2 of the GHCB protocol, so default to that for SEV-ES
- * guests created via KVM_SEV_INIT2.
- */
- if (sev->es_active && !sev->ghcb_version)
- sev->ghcb_version = GHCB_VERSION_DEFAULT;
-
- if (vm_type == KVM_X86_SNP_VM)
+ if (snp_active)
sev->vmsa_features |= SVM_SEV_FEAT_SNP_ACTIVE;
- ret = sev_asid_new(sev);
+ ret = sev_asid_new(sev, vm_type);
if (ret)
goto e_no_asid;
@@ -455,7 +490,7 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
}
/* This needs to happen after SEV/SNP firmware initialization. */
- if (vm_type == KVM_X86_SNP_VM) {
+ if (snp_active) {
ret = snp_guest_req_init(kvm);
if (ret)
goto e_free;
@@ -569,8 +604,6 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (copy_from_user(&params, u64_to_user_ptr(argp->data), sizeof(params)))
return -EFAULT;
- sev->policy = params.policy;
-
memset(&start, 0, sizeof(start));
dh_blob = NULL;
@@ -618,6 +651,7 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
goto e_free_session;
}
+ sev->policy = params.policy;
sev->handle = start.handle;
sev->fd = argp->sev_fd;
@@ -1968,7 +2002,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
kvm_for_each_vcpu(i, dst_vcpu, dst_kvm) {
dst_svm = to_svm(dst_vcpu);
- sev_init_vmcb(dst_svm);
+ sev_init_vmcb(dst_svm, false);
if (!dst->es_active)
continue;
@@ -2180,7 +2214,12 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!(params.policy & SNP_POLICY_MASK_RSVD_MBO))
return -EINVAL;
- sev->policy = params.policy;
+ if (snp_is_secure_tsc_enabled(kvm)) {
+ if (WARN_ON_ONCE(!kvm->arch.default_tsc_khz))
+ return -EINVAL;
+
+ start.desired_tsc_khz = kvm->arch.default_tsc_khz;
+ }
sev->snp_context = snp_context_create(kvm, argp);
if (!sev->snp_context)
@@ -2188,6 +2227,7 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
start.gctx_paddr = __psp_pa(sev->snp_context);
start.policy = params.policy;
+
memcpy(start.gosvw, params.gosvw, sizeof(params.gosvw));
rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_START, &start, &argp->error);
if (rc) {
@@ -2196,6 +2236,7 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
goto e_free_context;
}
+ sev->policy = params.policy;
sev->fd = argp->sev_fd;
rc = snp_bind_asid(kvm, &argp->error);
if (rc) {
@@ -2329,7 +2370,7 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
pr_debug("%s: GFN start 0x%llx length 0x%llx type %d flags %d\n", __func__,
params.gfn_start, params.len, params.type, params.flags);
- if (!PAGE_ALIGNED(params.len) || params.flags ||
+ if (!params.len || !PAGE_ALIGNED(params.len) || params.flags ||
(params.type != KVM_SEV_SNP_PAGE_TYPE_NORMAL &&
params.type != KVM_SEV_SNP_PAGE_TYPE_ZERO &&
params.type != KVM_SEV_SNP_PAGE_TYPE_UNMEASURED &&
@@ -3038,6 +3079,9 @@ void __init sev_hardware_setup(void)
if (min_sev_asid == 1)
goto out;
+ min_sev_es_asid = min_snp_asid = 1;
+ max_sev_es_asid = max_snp_asid = min_sev_asid - 1;
+
sev_es_asid_count = min_sev_asid - 1;
WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count));
sev_es_supported = true;
@@ -3046,10 +3090,32 @@ void __init sev_hardware_setup(void)
out:
if (sev_enabled) {
init_args.probe = true;
+
+ if (sev_is_snp_ciphertext_hiding_supported())
+ init_args.max_snp_asid = min(nr_ciphertext_hiding_asids,
+ min_sev_asid - 1);
+
if (sev_platform_init(&init_args))
sev_supported = sev_es_supported = sev_snp_supported = false;
else if (sev_snp_supported)
sev_snp_supported = is_sev_snp_initialized();
+
+ if (sev_snp_supported)
+ nr_ciphertext_hiding_asids = init_args.max_snp_asid;
+
+ /*
+ * If ciphertext hiding is enabled, the joint SEV-ES/SEV-SNP
+ * ASID range is partitioned into separate SEV-ES and SEV-SNP
+ * ASID ranges, with the SEV-SNP range being [1..max_snp_asid]
+ * and the SEV-ES range being (max_snp_asid..max_sev_es_asid].
+ * Note, SEV-ES may effectively be disabled if all ASIDs from
+ * the joint range are assigned to SEV-SNP.
+ */
+ if (nr_ciphertext_hiding_asids) {
+ max_snp_asid = nr_ciphertext_hiding_asids;
+ min_sev_es_asid = max_snp_asid + 1;
+ pr_info("SEV-SNP ciphertext hiding enabled\n");
+ }
}
if (boot_cpu_has(X86_FEATURE_SEV))
@@ -3060,12 +3126,14 @@ out:
min_sev_asid, max_sev_asid);
if (boot_cpu_has(X86_FEATURE_SEV_ES))
pr_info("SEV-ES %s (ASIDs %u - %u)\n",
- str_enabled_disabled(sev_es_supported),
- min_sev_asid > 1 ? 1 : 0, min_sev_asid - 1);
+ sev_es_supported ? min_sev_es_asid <= max_sev_es_asid ? "enabled" :
+ "unusable" :
+ "disabled",
+ min_sev_es_asid, max_sev_es_asid);
if (boot_cpu_has(X86_FEATURE_SEV_SNP))
pr_info("SEV-SNP %s (ASIDs %u - %u)\n",
str_enabled_disabled(sev_snp_supported),
- min_sev_asid > 1 ? 1 : 0, min_sev_asid - 1);
+ min_snp_asid, max_snp_asid);
sev_enabled = sev_supported;
sev_es_enabled = sev_es_supported;
@@ -3078,6 +3146,9 @@ out:
sev_supported_vmsa_features = 0;
if (sev_es_debug_swap_enabled)
sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP;
+
+ if (sev_snp_enabled && tsc_khz && cpu_feature_enabled(X86_FEATURE_SNP_SECURE_TSC))
+ sev_supported_vmsa_features |= SVM_SEV_FEAT_SECURE_TSC;
}
void sev_hardware_unsetup(void)
@@ -3193,7 +3264,7 @@ skip_vmsa_free:
kvfree(svm->sev_es.ghcb_sa);
}
-static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control)
+static u64 kvm_get_cached_sw_exit_code(struct vmcb_control_area *control)
{
return (((u64)control->exit_code_hi) << 32) | control->exit_code;
}
@@ -3219,7 +3290,7 @@ static void dump_ghcb(struct vcpu_svm *svm)
*/
pr_err("GHCB (GPA=%016llx) snapshot:\n", svm->vmcb->control.ghcb_gpa);
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
- kvm_ghcb_get_sw_exit_code(control), kvm_ghcb_sw_exit_code_is_valid(svm));
+ kvm_get_cached_sw_exit_code(control), kvm_ghcb_sw_exit_code_is_valid(svm));
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
control->exit_info_1, kvm_ghcb_sw_exit_info_1_is_valid(svm));
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
@@ -3272,26 +3343,27 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
BUILD_BUG_ON(sizeof(svm->sev_es.valid_bitmap) != sizeof(ghcb->save.valid_bitmap));
memcpy(&svm->sev_es.valid_bitmap, &ghcb->save.valid_bitmap, sizeof(ghcb->save.valid_bitmap));
- vcpu->arch.regs[VCPU_REGS_RAX] = kvm_ghcb_get_rax_if_valid(svm, ghcb);
- vcpu->arch.regs[VCPU_REGS_RBX] = kvm_ghcb_get_rbx_if_valid(svm, ghcb);
- vcpu->arch.regs[VCPU_REGS_RCX] = kvm_ghcb_get_rcx_if_valid(svm, ghcb);
- vcpu->arch.regs[VCPU_REGS_RDX] = kvm_ghcb_get_rdx_if_valid(svm, ghcb);
- vcpu->arch.regs[VCPU_REGS_RSI] = kvm_ghcb_get_rsi_if_valid(svm, ghcb);
+ vcpu->arch.regs[VCPU_REGS_RAX] = kvm_ghcb_get_rax_if_valid(svm);
+ vcpu->arch.regs[VCPU_REGS_RBX] = kvm_ghcb_get_rbx_if_valid(svm);
+ vcpu->arch.regs[VCPU_REGS_RCX] = kvm_ghcb_get_rcx_if_valid(svm);
+ vcpu->arch.regs[VCPU_REGS_RDX] = kvm_ghcb_get_rdx_if_valid(svm);
+ vcpu->arch.regs[VCPU_REGS_RSI] = kvm_ghcb_get_rsi_if_valid(svm);
- svm->vmcb->save.cpl = kvm_ghcb_get_cpl_if_valid(svm, ghcb);
+ svm->vmcb->save.cpl = kvm_ghcb_get_cpl_if_valid(svm);
- if (kvm_ghcb_xcr0_is_valid(svm)) {
- vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
- vcpu->arch.cpuid_dynamic_bits_dirty = true;
- }
+ if (kvm_ghcb_xcr0_is_valid(svm))
+ __kvm_set_xcr(vcpu, 0, kvm_ghcb_get_xcr0(svm));
+
+ if (kvm_ghcb_xss_is_valid(svm))
+ __kvm_emulate_msr_write(vcpu, MSR_IA32_XSS, kvm_ghcb_get_xss(svm));
/* Copy the GHCB exit information into the VMCB fields */
- exit_code = ghcb_get_sw_exit_code(ghcb);
+ exit_code = kvm_ghcb_get_sw_exit_code(svm);
control->exit_code = lower_32_bits(exit_code);
control->exit_code_hi = upper_32_bits(exit_code);
- control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
- control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
- svm->sev_es.sw_scratch = kvm_ghcb_get_sw_scratch_if_valid(svm, ghcb);
+ control->exit_info_1 = kvm_ghcb_get_sw_exit_info_1(svm);
+ control->exit_info_2 = kvm_ghcb_get_sw_exit_info_2(svm);
+ svm->sev_es.sw_scratch = kvm_ghcb_get_sw_scratch_if_valid(svm);
/* Clear the valid entries fields */
memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
@@ -3308,7 +3380,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
* Retrieve the exit code now even though it may not be marked valid
* as it could help with debugging.
*/
- exit_code = kvm_ghcb_get_sw_exit_code(control);
+ exit_code = kvm_get_cached_sw_exit_code(control);
/* Only GHCB Usage code 0 is supported */
if (svm->sev_es.ghcb->ghcb_usage) {
@@ -3880,7 +3952,7 @@ next_range:
/*
* Invoked as part of svm_vcpu_reset() processing of an init event.
*/
-void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)
+static void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_memory_slot *slot;
@@ -3888,9 +3960,6 @@ void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)
kvm_pfn_t pfn;
gfn_t gfn;
- if (!sev_snp_guest(vcpu->kvm))
- return;
-
guard(mutex)(&svm->sev_es.snp_vmsa_mutex);
if (!svm->sev_es.snp_ap_waiting_for_reset)
@@ -4316,7 +4385,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
svm_vmgexit_success(svm, 0);
- exit_code = kvm_ghcb_get_sw_exit_code(control);
+ exit_code = kvm_get_cached_sw_exit_code(control);
switch (exit_code) {
case SVM_VMGEXIT_MMIO_READ:
ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);
@@ -4448,6 +4517,9 @@ void sev_es_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
!guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) &&
!guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID));
+ svm_set_intercept_for_msr(vcpu, MSR_AMD64_GUEST_TSC_FREQ, MSR_TYPE_R,
+ !snp_is_secure_tsc_enabled(vcpu->kvm));
+
/*
* For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if
* the host/guest supports its use.
@@ -4476,7 +4548,7 @@ void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm)
vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f));
}
-static void sev_es_init_vmcb(struct vcpu_svm *svm)
+static void sev_es_init_vmcb(struct vcpu_svm *svm, bool init_event)
{
struct kvm_sev_info *sev = to_kvm_sev_info(svm->vcpu.kvm);
struct vmcb *vmcb = svm->vmcb01.ptr;
@@ -4537,10 +4609,21 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
/* Can't intercept XSETBV, HV can't modify XCR0 directly */
svm_clr_intercept(svm, INTERCEPT_XSETBV);
+
+ /*
+ * Set the GHCB MSR value as per the GHCB specification when emulating
+ * vCPU RESET for an SEV-ES guest.
+ */
+ if (!init_event)
+ set_ghcb_msr(svm, GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version,
+ GHCB_VERSION_MIN,
+ sev_enc_bit));
}
-void sev_init_vmcb(struct vcpu_svm *svm)
+void sev_init_vmcb(struct vcpu_svm *svm, bool init_event)
{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
clr_exception_intercept(svm, UD_VECTOR);
@@ -4550,24 +4633,36 @@ void sev_init_vmcb(struct vcpu_svm *svm)
*/
clr_exception_intercept(svm, GP_VECTOR);
- if (sev_es_guest(svm->vcpu.kvm))
- sev_es_init_vmcb(svm);
+ if (init_event && sev_snp_guest(vcpu->kvm))
+ sev_snp_init_protected_guest_state(vcpu);
+
+ if (sev_es_guest(vcpu->kvm))
+ sev_es_init_vmcb(svm, init_event);
}
-void sev_es_vcpu_reset(struct vcpu_svm *svm)
+int sev_vcpu_create(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = &svm->vcpu;
- struct kvm_sev_info *sev = to_kvm_sev_info(vcpu->kvm);
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct page *vmsa_page;
+
+ mutex_init(&svm->sev_es.snp_vmsa_mutex);
+
+ if (!sev_es_guest(vcpu->kvm))
+ return 0;
/*
- * Set the GHCB MSR value as per the GHCB specification when emulating
- * vCPU RESET for an SEV-ES guest.
+ * SEV-ES guests require a separate (from the VMCB) VMSA page used to
+ * contain the encrypted register state of the guest.
*/
- set_ghcb_msr(svm, GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version,
- GHCB_VERSION_MIN,
- sev_enc_bit));
+ vmsa_page = snp_safe_alloc_page();
+ if (!vmsa_page)
+ return -ENOMEM;
- mutex_init(&svm->sev_es.snp_vmsa_mutex);
+ svm->sev_es.vmsa = page_address(vmsa_page);
+
+ vcpu->arch.guest_tsc_protected = snp_is_secure_tsc_enabled(vcpu->kvm);
+
+ return 0;
}
void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa)
@@ -4618,6 +4713,16 @@ void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_are
hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
}
+
+ /*
+ * TSC_AUX is always virtualized for SEV-ES guests when the feature is
+ * available, i.e. TSC_AUX is loaded on #VMEXIT from the host save area.
+ * Set the save area to the current hardware value, i.e. the current
+ * user return value, so that the correct value is restored on #VMEXIT.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_V_TSC_AUX) &&
+ !WARN_ON_ONCE(tsc_aux_uret_slot < 0))
+ hostsa->tsc_aux = kvm_get_user_return_msr(tsc_aux_uret_slot);
}
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 3a9fe0a8b78c..153c12dbf3eb 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -158,14 +158,6 @@ module_param(lbrv, int, 0444);
static int tsc_scaling = true;
module_param(tsc_scaling, int, 0444);
-/*
- * enable / disable AVIC. Because the defaults differ for APICv
- * support between VMX and SVM we cannot use module_param_named.
- */
-static bool avic;
-module_param(avic, bool, 0444);
-module_param(enable_ipiv, bool, 0444);
-
module_param(enable_device_posted_irqs, bool, 0444);
bool __read_mostly dump_invalid_vmcb;
@@ -195,7 +187,7 @@ static DEFINE_MUTEX(vmcb_dump_mutex);
* RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to
* defer the restoration of TSC_AUX until the CPU returns to userspace.
*/
-static int tsc_aux_uret_slot __read_mostly = -1;
+int tsc_aux_uret_slot __ro_after_init = -1;
static int get_npt_level(void)
{
@@ -577,18 +569,6 @@ static int svm_enable_virtualization_cpu(void)
amd_pmu_enable_virt();
- /*
- * If TSC_AUX virtualization is supported, TSC_AUX becomes a swap type
- * "B" field (see sev_es_prepare_switch_to_guest()) for SEV-ES guests.
- * Since Linux does not change the value of TSC_AUX once set, prime the
- * TSC_AUX field now to avoid a RDMSR on every vCPU run.
- */
- if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) {
- u32 __maybe_unused msr_hi;
-
- rdmsr(MSR_TSC_AUX, sev_es_host_save_area(sd)->tsc_aux, msr_hi);
- }
-
return 0;
}
@@ -736,55 +716,6 @@ static void svm_recalc_lbr_msr_intercepts(struct kvm_vcpu *vcpu)
svm_set_intercept_for_msr(vcpu, MSR_IA32_DEBUGCTLMSR, MSR_TYPE_RW, intercept);
}
-void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool intercept)
-{
- static const u32 x2avic_passthrough_msrs[] = {
- X2APIC_MSR(APIC_ID),
- X2APIC_MSR(APIC_LVR),
- X2APIC_MSR(APIC_TASKPRI),
- X2APIC_MSR(APIC_ARBPRI),
- X2APIC_MSR(APIC_PROCPRI),
- X2APIC_MSR(APIC_EOI),
- X2APIC_MSR(APIC_RRR),
- X2APIC_MSR(APIC_LDR),
- X2APIC_MSR(APIC_DFR),
- X2APIC_MSR(APIC_SPIV),
- X2APIC_MSR(APIC_ISR),
- X2APIC_MSR(APIC_TMR),
- X2APIC_MSR(APIC_IRR),
- X2APIC_MSR(APIC_ESR),
- X2APIC_MSR(APIC_ICR),
- X2APIC_MSR(APIC_ICR2),
-
- /*
- * Note! Always intercept LVTT, as TSC-deadline timer mode
- * isn't virtualized by hardware, and the CPU will generate a
- * #GP instead of a #VMEXIT.
- */
- X2APIC_MSR(APIC_LVTTHMR),
- X2APIC_MSR(APIC_LVTPC),
- X2APIC_MSR(APIC_LVT0),
- X2APIC_MSR(APIC_LVT1),
- X2APIC_MSR(APIC_LVTERR),
- X2APIC_MSR(APIC_TMICT),
- X2APIC_MSR(APIC_TMCCT),
- X2APIC_MSR(APIC_TDCR),
- };
- int i;
-
- if (intercept == svm->x2avic_msrs_intercepted)
- return;
-
- if (!x2avic_enabled)
- return;
-
- for (i = 0; i < ARRAY_SIZE(x2avic_passthrough_msrs); i++)
- svm_set_intercept_for_msr(&svm->vcpu, x2avic_passthrough_msrs[i],
- MSR_TYPE_RW, intercept);
-
- svm->x2avic_msrs_intercepted = intercept;
-}
-
void svm_vcpu_free_msrpm(void *msrpm)
{
__free_pages(virt_to_page(msrpm), get_order(MSRPM_SIZE));
@@ -844,6 +775,17 @@ static void svm_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
svm_disable_intercept_for_msr(vcpu, MSR_IA32_MPERF, MSR_TYPE_R);
}
+ if (kvm_cpu_cap_has(X86_FEATURE_SHSTK)) {
+ bool shstk_enabled = guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK);
+
+ svm_set_intercept_for_msr(vcpu, MSR_IA32_U_CET, MSR_TYPE_RW, !shstk_enabled);
+ svm_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, !shstk_enabled);
+ svm_set_intercept_for_msr(vcpu, MSR_IA32_PL0_SSP, MSR_TYPE_RW, !shstk_enabled);
+ svm_set_intercept_for_msr(vcpu, MSR_IA32_PL1_SSP, MSR_TYPE_RW, !shstk_enabled);
+ svm_set_intercept_for_msr(vcpu, MSR_IA32_PL2_SSP, MSR_TYPE_RW, !shstk_enabled);
+ svm_set_intercept_for_msr(vcpu, MSR_IA32_PL3_SSP, MSR_TYPE_RW, !shstk_enabled);
+ }
+
if (sev_es_guest(vcpu->kvm))
sev_es_recalc_msr_intercepts(vcpu);
@@ -1077,13 +1019,13 @@ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu)
}
}
-static void svm_recalc_intercepts_after_set_cpuid(struct kvm_vcpu *vcpu)
+static void svm_recalc_intercepts(struct kvm_vcpu *vcpu)
{
svm_recalc_instruction_intercepts(vcpu);
svm_recalc_msr_intercepts(vcpu);
}
-static void init_vmcb(struct kvm_vcpu *vcpu)
+static void init_vmcb(struct kvm_vcpu *vcpu, bool init_event)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb01.ptr;
@@ -1221,11 +1163,11 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm_set_intercept(svm, INTERCEPT_BUSLOCK);
if (sev_guest(vcpu->kvm))
- sev_init_vmcb(svm);
+ sev_init_vmcb(svm, init_event);
svm_hv_init_vmcb(vmcb);
- svm_recalc_intercepts_after_set_cpuid(vcpu);
+ kvm_make_request(KVM_REQ_RECALC_INTERCEPTS, vcpu);
vmcb_mark_all_dirty(vmcb);
@@ -1244,9 +1186,6 @@ static void __svm_vcpu_reset(struct kvm_vcpu *vcpu)
svm->nmi_masked = false;
svm->awaiting_iret_completion = false;
-
- if (sev_es_guest(vcpu->kvm))
- sev_es_vcpu_reset(svm);
}
static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -1256,10 +1195,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
svm->spec_ctrl = 0;
svm->virt_spec_ctrl = 0;
- if (init_event)
- sev_snp_init_protected_guest_state(vcpu);
-
- init_vmcb(vcpu);
+ init_vmcb(vcpu, init_event);
if (!init_event)
__svm_vcpu_reset(vcpu);
@@ -1275,7 +1211,6 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm;
struct page *vmcb01_page;
- struct page *vmsa_page = NULL;
int err;
BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0);
@@ -1286,24 +1221,18 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
if (!vmcb01_page)
goto out;
- if (sev_es_guest(vcpu->kvm)) {
- /*
- * SEV-ES guests require a separate VMSA page used to contain
- * the encrypted register state of the guest.
- */
- vmsa_page = snp_safe_alloc_page();
- if (!vmsa_page)
- goto error_free_vmcb_page;
- }
+ err = sev_vcpu_create(vcpu);
+ if (err)
+ goto error_free_vmcb_page;
err = avic_init_vcpu(svm);
if (err)
- goto error_free_vmsa_page;
+ goto error_free_sev;
svm->msrpm = svm_vcpu_alloc_msrpm();
if (!svm->msrpm) {
err = -ENOMEM;
- goto error_free_vmsa_page;
+ goto error_free_sev;
}
svm->x2avic_msrs_intercepted = true;
@@ -1312,16 +1241,12 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT);
svm_switch_vmcb(svm, &svm->vmcb01);
- if (vmsa_page)
- svm->sev_es.vmsa = page_address(vmsa_page);
-
svm->guest_state_loaded = false;
return 0;
-error_free_vmsa_page:
- if (vmsa_page)
- __free_page(vmsa_page);
+error_free_sev:
+ sev_free_vcpu(vcpu);
error_free_vmcb_page:
__free_page(vmcb01_page);
out:
@@ -1423,10 +1348,10 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
/*
- * TSC_AUX is always virtualized for SEV-ES guests when the feature is
- * available. The user return MSR support is not required in this case
- * because TSC_AUX is restored on #VMEXIT from the host save area
- * (which has been initialized in svm_enable_virtualization_cpu()).
+ * TSC_AUX is always virtualized (context switched by hardware) for
+ * SEV-ES guests when the feature is available. For non-SEV-ES guests,
+ * context switch TSC_AUX via the user_return MSR infrastructure (not
+ * all CPUs support TSC_AUX virtualization).
*/
if (likely(tsc_aux_uret_slot >= 0) &&
(!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm)))
@@ -2727,8 +2652,8 @@ static int svm_get_feature_msr(u32 msr, u64 *data)
static bool sev_es_prevent_msr_access(struct kvm_vcpu *vcpu,
struct msr_data *msr_info)
{
- return sev_es_guest(vcpu->kvm) &&
- vcpu->arch.guest_state_protected &&
+ return sev_es_guest(vcpu->kvm) && vcpu->arch.guest_state_protected &&
+ msr_info->index != MSR_IA32_XSS &&
!msr_write_intercepted(vcpu, msr_info->index);
}
@@ -2784,6 +2709,15 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (guest_cpuid_is_intel_compatible(vcpu))
msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
break;
+ case MSR_IA32_S_CET:
+ msr_info->data = svm->vmcb->save.s_cet;
+ break;
+ case MSR_IA32_INT_SSP_TAB:
+ msr_info->data = svm->vmcb->save.isst_addr;
+ break;
+ case MSR_KVM_INTERNAL_GUEST_SSP:
+ msr_info->data = svm->vmcb->save.ssp;
+ break;
case MSR_TSC_AUX:
msr_info->data = svm->tsc_aux;
break;
@@ -3016,13 +2950,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm->vmcb01.ptr->save.sysenter_esp = (u32)data;
svm->sysenter_esp_hi = guest_cpuid_is_intel_compatible(vcpu) ? (data >> 32) : 0;
break;
+ case MSR_IA32_S_CET:
+ svm->vmcb->save.s_cet = data;
+ vmcb_mark_dirty(svm->vmcb01.ptr, VMCB_CET);
+ break;
+ case MSR_IA32_INT_SSP_TAB:
+ svm->vmcb->save.isst_addr = data;
+ vmcb_mark_dirty(svm->vmcb01.ptr, VMCB_CET);
+ break;
+ case MSR_KVM_INTERNAL_GUEST_SSP:
+ svm->vmcb->save.ssp = data;
+ vmcb_mark_dirty(svm->vmcb01.ptr, VMCB_CET);
+ break;
case MSR_TSC_AUX:
/*
* TSC_AUX is always virtualized for SEV-ES guests when the
* feature is available. The user return MSR support is not
* required in this case because TSC_AUX is restored on #VMEXIT
- * from the host save area (which has been initialized in
- * svm_enable_virtualization_cpu()).
+ * from the host save area.
*/
if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && sev_es_guest(vcpu->kvm))
break;
@@ -3407,6 +3352,10 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
pr_err("%-15s %016llx %-13s %016llx\n",
"rsp:", save->rsp, "rax:", save->rax);
pr_err("%-15s %016llx %-13s %016llx\n",
+ "s_cet:", save->s_cet, "ssp:", save->ssp);
+ pr_err("%-15s %016llx\n",
+ "isst_addr:", save->isst_addr);
+ pr_err("%-15s %016llx %-13s %016llx\n",
"star:", save01->star, "lstar:", save01->lstar);
pr_err("%-15s %016llx %-13s %016llx\n",
"cstar:", save01->cstar, "sfmask:", save01->sfmask);
@@ -3431,6 +3380,13 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
"sev_features", vmsa->sev_features);
pr_err("%-15s %016llx %-13s %016llx\n",
+ "pl0_ssp:", vmsa->pl0_ssp, "pl1_ssp:", vmsa->pl1_ssp);
+ pr_err("%-15s %016llx %-13s %016llx\n",
+ "pl2_ssp:", vmsa->pl2_ssp, "pl3_ssp:", vmsa->pl3_ssp);
+ pr_err("%-15s %016llx\n",
+ "u_cet:", vmsa->u_cet);
+
+ pr_err("%-15s %016llx %-13s %016llx\n",
"rax:", vmsa->rax, "rbx:", vmsa->rbx);
pr_err("%-15s %016llx %-13s %016llx\n",
"rcx:", vmsa->rcx, "rdx:", vmsa->rdx);
@@ -4180,17 +4136,27 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb_control_area *control = &svm->vmcb->control;
+
+ /*
+ * Next RIP must be provided as IRQs are disabled, and accessing guest
+ * memory to decode the instruction might fault, i.e. might sleep.
+ */
+ if (!nrips || !control->next_rip)
+ return EXIT_FASTPATH_NONE;
if (is_guest_mode(vcpu))
return EXIT_FASTPATH_NONE;
- switch (svm->vmcb->control.exit_code) {
+ switch (control->exit_code) {
case SVM_EXIT_MSR:
- if (!svm->vmcb->control.exit_info_1)
+ if (!control->exit_info_1)
break;
- return handle_fastpath_set_msr_irqoff(vcpu);
+ return handle_fastpath_wrmsr(vcpu);
case SVM_EXIT_HLT:
return handle_fastpath_hlt(vcpu);
+ case SVM_EXIT_INVD:
+ return handle_fastpath_invd(vcpu);
default:
break;
}
@@ -4467,8 +4433,6 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
if (sev_guest(vcpu->kvm))
sev_vcpu_after_set_cpuid(svm);
-
- svm_recalc_intercepts_after_set_cpuid(vcpu);
}
static bool svm_has_wbinvd_exit(void)
@@ -5041,7 +5005,7 @@ static void *svm_alloc_apic_backing_page(struct kvm_vcpu *vcpu)
return page_address(page);
}
-static struct kvm_x86_ops svm_x86_ops __initdata = {
+struct kvm_x86_ops svm_x86_ops __initdata = {
.name = KBUILD_MODNAME,
.check_processor_compatibility = svm_check_processor_compat,
@@ -5170,7 +5134,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.apic_init_signal_blocked = svm_apic_init_signal_blocked,
- .recalc_msr_intercepts = svm_recalc_msr_intercepts,
+ .recalc_intercepts = svm_recalc_intercepts,
.complete_emulated_msr = svm_complete_emulated_msr,
.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
@@ -5228,7 +5192,8 @@ static __init void svm_set_cpu_caps(void)
kvm_set_cpu_caps();
kvm_caps.supported_perf_cap = 0;
- kvm_caps.supported_xss = 0;
+
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
/* CPUID 0x80000001 and 0x8000000A (SVM features) */
if (nested) {
@@ -5300,8 +5265,12 @@ static __init void svm_set_cpu_caps(void)
/* CPUID 0x8000001F (SME/SEV features) */
sev_set_cpu_caps();
- /* Don't advertise Bus Lock Detect to guest if SVM support is absent */
+ /*
+ * Clear capabilities that are automatically configured by common code,
+ * but that require explicit SVM support (that isn't yet implemented).
+ */
kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
+ kvm_cpu_cap_clear(X86_FEATURE_MSR_IMM);
}
static __init int svm_hardware_setup(void)
@@ -5374,6 +5343,21 @@ static __init int svm_hardware_setup(void)
get_npt_level(), PG_LEVEL_1G);
pr_info("Nested Paging %s\n", str_enabled_disabled(npt_enabled));
+ /*
+ * It seems that on AMD processors PTE's accessed bit is
+ * being set by the CPU hardware before the NPF vmexit.
+ * This is not expected behaviour and our tests fail because
+ * of it.
+ * A workaround here is to disable support for
+ * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled.
+ * In this case userspace can know if there is support using
+ * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle
+ * it
+ * If future AMD CPU models change the behaviour described above,
+ * this variable can be changed accordingly
+ */
+ allow_smaller_maxphyaddr = !npt_enabled;
+
/* Setup shadow_me_value and shadow_me_mask */
kvm_mmu_set_me_spte_mask(sme_me_mask, sme_me_mask);
@@ -5408,15 +5392,12 @@ static __init int svm_hardware_setup(void)
goto err;
}
- enable_apicv = avic = avic && avic_hardware_setup();
-
+ enable_apicv = avic_hardware_setup();
if (!enable_apicv) {
enable_ipiv = false;
svm_x86_ops.vcpu_blocking = NULL;
svm_x86_ops.vcpu_unblocking = NULL;
svm_x86_ops.vcpu_get_apicv_inhibit_reasons = NULL;
- } else if (!x2avic_enabled) {
- svm_x86_ops.allow_apicv_in_x2apic_without_x2apic_virtualization = true;
}
if (vls) {
@@ -5453,21 +5434,6 @@ static __init int svm_hardware_setup(void)
svm_set_cpu_caps();
- /*
- * It seems that on AMD processors PTE's accessed bit is
- * being set by the CPU hardware before the NPF vmexit.
- * This is not expected behaviour and our tests fail because
- * of it.
- * A workaround here is to disable support for
- * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled.
- * In this case userspace can know if there is support using
- * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle
- * it
- * If future AMD CPU models change the behaviour described above,
- * this variable can be changed accordingly
- */
- allow_smaller_maxphyaddr = !npt_enabled;
-
kvm_caps.inapplicable_quirks &= ~KVM_X86_QUIRK_CD_NW_CLEARED;
return 0;
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 70df7c6413cf..e4b04f435b3d 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -48,10 +48,13 @@ extern bool npt_enabled;
extern int nrips;
extern int vgif;
extern bool intercept_smi;
-extern bool x2avic_enabled;
extern bool vnmi;
extern int lbrv;
+extern int tsc_aux_uret_slot __ro_after_init;
+
+extern struct kvm_x86_ops svm_x86_ops __initdata;
+
/*
* Clean bits in VMCB.
* VMCB_ALL_CLEAN_MASK might also need to
@@ -74,6 +77,7 @@ enum {
* AVIC PHYSICAL_TABLE pointer,
* AVIC LOGICAL_TABLE pointer
*/
+ VMCB_CET, /* S_CET, SSP, ISST_ADDR */
VMCB_SW = 31, /* Reserved for hypervisor/software use */
};
@@ -82,7 +86,7 @@ enum {
(1U << VMCB_ASID) | (1U << VMCB_INTR) | \
(1U << VMCB_NPT) | (1U << VMCB_CR) | (1U << VMCB_DR) | \
(1U << VMCB_DT) | (1U << VMCB_SEG) | (1U << VMCB_CR2) | \
- (1U << VMCB_LBR) | (1U << VMCB_AVIC) | \
+ (1U << VMCB_LBR) | (1U << VMCB_AVIC) | (1U << VMCB_CET) | \
(1U << VMCB_SW))
/* TPR and CR2 are always written before VMRUN */
@@ -699,7 +703,6 @@ void svm_set_gif(struct vcpu_svm *svm, bool value);
int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code);
void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
int read, int write);
-void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool disable);
void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
int trig_mode, int vec);
@@ -801,7 +804,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG) \
)
-bool avic_hardware_setup(void);
+bool __init avic_hardware_setup(void);
int avic_ga_log_notifier(u32 ga_tag);
void avic_vm_destroy(struct kvm *kvm);
int avic_vm_init(struct kvm *kvm);
@@ -826,10 +829,9 @@ void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu);
/* sev.c */
int pre_sev_run(struct vcpu_svm *svm, int cpu);
-void sev_init_vmcb(struct vcpu_svm *svm);
+void sev_init_vmcb(struct vcpu_svm *svm, bool init_event);
void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm);
int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
-void sev_es_vcpu_reset(struct vcpu_svm *svm);
void sev_es_recalc_msr_intercepts(struct kvm_vcpu *vcpu);
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa);
@@ -854,6 +856,7 @@ static inline struct page *snp_safe_alloc_page(void)
return snp_safe_alloc_page_node(numa_node_id(), GFP_KERNEL_ACCOUNT);
}
+int sev_vcpu_create(struct kvm_vcpu *vcpu);
void sev_free_vcpu(struct kvm_vcpu *vcpu);
void sev_vm_destroy(struct kvm *kvm);
void __init sev_set_cpu_caps(void);
@@ -863,7 +866,6 @@ int sev_cpu_init(struct svm_cpu_data *sd);
int sev_dev_get_attr(u32 group, u64 attr, u64 *val);
extern unsigned int max_sev_asid;
void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code);
-void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu);
int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
int sev_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private);
@@ -880,6 +882,7 @@ static inline struct page *snp_safe_alloc_page(void)
return snp_safe_alloc_page_node(numa_node_id(), GFP_KERNEL_ACCOUNT);
}
+static inline int sev_vcpu_create(struct kvm_vcpu *vcpu) { return 0; }
static inline void sev_free_vcpu(struct kvm_vcpu *vcpu) {}
static inline void sev_vm_destroy(struct kvm *kvm) {}
static inline void __init sev_set_cpu_caps(void) {}
@@ -889,7 +892,6 @@ static inline int sev_cpu_init(struct svm_cpu_data *sd) { return 0; }
static inline int sev_dev_get_attr(u32 group, u64 attr, u64 *val) { return -ENXIO; }
#define max_sev_asid 0
static inline void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code) {}
-static inline void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu) {}
static inline int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order)
{
return 0;
@@ -914,16 +916,21 @@ void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted,
void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
#define DEFINE_KVM_GHCB_ACCESSORS(field) \
- static __always_inline bool kvm_ghcb_##field##_is_valid(const struct vcpu_svm *svm) \
- { \
- return test_bit(GHCB_BITMAP_IDX(field), \
- (unsigned long *)&svm->sev_es.valid_bitmap); \
- } \
- \
- static __always_inline u64 kvm_ghcb_get_##field##_if_valid(struct vcpu_svm *svm, struct ghcb *ghcb) \
- { \
- return kvm_ghcb_##field##_is_valid(svm) ? ghcb->save.field : 0; \
- } \
+static __always_inline u64 kvm_ghcb_get_##field(struct vcpu_svm *svm) \
+{ \
+ return READ_ONCE(svm->sev_es.ghcb->save.field); \
+} \
+ \
+static __always_inline bool kvm_ghcb_##field##_is_valid(const struct vcpu_svm *svm) \
+{ \
+ return test_bit(GHCB_BITMAP_IDX(field), \
+ (unsigned long *)&svm->sev_es.valid_bitmap); \
+} \
+ \
+static __always_inline u64 kvm_ghcb_get_##field##_if_valid(struct vcpu_svm *svm) \
+{ \
+ return kvm_ghcb_##field##_is_valid(svm) ? kvm_ghcb_get_##field(svm) : 0; \
+}
DEFINE_KVM_GHCB_ACCESSORS(cpl)
DEFINE_KVM_GHCB_ACCESSORS(rax)
@@ -936,5 +943,6 @@ DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_1)
DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_2)
DEFINE_KVM_GHCB_ACCESSORS(sw_scratch)
DEFINE_KVM_GHCB_ACCESSORS(xcr0)
+DEFINE_KVM_GHCB_ACCESSORS(xss)
#endif
diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c
index 3971b3ea5d04..a8e78c0e5956 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.c
+++ b/arch/x86/kvm/svm/svm_onhyperv.c
@@ -15,7 +15,7 @@
#include "kvm_onhyperv.h"
#include "svm_onhyperv.h"
-int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu)
+static int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu)
{
struct hv_vmcb_enlightenments *hve;
hpa_t partition_assist_page = hv_get_partition_assist_page(vcpu);
@@ -35,3 +35,29 @@ int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu)
return 0;
}
+__init void svm_hv_hardware_setup(void)
+{
+ if (npt_enabled &&
+ ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) {
+ pr_info(KBUILD_MODNAME ": Hyper-V enlightened NPT TLB flush enabled\n");
+ svm_x86_ops.flush_remote_tlbs = hv_flush_remote_tlbs;
+ svm_x86_ops.flush_remote_tlbs_range = hv_flush_remote_tlbs_range;
+ }
+
+ if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) {
+ int cpu;
+
+ pr_info(KBUILD_MODNAME ": Hyper-V Direct TLB Flush enabled\n");
+ for_each_online_cpu(cpu) {
+ struct hv_vp_assist_page *vp_ap =
+ hv_get_vp_assist_page(cpu);
+
+ if (!vp_ap)
+ continue;
+
+ vp_ap->nested_control.features.directhypercall = 1;
+ }
+ svm_x86_ops.enable_l2_tlb_flush =
+ svm_hv_enable_l2_tlb_flush;
+ }
+}
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index f85bc617ffe4..08f14e6f195c 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -13,9 +13,7 @@
#include "kvm_onhyperv.h"
#include "svm/hyperv.h"
-static struct kvm_x86_ops svm_x86_ops;
-
-int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu);
+__init void svm_hv_hardware_setup(void);
static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu)
{
@@ -40,33 +38,6 @@ static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
hve->hv_enlightenments_control.msr_bitmap = 1;
}
-static inline __init void svm_hv_hardware_setup(void)
-{
- if (npt_enabled &&
- ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) {
- pr_info(KBUILD_MODNAME ": Hyper-V enlightened NPT TLB flush enabled\n");
- svm_x86_ops.flush_remote_tlbs = hv_flush_remote_tlbs;
- svm_x86_ops.flush_remote_tlbs_range = hv_flush_remote_tlbs_range;
- }
-
- if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) {
- int cpu;
-
- pr_info(KBUILD_MODNAME ": Hyper-V Direct TLB Flush enabled\n");
- for_each_online_cpu(cpu) {
- struct hv_vp_assist_page *vp_ap =
- hv_get_vp_assist_page(cpu);
-
- if (!vp_ap)
- continue;
-
- vp_ap->nested_control.features.directhypercall = 1;
- }
- svm_x86_ops.enable_l2_tlb_flush =
- svm_hv_enable_l2_tlb_flush;
- }
-}
-
static inline void svm_hv_vmcb_dirty_nested_enlightenments(
struct kvm_vcpu *vcpu)
{
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 57d79fd31df0..e79bc9cb7162 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -461,8 +461,9 @@ TRACE_EVENT(kvm_inj_virq,
#define kvm_trace_sym_exc \
EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \
- EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \
- EXS(MF), EXS(AC), EXS(MC)
+ EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), EXS(MF), \
+ EXS(AC), EXS(MC), EXS(XM), EXS(VE), EXS(CP), \
+ EXS(HV), EXS(VC), EXS(SX)
/*
* Tracepoint for kvm interrupt injection:
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 5316c27f6099..02aadb9d730e 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -20,9 +20,6 @@ extern int __read_mostly pt_mode;
#define PT_MODE_SYSTEM 0
#define PT_MODE_HOST_GUEST 1
-#define PMU_CAP_FW_WRITES (1ULL << 13)
-#define PMU_CAP_LBR_FMT 0x3f
-
struct nested_vmx_msrs {
/*
* We only store the "true" versions of the VMX capability MSRs. We
@@ -76,6 +73,11 @@ static inline bool cpu_has_vmx_basic_inout(void)
return vmcs_config.basic & VMX_BASIC_INOUT;
}
+static inline bool cpu_has_vmx_basic_no_hw_errcode_cc(void)
+{
+ return vmcs_config.basic & VMX_BASIC_NO_HW_ERROR_CODE_CC;
+}
+
static inline bool cpu_has_virtual_nmis(void)
{
return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS &&
@@ -103,6 +105,10 @@ static inline bool cpu_has_load_perf_global_ctrl(void)
return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
}
+static inline bool cpu_has_load_cet_ctrl(void)
+{
+ return (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_CET_STATE);
+}
static inline bool cpu_has_vmx_mpx(void)
{
return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS;
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index bb5f182f6788..0eb2773b2ae2 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -188,18 +188,18 @@ static int vt_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return vmx_get_msr(vcpu, msr_info);
}
-static void vt_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
+static void vt_recalc_intercepts(struct kvm_vcpu *vcpu)
{
/*
- * TDX doesn't allow VMM to configure interception of MSR accesses.
- * TDX guest requests MSR accesses by calling TDVMCALL. The MSR
- * filters will be applied when handling the TDVMCALL for RDMSR/WRMSR
- * if the userspace has set any.
+ * TDX doesn't allow VMM to configure interception of instructions or
+ * MSR accesses. TDX guest requests MSR accesses by calling TDVMCALL.
+ * The MSR filters will be applied when handling the TDVMCALL for
+ * RDMSR/WRMSR if the userspace has set any.
*/
if (is_td_vcpu(vcpu))
return;
- vmx_recalc_msr_intercepts(vcpu);
+ vmx_recalc_intercepts(vcpu);
}
static int vt_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
@@ -996,7 +996,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.apic_init_signal_blocked = vt_op(apic_init_signal_blocked),
.migrate_timers = vmx_migrate_timers,
- .recalc_msr_intercepts = vt_op(recalc_msr_intercepts),
+ .recalc_intercepts = vt_op(recalc_intercepts),
.complete_emulated_msr = vt_op(complete_emulated_msr),
.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b8ea1969113d..76271962cb70 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -721,6 +721,24 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_MPERF, MSR_TYPE_R);
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_U_CET, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_S_CET, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_PL0_SSP, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_PL1_SSP, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_PL2_SSP, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_PL3_SSP, MSR_TYPE_RW);
+
kvm_vcpu_unmap(vcpu, &map);
vmx->nested.force_msr_bitmap_recalc = false;
@@ -997,7 +1015,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
__func__, i, e.index, e.reserved);
goto fail;
}
- if (kvm_set_msr_with_filter(vcpu, e.index, e.value)) {
+ if (kvm_emulate_msr_write(vcpu, e.index, e.value)) {
pr_debug_ratelimited(
"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
__func__, i, e.index, e.value);
@@ -1033,7 +1051,7 @@ static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
}
}
- if (kvm_get_msr_with_filter(vcpu, msr_index, data)) {
+ if (kvm_emulate_msr_read(vcpu, msr_index, data)) {
pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__,
msr_index);
return false;
@@ -1272,9 +1290,10 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
{
const u64 feature_bits = VMX_BASIC_DUAL_MONITOR_TREATMENT |
VMX_BASIC_INOUT |
- VMX_BASIC_TRUE_CTLS;
+ VMX_BASIC_TRUE_CTLS |
+ VMX_BASIC_NO_HW_ERROR_CODE_CC;
- const u64 reserved_bits = GENMASK_ULL(63, 56) |
+ const u64 reserved_bits = GENMASK_ULL(63, 57) |
GENMASK_ULL(47, 45) |
BIT_ULL(31);
@@ -2520,6 +2539,32 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
}
}
+static void vmcs_read_cet_state(struct kvm_vcpu *vcpu, u64 *s_cet,
+ u64 *ssp, u64 *ssp_tbl)
+{
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_IBT) ||
+ guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK))
+ *s_cet = vmcs_readl(GUEST_S_CET);
+
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) {
+ *ssp = vmcs_readl(GUEST_SSP);
+ *ssp_tbl = vmcs_readl(GUEST_INTR_SSP_TABLE);
+ }
+}
+
+static void vmcs_write_cet_state(struct kvm_vcpu *vcpu, u64 s_cet,
+ u64 ssp, u64 ssp_tbl)
+{
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_IBT) ||
+ guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK))
+ vmcs_writel(GUEST_S_CET, s_cet);
+
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) {
+ vmcs_writel(GUEST_SSP, ssp);
+ vmcs_writel(GUEST_INTR_SSP_TABLE, ssp_tbl);
+ }
+}
+
static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
{
struct hv_enlightened_vmcs *hv_evmcs = nested_vmx_evmcs(vmx);
@@ -2636,6 +2681,10 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE)
+ vmcs_write_cet_state(&vmx->vcpu, vmcs12->guest_s_cet,
+ vmcs12->guest_ssp, vmcs12->guest_ssp_tbl);
+
set_cr4_guest_host_mask(vmx);
}
@@ -2675,6 +2724,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl);
}
+
+ if (!vmx->nested.nested_run_pending ||
+ !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE))
+ vmcs_write_cet_state(vcpu, vmx->nested.pre_vmenter_s_cet,
+ vmx->nested.pre_vmenter_ssp,
+ vmx->nested.pre_vmenter_ssp_tbl);
+
if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
vmcs_write64(GUEST_BNDCFGS, vmx->nested.pre_vmenter_bndcfgs);
@@ -2770,8 +2826,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) &&
- WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
- vmcs12->guest_ia32_perf_global_ctrl))) {
+ WARN_ON_ONCE(__kvm_emulate_msr_write(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+ vmcs12->guest_ia32_perf_global_ctrl))) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
return -EINVAL;
}
@@ -2949,7 +3005,6 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
- bool should_have_error_code;
bool urg = nested_cpu_has2(vmcs12,
SECONDARY_EXEC_UNRESTRICTED_GUEST);
bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
@@ -2966,12 +3021,19 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
CC(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
return -EINVAL;
- /* VM-entry interruption-info field: deliver error code */
- should_have_error_code =
- intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
- x86_exception_has_error_code(vector);
- if (CC(has_error_code != should_have_error_code))
- return -EINVAL;
+ /*
+ * Cannot deliver error code in real mode or if the interrupt
+ * type is not hardware exception. For other cases, do the
+ * consistency check only if the vCPU doesn't enumerate
+ * VMX_BASIC_NO_HW_ERROR_CODE_CC.
+ */
+ if (!prot_mode || intr_type != INTR_TYPE_HARD_EXCEPTION) {
+ if (CC(has_error_code))
+ return -EINVAL;
+ } else if (!nested_cpu_has_no_hw_errcode_cc(vcpu)) {
+ if (CC(has_error_code != x86_exception_has_error_code(vector)))
+ return -EINVAL;
+ }
/* VM-entry exception error code */
if (CC(has_error_code &&
@@ -3038,6 +3100,16 @@ static bool is_l1_noncanonical_address_on_vmexit(u64 la, struct vmcs12 *vmcs12)
return !__is_canonical_address(la, l1_address_bits_on_exit);
}
+static int nested_vmx_check_cet_state_common(struct kvm_vcpu *vcpu, u64 s_cet,
+ u64 ssp, u64 ssp_tbl)
+{
+ if (CC(!kvm_is_valid_u_s_cet(vcpu, s_cet)) || CC(!IS_ALIGNED(ssp, 4)) ||
+ CC(is_noncanonical_msr_address(ssp_tbl, vcpu)))
+ return -EINVAL;
+
+ return 0;
+}
+
static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
@@ -3048,6 +3120,9 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
CC(!kvm_vcpu_is_legal_cr3(vcpu, vmcs12->host_cr3)))
return -EINVAL;
+ if (CC(vmcs12->host_cr4 & X86_CR4_CET && !(vmcs12->host_cr0 & X86_CR0_WP)))
+ return -EINVAL;
+
if (CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
return -EINVAL;
@@ -3104,6 +3179,27 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
return -EINVAL;
}
+ if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_CET_STATE) {
+ if (nested_vmx_check_cet_state_common(vcpu, vmcs12->host_s_cet,
+ vmcs12->host_ssp,
+ vmcs12->host_ssp_tbl))
+ return -EINVAL;
+
+ /*
+ * IA32_S_CET and SSP must be canonical if the host will
+ * enter 64-bit mode after VM-exit; otherwise, higher
+ * 32-bits must be all 0s.
+ */
+ if (ia32e) {
+ if (CC(is_noncanonical_msr_address(vmcs12->host_s_cet, vcpu)) ||
+ CC(is_noncanonical_msr_address(vmcs12->host_ssp, vcpu)))
+ return -EINVAL;
+ } else {
+ if (CC(vmcs12->host_s_cet >> 32) || CC(vmcs12->host_ssp >> 32))
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -3162,6 +3258,9 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)))
return -EINVAL;
+ if (CC(vmcs12->guest_cr4 & X86_CR4_CET && !(vmcs12->guest_cr0 & X86_CR0_WP)))
+ return -EINVAL;
+
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
(CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false))))
@@ -3211,6 +3310,23 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))))
return -EINVAL;
+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE) {
+ if (nested_vmx_check_cet_state_common(vcpu, vmcs12->guest_s_cet,
+ vmcs12->guest_ssp,
+ vmcs12->guest_ssp_tbl))
+ return -EINVAL;
+
+ /*
+ * Guest SSP must have 63:N bits identical, rather than
+ * be canonical (i.e., 63:N-1 bits identical), where N is
+ * the CPU's maximum linear-address width. Similar to
+ * is_noncanonical_msr_address(), use the host's
+ * linear-address width.
+ */
+ if (CC(!__is_canonical_address(vmcs12->guest_ssp, max_host_virt_addr_bits() + 1)))
+ return -EINVAL;
+ }
+
if (nested_check_guest_non_reg_state(vmcs12))
return -EINVAL;
@@ -3544,6 +3660,12 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
vmx->nested.pre_vmenter_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
+ if (!vmx->nested.nested_run_pending ||
+ !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE))
+ vmcs_read_cet_state(vcpu, &vmx->nested.pre_vmenter_s_cet,
+ &vmx->nested.pre_vmenter_ssp,
+ &vmx->nested.pre_vmenter_ssp_tbl);
+
/*
* Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and*
* nested early checks are disabled. In the event of a "late" VM-Fail,
@@ -3690,7 +3812,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
return 1;
}
- kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED);
+ kvm_pmu_branch_retired(vcpu);
if (CC(evmptrld_status == EVMPTRLD_VMFAIL))
return nested_vmx_failInvalid(vcpu);
@@ -4627,6 +4749,10 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
vmcs12->guest_ia32_efer = vcpu->arch.efer;
+
+ vmcs_read_cet_state(&vmx->vcpu, &vmcs12->guest_s_cet,
+ &vmcs12->guest_ssp,
+ &vmcs12->guest_ssp_tbl);
}
/*
@@ -4752,14 +4878,26 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
vmcs_write64(GUEST_BNDCFGS, 0);
+ /*
+ * Load CET state from host state if VM_EXIT_LOAD_CET_STATE is set.
+ * otherwise CET state should be retained across VM-exit, i.e.,
+ * guest values should be propagated from vmcs12 to vmcs01.
+ */
+ if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_CET_STATE)
+ vmcs_write_cet_state(vcpu, vmcs12->host_s_cet, vmcs12->host_ssp,
+ vmcs12->host_ssp_tbl);
+ else
+ vmcs_write_cet_state(vcpu, vmcs12->guest_s_cet, vmcs12->guest_ssp,
+ vmcs12->guest_ssp_tbl);
+
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
vcpu->arch.pat = vmcs12->host_ia32_pat;
}
if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)))
- WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
- vmcs12->host_ia32_perf_global_ctrl));
+ WARN_ON_ONCE(__kvm_emulate_msr_write(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+ vmcs12->host_ia32_perf_global_ctrl));
/* Set L1 segment info according to Intel SDM
27.5.2 Loading Host Segment and Descriptor-Table Registers */
@@ -4937,7 +5075,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
goto vmabort;
}
- if (kvm_set_msr_with_filter(vcpu, h.index, h.value)) {
+ if (kvm_emulate_msr_write(vcpu, h.index, h.value)) {
pr_debug_ratelimited(
"%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
__func__, j, h.index, h.value);
@@ -6216,19 +6354,26 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12,
union vmx_exit_reason exit_reason)
{
- u32 msr_index = kvm_rcx_read(vcpu);
+ u32 msr_index;
gpa_t bitmap;
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
return true;
+ if (exit_reason.basic == EXIT_REASON_MSR_READ_IMM ||
+ exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM)
+ msr_index = vmx_get_exit_qual(vcpu);
+ else
+ msr_index = kvm_rcx_read(vcpu);
+
/*
* The MSR_BITMAP page is divided into four 1024-byte bitmaps,
* for the four combinations of read/write and low/high MSR numbers.
* First we need to figure out which of the four to use:
*/
bitmap = vmcs12->msr_bitmap;
- if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
+ if (exit_reason.basic == EXIT_REASON_MSR_WRITE ||
+ exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM)
bitmap += 2048;
if (msr_index >= 0xc0000000) {
msr_index -= 0xc0000000;
@@ -6527,6 +6672,8 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
case EXIT_REASON_MSR_READ:
case EXIT_REASON_MSR_WRITE:
+ case EXIT_REASON_MSR_READ_IMM:
+ case EXIT_REASON_MSR_WRITE_IMM:
return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
case EXIT_REASON_INVALID_STATE:
return true;
@@ -6561,14 +6708,17 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
case EXIT_REASON_XSETBV:
return true;
- case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
+ case EXIT_REASON_XSAVES:
+ case EXIT_REASON_XRSTORS:
/*
- * This should never happen, since it is not possible to
- * set XSS to a non-zero value---neither in L1 nor in L2.
- * If if it were, XSS would have to be checked against
- * the XSS exit bitmap in vmcs12.
+ * Always forward XSAVES/XRSTORS to L1 as KVM doesn't utilize
+ * XSS-bitmap, and always loads vmcs02 with vmcs12's XSS-bitmap
+ * verbatim, i.e. any exit is due to L1's bitmap. WARN if
+ * XSAVES isn't enabled, as the CPU is supposed to inject #UD
+ * in that case, before consulting the XSS-bitmap.
*/
- return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES);
+ WARN_ON_ONCE(!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES));
+ return true;
case EXIT_REASON_UMWAIT:
case EXIT_REASON_TPAUSE:
return nested_cpu_has2(vmcs12,
@@ -7029,13 +7179,17 @@ static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf,
VM_EXIT_HOST_ADDR_SPACE_SIZE |
#endif
VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
- VM_EXIT_CLEAR_BNDCFGS;
+ VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_CET_STATE;
msrs->exit_ctls_high |=
VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT |
VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
+ if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
+ !kvm_cpu_cap_has(X86_FEATURE_IBT))
+ msrs->exit_ctls_high &= ~VM_EXIT_LOAD_CET_STATE;
+
/* We support free control of debug control saving. */
msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
}
@@ -7051,11 +7205,16 @@ static void nested_vmx_setup_entry_ctls(struct vmcs_config *vmcs_conf,
#ifdef CONFIG_X86_64
VM_ENTRY_IA32E_MODE |
#endif
- VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS;
+ VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS |
+ VM_ENTRY_LOAD_CET_STATE;
msrs->entry_ctls_high |=
(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER |
VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL);
+ if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
+ !kvm_cpu_cap_has(X86_FEATURE_IBT))
+ msrs->entry_ctls_high &= ~VM_ENTRY_LOAD_CET_STATE;
+
/* We support free control of debug control loading. */
msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
}
@@ -7205,6 +7364,8 @@ static void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs)
msrs->basic |= VMX_BASIC_TRUE_CTLS;
if (cpu_has_vmx_basic_inout())
msrs->basic |= VMX_BASIC_INOUT;
+ if (cpu_has_vmx_basic_no_hw_errcode_cc())
+ msrs->basic |= VMX_BASIC_NO_HW_ERROR_CODE_CC;
}
static void nested_vmx_setup_cr_fixed(struct nested_vmx_msrs *msrs)
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 6eedcfc91070..983484d42ebf 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -309,6 +309,11 @@ static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
__kvm_is_valid_cr4(vcpu, val);
}
+static inline bool nested_cpu_has_no_hw_errcode_cc(struct kvm_vcpu *vcpu)
+{
+ return to_vmx(vcpu)->nested.msrs.basic & VMX_BASIC_NO_HW_ERROR_CODE_CC;
+}
+
/* No difference in the restrictions on guest and host CR4 in VMX operation. */
#define nested_guest_cr4_valid nested_cr4_valid
#define nested_host_cr4_valid nested_cr4_valid
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 0b173602821b..de1d9785c01f 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -138,7 +138,7 @@ static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu)
static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu)
{
- return (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_FW_WRITES) != 0;
+ return (vcpu_get_perf_capabilities(vcpu) & PERF_CAP_FW_WRITES) != 0;
}
static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr)
@@ -478,8 +478,8 @@ static __always_inline u64 intel_get_fixed_pmc_eventsel(unsigned int index)
};
u64 eventsel;
- BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_perf_ids) != KVM_MAX_NR_INTEL_FIXED_COUTNERS);
- BUILD_BUG_ON(index >= KVM_MAX_NR_INTEL_FIXED_COUTNERS);
+ BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_perf_ids) != KVM_MAX_NR_INTEL_FIXED_COUNTERS);
+ BUILD_BUG_ON(index >= KVM_MAX_NR_INTEL_FIXED_COUNTERS);
/*
* Yell if perf reports support for a fixed counter but perf doesn't
@@ -536,29 +536,44 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
kvm_pmu_cap.num_counters_gp);
eax.split.bit_width = min_t(int, eax.split.bit_width,
kvm_pmu_cap.bit_width_gp);
- pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
+ pmu->counter_bitmask[KVM_PMC_GP] = BIT_ULL(eax.split.bit_width) - 1;
eax.split.mask_length = min_t(int, eax.split.mask_length,
kvm_pmu_cap.events_mask_len);
- pmu->available_event_types = ~entry->ebx &
- ((1ull << eax.split.mask_length) - 1);
-
- if (pmu->version == 1) {
- pmu->nr_arch_fixed_counters = 0;
- } else {
- pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed,
- kvm_pmu_cap.num_counters_fixed);
- edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed,
- kvm_pmu_cap.bit_width_fixed);
- pmu->counter_bitmask[KVM_PMC_FIXED] =
- ((u64)1 << edx.split.bit_width_fixed) - 1;
+ pmu->available_event_types = ~entry->ebx & (BIT_ULL(eax.split.mask_length) - 1);
+
+ entry = kvm_find_cpuid_entry_index(vcpu, 7, 0);
+ if (entry &&
+ (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
+ (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) {
+ pmu->reserved_bits ^= HSW_IN_TX;
+ pmu->raw_event_mask |= (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
}
+ perf_capabilities = vcpu_get_perf_capabilities(vcpu);
+ if (intel_pmu_lbr_is_compatible(vcpu) &&
+ (perf_capabilities & PERF_CAP_LBR_FMT))
+ memcpy(&lbr_desc->records, &vmx_lbr_caps, sizeof(vmx_lbr_caps));
+ else
+ lbr_desc->records.nr = 0;
+
+ if (lbr_desc->records.nr)
+ bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1);
+
+ if (pmu->version == 1)
+ return;
+
+ pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed,
+ kvm_pmu_cap.num_counters_fixed);
+ edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed,
+ kvm_pmu_cap.bit_width_fixed);
+ pmu->counter_bitmask[KVM_PMC_FIXED] = BIT_ULL(edx.split.bit_width_fixed) - 1;
+
intel_pmu_enable_fixed_counter_bits(pmu, INTEL_FIXED_0_KERNEL |
INTEL_FIXED_0_USER |
INTEL_FIXED_0_ENABLE_PMI);
- counter_rsvd = ~(((1ull << pmu->nr_arch_gp_counters) - 1) |
- (((1ull << pmu->nr_arch_fixed_counters) - 1) << KVM_FIXED_PMC_BASE_IDX));
+ counter_rsvd = ~((BIT_ULL(pmu->nr_arch_gp_counters) - 1) |
+ ((BIT_ULL(pmu->nr_arch_fixed_counters) - 1) << KVM_FIXED_PMC_BASE_IDX));
pmu->global_ctrl_rsvd = counter_rsvd;
/*
@@ -573,29 +588,6 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->global_status_rsvd &=
~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI;
- entry = kvm_find_cpuid_entry_index(vcpu, 7, 0);
- if (entry &&
- (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
- (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) {
- pmu->reserved_bits ^= HSW_IN_TX;
- pmu->raw_event_mask |= (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
- }
-
- bitmap_set(pmu->all_valid_pmc_idx,
- 0, pmu->nr_arch_gp_counters);
- bitmap_set(pmu->all_valid_pmc_idx,
- INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
-
- perf_capabilities = vcpu_get_perf_capabilities(vcpu);
- if (intel_pmu_lbr_is_compatible(vcpu) &&
- (perf_capabilities & PMU_CAP_LBR_FMT))
- memcpy(&lbr_desc->records, &vmx_lbr_caps, sizeof(vmx_lbr_caps));
- else
- lbr_desc->records.nr = 0;
-
- if (lbr_desc->records.nr)
- bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1);
-
if (perf_capabilities & PERF_CAP_PEBS_FORMAT) {
if (perf_capabilities & PERF_CAP_PEBS_BASELINE) {
pmu->pebs_enable_rsvd = counter_rsvd;
@@ -603,8 +595,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->pebs_data_cfg_rsvd = ~0xff00000full;
intel_pmu_enable_fixed_counter_bits(pmu, ICL_FIXED_0_ADAPTIVE);
} else {
- pmu->pebs_enable_rsvd =
- ~((1ull << pmu->nr_arch_gp_counters) - 1);
+ pmu->pebs_enable_rsvd = ~(BIT_ULL(pmu->nr_arch_gp_counters) - 1);
}
}
}
@@ -625,7 +616,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
pmu->gp_counters[i].current_config = 0;
}
- for (i = 0; i < KVM_MAX_NR_INTEL_FIXED_COUTNERS; i++) {
+ for (i = 0; i < KVM_MAX_NR_INTEL_FIXED_COUNTERS; i++) {
pmu->fixed_counters[i].type = KVM_PMC_FIXED;
pmu->fixed_counters[i].vcpu = vcpu;
pmu->fixed_counters[i].idx = i + KVM_FIXED_PMC_BASE_IDX;
@@ -762,7 +753,7 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
int bit, hw_idx;
kvm_for_each_pmc(pmu, pmc, bit, (unsigned long *)&pmu->global_ctrl) {
- if (!pmc_speculative_in_use(pmc) ||
+ if (!pmc_is_locally_enabled(pmc) ||
!pmc_is_globally_enabled(pmc) || !pmc->perf_event)
continue;
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 00f8bfd2330d..0a49c863c811 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -620,6 +620,11 @@ int tdx_vm_init(struct kvm *kvm)
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
kvm->arch.has_protected_state = true;
+ /*
+ * TDX Module doesn't allow the hypervisor to modify the EOI-bitmap,
+ * i.e. all EOIs are accelerated and never trigger exits.
+ */
+ kvm->arch.has_protected_eoi = true;
kvm->arch.has_private_mem = true;
kvm->arch.disabled_quirks |= KVM_X86_QUIRK_IGNORE_GUEST_PAT;
@@ -1994,6 +1999,8 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
* handle retries locally in their EPT violation handlers.
*/
while (1) {
+ struct kvm_memory_slot *slot;
+
ret = __vmx_handle_ept_violation(vcpu, gpa, exit_qual);
if (ret != RET_PF_RETRY || !local_retry)
@@ -2007,6 +2014,15 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
break;
}
+ /*
+ * Bail if the memslot is invalid, i.e. is being deleted, as
+ * faulting in will never succeed and this task needs to drop
+ * SRCU in order to let memslot deletion complete.
+ */
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gpa_to_gfn(gpa));
+ if (slot && slot->flags & KVM_MEMSLOT_INVALID)
+ break;
+
cond_resched();
}
return ret;
@@ -2472,7 +2488,7 @@ static int __tdx_td_init(struct kvm *kvm, struct td_params *td_params,
/* TDVPS = TDVPR(4K page) + TDCX(multiple 4K pages), -1 for TDVPR. */
kvm_tdx->td.tdcx_nr_pages = tdx_sysinfo->td_ctrl.tdvps_base_size / PAGE_SIZE - 1;
tdcs_pages = kcalloc(kvm_tdx->td.tdcs_nr_pages, sizeof(*kvm_tdx->td.tdcs_pages),
- GFP_KERNEL | __GFP_ZERO);
+ GFP_KERNEL);
if (!tdcs_pages)
goto free_tdr;
@@ -3460,12 +3476,11 @@ static int __init __tdx_bringup(void)
if (r)
goto tdx_bringup_err;
+ r = -EINVAL;
/* Get TDX global information for later use */
tdx_sysinfo = tdx_get_sysinfo();
- if (WARN_ON_ONCE(!tdx_sysinfo)) {
- r = -EINVAL;
+ if (WARN_ON_ONCE(!tdx_sysinfo))
goto get_sysinfo_err;
- }
/* Check TDX module and KVM capabilities */
if (!tdx_get_supported_attrs(&tdx_sysinfo->td_conf) ||
@@ -3508,14 +3523,11 @@ static int __init __tdx_bringup(void)
if (td_conf->max_vcpus_per_td < num_present_cpus()) {
pr_err("Disable TDX: MAX_VCPU_PER_TD (%u) smaller than number of logical CPUs (%u).\n",
td_conf->max_vcpus_per_td, num_present_cpus());
- r = -EINVAL;
goto get_sysinfo_err;
}
- if (misc_cg_set_capacity(MISC_CG_RES_TDX, tdx_get_nr_guest_keyids())) {
- r = -EINVAL;
+ if (misc_cg_set_capacity(MISC_CG_RES_TDX, tdx_get_nr_guest_keyids()))
goto get_sysinfo_err;
- }
/*
* Leave hardware virtualization enabled after TDX is enabled
diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c
index 106a72c923ca..4233b5ca9461 100644
--- a/arch/x86/kvm/vmx/vmcs12.c
+++ b/arch/x86/kvm/vmx/vmcs12.c
@@ -139,6 +139,9 @@ const unsigned short vmcs12_field_offsets[] = {
FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions),
FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp),
FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip),
+ FIELD(GUEST_S_CET, guest_s_cet),
+ FIELD(GUEST_SSP, guest_ssp),
+ FIELD(GUEST_INTR_SSP_TABLE, guest_ssp_tbl),
FIELD(HOST_CR0, host_cr0),
FIELD(HOST_CR3, host_cr3),
FIELD(HOST_CR4, host_cr4),
@@ -151,5 +154,8 @@ const unsigned short vmcs12_field_offsets[] = {
FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip),
FIELD(HOST_RSP, host_rsp),
FIELD(HOST_RIP, host_rip),
+ FIELD(HOST_S_CET, host_s_cet),
+ FIELD(HOST_SSP, host_ssp),
+ FIELD(HOST_INTR_SSP_TABLE, host_ssp_tbl),
};
const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs12_field_offsets);
diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
index 56fd150a6f24..4ad6b16525b9 100644
--- a/arch/x86/kvm/vmx/vmcs12.h
+++ b/arch/x86/kvm/vmx/vmcs12.h
@@ -117,7 +117,13 @@ struct __packed vmcs12 {
natural_width host_ia32_sysenter_eip;
natural_width host_rsp;
natural_width host_rip;
- natural_width paddingl[8]; /* room for future expansion */
+ natural_width host_s_cet;
+ natural_width host_ssp;
+ natural_width host_ssp_tbl;
+ natural_width guest_s_cet;
+ natural_width guest_ssp;
+ natural_width guest_ssp_tbl;
+ natural_width paddingl[2]; /* room for future expansion */
u32 pin_based_vm_exec_control;
u32 cpu_based_vm_exec_control;
u32 exception_bitmap;
@@ -294,6 +300,12 @@ static inline void vmx_check_vmcs12_offsets(void)
CHECK_OFFSET(host_ia32_sysenter_eip, 656);
CHECK_OFFSET(host_rsp, 664);
CHECK_OFFSET(host_rip, 672);
+ CHECK_OFFSET(host_s_cet, 680);
+ CHECK_OFFSET(host_ssp, 688);
+ CHECK_OFFSET(host_ssp_tbl, 696);
+ CHECK_OFFSET(guest_s_cet, 704);
+ CHECK_OFFSET(guest_ssp, 712);
+ CHECK_OFFSET(guest_ssp_tbl, 720);
CHECK_OFFSET(pin_based_vm_exec_control, 744);
CHECK_OFFSET(cpu_based_vm_exec_control, 748);
CHECK_OFFSET(exception_bitmap, 752);
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 0a6cf5bff2aa..bc255d709d8a 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -361,6 +361,10 @@ SYM_FUNC_END(vmread_error_trampoline)
.section .text, "ax"
+#ifndef CONFIG_X86_FRED
+
SYM_FUNC_START(vmx_do_interrupt_irqoff)
VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
SYM_FUNC_END(vmx_do_interrupt_irqoff)
+
+#endif
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 0bdf9405969a..f87c216d976d 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -28,7 +28,6 @@
#include <linux/slab.h>
#include <linux/tboot.h>
#include <linux/trace_events.h>
-#include <linux/entry-kvm.h>
#include <asm/apic.h>
#include <asm/asm.h>
@@ -1344,22 +1343,35 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
}
#ifdef CONFIG_X86_64
-static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
+static u64 vmx_read_guest_host_msr(struct vcpu_vmx *vmx, u32 msr, u64 *cache)
{
preempt_disable();
if (vmx->vt.guest_state_loaded)
- rdmsrq(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+ *cache = read_msr(msr);
preempt_enable();
- return vmx->msr_guest_kernel_gs_base;
+ return *cache;
}
-static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
+static void vmx_write_guest_host_msr(struct vcpu_vmx *vmx, u32 msr, u64 data,
+ u64 *cache)
{
preempt_disable();
if (vmx->vt.guest_state_loaded)
- wrmsrq(MSR_KERNEL_GS_BASE, data);
+ wrmsrns(msr, data);
preempt_enable();
- vmx->msr_guest_kernel_gs_base = data;
+ *cache = data;
+}
+
+static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
+{
+ return vmx_read_guest_host_msr(vmx, MSR_KERNEL_GS_BASE,
+ &vmx->msr_guest_kernel_gs_base);
+}
+
+static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
+{
+ vmx_write_guest_host_msr(vmx, MSR_KERNEL_GS_BASE, data,
+ &vmx->msr_guest_kernel_gs_base);
}
#endif
@@ -2093,6 +2105,15 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
else
msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
break;
+ case MSR_IA32_S_CET:
+ msr_info->data = vmcs_readl(GUEST_S_CET);
+ break;
+ case MSR_KVM_INTERNAL_GUEST_SSP:
+ msr_info->data = vmcs_readl(GUEST_SSP);
+ break;
+ case MSR_IA32_INT_SSP_TAB:
+ msr_info->data = vmcs_readl(GUEST_INTR_SSP_TABLE);
+ break;
case MSR_IA32_DEBUGCTLMSR:
msr_info->data = vmx_guest_debugctl_read();
break;
@@ -2127,7 +2148,7 @@ u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated)
(host_initiated || guest_cpu_cap_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)))
debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
- if ((kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT) &&
+ if ((kvm_caps.supported_perf_cap & PERF_CAP_LBR_FMT) &&
(host_initiated || intel_pmu_lbr_is_enabled(vcpu)))
debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
@@ -2411,10 +2432,19 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
else
vmx->pt_desc.guest.addr_a[index / 2] = data;
break;
+ case MSR_IA32_S_CET:
+ vmcs_writel(GUEST_S_CET, data);
+ break;
+ case MSR_KVM_INTERNAL_GUEST_SSP:
+ vmcs_writel(GUEST_SSP, data);
+ break;
+ case MSR_IA32_INT_SSP_TAB:
+ vmcs_writel(GUEST_INTR_SSP_TABLE, data);
+ break;
case MSR_IA32_PERF_CAPABILITIES:
- if (data & PMU_CAP_LBR_FMT) {
- if ((data & PMU_CAP_LBR_FMT) !=
- (kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT))
+ if (data & PERF_CAP_LBR_FMT) {
+ if ((data & PERF_CAP_LBR_FMT) !=
+ (kvm_caps.supported_perf_cap & PERF_CAP_LBR_FMT))
return 1;
if (!cpuid_model_is_consistent(vcpu))
return 1;
@@ -2584,6 +2614,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
{ VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER },
{ VM_ENTRY_LOAD_BNDCFGS, VM_EXIT_CLEAR_BNDCFGS },
{ VM_ENTRY_LOAD_IA32_RTIT_CTL, VM_EXIT_CLEAR_IA32_RTIT_CTL },
+ { VM_ENTRY_LOAD_CET_STATE, VM_EXIT_LOAD_CET_STATE },
};
memset(vmcs_conf, 0, sizeof(*vmcs_conf));
@@ -4068,8 +4099,10 @@ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu)
}
}
-void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
+static void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
{
+ bool intercept;
+
if (!cpu_has_vmx_msr_bitmap())
return;
@@ -4115,12 +4148,34 @@ void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
!guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D));
+ if (kvm_cpu_cap_has(X86_FEATURE_SHSTK)) {
+ intercept = !guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK);
+
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL0_SSP, MSR_TYPE_RW, intercept);
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL1_SSP, MSR_TYPE_RW, intercept);
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL2_SSP, MSR_TYPE_RW, intercept);
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL3_SSP, MSR_TYPE_RW, intercept);
+ }
+
+ if (kvm_cpu_cap_has(X86_FEATURE_SHSTK) || kvm_cpu_cap_has(X86_FEATURE_IBT)) {
+ intercept = !guest_cpu_cap_has(vcpu, X86_FEATURE_IBT) &&
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK);
+
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_U_CET, MSR_TYPE_RW, intercept);
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, intercept);
+ }
+
/*
* x2APIC and LBR MSR intercepts are modified on-demand and cannot be
* filtered by userspace.
*/
}
+void vmx_recalc_intercepts(struct kvm_vcpu *vcpu)
+{
+ vmx_recalc_msr_intercepts(vcpu);
+}
+
static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
int vector)
{
@@ -4270,6 +4325,21 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
if (cpu_has_load_ia32_efer())
vmcs_write64(HOST_IA32_EFER, kvm_host.efer);
+
+ /*
+ * Supervisor shadow stack is not enabled on host side, i.e.,
+ * host IA32_S_CET.SHSTK_EN bit is guaranteed to 0 now, per SDM
+ * description(RDSSP instruction), SSP is not readable in CPL0,
+ * so resetting the two registers to 0s at VM-Exit does no harm
+ * to kernel execution. When execution flow exits to userspace,
+ * SSP is reloaded from IA32_PL3_SSP. Check SDM Vol.2A/B Chapter
+ * 3 and 4 for details.
+ */
+ if (cpu_has_load_cet_ctrl()) {
+ vmcs_writel(HOST_S_CET, kvm_host.s_cet);
+ vmcs_writel(HOST_SSP, 0);
+ vmcs_writel(HOST_INTR_SSP_TABLE, 0);
+ }
}
void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
@@ -4304,7 +4374,7 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
return pin_based_exec_ctrl;
}
-static u32 vmx_vmentry_ctrl(void)
+static u32 vmx_get_initial_vmentry_ctrl(void)
{
u32 vmentry_ctrl = vmcs_config.vmentry_ctrl;
@@ -4321,7 +4391,7 @@ static u32 vmx_vmentry_ctrl(void)
return vmentry_ctrl;
}
-static u32 vmx_vmexit_ctrl(void)
+static u32 vmx_get_initial_vmexit_ctrl(void)
{
u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
@@ -4351,19 +4421,13 @@ void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
- if (kvm_vcpu_apicv_active(vcpu)) {
- secondary_exec_controls_setbit(vmx,
- SECONDARY_EXEC_APIC_REGISTER_VIRT |
- SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
- if (enable_ipiv)
- tertiary_exec_controls_setbit(vmx, TERTIARY_EXEC_IPI_VIRT);
- } else {
- secondary_exec_controls_clearbit(vmx,
- SECONDARY_EXEC_APIC_REGISTER_VIRT |
- SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
- if (enable_ipiv)
- tertiary_exec_controls_clearbit(vmx, TERTIARY_EXEC_IPI_VIRT);
- }
+ secondary_exec_controls_changebit(vmx,
+ SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY,
+ kvm_vcpu_apicv_active(vcpu));
+ if (enable_ipiv)
+ tertiary_exec_controls_changebit(vmx, TERTIARY_EXEC_IPI_VIRT,
+ kvm_vcpu_apicv_active(vcpu));
vmx_update_msr_bitmap_x2apic(vcpu);
}
@@ -4686,10 +4750,10 @@ static void init_vmcs(struct vcpu_vmx *vmx)
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
- vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
+ vm_exit_controls_set(vmx, vmx_get_initial_vmexit_ctrl());
/* 22.2.1, 20.8.1 */
- vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
+ vm_entry_controls_set(vmx, vmx_get_initial_vmentry_ctrl());
vmx->vcpu.arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits);
@@ -4817,6 +4881,14 @@ void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
+ if (kvm_cpu_cap_has(X86_FEATURE_SHSTK)) {
+ vmcs_writel(GUEST_SSP, 0);
+ vmcs_writel(GUEST_INTR_SSP_TABLE, 0);
+ }
+ if (kvm_cpu_cap_has(X86_FEATURE_IBT) ||
+ kvm_cpu_cap_has(X86_FEATURE_SHSTK))
+ vmcs_writel(GUEST_S_CET, 0);
+
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
vpid_sync_context(vmx->vpid);
@@ -6010,6 +6082,23 @@ static int handle_notify(struct kvm_vcpu *vcpu)
return 1;
}
+static int vmx_get_msr_imm_reg(struct kvm_vcpu *vcpu)
+{
+ return vmx_get_instr_info_reg(vmcs_read32(VMX_INSTRUCTION_INFO));
+}
+
+static int handle_rdmsr_imm(struct kvm_vcpu *vcpu)
+{
+ return kvm_emulate_rdmsr_imm(vcpu, vmx_get_exit_qual(vcpu),
+ vmx_get_msr_imm_reg(vcpu));
+}
+
+static int handle_wrmsr_imm(struct kvm_vcpu *vcpu)
+{
+ return kvm_emulate_wrmsr_imm(vcpu, vmx_get_exit_qual(vcpu),
+ vmx_get_msr_imm_reg(vcpu));
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -6068,6 +6157,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_ENCLS] = handle_encls,
[EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit,
[EXIT_REASON_NOTIFY] = handle_notify,
+ [EXIT_REASON_MSR_READ_IMM] = handle_rdmsr_imm,
+ [EXIT_REASON_MSR_WRITE_IMM] = handle_wrmsr_imm,
};
static const int kvm_vmx_max_exit_handlers =
@@ -6272,6 +6363,10 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
if (vmcs_read32(VM_EXIT_MSR_STORE_COUNT) > 0)
vmx_dump_msrs("guest autostore", &vmx->msr_autostore.guest);
+ if (vmentry_ctl & VM_ENTRY_LOAD_CET_STATE)
+ pr_err("S_CET = 0x%016lx, SSP = 0x%016lx, SSP TABLE = 0x%016lx\n",
+ vmcs_readl(GUEST_S_CET), vmcs_readl(GUEST_SSP),
+ vmcs_readl(GUEST_INTR_SSP_TABLE));
pr_err("*** Host State ***\n");
pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
@@ -6302,6 +6397,10 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
if (vmcs_read32(VM_EXIT_MSR_LOAD_COUNT) > 0)
vmx_dump_msrs("host autoload", &vmx->msr_autoload.host);
+ if (vmexit_ctl & VM_EXIT_LOAD_CET_STATE)
+ pr_err("S_CET = 0x%016lx, SSP = 0x%016lx, SSP TABLE = 0x%016lx\n",
+ vmcs_readl(HOST_S_CET), vmcs_readl(HOST_SSP),
+ vmcs_readl(HOST_INTR_SSP_TABLE));
pr_err("*** Control State ***\n");
pr_err("CPUBased=0x%08x SecondaryExec=0x%08x TertiaryExec=0x%016llx\n",
@@ -6502,6 +6601,8 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
#ifdef CONFIG_MITIGATION_RETPOLINE
if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
return kvm_emulate_wrmsr(vcpu);
+ else if (exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM)
+ return handle_wrmsr_imm(vcpu);
else if (exit_reason.basic == EXIT_REASON_PREEMPTION_TIMER)
return handle_preemption_timer(vcpu);
else if (exit_reason.basic == EXIT_REASON_INTERRUPT_WINDOW)
@@ -6920,8 +7021,14 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu,
"unexpected VM-Exit interrupt info: 0x%x", intr_info))
return;
+ /*
+ * Invoke the kernel's IRQ handler for the vector. Use the FRED path
+ * when it's available even if FRED isn't fully enabled, e.g. even if
+ * FRED isn't supported in hardware, in order to avoid the indirect
+ * CALL in the non-FRED path.
+ */
kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
- if (cpu_feature_enabled(X86_FEATURE_FRED))
+ if (IS_ENABLED(CONFIG_X86_FRED))
fred_entry_from_kvm(EVENT_TYPE_EXTINT, vector);
else
vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base + vector));
@@ -7177,11 +7284,16 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu,
switch (vmx_get_exit_reason(vcpu).basic) {
case EXIT_REASON_MSR_WRITE:
- return handle_fastpath_set_msr_irqoff(vcpu);
+ return handle_fastpath_wrmsr(vcpu);
+ case EXIT_REASON_MSR_WRITE_IMM:
+ return handle_fastpath_wrmsr_imm(vcpu, vmx_get_exit_qual(vcpu),
+ vmx_get_msr_imm_reg(vcpu));
case EXIT_REASON_PREEMPTION_TIMER:
return handle_fastpath_preemption_timer(vcpu, force_immediate_exit);
case EXIT_REASON_HLT:
return handle_fastpath_hlt(vcpu);
+ case EXIT_REASON_INVD:
+ return handle_fastpath_invd(vcpu);
default:
return EXIT_FASTPATH_NONE;
}
@@ -7648,6 +7760,8 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
cr4_fixed1_update(X86_CR4_PKE, ecx, feature_bit(PKU));
cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP));
cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57));
+ cr4_fixed1_update(X86_CR4_CET, ecx, feature_bit(SHSTK));
+ cr4_fixed1_update(X86_CR4_CET, edx, feature_bit(IBT));
entry = kvm_find_cpuid_entry_index(vcpu, 0x7, 1);
cr4_fixed1_update(X86_CR4_LAM_SUP, eax, feature_bit(LAM));
@@ -7782,16 +7896,13 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vmx->msr_ia32_feature_control_valid_bits &=
~FEAT_CTL_SGX_LC_ENABLED;
- /* Recalc MSR interception to account for feature changes. */
- vmx_recalc_msr_intercepts(vcpu);
-
/* Refresh #PF interception to account for MAXPHYADDR changes. */
vmx_update_exception_bitmap(vcpu);
}
static __init u64 vmx_get_perf_capabilities(void)
{
- u64 perf_cap = PMU_CAP_FW_WRITES;
+ u64 perf_cap = PERF_CAP_FW_WRITES;
u64 host_perf_cap = 0;
if (!enable_pmu)
@@ -7811,7 +7922,7 @@ static __init u64 vmx_get_perf_capabilities(void)
if (!vmx_lbr_caps.has_callstack)
memset(&vmx_lbr_caps, 0, sizeof(vmx_lbr_caps));
else if (vmx_lbr_caps.nr)
- perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
+ perf_cap |= host_perf_cap & PERF_CAP_LBR_FMT;
}
if (vmx_pebs_supported()) {
@@ -7879,7 +7990,6 @@ static __init void vmx_set_cpu_caps(void)
kvm_cpu_cap_set(X86_FEATURE_UMIP);
/* CPUID 0xD.1 */
- kvm_caps.supported_xss = 0;
if (!cpu_has_vmx_xsaves())
kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
@@ -7891,6 +8001,18 @@ static __init void vmx_set_cpu_caps(void)
if (cpu_has_vmx_waitpkg())
kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
+
+ /*
+ * Disable CET if unrestricted_guest is unsupported as KVM doesn't
+ * enforce CET HW behaviors in emulator. On platforms with
+ * VMX_BASIC[bit56] == 0, inject #CP at VMX entry with error code
+ * fails, so disable CET in this case too.
+ */
+ if (!cpu_has_load_cet_ctrl() || !enable_unrestricted_guest ||
+ !cpu_has_vmx_basic_no_hw_errcode_cc()) {
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
+ }
}
static bool vmx_is_io_intercepted(struct kvm_vcpu *vcpu,
@@ -8340,8 +8462,6 @@ __init int vmx_hardware_setup(void)
vmx_setup_user_return_msrs();
- if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
- return -EIO;
if (boot_cpu_has(X86_FEATURE_NX))
kvm_enable_efer_bits(EFER_NX);
@@ -8371,6 +8491,14 @@ __init int vmx_hardware_setup(void)
return -EOPNOTSUPP;
}
+ /*
+ * Shadow paging doesn't have a (further) performance penalty
+ * from GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable it
+ * by default
+ */
+ if (!enable_ept)
+ allow_smaller_maxphyaddr = true;
+
if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
enable_ept_ad_bits = 0;
@@ -8496,6 +8624,13 @@ __init int vmx_hardware_setup(void)
setup_default_sgx_lepubkeyhash();
+ vmx_set_cpu_caps();
+
+ /*
+ * Configure nested capabilities after core CPU capabilities so that
+ * nested support can be conditional on base support, e.g. so that KVM
+ * can hide/show features based on kvm_cpu_cap_has().
+ */
if (nested) {
nested_vmx_setup_ctls_msrs(&vmcs_config, vmx_capability.ept);
@@ -8504,8 +8639,6 @@ __init int vmx_hardware_setup(void)
return r;
}
- vmx_set_cpu_caps();
-
r = alloc_kvm_area();
if (r && nested)
nested_vmx_hardware_unsetup();
@@ -8532,7 +8665,9 @@ __init int vmx_hardware_setup(void)
*/
if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
kvm_caps.supported_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT;
- kvm_caps.inapplicable_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT;
+
+ kvm_caps.inapplicable_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT;
+
return r;
}
@@ -8565,11 +8700,18 @@ int __init vmx_init(void)
return -EOPNOTSUPP;
/*
- * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
- * to unwind if a later step fails.
+ * Note, VMCS and eVMCS configuration only touch VMX knobs/variables,
+ * i.e. there's nothing to unwind if a later step fails.
*/
hv_init_evmcs();
+ /*
+ * Parse the VMCS config and VMX capabilities before anything else, so
+ * that the information is available to all setup flows.
+ */
+ if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
+ return -EIO;
+
r = kvm_x86_vendor_init(&vt_init_ops);
if (r)
return r;
@@ -8593,14 +8735,6 @@ int __init vmx_init(void)
vmx_check_vmcs12_offsets();
- /*
- * Shadow paging doesn't have a (further) performance penalty
- * from GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable it
- * by default
- */
- if (!enable_ept)
- allow_smaller_maxphyaddr = true;
-
return 0;
err_l1d_flush:
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index d3389baf3ab3..ea93121029f9 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -181,6 +181,9 @@ struct nested_vmx {
*/
u64 pre_vmenter_debugctl;
u64 pre_vmenter_bndcfgs;
+ u64 pre_vmenter_s_cet;
+ u64 pre_vmenter_ssp;
+ u64 pre_vmenter_ssp_tbl;
/* to migrate it to L1 if L2 writes to L1's CR8 directly */
int l1_tpr_threshold;
@@ -484,7 +487,8 @@ static inline u8 vmx_get_rvi(void)
VM_ENTRY_LOAD_IA32_EFER | \
VM_ENTRY_LOAD_BNDCFGS | \
VM_ENTRY_PT_CONCEAL_PIP | \
- VM_ENTRY_LOAD_IA32_RTIT_CTL)
+ VM_ENTRY_LOAD_IA32_RTIT_CTL | \
+ VM_ENTRY_LOAD_CET_STATE)
#define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \
(VM_EXIT_SAVE_DEBUG_CONTROLS | \
@@ -506,7 +510,8 @@ static inline u8 vmx_get_rvi(void)
VM_EXIT_LOAD_IA32_EFER | \
VM_EXIT_CLEAR_BNDCFGS | \
VM_EXIT_PT_CONCEAL_PIP | \
- VM_EXIT_CLEAR_IA32_RTIT_CTL)
+ VM_EXIT_CLEAR_IA32_RTIT_CTL | \
+ VM_EXIT_LOAD_CET_STATE)
#define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \
(PIN_BASED_EXT_INTR_MASK | \
@@ -608,6 +613,14 @@ static __always_inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u##b
{ \
BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname))); \
lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \
+} \
+static __always_inline void lname##_controls_changebit(struct vcpu_vmx *vmx, u##bits val, \
+ bool set) \
+{ \
+ if (set) \
+ lname##_controls_setbit(vmx, val); \
+ else \
+ lname##_controls_clearbit(vmx, val); \
}
BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS, 32)
BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS, 32)
@@ -706,6 +719,11 @@ static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu)
void dump_vmcs(struct kvm_vcpu *vcpu);
+static inline int vmx_get_instr_info_reg(u32 vmx_instr_info)
+{
+ return (vmx_instr_info >> 3) & 0xf;
+}
+
static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info)
{
return (vmx_instr_info >> 28) & 0xf;
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 4c70f56c57c8..9697368d65b3 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -52,7 +52,7 @@ void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
int trig_mode, int vector);
void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu);
bool vmx_has_emulated_msr(struct kvm *kvm, u32 index);
-void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu);
+void vmx_recalc_intercepts(struct kvm_vcpu *vcpu);
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu);
int vmx_get_feature_msr(u32 msr, u64 *data);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f122906ed9f3..42ecd093bb4c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -59,7 +59,6 @@
#include <linux/sched/stat.h>
#include <linux/sched/isolation.h>
#include <linux/mem_encrypt.h>
-#include <linux/entry-kvm.h>
#include <linux/suspend.h>
#include <linux/smp.h>
@@ -97,10 +96,10 @@
* vendor module being reloaded with different module parameters.
*/
struct kvm_caps kvm_caps __read_mostly;
-EXPORT_SYMBOL_GPL(kvm_caps);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_caps);
struct kvm_host_values kvm_host __read_mostly;
-EXPORT_SYMBOL_GPL(kvm_host);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_host);
#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
@@ -136,6 +135,9 @@ static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
static DEFINE_MUTEX(vendor_module_lock);
+static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
+static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
+
struct kvm_x86_ops kvm_x86_ops __read_mostly;
#define KVM_X86_OP(func) \
@@ -152,7 +154,7 @@ module_param(ignore_msrs, bool, 0644);
bool __read_mostly report_ignored_msrs = true;
module_param(report_ignored_msrs, bool, 0644);
-EXPORT_SYMBOL_GPL(report_ignored_msrs);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(report_ignored_msrs);
unsigned int min_timer_period_us = 200;
module_param(min_timer_period_us, uint, 0644);
@@ -164,12 +166,9 @@ module_param(kvmclock_periodic_sync, bool, 0444);
static u32 __read_mostly tsc_tolerance_ppm = 250;
module_param(tsc_tolerance_ppm, uint, 0644);
-static bool __read_mostly vector_hashing = true;
-module_param(vector_hashing, bool, 0444);
-
bool __read_mostly enable_vmware_backdoor = false;
module_param(enable_vmware_backdoor, bool, 0444);
-EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_vmware_backdoor);
/*
* Flags to manipulate forced emulation behavior (any non-zero value will
@@ -184,7 +183,7 @@ module_param(pi_inject_timer, bint, 0644);
/* Enable/disable PMU virtualization */
bool __read_mostly enable_pmu = true;
-EXPORT_SYMBOL_GPL(enable_pmu);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_pmu);
module_param(enable_pmu, bool, 0444);
bool __read_mostly eager_page_split = true;
@@ -211,7 +210,7 @@ struct kvm_user_return_msrs {
};
u32 __read_mostly kvm_nr_uret_msrs;
-EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_nr_uret_msrs);
static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS];
static struct kvm_user_return_msrs __percpu *user_return_msrs;
@@ -220,17 +219,26 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs;
| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
| XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)
+#define XFEATURE_MASK_CET_ALL (XFEATURE_MASK_CET_USER | XFEATURE_MASK_CET_KERNEL)
+/*
+ * Note, KVM supports exposing PT to the guest, but does not support context
+ * switching PT via XSTATE (KVM's PT virtualization relies on perf; swapping
+ * PT via guest XSTATE would clobber perf state), i.e. KVM doesn't support
+ * IA32_XSS[bit 8] (guests can/must use RDMSR/WRMSR to save/restore PT MSRs).
+ */
+#define KVM_SUPPORTED_XSS (XFEATURE_MASK_CET_ALL)
+
bool __read_mostly allow_smaller_maxphyaddr = 0;
-EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(allow_smaller_maxphyaddr);
bool __read_mostly enable_apicv = true;
-EXPORT_SYMBOL_GPL(enable_apicv);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_apicv);
bool __read_mostly enable_ipiv = true;
-EXPORT_SYMBOL_GPL(enable_ipiv);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_ipiv);
bool __read_mostly enable_device_posted_irqs = true;
-EXPORT_SYMBOL_GPL(enable_device_posted_irqs);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_device_posted_irqs);
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
KVM_GENERIC_VM_STATS(),
@@ -335,7 +343,11 @@ static const u32 msrs_to_save_base[] = {
MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
MSR_IA32_UMWAIT_CONTROL,
- MSR_IA32_XFD, MSR_IA32_XFD_ERR,
+ MSR_IA32_XFD, MSR_IA32_XFD_ERR, MSR_IA32_XSS,
+
+ MSR_IA32_U_CET, MSR_IA32_S_CET,
+ MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP,
+ MSR_IA32_PL3_SSP, MSR_IA32_INT_SSP_TAB,
};
static const u32 msrs_to_save_pmu[] = {
@@ -367,6 +379,7 @@ static const u32 msrs_to_save_pmu[] = {
MSR_AMD64_PERF_CNTR_GLOBAL_CTL,
MSR_AMD64_PERF_CNTR_GLOBAL_STATUS,
MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
+ MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET,
};
static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) +
@@ -614,7 +627,7 @@ int kvm_add_user_return_msr(u32 msr)
kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr;
return kvm_nr_uret_msrs++;
}
-EXPORT_SYMBOL_GPL(kvm_add_user_return_msr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_add_user_return_msr);
int kvm_find_user_return_msr(u32 msr)
{
@@ -626,7 +639,7 @@ int kvm_find_user_return_msr(u32 msr)
}
return -1;
}
-EXPORT_SYMBOL_GPL(kvm_find_user_return_msr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_find_user_return_msr);
static void kvm_user_return_msr_cpu_online(void)
{
@@ -666,7 +679,7 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
kvm_user_return_register_notifier(msrs);
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_user_return_msr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_user_return_msr);
void kvm_user_return_msr_update_cache(unsigned int slot, u64 value)
{
@@ -675,7 +688,13 @@ void kvm_user_return_msr_update_cache(unsigned int slot, u64 value)
msrs->values[slot].curr = value;
kvm_user_return_register_notifier(msrs);
}
-EXPORT_SYMBOL_GPL(kvm_user_return_msr_update_cache);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_user_return_msr_update_cache);
+
+u64 kvm_get_user_return_msr(unsigned int slot)
+{
+ return this_cpu_ptr(user_return_msrs)->values[slot].curr;
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_user_return_msr);
static void drop_user_return_notifiers(void)
{
@@ -697,7 +716,7 @@ noinstr void kvm_spurious_fault(void)
/* Fault while not rebooting. We want the trace. */
BUG_ON(!kvm_rebooting);
}
-EXPORT_SYMBOL_GPL(kvm_spurious_fault);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_spurious_fault);
#define EXCPT_BENIGN 0
#define EXCPT_CONTRIBUTORY 1
@@ -802,7 +821,7 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu,
ex->has_payload = false;
ex->payload = 0;
}
-EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_deliver_exception_payload);
static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vector,
bool has_error_code, u32 error_code,
@@ -886,7 +905,7 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
kvm_multiple_exception(vcpu, nr, false, 0, false, 0);
}
-EXPORT_SYMBOL_GPL(kvm_queue_exception);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_queue_exception);
void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
@@ -894,7 +913,7 @@ void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
{
kvm_multiple_exception(vcpu, nr, false, 0, true, payload);
}
-EXPORT_SYMBOL_GPL(kvm_queue_exception_p);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_queue_exception_p);
static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
u32 error_code, unsigned long payload)
@@ -929,7 +948,7 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr,
vcpu->arch.exception.has_payload = false;
vcpu->arch.exception.payload = 0;
}
-EXPORT_SYMBOL_GPL(kvm_requeue_exception);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_requeue_exception);
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
{
@@ -940,7 +959,7 @@ int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
return 1;
}
-EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_complete_insn_gp);
static int complete_emulated_insn_gp(struct kvm_vcpu *vcpu, int err)
{
@@ -990,7 +1009,7 @@ void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
fault_mmu->inject_page_fault(vcpu, fault);
}
-EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_inject_emulated_page_fault);
void kvm_inject_nmi(struct kvm_vcpu *vcpu)
{
@@ -1002,7 +1021,7 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
{
kvm_multiple_exception(vcpu, nr, true, error_code, false, 0);
}
-EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_queue_exception_e);
/*
* Checks if cpl <= required_cpl; if true, return true. Otherwise queue
@@ -1024,7 +1043,7 @@ bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
kvm_queue_exception(vcpu, UD_VECTOR);
return false;
}
-EXPORT_SYMBOL_GPL(kvm_require_dr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_require_dr);
static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
{
@@ -1079,7 +1098,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
return 1;
}
-EXPORT_SYMBOL_GPL(load_pdptrs);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(load_pdptrs);
static bool kvm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
@@ -1132,7 +1151,7 @@ void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned lon
if ((cr0 ^ old_cr0) & KVM_MMU_CR0_ROLE_BITS)
kvm_mmu_reset_context(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_post_set_cr0);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_post_set_cr0);
int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
@@ -1167,19 +1186,22 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
(is_64_bit_mode(vcpu) || kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE)))
return 1;
+ if (!(cr0 & X86_CR0_WP) && kvm_is_cr4_bit_set(vcpu, X86_CR4_CET))
+ return 1;
+
kvm_x86_call(set_cr0)(vcpu, cr0);
kvm_post_set_cr0(vcpu, old_cr0, cr0);
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_cr0);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr0);
void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
{
(void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
}
-EXPORT_SYMBOL_GPL(kvm_lmsw);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lmsw);
void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
{
@@ -1202,7 +1224,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE)))
wrpkru(vcpu->arch.pkru);
}
-EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_load_guest_xsave_state);
void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
{
@@ -1228,7 +1250,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
}
}
-EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_load_host_xsave_state);
#ifdef CONFIG_X86_64
static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu)
@@ -1237,7 +1259,7 @@ static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu)
}
#endif
-static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
+int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
u64 xcr0 = xcr;
u64 old_xcr0 = vcpu->arch.xcr0;
@@ -1281,6 +1303,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
vcpu->arch.cpuid_dynamic_bits_dirty = true;
return 0;
}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_set_xcr);
int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
{
@@ -1293,7 +1316,7 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_xsetbv);
static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
@@ -1341,7 +1364,7 @@ void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned lon
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_post_set_cr4);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_post_set_cr4);
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
@@ -1366,13 +1389,16 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;
}
+ if ((cr4 & X86_CR4_CET) && !kvm_is_cr0_bit_set(vcpu, X86_CR0_WP))
+ return 1;
+
kvm_x86_call(set_cr4)(vcpu, cr4);
kvm_post_set_cr4(vcpu, old_cr4, cr4);
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_cr4);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr4);
static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid)
{
@@ -1464,7 +1490,7 @@ handle_tlb_flush:
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_cr3);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr3);
int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
{
@@ -1476,7 +1502,7 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
vcpu->arch.cr8 = cr8;
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_cr8);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr8);
unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
{
@@ -1485,7 +1511,7 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
else
return vcpu->arch.cr8;
}
-EXPORT_SYMBOL_GPL(kvm_get_cr8);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_cr8);
static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
{
@@ -1510,7 +1536,7 @@ void kvm_update_dr7(struct kvm_vcpu *vcpu)
if (dr7 & DR7_BP_EN_MASK)
vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
}
-EXPORT_SYMBOL_GPL(kvm_update_dr7);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_update_dr7);
static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
{
@@ -1551,7 +1577,7 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_dr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_dr);
unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr)
{
@@ -1568,14 +1594,14 @@ unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr)
return vcpu->arch.dr7;
}
}
-EXPORT_SYMBOL_GPL(kvm_get_dr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_dr);
int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu)
{
- u32 ecx = kvm_rcx_read(vcpu);
+ u32 pmc = kvm_rcx_read(vcpu);
u64 data;
- if (kvm_pmu_rdpmc(vcpu, ecx, &data)) {
+ if (kvm_pmu_rdpmc(vcpu, pmc, &data)) {
kvm_inject_gp(vcpu, 0);
return 1;
}
@@ -1584,7 +1610,7 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu)
kvm_rdx_write(vcpu, data >> 32);
return kvm_skip_emulated_instruction(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_rdpmc);
/*
* Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM
@@ -1723,7 +1749,7 @@ bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
return __kvm_valid_efer(vcpu, efer);
}
-EXPORT_SYMBOL_GPL(kvm_valid_efer);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_valid_efer);
static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
@@ -1766,7 +1792,7 @@ void kvm_enable_efer_bits(u64 mask)
{
efer_reserved_bits &= ~mask;
}
-EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_enable_efer_bits);
bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
{
@@ -1809,7 +1835,7 @@ out:
return allowed;
}
-EXPORT_SYMBOL_GPL(kvm_msr_allowed);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_msr_allowed);
/*
* Write @data into the MSR specified by @index. Select MSR specific fault
@@ -1870,6 +1896,44 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
data = (u32)data;
break;
+ case MSR_IA32_U_CET:
+ case MSR_IA32_S_CET:
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_IBT))
+ return KVM_MSR_RET_UNSUPPORTED;
+ if (!kvm_is_valid_u_s_cet(vcpu, data))
+ return 1;
+ break;
+ case MSR_KVM_INTERNAL_GUEST_SSP:
+ if (!host_initiated)
+ return 1;
+ fallthrough;
+ /*
+ * Note that the MSR emulation here is flawed when a vCPU
+ * doesn't support the Intel 64 architecture. The expected
+ * architectural behavior in this case is that the upper 32
+ * bits do not exist and should always read '0'. However,
+ * because the actual hardware on which the virtual CPU is
+ * running does support Intel 64, XRSTORS/XSAVES in the
+ * guest could observe behavior that violates the
+ * architecture. Intercepting XRSTORS/XSAVES for this
+ * special case isn't deemed worthwhile.
+ */
+ case MSR_IA32_PL0_SSP ... MSR_IA32_INT_SSP_TAB:
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK))
+ return KVM_MSR_RET_UNSUPPORTED;
+ /*
+ * MSR_IA32_INT_SSP_TAB is not present on processors that do
+ * not support Intel 64 architecture.
+ */
+ if (index == MSR_IA32_INT_SSP_TAB && !guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
+ return KVM_MSR_RET_UNSUPPORTED;
+ if (is_noncanonical_msr_address(data, vcpu))
+ return 1;
+ /* All SSP MSRs except MSR_IA32_INT_SSP_TAB must be 4-byte aligned */
+ if (index != MSR_IA32_INT_SSP_TAB && !IS_ALIGNED(data, 4))
+ return 1;
+ break;
}
msr.data = data;
@@ -1898,8 +1962,8 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
* Returns 0 on success, non-0 otherwise.
* Assumes vcpu_load() was already called.
*/
-int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
- bool host_initiated)
+static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
+ bool host_initiated)
{
struct msr_data msr;
int ret;
@@ -1914,6 +1978,20 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
!guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID))
return 1;
break;
+ case MSR_IA32_U_CET:
+ case MSR_IA32_S_CET:
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_IBT))
+ return KVM_MSR_RET_UNSUPPORTED;
+ break;
+ case MSR_KVM_INTERNAL_GUEST_SSP:
+ if (!host_initiated)
+ return 1;
+ fallthrough;
+ case MSR_IA32_PL0_SSP ... MSR_IA32_INT_SSP_TAB:
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK))
+ return KVM_MSR_RET_UNSUPPORTED;
+ break;
}
msr.index = index;
@@ -1925,6 +2003,16 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
return ret;
}
+int kvm_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data)
+{
+ return __kvm_set_msr(vcpu, index, data, true);
+}
+
+int kvm_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data)
+{
+ return __kvm_get_msr(vcpu, index, data, true);
+}
+
static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
u32 index, u64 *data, bool host_initiated)
{
@@ -1932,33 +2020,36 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
__kvm_get_msr);
}
-int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data)
+int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data)
{
- if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
- return KVM_MSR_RET_FILTERED;
return kvm_get_msr_ignored_check(vcpu, index, data, false);
}
-EXPORT_SYMBOL_GPL(kvm_get_msr_with_filter);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_emulate_msr_read);
-int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data)
+int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data)
{
- if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
- return KVM_MSR_RET_FILTERED;
return kvm_set_msr_ignored_check(vcpu, index, data, false);
}
-EXPORT_SYMBOL_GPL(kvm_set_msr_with_filter);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_emulate_msr_write);
-int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
+int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data)
{
- return kvm_get_msr_ignored_check(vcpu, index, data, false);
+ if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
+ return KVM_MSR_RET_FILTERED;
+
+ return __kvm_emulate_msr_read(vcpu, index, data);
}
-EXPORT_SYMBOL_GPL(kvm_get_msr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_msr_read);
-int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
+int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data)
{
- return kvm_set_msr_ignored_check(vcpu, index, data, false);
+ if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
+ return KVM_MSR_RET_FILTERED;
+
+ return __kvm_emulate_msr_write(vcpu, index, data);
}
-EXPORT_SYMBOL_GPL(kvm_set_msr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_msr_write);
+
static void complete_userspace_rdmsr(struct kvm_vcpu *vcpu)
{
@@ -1990,6 +2081,15 @@ static int complete_fast_rdmsr(struct kvm_vcpu *vcpu)
return complete_fast_msr_access(vcpu);
}
+static int complete_fast_rdmsr_imm(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu->run->msr.error)
+ kvm_register_write(vcpu, vcpu->arch.cui_rdmsr_imm_reg,
+ vcpu->run->msr.data);
+
+ return complete_fast_msr_access(vcpu);
+}
+
static u64 kvm_msr_reason(int r)
{
switch (r) {
@@ -2024,55 +2124,82 @@ static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index,
return 1;
}
-int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
+static int __kvm_emulate_rdmsr(struct kvm_vcpu *vcpu, u32 msr, int reg,
+ int (*complete_rdmsr)(struct kvm_vcpu *))
{
- u32 ecx = kvm_rcx_read(vcpu);
u64 data;
int r;
- r = kvm_get_msr_with_filter(vcpu, ecx, &data);
+ r = kvm_emulate_msr_read(vcpu, msr, &data);
if (!r) {
- trace_kvm_msr_read(ecx, data);
+ trace_kvm_msr_read(msr, data);
- kvm_rax_write(vcpu, data & -1u);
- kvm_rdx_write(vcpu, (data >> 32) & -1u);
+ if (reg < 0) {
+ kvm_rax_write(vcpu, data & -1u);
+ kvm_rdx_write(vcpu, (data >> 32) & -1u);
+ } else {
+ kvm_register_write(vcpu, reg, data);
+ }
} else {
/* MSR read failed? See if we should ask user space */
- if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_RDMSR, 0,
- complete_fast_rdmsr, r))
+ if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_RDMSR, 0,
+ complete_rdmsr, r))
return 0;
- trace_kvm_msr_read_ex(ecx);
+ trace_kvm_msr_read_ex(msr);
}
return kvm_x86_call(complete_emulated_msr)(vcpu, r);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
-int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
+int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
{
- u32 ecx = kvm_rcx_read(vcpu);
- u64 data = kvm_read_edx_eax(vcpu);
- int r;
+ return __kvm_emulate_rdmsr(vcpu, kvm_rcx_read(vcpu), -1,
+ complete_fast_rdmsr);
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_rdmsr);
+
+int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg)
+{
+ vcpu->arch.cui_rdmsr_imm_reg = reg;
+
+ return __kvm_emulate_rdmsr(vcpu, msr, reg, complete_fast_rdmsr_imm);
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_rdmsr_imm);
- r = kvm_set_msr_with_filter(vcpu, ecx, data);
+static int __kvm_emulate_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+ int r;
+ r = kvm_emulate_msr_write(vcpu, msr, data);
if (!r) {
- trace_kvm_msr_write(ecx, data);
+ trace_kvm_msr_write(msr, data);
} else {
/* MSR write failed? See if we should ask user space */
- if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_WRMSR, data,
+ if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_WRMSR, data,
complete_fast_msr_access, r))
return 0;
/* Signal all other negative errors to userspace */
if (r < 0)
return r;
- trace_kvm_msr_write_ex(ecx, data);
+ trace_kvm_msr_write_ex(msr, data);
}
return kvm_x86_call(complete_emulated_msr)(vcpu, r);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
+
+int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
+{
+ return __kvm_emulate_wrmsr(vcpu, kvm_rcx_read(vcpu),
+ kvm_read_edx_eax(vcpu));
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_wrmsr);
+
+int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg)
+{
+ return __kvm_emulate_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg));
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_wrmsr_imm);
int kvm_emulate_as_nop(struct kvm_vcpu *vcpu)
{
@@ -2084,14 +2211,23 @@ int kvm_emulate_invd(struct kvm_vcpu *vcpu)
/* Treat an INVD instruction as a NOP and just skip it. */
return kvm_emulate_as_nop(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_invd);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_invd);
+
+fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_emulate_invd(vcpu))
+ return EXIT_FASTPATH_EXIT_USERSPACE;
+
+ return EXIT_FASTPATH_REENTER_GUEST;
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_invd);
int kvm_handle_invalid_op(struct kvm_vcpu *vcpu)
{
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
-EXPORT_SYMBOL_GPL(kvm_handle_invalid_op);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_invalid_op);
static int kvm_emulate_monitor_mwait(struct kvm_vcpu *vcpu, const char *insn)
@@ -2117,13 +2253,13 @@ int kvm_emulate_mwait(struct kvm_vcpu *vcpu)
{
return kvm_emulate_monitor_mwait(vcpu, "MWAIT");
}
-EXPORT_SYMBOL_GPL(kvm_emulate_mwait);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_mwait);
int kvm_emulate_monitor(struct kvm_vcpu *vcpu)
{
return kvm_emulate_monitor_mwait(vcpu, "MONITOR");
}
-EXPORT_SYMBOL_GPL(kvm_emulate_monitor);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_monitor);
static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
{
@@ -2133,74 +2269,41 @@ static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending();
}
-/*
- * The fast path for frequent and performance sensitive wrmsr emulation,
- * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces
- * the latency of virtual IPI by avoiding the expensive bits of transitioning
- * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the
- * other cases which must be called after interrupts are enabled on the host.
- */
-static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
-{
- if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic))
- return 1;
-
- if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) &&
- ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
- ((data & APIC_MODE_MASK) == APIC_DM_FIXED) &&
- ((u32)(data >> 32) != X2APIC_BROADCAST))
- return kvm_x2apic_icr_write(vcpu->arch.apic, data);
-
- return 1;
-}
-
-static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data)
-{
- if (!kvm_can_use_hv_timer(vcpu))
- return 1;
-
- kvm_set_lapic_tscdeadline_msr(vcpu, data);
- return 0;
-}
-
-fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
+static fastpath_t __handle_fastpath_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
- u32 msr = kvm_rcx_read(vcpu);
- u64 data;
- fastpath_t ret;
- bool handled;
-
- kvm_vcpu_srcu_read_lock(vcpu);
-
switch (msr) {
case APIC_BASE_MSR + (APIC_ICR >> 4):
- data = kvm_read_edx_eax(vcpu);
- handled = !handle_fastpath_set_x2apic_icr_irqoff(vcpu, data);
+ if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic) ||
+ kvm_x2apic_icr_write_fast(vcpu->arch.apic, data))
+ return EXIT_FASTPATH_NONE;
break;
case MSR_IA32_TSC_DEADLINE:
- data = kvm_read_edx_eax(vcpu);
- handled = !handle_fastpath_set_tscdeadline(vcpu, data);
+ kvm_set_lapic_tscdeadline_msr(vcpu, data);
break;
default:
- handled = false;
- break;
+ return EXIT_FASTPATH_NONE;
}
- if (handled) {
- if (!kvm_skip_emulated_instruction(vcpu))
- ret = EXIT_FASTPATH_EXIT_USERSPACE;
- else
- ret = EXIT_FASTPATH_REENTER_GUEST;
- trace_kvm_msr_write(msr, data);
- } else {
- ret = EXIT_FASTPATH_NONE;
- }
+ trace_kvm_msr_write(msr, data);
- kvm_vcpu_srcu_read_unlock(vcpu);
+ if (!kvm_skip_emulated_instruction(vcpu))
+ return EXIT_FASTPATH_EXIT_USERSPACE;
- return ret;
+ return EXIT_FASTPATH_REENTER_GUEST;
+}
+
+fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu)
+{
+ return __handle_fastpath_wrmsr(vcpu, kvm_rcx_read(vcpu),
+ kvm_read_edx_eax(vcpu));
}
-EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_wrmsr);
+
+fastpath_t handle_fastpath_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg)
+{
+ return __handle_fastpath_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg));
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_wrmsr_imm);
/*
* Adapt set_msr() to msr_io()'s calling convention
@@ -2566,7 +2669,7 @@ u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
return vcpu->arch.l1_tsc_offset +
kvm_scale_tsc(host_tsc, vcpu->arch.l1_tsc_scaling_ratio);
}
-EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_read_l1_tsc);
u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier)
{
@@ -2581,7 +2684,7 @@ u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier)
nested_offset += l2_offset;
return nested_offset;
}
-EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_offset);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_calc_nested_tsc_offset);
u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier)
{
@@ -2591,7 +2694,7 @@ u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier)
return l1_multiplier;
}
-EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_multiplier);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_calc_nested_tsc_multiplier);
static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset)
{
@@ -3669,7 +3772,7 @@ void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
kvm_vcpu_flush_tlb_guest(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_service_local_tlb_flush_requests);
static void record_steal_time(struct kvm_vcpu *vcpu)
{
@@ -3769,6 +3872,67 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
}
+/*
+ * Returns true if the MSR in question is managed via XSTATE, i.e. is context
+ * switched with the rest of guest FPU state. Note! S_CET is _not_ context
+ * switched via XSTATE even though it _is_ saved/restored via XSAVES/XRSTORS.
+ * Because S_CET is loaded on VM-Enter and VM-Exit via dedicated VMCS fields,
+ * the value saved/restored via XSTATE is always the host's value. That detail
+ * is _extremely_ important, as the guest's S_CET must _never_ be resident in
+ * hardware while executing in the host. Loading guest values for U_CET and
+ * PL[0-3]_SSP while executing in the kernel is safe, as U_CET is specific to
+ * userspace, and PL[0-3]_SSP are only consumed when transitioning to lower
+ * privilege levels, i.e. are effectively only consumed by userspace as well.
+ */
+static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr)
+{
+ if (!vcpu)
+ return false;
+
+ switch (msr) {
+ case MSR_IA32_U_CET:
+ return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) ||
+ guest_cpu_cap_has(vcpu, X86_FEATURE_IBT);
+ case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+ return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK);
+ default:
+ return false;
+ }
+}
+
+/*
+ * Lock (and if necessary, re-load) the guest FPU, i.e. XSTATE, and access an
+ * MSR that is managed via XSTATE. Note, the caller is responsible for doing
+ * the initial FPU load, this helper only ensures that guest state is resident
+ * in hardware (the kernel can load its FPU state in IRQ context).
+ */
+static __always_inline void kvm_access_xstate_msr(struct kvm_vcpu *vcpu,
+ struct msr_data *msr_info,
+ int access)
+{
+ BUILD_BUG_ON(access != MSR_TYPE_R && access != MSR_TYPE_W);
+
+ KVM_BUG_ON(!is_xstate_managed_msr(vcpu, msr_info->index), vcpu->kvm);
+ KVM_BUG_ON(!vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm);
+
+ kvm_fpu_get();
+ if (access == MSR_TYPE_R)
+ rdmsrq(msr_info->index, msr_info->data);
+ else
+ wrmsrq(msr_info->index, msr_info->data);
+ kvm_fpu_put();
+}
+
+static void kvm_set_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+ kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_W);
+}
+
+static void kvm_get_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+ kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_R);
+}
+
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
u32 msr = msr_info->index;
@@ -3960,16 +4124,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
break;
case MSR_IA32_XSS:
- if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
- return 1;
- /*
- * KVM supports exposing PT to the guest, but does not support
- * IA32_XSS[bit 8]. Guests have to use RDMSR/WRMSR rather than
- * XSAVES/XRSTORS to save/restore PT MSRs.
- */
- if (data & ~kvm_caps.supported_xss)
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
+ return KVM_MSR_RET_UNSUPPORTED;
+
+ if (data & ~vcpu->arch.guest_supported_xss)
return 1;
+ if (vcpu->arch.ia32_xss == data)
+ break;
vcpu->arch.ia32_xss = data;
vcpu->arch.cpuid_dynamic_bits_dirty = true;
break;
@@ -4153,6 +4314,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.guest_fpu.xfd_err = data;
break;
#endif
+ case MSR_IA32_U_CET:
+ case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+ kvm_set_xstate_msr(vcpu, msr_info);
+ break;
default:
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
@@ -4161,7 +4326,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_msr_common);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_msr_common);
static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
{
@@ -4502,6 +4667,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.guest_fpu.xfd_err;
break;
#endif
+ case MSR_IA32_U_CET:
+ case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+ kvm_get_xstate_msr(vcpu, msr_info);
+ break;
default:
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info);
@@ -4510,7 +4679,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_get_msr_common);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_msr_common);
/*
* Read or write a bunch of msrs. All parameters are kernel addresses.
@@ -4522,11 +4691,25 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
int (*do_msr)(struct kvm_vcpu *vcpu,
unsigned index, u64 *data))
{
+ bool fpu_loaded = false;
int i;
- for (i = 0; i < msrs->nmsrs; ++i)
+ for (i = 0; i < msrs->nmsrs; ++i) {
+ /*
+ * If userspace is accessing one or more XSTATE-managed MSRs,
+ * temporarily load the guest's FPU state so that the guest's
+ * MSR value(s) is resident in hardware and thus can be accessed
+ * via RDMSR/WRMSR.
+ */
+ if (!fpu_loaded && is_xstate_managed_msr(vcpu, entries[i].index)) {
+ kvm_load_guest_fpu(vcpu);
+ fpu_loaded = true;
+ }
if (do_msr(vcpu, entries[i].index, &entries[i].data))
break;
+ }
+ if (fpu_loaded)
+ kvm_put_guest_fpu(vcpu);
return i;
}
@@ -4711,6 +4894,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IRQFD_RESAMPLE:
case KVM_CAP_MEMORY_FAULT_INFO:
case KVM_CAP_X86_GUEST_MODE:
+ case KVM_CAP_ONE_REG:
r = 1;
break;
case KVM_CAP_PRE_FAULT_MEMORY:
@@ -5889,6 +6073,134 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
}
}
+struct kvm_x86_reg_id {
+ __u32 index;
+ __u8 type;
+ __u8 rsvd1;
+ __u8 rsvd2:4;
+ __u8 size:4;
+ __u8 x86;
+};
+
+static int kvm_translate_kvm_reg(struct kvm_vcpu *vcpu,
+ struct kvm_x86_reg_id *reg)
+{
+ switch (reg->index) {
+ case KVM_REG_GUEST_SSP:
+ /*
+ * FIXME: If host-initiated accesses are ever exempted from
+ * ignore_msrs (in kvm_do_msr_access()), drop this manual check
+ * and rely on KVM's standard checks to reject accesses to regs
+ * that don't exist.
+ */
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK))
+ return -EINVAL;
+
+ reg->type = KVM_X86_REG_TYPE_MSR;
+ reg->index = MSR_KVM_INTERNAL_GUEST_SSP;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int kvm_get_one_msr(struct kvm_vcpu *vcpu, u32 msr, u64 __user *user_val)
+{
+ u64 val;
+
+ if (do_get_msr(vcpu, msr, &val))
+ return -EINVAL;
+
+ if (put_user(val, user_val))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_set_one_msr(struct kvm_vcpu *vcpu, u32 msr, u64 __user *user_val)
+{
+ u64 val;
+
+ if (get_user(val, user_val))
+ return -EFAULT;
+
+ if (do_set_msr(vcpu, msr, &val))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int kvm_get_set_one_reg(struct kvm_vcpu *vcpu, unsigned int ioctl,
+ void __user *argp)
+{
+ struct kvm_one_reg one_reg;
+ struct kvm_x86_reg_id *reg;
+ u64 __user *user_val;
+ bool load_fpu;
+ int r;
+
+ if (copy_from_user(&one_reg, argp, sizeof(one_reg)))
+ return -EFAULT;
+
+ if ((one_reg.id & KVM_REG_ARCH_MASK) != KVM_REG_X86)
+ return -EINVAL;
+
+ reg = (struct kvm_x86_reg_id *)&one_reg.id;
+ if (reg->rsvd1 || reg->rsvd2)
+ return -EINVAL;
+
+ if (reg->type == KVM_X86_REG_TYPE_KVM) {
+ r = kvm_translate_kvm_reg(vcpu, reg);
+ if (r)
+ return r;
+ }
+
+ if (reg->type != KVM_X86_REG_TYPE_MSR)
+ return -EINVAL;
+
+ if ((one_reg.id & KVM_REG_SIZE_MASK) != KVM_REG_SIZE_U64)
+ return -EINVAL;
+
+ guard(srcu)(&vcpu->kvm->srcu);
+
+ load_fpu = is_xstate_managed_msr(vcpu, reg->index);
+ if (load_fpu)
+ kvm_load_guest_fpu(vcpu);
+
+ user_val = u64_to_user_ptr(one_reg.addr);
+ if (ioctl == KVM_GET_ONE_REG)
+ r = kvm_get_one_msr(vcpu, reg->index, user_val);
+ else
+ r = kvm_set_one_msr(vcpu, reg->index, user_val);
+
+ if (load_fpu)
+ kvm_put_guest_fpu(vcpu);
+ return r;
+}
+
+static int kvm_get_reg_list(struct kvm_vcpu *vcpu,
+ struct kvm_reg_list __user *user_list)
+{
+ u64 nr_regs = guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) ? 1 : 0;
+ u64 user_nr_regs;
+
+ if (get_user(user_nr_regs, &user_list->n))
+ return -EFAULT;
+
+ if (put_user(nr_regs, &user_list->n))
+ return -EFAULT;
+
+ if (user_nr_regs < nr_regs)
+ return -E2BIG;
+
+ if (nr_regs &&
+ put_user(KVM_X86_REG_KVM(KVM_REG_GUEST_SSP), &user_list->reg[0]))
+ return -EFAULT;
+
+ return 0;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -6005,6 +6317,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
+ case KVM_GET_ONE_REG:
+ case KVM_SET_ONE_REG:
+ r = kvm_get_set_one_reg(vcpu, ioctl, argp);
+ break;
+ case KVM_GET_REG_LIST:
+ r = kvm_get_reg_list(vcpu, argp);
+ break;
case KVM_TPR_ACCESS_REPORTING: {
struct kvm_tpr_access_ctl tac;
@@ -6771,7 +7090,11 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm,
kvm_free_msr_filter(old_filter);
- kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
+ /*
+ * Recalc MSR intercepts as userspace may want to intercept accesses to
+ * MSRs that KVM would otherwise pass through to the guest.
+ */
+ kvm_make_all_cpus_request(kvm, KVM_REQ_RECALC_INTERCEPTS);
return 0;
}
@@ -6966,6 +7289,15 @@ set_identity_unlock:
if (irqchip_in_kernel(kvm))
goto create_irqchip_unlock;
+ /*
+ * Disallow an in-kernel I/O APIC if the VM has protected EOIs,
+ * i.e. if KVM can't intercept EOIs and thus can't properly
+ * emulate level-triggered interrupts.
+ */
+ r = -ENOTTY;
+ if (kvm->arch.has_protected_eoi)
+ goto create_irqchip_unlock;
+
r = -EINVAL;
if (kvm->created_vcpus)
goto create_irqchip_unlock;
@@ -7353,6 +7685,7 @@ static void kvm_probe_msr_to_save(u32 msr_index)
case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET:
if (!kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2))
return;
break;
@@ -7365,6 +7698,24 @@ static void kvm_probe_msr_to_save(u32 msr_index)
if (!(kvm_get_arch_capabilities() & ARCH_CAP_TSX_CTRL_MSR))
return;
break;
+ case MSR_IA32_XSS:
+ if (!kvm_caps.supported_xss)
+ return;
+ break;
+ case MSR_IA32_U_CET:
+ case MSR_IA32_S_CET:
+ if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
+ !kvm_cpu_cap_has(X86_FEATURE_IBT))
+ return;
+ break;
+ case MSR_IA32_INT_SSP_TAB:
+ if (!kvm_cpu_cap_has(X86_FEATURE_LM))
+ return;
+ fallthrough;
+ case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+ if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK))
+ return;
+ break;
default:
break;
}
@@ -7484,7 +7835,7 @@ gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
u64 access = (kvm_x86_call(get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
}
-EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_gva_to_gpa_read);
gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception)
@@ -7495,7 +7846,7 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
access |= PFERR_WRITE_MASK;
return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
}
-EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_gva_to_gpa_write);
/* uses this to access any guest's mapped memory without checking CPL */
gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
@@ -7581,7 +7932,7 @@ int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
exception);
}
-EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_read_guest_virt);
static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
gva_t addr, void *val, unsigned int bytes,
@@ -7653,7 +8004,7 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
PFERR_WRITE_MASK, exception);
}
-EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_write_guest_virt_system);
static int kvm_check_emulate_insn(struct kvm_vcpu *vcpu, int emul_type,
void *insn, int insn_len)
@@ -7687,7 +8038,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
return kvm_emulate_instruction(vcpu, emul_type);
}
-EXPORT_SYMBOL_GPL(handle_ud);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_ud);
static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
gpa_t gpa, bool write)
@@ -8166,7 +8517,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
kvm_emulate_wbinvd_noskip(vcpu);
return kvm_skip_emulated_instruction(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_wbinvd);
@@ -8353,7 +8704,7 @@ static int emulator_get_msr_with_filter(struct x86_emulate_ctxt *ctxt,
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
int r;
- r = kvm_get_msr_with_filter(vcpu, msr_index, pdata);
+ r = kvm_emulate_msr_read(vcpu, msr_index, pdata);
if (r < 0)
return X86EMUL_UNHANDLEABLE;
@@ -8376,7 +8727,7 @@ static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt,
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
int r;
- r = kvm_set_msr_with_filter(vcpu, msr_index, data);
+ r = kvm_emulate_msr_write(vcpu, msr_index, data);
if (r < 0)
return X86EMUL_UNHANDLEABLE;
@@ -8396,7 +8747,16 @@ static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt,
static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
u32 msr_index, u64 *pdata)
{
- return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
+ /*
+ * Treat emulator accesses to the current shadow stack pointer as host-
+ * initiated, as they aren't true MSR accesses (SSP is a "just a reg"),
+ * and this API is used only for implicit accesses, i.e. not RDMSR, and
+ * so the index is fully KVM-controlled.
+ */
+ if (unlikely(msr_index == MSR_KVM_INTERNAL_GUEST_SSP))
+ return kvm_msr_read(emul_to_vcpu(ctxt), msr_index, pdata);
+
+ return __kvm_emulate_msr_read(emul_to_vcpu(ctxt), msr_index, pdata);
}
static int emulator_check_rdpmc_early(struct x86_emulate_ctxt *ctxt, u32 pmc)
@@ -8470,11 +8830,6 @@ static bool emulator_is_smm(struct x86_emulate_ctxt *ctxt)
return is_smm(emul_to_vcpu(ctxt));
}
-static bool emulator_is_guest_mode(struct x86_emulate_ctxt *ctxt)
-{
- return is_guest_mode(emul_to_vcpu(ctxt));
-}
-
#ifndef CONFIG_KVM_SMM
static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
{
@@ -8558,7 +8913,6 @@ static const struct x86_emulate_ops emulate_ops = {
.guest_cpuid_is_intel_compatible = emulator_guest_cpuid_is_intel_compatible,
.set_nmi_mask = emulator_set_nmi_mask,
.is_smm = emulator_is_smm,
- .is_guest_mode = emulator_is_guest_mode,
.leave_smm = emulator_leave_smm,
.triple_fault = emulator_triple_fault,
.set_xcr = emulator_set_xcr,
@@ -8661,7 +9015,7 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
kvm_set_rflags(vcpu, ctxt->eflags);
}
}
-EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_inject_realmode_interrupt);
static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
u8 ndata, u8 *insn_bytes, u8 insn_size)
@@ -8726,13 +9080,13 @@ void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
{
prepare_emulation_failure_exit(vcpu, data, ndata, NULL, 0);
}
-EXPORT_SYMBOL_GPL(__kvm_prepare_emulation_failure_exit);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_prepare_emulation_failure_exit);
void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
{
__kvm_prepare_emulation_failure_exit(vcpu, NULL, 0);
}
-EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_prepare_emulation_failure_exit);
void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa)
{
@@ -8754,7 +9108,7 @@ void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa)
run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
run->internal.ndata = ndata;
}
-EXPORT_SYMBOL_GPL(kvm_prepare_event_vectoring_exit);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_prepare_event_vectoring_exit);
static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
{
@@ -8864,7 +9218,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
if (unlikely(!r))
return 0;
- kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED);
+ kvm_pmu_instruction_retired(vcpu);
/*
* rflags is the old, "raw" value of the flags. The new value has
@@ -8878,7 +9232,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
r = kvm_vcpu_do_singlestep(vcpu);
return r;
}
-EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_skip_emulated_instruction);
static bool kvm_is_code_breakpoint_inhibited(struct kvm_vcpu *vcpu)
{
@@ -9009,7 +9363,7 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
return r;
}
-EXPORT_SYMBOL_GPL(x86_decode_emulated_instruction);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(x86_decode_emulated_instruction);
int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len)
@@ -9143,7 +9497,14 @@ restart:
ctxt->exception.address = 0;
}
- r = x86_emulate_insn(ctxt);
+ /*
+ * Check L1's instruction intercepts when emulating instructions for
+ * L2, unless KVM is re-emulating a previously decoded instruction,
+ * e.g. to complete userspace I/O, in which case KVM has already
+ * checked the intercepts.
+ */
+ r = x86_emulate_insn(ctxt, is_guest_mode(vcpu) &&
+ !(emulation_type & EMULTYPE_NO_DECODE));
if (r == EMULATION_INTERCEPTED)
return 1;
@@ -9198,9 +9559,9 @@ writeback:
*/
if (!ctxt->have_exception ||
exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
- kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED);
+ kvm_pmu_instruction_retired(vcpu);
if (ctxt->is_branch)
- kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED);
+ kvm_pmu_branch_retired(vcpu);
kvm_rip_write(vcpu, ctxt->eip);
if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
r = kvm_vcpu_do_singlestep(vcpu);
@@ -9226,14 +9587,14 @@ int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
{
return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_instruction);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
void *insn, int insn_len)
{
return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_instruction_from_buffer);
static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
{
@@ -9328,7 +9689,7 @@ int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in)
ret = kvm_fast_pio_out(vcpu, size, port);
return ret && kvm_skip_emulated_instruction(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_fast_pio);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_fast_pio);
static int kvmclock_cpu_down_prep(unsigned int cpu)
{
@@ -9651,6 +10012,18 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
return -EIO;
}
+ if (boot_cpu_has(X86_FEATURE_SHSTK) || boot_cpu_has(X86_FEATURE_IBT)) {
+ rdmsrq(MSR_IA32_S_CET, kvm_host.s_cet);
+ /*
+ * Linux doesn't yet support supervisor shadow stacks (SSS), so
+ * KVM doesn't save/restore the associated MSRs, i.e. KVM may
+ * clobber the host values. Yell and refuse to load if SSS is
+ * unexpectedly enabled, e.g. to avoid crashing the host.
+ */
+ if (WARN_ON_ONCE(kvm_host.s_cet & CET_SHSTK_EN))
+ return -EIO;
+ }
+
memset(&kvm_caps, 0, sizeof(kvm_caps));
x86_emulator_cache = kvm_alloc_emulator_cache();
@@ -9678,14 +10051,17 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
kvm_host.xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
kvm_caps.supported_xcr0 = kvm_host.xcr0 & KVM_SUPPORTED_XCR0;
}
+
+ if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+ rdmsrq(MSR_IA32_XSS, kvm_host.xss);
+ kvm_caps.supported_xss = kvm_host.xss & KVM_SUPPORTED_XSS;
+ }
+
kvm_caps.supported_quirks = KVM_X86_VALID_QUIRKS;
kvm_caps.inapplicable_quirks = KVM_X86_CONDITIONAL_QUIRKS;
rdmsrq_safe(MSR_EFER, &kvm_host.efer);
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- rdmsrq(MSR_IA32_XSS, kvm_host.xss);
-
kvm_init_pmu_capability(ops->pmu_ops);
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
@@ -9734,6 +10110,16 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
kvm_caps.supported_xss = 0;
+ if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
+ !kvm_cpu_cap_has(X86_FEATURE_IBT))
+ kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
+
+ if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) {
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
+ kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
+ }
+
if (kvm_caps.has_tsc_control) {
/*
* Make sure the user can only configure tsc_khz values that
@@ -9760,7 +10146,7 @@ out_free_x86_emulator_cache:
kmem_cache_destroy(x86_emulator_cache);
return r;
}
-EXPORT_SYMBOL_GPL(kvm_x86_vendor_init);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_x86_vendor_init);
void kvm_x86_vendor_exit(void)
{
@@ -9794,7 +10180,7 @@ void kvm_x86_vendor_exit(void)
kvm_x86_ops.enable_virtualization_cpu = NULL;
mutex_unlock(&vendor_module_lock);
}
-EXPORT_SYMBOL_GPL(kvm_x86_vendor_exit);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_x86_vendor_exit);
#ifdef CONFIG_X86_64
static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
@@ -9858,7 +10244,7 @@ bool kvm_apicv_activated(struct kvm *kvm)
{
return (READ_ONCE(kvm->arch.apicv_inhibit_reasons) == 0);
}
-EXPORT_SYMBOL_GPL(kvm_apicv_activated);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apicv_activated);
bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu)
{
@@ -9868,7 +10254,7 @@ bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu)
return (vm_reasons | vcpu_reasons) == 0;
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_apicv_activated);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_apicv_activated);
static void set_or_clear_apicv_inhibit(unsigned long *inhibits,
enum kvm_apicv_inhibit reason, bool set)
@@ -10044,7 +10430,7 @@ out:
vcpu->run->hypercall.ret = ret;
return 1;
}
-EXPORT_SYMBOL_GPL(____kvm_emulate_hypercall);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(____kvm_emulate_hypercall);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
@@ -10057,7 +10443,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
return __kvm_emulate_hypercall(vcpu, kvm_x86_call(get_cpl)(vcpu),
complete_hypercall_exit);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_hypercall);
static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
{
@@ -10500,7 +10886,7 @@ out:
preempt_enable();
up_read(&vcpu->kvm->arch.apicv_update_lock);
}
-EXPORT_SYMBOL_GPL(__kvm_vcpu_update_apicv);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_vcpu_update_apicv);
static void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
{
@@ -10576,7 +10962,7 @@ void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
__kvm_set_or_clear_apicv_inhibit(kvm, reason, set);
up_write(&kvm->arch.apicv_update_lock);
}
-EXPORT_SYMBOL_GPL(kvm_set_or_clear_apicv_inhibit);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_or_clear_apicv_inhibit);
static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
{
@@ -10796,13 +11182,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
kvm_check_async_pf_completion(vcpu);
- /*
- * Recalc MSR intercepts as userspace may want to intercept
- * accesses to MSRs that KVM would otherwise pass through to
- * the guest.
- */
- if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
- kvm_x86_call(recalc_msr_intercepts)(vcpu);
+ if (kvm_check_request(KVM_REQ_RECALC_INTERCEPTS, vcpu))
+ kvm_x86_call(recalc_intercepts)(vcpu);
if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
kvm_x86_call(update_cpu_dirty_logging)(vcpu);
@@ -11135,7 +11516,7 @@ bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
return false;
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_has_events);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_has_events);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
@@ -11253,7 +11634,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
if (__xfer_to_guest_mode_work_pending()) {
kvm_vcpu_srcu_read_unlock(vcpu);
- r = xfer_to_guest_mode_handle_work(vcpu);
+ r = kvm_xfer_to_guest_mode_handle_work(vcpu);
kvm_vcpu_srcu_read_lock(vcpu);
if (r)
return r;
@@ -11288,7 +11669,7 @@ int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu)
{
return __kvm_emulate_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_halt_noskip);
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
{
@@ -11299,17 +11680,11 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
*/
return kvm_emulate_halt_noskip(vcpu) && ret;
}
-EXPORT_SYMBOL_GPL(kvm_emulate_halt);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_halt);
fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu)
{
- int ret;
-
- kvm_vcpu_srcu_read_lock(vcpu);
- ret = kvm_emulate_halt(vcpu);
- kvm_vcpu_srcu_read_unlock(vcpu);
-
- if (!ret)
+ if (!kvm_emulate_halt(vcpu))
return EXIT_FASTPATH_EXIT_USERSPACE;
if (kvm_vcpu_running(vcpu))
@@ -11317,7 +11692,7 @@ fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu)
return EXIT_FASTPATH_EXIT_HANDLED;
}
-EXPORT_SYMBOL_GPL(handle_fastpath_hlt);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_hlt);
int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
{
@@ -11326,7 +11701,7 @@ int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
return __kvm_emulate_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD,
KVM_EXIT_AP_RESET_HOLD) && ret;
}
-EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_ap_reset_hold);
bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
{
@@ -11837,6 +12212,25 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
int ret;
+ if (kvm_is_cr4_bit_set(vcpu, X86_CR4_CET)) {
+ u64 u_cet, s_cet;
+
+ /*
+ * Check both User and Supervisor on task switches as inter-
+ * privilege level task switches are impacted by CET at both
+ * the current privilege level and the new privilege level, and
+ * that information is not known at this time. The expectation
+ * is that the guest won't require emulation of task switches
+ * while using IBT or Shadow Stacks.
+ */
+ if (__kvm_emulate_msr_read(vcpu, MSR_IA32_U_CET, &u_cet) ||
+ __kvm_emulate_msr_read(vcpu, MSR_IA32_S_CET, &s_cet))
+ goto unhandled_task_switch;
+
+ if ((u_cet | s_cet) & (CET_ENDBR_EN | CET_SHSTK_EN))
+ goto unhandled_task_switch;
+ }
+
init_emulate_ctxt(vcpu);
ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
@@ -11846,19 +12240,21 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
* Report an error userspace if MMIO is needed, as KVM doesn't support
* MMIO during a task switch (or any other complex operation).
*/
- if (ret || vcpu->mmio_needed) {
- vcpu->mmio_needed = false;
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
- return 0;
- }
+ if (ret || vcpu->mmio_needed)
+ goto unhandled_task_switch;
kvm_rip_write(vcpu, ctxt->eip);
kvm_set_rflags(vcpu, ctxt->eflags);
return 1;
+
+unhandled_task_switch:
+ vcpu->mmio_needed = false;
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ return 0;
}
-EXPORT_SYMBOL_GPL(kvm_task_switch);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_task_switch);
static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
@@ -12388,6 +12784,42 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvfree(vcpu->arch.cpuid_entries);
}
+static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event)
+{
+ struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
+ u64 xfeatures_mask;
+ int i;
+
+ /*
+ * Guest FPU state is zero allocated and so doesn't need to be manually
+ * cleared on RESET, i.e. during vCPU creation.
+ */
+ if (!init_event || !fpstate)
+ return;
+
+ /*
+ * On INIT, only select XSTATE components are zeroed, most components
+ * are unchanged. Currently, the only components that are zeroed and
+ * supported by KVM are MPX and CET related.
+ */
+ xfeatures_mask = (kvm_caps.supported_xcr0 | kvm_caps.supported_xss) &
+ (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR |
+ XFEATURE_MASK_CET_ALL);
+ if (!xfeatures_mask)
+ return;
+
+ BUILD_BUG_ON(sizeof(xfeatures_mask) * BITS_PER_BYTE <= XFEATURE_MAX);
+
+ /*
+ * All paths that lead to INIT are required to load the guest's FPU
+ * state (because most paths are buried in KVM_RUN).
+ */
+ kvm_put_guest_fpu(vcpu);
+ for_each_set_bit(i, (unsigned long *)&xfeatures_mask, XFEATURE_MAX)
+ fpstate_clear_xstate_component(fpstate, i);
+ kvm_load_guest_fpu(vcpu);
+}
+
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
struct kvm_cpuid_entry2 *cpuid_0x1;
@@ -12445,22 +12877,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_async_pf_hash_reset(vcpu);
vcpu->arch.apf.halted = false;
- if (vcpu->arch.guest_fpu.fpstate && kvm_mpx_supported()) {
- struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
-
- /*
- * All paths that lead to INIT are required to load the guest's
- * FPU state (because most paths are buried in KVM_RUN).
- */
- if (init_event)
- kvm_put_guest_fpu(vcpu);
-
- fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS);
- fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR);
-
- if (init_event)
- kvm_load_guest_fpu(vcpu);
- }
+ kvm_xstate_reset(vcpu, init_event);
if (!init_event) {
vcpu->arch.smbase = 0x30000;
@@ -12472,7 +12889,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
__kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP);
- __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true);
+ kvm_msr_write(vcpu, MSR_IA32_XSS, 0);
}
/* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */
@@ -12538,7 +12955,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
if (init_event)
kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_reset);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_reset);
void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
{
@@ -12550,7 +12967,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
kvm_rip_write(vcpu, 0);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_deliver_sipi_vector);
void kvm_arch_enable_virtualization(void)
{
@@ -12668,7 +13085,7 @@ bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
{
return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_is_reset_bsp);
bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
{
@@ -12832,7 +13249,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
return (void __user *)hva;
}
-EXPORT_SYMBOL_GPL(__x86_set_memory_region);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(__x86_set_memory_region);
void kvm_arch_pre_destroy_vm(struct kvm *kvm)
{
@@ -13240,13 +13657,13 @@ unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
kvm_rip_read(vcpu));
}
-EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_linear_rip);
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
{
return kvm_get_linear_rip(vcpu) == linear_rip;
}
-EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_is_linear_rip);
unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
{
@@ -13257,7 +13674,7 @@ unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
rflags &= ~X86_EFLAGS_TF;
return rflags;
}
-EXPORT_SYMBOL_GPL(kvm_get_rflags);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_rflags);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
@@ -13272,7 +13689,7 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
__kvm_set_rflags(vcpu, rflags);
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_set_rflags);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_rflags);
static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
{
@@ -13504,31 +13921,23 @@ void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
if (atomic_inc_return(&kvm->arch.noncoherent_dma_count) == 1)
kvm_noncoherent_dma_assignment_start_or_stop(kvm);
}
-EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
{
if (!atomic_dec_return(&kvm->arch.noncoherent_dma_count))
kvm_noncoherent_dma_assignment_start_or_stop(kvm);
}
-EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
{
return atomic_read(&kvm->arch.noncoherent_dma_count);
}
-EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
-
-bool kvm_vector_hashing_enabled(void)
-{
- return vector_hashing;
-}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_arch_has_noncoherent_dma);
bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
{
return (vcpu->arch.msr_kvm_poll_control & 1) == 0;
}
-EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
#ifdef CONFIG_KVM_GUEST_MEMFD
/*
@@ -13579,7 +13988,7 @@ int kvm_spec_ctrl_test_value(u64 value)
return ret;
}
-EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_spec_ctrl_test_value);
void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code)
{
@@ -13604,7 +14013,7 @@ void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_c
}
vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault);
}
-EXPORT_SYMBOL_GPL(kvm_fixup_and_inject_pf_error);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_fixup_and_inject_pf_error);
/*
* Handles kvm_read/write_guest_virt*() result and either injects #PF or returns
@@ -13633,7 +14042,7 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_handle_memory_failure);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_memory_failure);
int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
{
@@ -13697,7 +14106,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
return 1;
}
}
-EXPORT_SYMBOL_GPL(kvm_handle_invpcid);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_invpcid);
static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu)
{
@@ -13782,7 +14191,7 @@ int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_write);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_mmio_write);
int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
void *data)
@@ -13820,7 +14229,7 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_mmio_read);
static void advance_sev_es_emulated_pio(struct kvm_vcpu *vcpu, unsigned count, int size)
{
@@ -13908,7 +14317,7 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
return in ? kvm_sev_es_ins(vcpu, size, port)
: kvm_sev_es_outs(vcpu, size, port);
}
-EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_string_io);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index bcfd9b719ada..f3dc77f006f9 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -50,6 +50,7 @@ struct kvm_host_values {
u64 efer;
u64 xcr0;
u64 xss;
+ u64 s_cet;
u64 arch_capabilities;
};
@@ -101,6 +102,16 @@ do { \
#define KVM_SVM_DEFAULT_PLE_WINDOW_MAX USHRT_MAX
#define KVM_SVM_DEFAULT_PLE_WINDOW 3000
+/*
+ * KVM's internal, non-ABI indices for synthetic MSRs. The values themselves
+ * are arbitrary and have no meaning, the only requirement is that they don't
+ * conflict with "real" MSRs that KVM supports. Use values at the upper end
+ * of KVM's reserved paravirtual MSR range to minimize churn, i.e. these values
+ * will be usable until KVM exhausts its supply of paravirtual MSR indices.
+ */
+
+#define MSR_KVM_INTERNAL_GUEST_SSP 0x4b564dff
+
static inline unsigned int __grow_ple_window(unsigned int val,
unsigned int base, unsigned int modifier, unsigned int max)
{
@@ -431,14 +442,15 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu,
int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
-bool kvm_vector_hashing_enabled(void);
void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code);
int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
void *insn, int insn_len);
int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len);
-fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
+fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu);
+fastpath_t handle_fastpath_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu);
+fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu);
extern struct kvm_caps kvm_caps;
extern struct kvm_host_values kvm_host;
@@ -668,6 +680,9 @@ static inline bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
__reserved_bits |= X86_CR4_PCIDE; \
if (!__cpu_has(__c, X86_FEATURE_LAM)) \
__reserved_bits |= X86_CR4_LAM_SUP; \
+ if (!__cpu_has(__c, X86_FEATURE_SHSTK) && \
+ !__cpu_has(__c, X86_FEATURE_IBT)) \
+ __reserved_bits |= X86_CR4_CET; \
__reserved_bits; \
})
@@ -699,4 +714,27 @@ int ____kvm_emulate_hypercall(struct kvm_vcpu *vcpu, int cpl,
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
+#define CET_US_RESERVED_BITS GENMASK(9, 6)
+#define CET_US_SHSTK_MASK_BITS GENMASK(1, 0)
+#define CET_US_IBT_MASK_BITS (GENMASK_ULL(5, 2) | GENMASK_ULL(63, 10))
+#define CET_US_LEGACY_BITMAP_BASE(data) ((data) >> 12)
+
+static inline bool kvm_is_valid_u_s_cet(struct kvm_vcpu *vcpu, u64 data)
+{
+ if (data & CET_US_RESERVED_BITS)
+ return false;
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
+ (data & CET_US_SHSTK_MASK_BITS))
+ return false;
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_IBT) &&
+ (data & CET_US_IBT_MASK_BITS))
+ return false;
+ if (!IS_ALIGNED(CET_US_LEGACY_BITMAP_BASE(data), 4))
+ return false;
+ /* IBT can be suppressed iff the TRACKER isn't WAIT_ENDBR. */
+ if ((data & CET_SUPPRESS) && (data & CET_WAIT_ENDBR))
+ return false;
+
+ return true;
+}
#endif
diff --git a/arch/x86/lib/bhi.S b/arch/x86/lib/bhi.S
index 58891681261b..aad1e5839202 100644
--- a/arch/x86/lib/bhi.S
+++ b/arch/x86/lib/bhi.S
@@ -5,7 +5,7 @@
#include <asm/nospec-branch.h>
/*
- * Notably, the FineIBT preamble calling these will have ZF set and r10 zero.
+ * Notably, the FineIBT preamble calling these will have ZF set and eax zero.
*
* The very last element is in fact larger than 32 bytes, but since its the
* last element, this does not matter,
@@ -36,7 +36,7 @@ SYM_INNER_LABEL(__bhi_args_1, SYM_L_LOCAL)
ANNOTATE_NOENDBR
UNWIND_HINT_FUNC
jne .Lud_1
- cmovne %r10, %rdi
+ cmovne %rax, %rdi
ANNOTATE_UNRET_SAFE
ret
int3
@@ -53,8 +53,8 @@ SYM_INNER_LABEL(__bhi_args_2, SYM_L_LOCAL)
ANNOTATE_NOENDBR
UNWIND_HINT_FUNC
jne .Lud_1
- cmovne %r10, %rdi
- cmovne %r10, %rsi
+ cmovne %rax, %rdi
+ cmovne %rax, %rsi
ANNOTATE_UNRET_SAFE
ret
int3
@@ -64,9 +64,9 @@ SYM_INNER_LABEL(__bhi_args_3, SYM_L_LOCAL)
ANNOTATE_NOENDBR
UNWIND_HINT_FUNC
jne .Lud_1
- cmovne %r10, %rdi
- cmovne %r10, %rsi
- cmovne %r10, %rdx
+ cmovne %rax, %rdi
+ cmovne %rax, %rsi
+ cmovne %rax, %rdx
ANNOTATE_UNRET_SAFE
ret
int3
@@ -76,10 +76,10 @@ SYM_INNER_LABEL(__bhi_args_4, SYM_L_LOCAL)
ANNOTATE_NOENDBR
UNWIND_HINT_FUNC
jne .Lud_2
- cmovne %r10, %rdi
- cmovne %r10, %rsi
- cmovne %r10, %rdx
- cmovne %r10, %rcx
+ cmovne %rax, %rdi
+ cmovne %rax, %rsi
+ cmovne %rax, %rdx
+ cmovne %rax, %rcx
ANNOTATE_UNRET_SAFE
ret
int3
@@ -89,11 +89,11 @@ SYM_INNER_LABEL(__bhi_args_5, SYM_L_LOCAL)
ANNOTATE_NOENDBR
UNWIND_HINT_FUNC
jne .Lud_2
- cmovne %r10, %rdi
- cmovne %r10, %rsi
- cmovne %r10, %rdx
- cmovne %r10, %rcx
- cmovne %r10, %r8
+ cmovne %rax, %rdi
+ cmovne %rax, %rsi
+ cmovne %rax, %rdx
+ cmovne %rax, %rcx
+ cmovne %rax, %r8
ANNOTATE_UNRET_SAFE
ret
int3
@@ -110,12 +110,12 @@ SYM_INNER_LABEL(__bhi_args_6, SYM_L_LOCAL)
ANNOTATE_NOENDBR
UNWIND_HINT_FUNC
jne .Lud_2
- cmovne %r10, %rdi
- cmovne %r10, %rsi
- cmovne %r10, %rdx
- cmovne %r10, %rcx
- cmovne %r10, %r8
- cmovne %r10, %r9
+ cmovne %rax, %rdi
+ cmovne %rax, %rsi
+ cmovne %rax, %rdx
+ cmovne %rax, %rcx
+ cmovne %rax, %r8
+ cmovne %rax, %r9
ANNOTATE_UNRET_SAFE
ret
int3
@@ -125,13 +125,13 @@ SYM_INNER_LABEL(__bhi_args_7, SYM_L_LOCAL)
ANNOTATE_NOENDBR
UNWIND_HINT_FUNC
jne .Lud_2
- cmovne %r10, %rdi
- cmovne %r10, %rsi
- cmovne %r10, %rdx
- cmovne %r10, %rcx
- cmovne %r10, %r8
- cmovne %r10, %r9
- cmovne %r10, %rsp
+ cmovne %rax, %rdi
+ cmovne %rax, %rsi
+ cmovne %rax, %rdx
+ cmovne %rax, %rcx
+ cmovne %rax, %r8
+ cmovne %rax, %r9
+ cmovne %rax, %rsp
ANNOTATE_UNRET_SAFE
ret
int3
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index f513d33b6d37..8f1fed0c3b83 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -134,10 +134,10 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
.macro ITS_THUNK reg
/*
- * If CFI paranoid is used then the ITS thunk starts with opcodes (0xea; jne 1b)
+ * If CFI paranoid is used then the ITS thunk starts with opcodes (1: udb; jne 1b)
* that complete the fineibt_paranoid caller sequence.
*/
-1: .byte 0xea
+1: ASM_UDB
SYM_INNER_LABEL(__x86_indirect_paranoid_thunk_\reg, SYM_L_GLOBAL)
UNWIND_HINT_UNDEFINED
ANNOTATE_NOENDBR
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index fc13306af15f..d4c93d9e73e4 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -420,12 +420,12 @@ static void emit_fineibt(u8 **pprog, u8 *ip, u32 hash, int arity)
u8 *prog = *pprog;
EMIT_ENDBR();
- EMIT3_off32(0x41, 0x81, 0xea, hash); /* subl $hash, %r10d */
+ EMIT1_off32(0x2d, hash); /* subl $hash, %eax */
if (cfi_bhi) {
+ EMIT2(0x2e, 0x2e); /* cs cs */
emit_call(&prog, __bhi_args[arity], ip + 11);
} else {
- EMIT2(0x75, 0xf9); /* jne.d8 .-7 */
- EMIT3(0x0f, 0x1f, 0x00); /* nop3 */
+ EMIT3_off32(0x2e, 0x0f, 0x85, 3); /* jne.d32,pn 3 */
}
EMIT_ENDBR_POISON();
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index e7e71490bd25..25076a5acd96 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -295,6 +295,46 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC, pcie_ro
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC1, pcie_rootport_aspm_quirk);
/*
+ * PCIe devices underneath Xeon 6 PCIe Root Port bifurcated to x2 have lower
+ * performance with Extended Tags and MRRS > 128B. Work around the performance
+ * problems by disabling Extended Tags and limiting MRRS to 128B.
+ *
+ * https://cdrdv2.intel.com/v1/dl/getContent/837176
+ */
+static int limit_mrrs_to_128(struct pci_host_bridge *b, struct pci_dev *pdev)
+{
+ int readrq = pcie_get_readrq(pdev);
+
+ if (readrq > 128)
+ pcie_set_readrq(pdev, 128);
+
+ return 0;
+}
+
+static void pci_xeon_x2_bifurc_quirk(struct pci_dev *pdev)
+{
+ struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus);
+ u32 linkcap;
+
+ pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &linkcap);
+ if (FIELD_GET(PCI_EXP_LNKCAP_MLW, linkcap) != 0x2)
+ return;
+
+ bridge->no_ext_tags = 1;
+ bridge->enable_device = limit_mrrs_to_128;
+ pci_info(pdev, "Disabling Extended Tags and limiting MRRS to 128B (performance reasons due to x2 PCIe link)\n");
+}
+
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db0, pci_xeon_x2_bifurc_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db1, pci_xeon_x2_bifurc_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db2, pci_xeon_x2_bifurc_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db3, pci_xeon_x2_bifurc_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db6, pci_xeon_x2_bifurc_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db7, pci_xeon_x2_bifurc_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db8, pci_xeon_x2_bifurc_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db9, pci_xeon_x2_bifurc_quirk);
+
+/*
* Fixup to mark boot BIOS video selected by BIOS before it changes
*
* From information provided by "Jon Smirl" <jonsmirl@gmail.com>
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 2206b8bc47b8..f0a5fba0717e 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -11,6 +11,10 @@
#include <asm/nospec-branch.h>
SYM_FUNC_START(__efi_call)
+ /*
+ * The EFI code doesn't have any CFI, annotate away the CFI violation.
+ */
+ ANNOTATE_NOCFI_SYM
pushq %rbp
movq %rsp, %rbp
and $~0xf, %rsp
diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h
index df568fc3ceb4..9dc2efaf5df1 100644
--- a/arch/x86/um/shared/sysdep/stub_32.h
+++ b/arch/x86/um/shared/sysdep/stub_32.h
@@ -129,7 +129,7 @@ static __always_inline void *get_stub_data(void)
"subl %0,%%esp ;" \
"movl %1, %%eax ; " \
"call *%%eax ;" \
- :: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \
+ :: "i" (STUB_SIZE), \
"i" (&fn))
static __always_inline void
diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h
index 9cfd31afa769..9fd56954e2e0 100644
--- a/arch/x86/um/shared/sysdep/stub_64.h
+++ b/arch/x86/um/shared/sysdep/stub_64.h
@@ -133,7 +133,7 @@ static __always_inline void *get_stub_data(void)
"subq %0,%%rsp ;" \
"movq %1,%%rax ;" \
"call *%%rax ;" \
- :: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \
+ :: "i" (STUB_SIZE), \
"i" (&fn))
static __always_inline void
diff --git a/arch/xtensa/configs/audio_kc705_defconfig b/arch/xtensa/configs/audio_kc705_defconfig
index f2af1a32c9c7..dc942bbac69f 100644
--- a/arch/xtensa/configs/audio_kc705_defconfig
+++ b/arch/xtensa/configs/audio_kc705_defconfig
@@ -103,7 +103,7 @@ CONFIG_SND_SIMPLE_CARD=y
# CONFIG_USB_SUPPORT is not set
CONFIG_COMMON_CLK_CDCE706=y
# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_EXT4_FS=y
CONFIG_FANOTIFY=y
CONFIG_VFAT_FS=y
diff --git a/arch/xtensa/configs/cadence_csp_defconfig b/arch/xtensa/configs/cadence_csp_defconfig
index 88ed5284e21c..81a057f25f21 100644
--- a/arch/xtensa/configs/cadence_csp_defconfig
+++ b/arch/xtensa/configs/cadence_csp_defconfig
@@ -80,7 +80,7 @@ CONFIG_SOFT_WATCHDOG=y
# CONFIG_VGA_CONSOLE is not set
# CONFIG_USB_SUPPORT is not set
# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_FANOTIFY=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
diff --git a/arch/xtensa/configs/generic_kc705_defconfig b/arch/xtensa/configs/generic_kc705_defconfig
index 4427907becca..3ee7e1c56556 100644
--- a/arch/xtensa/configs/generic_kc705_defconfig
+++ b/arch/xtensa/configs/generic_kc705_defconfig
@@ -90,7 +90,7 @@ CONFIG_SOFT_WATCHDOG=y
# CONFIG_VGA_CONSOLE is not set
# CONFIG_USB_SUPPORT is not set
# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_EXT4_FS=y
CONFIG_FANOTIFY=y
CONFIG_VFAT_FS=y
diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig
index 5828228522ba..c6e96f0aa700 100644
--- a/arch/xtensa/configs/nommu_kc705_defconfig
+++ b/arch/xtensa/configs/nommu_kc705_defconfig
@@ -91,7 +91,7 @@ CONFIG_WATCHDOG_NOWAYOUT=y
CONFIG_SOFT_WATCHDOG=y
# CONFIG_VGA_CONSOLE is not set
# CONFIG_USB_SUPPORT is not set
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_EXT4_FS=y
CONFIG_FANOTIFY=y
CONFIG_VFAT_FS=y
diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig
index 326966ca7831..373d42b9e510 100644
--- a/arch/xtensa/configs/smp_lx200_defconfig
+++ b/arch/xtensa/configs/smp_lx200_defconfig
@@ -94,7 +94,7 @@ CONFIG_SOFT_WATCHDOG=y
# CONFIG_VGA_CONSOLE is not set
# CONFIG_USB_SUPPORT is not set
# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_EXT4_FS=y
CONFIG_FANOTIFY=y
CONFIG_VFAT_FS=y
diff --git a/arch/xtensa/configs/virt_defconfig b/arch/xtensa/configs/virt_defconfig
index e37048985b47..72628d31e87a 100644
--- a/arch/xtensa/configs/virt_defconfig
+++ b/arch/xtensa/configs/virt_defconfig
@@ -76,7 +76,7 @@ CONFIG_LOGO=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_INPUT=y
# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_FANOTIFY=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
diff --git a/arch/xtensa/configs/xip_kc705_defconfig b/arch/xtensa/configs/xip_kc705_defconfig
index ee47438f9b51..5d6013ea70fc 100644
--- a/arch/xtensa/configs/xip_kc705_defconfig
+++ b/arch/xtensa/configs/xip_kc705_defconfig
@@ -82,7 +82,7 @@ CONFIG_SOFT_WATCHDOG=y
# CONFIG_VGA_CONSOLE is not set
# CONFIG_USB_SUPPORT is not set
# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_FANOTIFY=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
diff --git a/arch/xtensa/kernel/platform.c b/arch/xtensa/kernel/platform.c
index 926b8bf0f14c..f14713060fd4 100644
--- a/arch/xtensa/kernel/platform.c
+++ b/arch/xtensa/kernel/platform.c
@@ -14,6 +14,7 @@
#include <linux/printk.h>
#include <linux/types.h>
+#include <linux/units.h>
#include <asm/platform.h>
#include <asm/timex.h>
@@ -38,7 +39,7 @@ void __weak platform_idle(void)
#ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT
void __weak platform_calibrate_ccount(void)
{
- pr_err("ERROR: Cannot calibrate cpu frequency! Assuming 10MHz.\n");
- ccount_freq = 10 * 1000000UL;
+ pr_err("ERROR: Cannot calibrate cpu frequency! Assuming 10 MHz.\n");
+ ccount_freq = 10 * HZ_PER_MHZ;
}
#endif
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 6ed009318d24..3cafc8feddee 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -231,10 +231,14 @@ static ssize_t proc_read_simdisk(struct file *file, char __user *buf,
static ssize_t proc_write_simdisk(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- char *tmp = memdup_user_nul(buf, count);
+ char *tmp;
struct simdisk *dev = pde_data(file_inode(file));
int err;
+ if (count == 0 || count > PAGE_SIZE)
+ return -EINVAL;
+
+ tmp = memdup_user_nul(buf, count);
if (IS_ERR(tmp))
return PTR_ERR(tmp);