summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorFabio Estevam <festevam@gmail.com>2018-02-26 21:54:02 -0300
committerFabio Estevam <festevam@gmail.com>2018-02-26 21:54:02 -0300
commit20ebb9fb81ecf84b57197aaa2a44be017d48e597 (patch)
tree54f6e35248e45254412291842083ddde9e400867 /arch
parent3c939d6f1d4c5c70b26d23cbd2e3d3e67e2bca69 (diff)
parent19c04ca5b239e6e2277a5b381d1e79482ab9bbc5 (diff)
Merge tag 'v4.9.84' into 4.9-1.0.x-imx-stable-merge
This is the 4.9.84 stable release
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/kernel/pci_impl.h3
-rw-r--r--arch/alpha/kernel/process.c3
-rw-r--r--arch/alpha/kernel/traps.c13
-rw-r--r--arch/arm/boot/dts/am4372.dtsi6
-rw-r--r--arch/arm/boot/dts/am437x-cm-t43.dts4
-rw-r--r--arch/arm/boot/dts/arm-realview-eb-mp.dtsi5
-rw-r--r--arch/arm/boot/dts/bcm-nsp.dtsi4
-rw-r--r--arch/arm/boot/dts/exynos5410.dtsi1
-rw-r--r--arch/arm/boot/dts/kirkwood-openblocks_a7.dts10
-rw-r--r--arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts3
-rw-r--r--arch/arm/boot/dts/logicpd-som-lv.dtsi16
-rw-r--r--arch/arm/boot/dts/lpc3250-ea3250.dts4
-rw-r--r--arch/arm/boot/dts/lpc3250-phy3250.dts4
-rw-r--r--arch/arm/boot/dts/mt2701.dtsi2
-rw-r--r--arch/arm/boot/dts/omap4.dtsi4
-rw-r--r--arch/arm/boot/dts/s5pv210.dtsi1
-rw-r--r--arch/arm/boot/dts/spear1310-evb.dts2
-rw-r--r--arch/arm/boot/dts/spear1340.dtsi4
-rw-r--r--arch/arm/boot/dts/spear13xx.dtsi6
-rw-r--r--arch/arm/boot/dts/spear600.dtsi1
-rw-r--r--arch/arm/boot/dts/ste-nomadik-stn8815.dtsi1
-rw-r--r--arch/arm/boot/dts/stih407.dtsi3
-rw-r--r--arch/arm/boot/dts/stih410.dtsi3
-rw-r--r--arch/arm/common/bL_switcher_dummy_if.c4
-rw-r--r--arch/arm/configs/sunxi_defconfig2
-rw-r--r--arch/arm/kvm/arm.c1
-rw-r--r--arch/arm/kvm/handle_exit.c13
-rw-r--r--arch/arm/kvm/mmio.c6
-rw-r--r--arch/arm/kvm/mmu.c2
-rw-r--r--arch/arm/mach-omap2/omap-secure.c19
-rw-r--r--arch/arm/mach-omap2/omap-secure.h4
-rw-r--r--arch/arm/mach-omap2/pm.h4
-rw-r--r--arch/arm/mach-omap2/pm34xx.c13
-rw-r--r--arch/arm/mach-omap2/prm33xx.c12
-rw-r--r--arch/arm/mach-omap2/sleep34xx.S26
-rw-r--r--arch/arm/mach-pxa/tosa-bt.c4
-rw-r--r--arch/arm64/Kconfig2
-rw-r--r--arch/arm64/Kconfig.platforms2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8173.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/msm8916.dtsi5
-rw-r--r--arch/arm64/crypto/crc32-arm64.c2
-rw-r--r--arch/arm64/include/asm/bug.h33
-rw-r--r--arch/arm64/kvm/handle_exit.c4
-rw-r--r--arch/arm64/mm/mmu.c2
-rw-r--r--arch/m68k/kernel/vmlinux-nommu.lds2
-rw-r--r--arch/m68k/kernel/vmlinux-std.lds2
-rw-r--r--arch/m68k/kernel/vmlinux-sun3.lds2
-rw-r--r--arch/mips/Kconfig12
-rw-r--r--arch/mips/ar7/platform.c2
-rw-r--r--arch/mips/kernel/process.c12
-rw-r--r--arch/mips/kernel/ptrace.c147
-rw-r--r--arch/mn10300/mm/misalignment.c2
-rw-r--r--arch/openrisc/kernel/traps.c10
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/crypto/crc32c-vpmsum_glue.c1
-rw-r--r--arch/powerpc/include/asm/exception-64e.h6
-rw-r--r--arch/powerpc/include/asm/exception-64s.h55
-rw-r--r--arch/powerpc/include/asm/feature-fixups.h15
-rw-r--r--arch/powerpc/include/asm/hvcall.h18
-rw-r--r--arch/powerpc/include/asm/paca.h9
-rw-r--r--arch/powerpc/include/asm/plpar_wrappers.h14
-rw-r--r--arch/powerpc/include/asm/setup.h13
-rw-r--r--arch/powerpc/kernel/asm-offsets.c3
-rw-r--r--arch/powerpc/kernel/entry_64.S44
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S127
-rw-r--r--arch/powerpc/kernel/idle_book3s.S7
-rw-r--r--arch/powerpc/kernel/setup_64.c128
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S9
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S7
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S7
-rw-r--r--arch/powerpc/kvm/book3s_segment.S4
-rw-r--r--arch/powerpc/lib/feature-fixups.c42
-rw-r--r--arch/powerpc/perf/core-book3s.c4
-rw-r--r--arch/powerpc/platforms/powernv/setup.c50
-rw-r--r--arch/powerpc/platforms/pseries/setup.c35
-rw-r--r--arch/s390/crypto/crc32-vx.c3
-rw-r--r--arch/s390/kernel/compat_linux.c8
-rw-r--r--arch/sh/kernel/traps_32.c3
-rw-r--r--arch/sparc/crypto/crc32c_glue.c1
-rw-r--r--arch/um/Makefile2
-rw-r--r--arch/x86/Kconfig16
-rw-r--r--arch/x86/Kconfig.debug1
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/boot/Makefile5
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S5
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c2
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S3
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S3
-rw-r--r--arch/x86/crypto/crc32-pclmul_glue.c1
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c1
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S3
-rw-r--r--arch/x86/crypto/poly1305_glue.c1
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c10
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64-3way.S112
-rw-r--r--arch/x86/entry/common.c9
-rw-r--r--arch/x86/entry/entry_32.S22
-rw-r--r--arch/x86/entry/entry_64.S143
-rw-r--r--arch/x86/entry/entry_64_compat.S30
-rw-r--r--arch/x86/entry/syscall_64.c7
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c7
-rw-r--r--arch/x86/events/amd/power.c2
-rw-r--r--arch/x86/events/core.c4
-rw-r--r--arch/x86/events/intel/bts.c44
-rw-r--r--arch/x86/events/intel/core.c2
-rw-r--r--arch/x86/events/intel/lbr.c2
-rw-r--r--arch/x86/events/intel/p6.c2
-rw-r--r--arch/x86/include/asm/acpi.h2
-rw-r--r--arch/x86/include/asm/alternative.h4
-rw-r--r--arch/x86/include/asm/asm-prototypes.h27
-rw-r--r--arch/x86/include/asm/asm.h15
-rw-r--r--arch/x86/include/asm/barrier.h28
-rw-r--r--arch/x86/include/asm/cpufeature.h9
-rw-r--r--arch/x86/include/asm/cpufeatures.h25
-rw-r--r--arch/x86/include/asm/disabled-features.h3
-rw-r--r--arch/x86/include/asm/intel-family.h7
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/microcode_amd.h1
-rw-r--r--arch/x86/include/asm/msr-index.h15
-rw-r--r--arch/x86/include/asm/msr.h3
-rw-r--r--arch/x86/include/asm/nospec-branch.h180
-rw-r--r--arch/x86/include/asm/pgtable.h6
-rw-r--r--arch/x86/include/asm/processor.h12
-rw-r--r--arch/x86/include/asm/required-features.h3
-rw-r--r--arch/x86/include/asm/syscall.h6
-rw-r--r--arch/x86/include/asm/thread_info.h14
-rw-r--r--arch/x86/include/asm/traps.h1
-rw-r--r--arch/x86/include/asm/uaccess.h15
-rw-r--r--arch/x86/include/asm/uaccess_32.h12
-rw-r--r--arch/x86/include/asm/uaccess_64.h12
-rw-r--r--arch/x86/include/asm/vmx.h5
-rw-r--r--arch/x86/include/asm/vsyscall.h1
-rw-r--r--arch/x86/include/asm/xen/hypercall.h5
-rw-r--r--arch/x86/kernel/acpi/boot.c61
-rw-r--r--arch/x86/kernel/alternative.c21
-rw-r--r--arch/x86/kernel/amd_nb.c2
-rw-r--r--arch/x86/kernel/apic/vector.c7
-rw-r--r--arch/x86/kernel/asm-offsets_32.c2
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/amd.c54
-rw-r--r--arch/x86/kernel/cpu/bugs.c305
-rw-r--r--arch/x86/kernel/cpu/bugs_64.c33
-rw-r--r--arch/x86/kernel/cpu/centaur.c4
-rw-r--r--arch/x86/kernel/cpu/common.c109
-rw-r--r--arch/x86/kernel/cpu/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c79
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c5
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c17
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c47
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c31
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c4
-rw-r--r--arch/x86/kernel/cpu/proc.c8
-rw-r--r--arch/x86/kernel/cpu/scattered.c3
-rw-r--r--arch/x86/kernel/head_32.S4
-rw-r--r--arch/x86/kernel/irq_32.c15
-rw-r--r--arch/x86/kernel/kprobes/opt.c23
-rw-r--r--arch/x86/kernel/mcount_64.S7
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/kernel/tboot.c10
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kernel/tsc.c1
-rw-r--r--arch/x86/kernel/vm86_32.c5
-rw-r--r--arch/x86/kernel/vmlinux.lds.S7
-rw-r--r--arch/x86/kvm/Kconfig3
-rw-r--r--arch/x86/kvm/cpuid.c21
-rw-r--r--arch/x86/kvm/cpuid.h31
-rw-r--r--arch/x86/kvm/emulate.c17
-rw-r--r--arch/x86/kvm/ioapic.c20
-rw-r--r--arch/x86/kvm/mmu.c10
-rw-r--r--arch/x86/kvm/svm.c139
-rw-r--r--arch/x86/kvm/vmx.c820
-rw-r--r--arch/x86/kvm/x86.c62
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/lib/checksum_32.S7
-rw-r--r--arch/x86/lib/cpu.c2
-rw-r--r--arch/x86/lib/delay.c7
-rw-r--r--arch/x86/lib/getuser.S10
-rw-r--r--arch/x86/lib/retpoline.S104
-rw-r--r--arch/x86/lib/usercopy_32.c8
-rw-r--r--arch/x86/math-emu/Makefile4
-rw-r--r--arch/x86/math-emu/reg_compare.c16
-rw-r--r--arch/x86/mm/fault.c7
-rw-r--r--arch/x86/mm/ioremap.c4
-rw-r--r--arch/x86/mm/kaiser.c4
-rw-r--r--arch/x86/mm/kmmio.c12
-rw-r--r--arch/x86/mm/pgtable.c7
-rw-r--r--arch/x86/mm/tlb.c2
-rw-r--r--arch/x86/net/bpf_jit_comp.c13
-rw-r--r--arch/x86/platform/efi/efi_64.c2
-rw-r--r--arch/xtensa/include/asm/futex.h23
195 files changed, 3051 insertions, 1128 deletions
diff --git a/arch/alpha/kernel/pci_impl.h b/arch/alpha/kernel/pci_impl.h
index 2b0ac429f5eb..412bb3c24f36 100644
--- a/arch/alpha/kernel/pci_impl.h
+++ b/arch/alpha/kernel/pci_impl.h
@@ -143,7 +143,8 @@ struct pci_iommu_arena
};
#if defined(CONFIG_ALPHA_SRM) && \
- (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA))
+ (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA) || \
+ defined(CONFIG_ALPHA_AVANTI))
# define NEED_SRM_SAVE_RESTORE
#else
# undef NEED_SRM_SAVE_RESTORE
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index b483156698d5..60c17b9bf04d 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -265,12 +265,13 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
application calling fork. */
if (clone_flags & CLONE_SETTLS)
childti->pcb.unique = regs->r20;
+ else
+ regs->r20 = 0; /* OSF/1 has some strange fork() semantics. */
childti->pcb.usp = usp ?: rdusp();
*childregs = *regs;
childregs->r0 = 0;
childregs->r19 = 0;
childregs->r20 = 1; /* OSF/1 has some strange fork() semantics. */
- regs->r20 = 0;
stack = ((struct switch_stack *) regs) - 1;
*childstack = *stack;
childstack->r26 = (unsigned long) ret_from_fork;
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 74aceead06e9..32ba92cdf5f6 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -158,11 +158,16 @@ void show_stack(struct task_struct *task, unsigned long *sp)
for(i=0; i < kstack_depth_to_print; i++) {
if (((long) stack & (THREAD_SIZE-1)) == 0)
break;
- if (i && ((i % 4) == 0))
- printk("\n ");
- printk("%016lx ", *stack++);
+ if ((i % 4) == 0) {
+ if (i)
+ pr_cont("\n");
+ printk(" ");
+ } else {
+ pr_cont(" ");
+ }
+ pr_cont("%016lx", *stack++);
}
- printk("\n");
+ pr_cont("\n");
dik_show_trace(sp);
}
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index a20a71d9d22e..c9c9a47446e8 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -926,7 +926,8 @@
reg = <0x48038000 0x2000>,
<0x46000000 0x400000>;
reg-names = "mpu", "dat";
- interrupts = <80>, <81>;
+ interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "tx", "rx";
status = "disabled";
dmas = <&edma 8 2>,
@@ -940,7 +941,8 @@
reg = <0x4803C000 0x2000>,
<0x46400000 0x400000>;
reg-names = "mpu", "dat";
- interrupts = <82>, <83>;
+ interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "tx", "rx";
status = "disabled";
dmas = <&edma 10 2>,
diff --git a/arch/arm/boot/dts/am437x-cm-t43.dts b/arch/arm/boot/dts/am437x-cm-t43.dts
index 9e92d480576b..3b9a94c274a7 100644
--- a/arch/arm/boot/dts/am437x-cm-t43.dts
+++ b/arch/arm/boot/dts/am437x-cm-t43.dts
@@ -301,8 +301,8 @@
status = "okay";
pinctrl-names = "default";
pinctrl-0 = <&spi0_pins>;
- dmas = <&edma 16
- &edma 17>;
+ dmas = <&edma 16 0
+ &edma 17 0>;
dma-names = "tx0", "rx0";
flash: w25q64cvzpig@0 {
diff --git a/arch/arm/boot/dts/arm-realview-eb-mp.dtsi b/arch/arm/boot/dts/arm-realview-eb-mp.dtsi
index 7b8d90b7aeea..29b636fce23f 100644
--- a/arch/arm/boot/dts/arm-realview-eb-mp.dtsi
+++ b/arch/arm/boot/dts/arm-realview-eb-mp.dtsi
@@ -150,11 +150,6 @@
interrupts = <0 8 IRQ_TYPE_LEVEL_HIGH>;
};
-&charlcd {
- interrupt-parent = <&intc>;
- interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
-};
-
&serial0 {
interrupt-parent = <&intc>;
interrupts = <0 4 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi
index 7c9e0fae9bb9..65e0db1d3bd7 100644
--- a/arch/arm/boot/dts/bcm-nsp.dtsi
+++ b/arch/arm/boot/dts/bcm-nsp.dtsi
@@ -85,7 +85,7 @@
timer@20200 {
compatible = "arm,cortex-a9-global-timer";
reg = <0x20200 0x100>;
- interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
clocks = <&periph_clk>;
};
@@ -93,7 +93,7 @@
compatible = "arm,cortex-a9-twd-timer";
reg = <0x20600 0x20>;
interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) |
- IRQ_TYPE_LEVEL_HIGH)>;
+ IRQ_TYPE_EDGE_RISING)>;
clocks = <&periph_clk>;
};
diff --git a/arch/arm/boot/dts/exynos5410.dtsi b/arch/arm/boot/dts/exynos5410.dtsi
index 137f48464f8b..bb59fee072c0 100644
--- a/arch/arm/boot/dts/exynos5410.dtsi
+++ b/arch/arm/boot/dts/exynos5410.dtsi
@@ -274,7 +274,6 @@
&rtc {
clocks = <&clock CLK_RTC>;
clock-names = "rtc";
- interrupt-parent = <&pmu_system_controller>;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts
index cf2f5240e176..27cc913ca0f5 100644
--- a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts
+++ b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts
@@ -53,7 +53,8 @@
};
pinctrl: pin-controller@10000 {
- pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header>;
+ pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header
+ &pmx_gpio_header_gpo>;
pinctrl-names = "default";
pmx_uart0: pmx-uart0 {
@@ -85,11 +86,16 @@
* ground.
*/
pmx_gpio_header: pmx-gpio-header {
- marvell,pins = "mpp17", "mpp7", "mpp29", "mpp28",
+ marvell,pins = "mpp17", "mpp29", "mpp28",
"mpp35", "mpp34", "mpp40";
marvell,function = "gpio";
};
+ pmx_gpio_header_gpo: pxm-gpio-header-gpo {
+ marvell,pins = "mpp7";
+ marvell,function = "gpo";
+ };
+
pmx_gpio_init: pmx-init {
marvell,pins = "mpp38";
marvell,function = "gpio";
diff --git a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
index 38faa90007d7..2fa5eb4bd402 100644
--- a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
@@ -72,7 +72,8 @@
};
&gpmc {
- ranges = <1 0 0x08000000 0x1000000>; /* CS1: 16MB for LAN9221 */
+ ranges = <0 0 0x30000000 0x1000000 /* CS0: 16MB for NAND */
+ 1 0 0x2c000000 0x1000000>; /* CS1: 16MB for LAN9221 */
ethernet@gpmc {
pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi
index 26cce4d18405..4f2c5ec75714 100644
--- a/arch/arm/boot/dts/logicpd-som-lv.dtsi
+++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi
@@ -37,7 +37,7 @@
};
&gpmc {
- ranges = <0 0 0x00000000 0x1000000>; /* CS0: 16MB for NAND */
+ ranges = <0 0 0x30000000 0x1000000>; /* CS0: 16MB for NAND */
nand@0,0 {
compatible = "ti,omap2-nand";
@@ -121,7 +121,7 @@
&mmc3 {
interrupts-extended = <&intc 94 &omap3_pmx_core2 0x46>;
- pinctrl-0 = <&mmc3_pins>;
+ pinctrl-0 = <&mmc3_pins &wl127x_gpio>;
pinctrl-names = "default";
vmmc-supply = <&wl12xx_vmmc>;
non-removable;
@@ -132,8 +132,8 @@
wlcore: wlcore@2 {
compatible = "ti,wl1273";
reg = <2>;
- interrupt-parent = <&gpio5>;
- interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; /* gpio 152 */
+ interrupt-parent = <&gpio1>;
+ interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; /* gpio 2 */
ref-clock-frequency = <26000000>;
};
};
@@ -157,8 +157,6 @@
OMAP3_CORE1_IOPAD(0x2166, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat5.sdmmc3_dat1 */
OMAP3_CORE1_IOPAD(0x2168, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat6.sdmmc3_dat2 */
OMAP3_CORE1_IOPAD(0x216a, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat6.sdmmc3_dat3 */
- OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT_PULLUP | MUX_MODE4) /* mcbsp4_clkx.gpio_152 */
- OMAP3_CORE1_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4) /* sys_boot1.gpio_3 */
OMAP3_CORE1_IOPAD(0x21d0, PIN_INPUT_PULLUP | MUX_MODE3) /* mcspi1_cs1.sdmmc3_cmd */
OMAP3_CORE1_IOPAD(0x21d2, PIN_INPUT_PULLUP | MUX_MODE3) /* mcspi1_cs2.sdmmc_clk */
>;
@@ -228,6 +226,12 @@
OMAP3_WKUP_IOPAD(0x2a0e, PIN_OUTPUT | MUX_MODE4) /* sys_boot2.gpio_4 */
>;
};
+ wl127x_gpio: pinmux_wl127x_gpio_pin {
+ pinctrl-single,pins = <
+ OMAP3_WKUP_IOPAD(0x2a0c, PIN_INPUT | MUX_MODE4) /* sys_boot0.gpio_2 */
+ OMAP3_WKUP_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4) /* sys_boot1.gpio_3 */
+ >;
+ };
};
&omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/lpc3250-ea3250.dts b/arch/arm/boot/dts/lpc3250-ea3250.dts
index 52b3ed10283a..e2bc731079be 100644
--- a/arch/arm/boot/dts/lpc3250-ea3250.dts
+++ b/arch/arm/boot/dts/lpc3250-ea3250.dts
@@ -156,8 +156,8 @@
uda1380: uda1380@18 {
compatible = "nxp,uda1380";
reg = <0x18>;
- power-gpio = <&gpio 0x59 0>;
- reset-gpio = <&gpio 0x51 0>;
+ power-gpio = <&gpio 3 10 0>;
+ reset-gpio = <&gpio 3 2 0>;
dac-clk = "wspll";
};
diff --git a/arch/arm/boot/dts/lpc3250-phy3250.dts b/arch/arm/boot/dts/lpc3250-phy3250.dts
index fd95e2b10357..b7bd3a110a8d 100644
--- a/arch/arm/boot/dts/lpc3250-phy3250.dts
+++ b/arch/arm/boot/dts/lpc3250-phy3250.dts
@@ -81,8 +81,8 @@
uda1380: uda1380@18 {
compatible = "nxp,uda1380";
reg = <0x18>;
- power-gpio = <&gpio 0x59 0>;
- reset-gpio = <&gpio 0x51 0>;
+ power-gpio = <&gpio 3 10 0>;
+ reset-gpio = <&gpio 3 2 0>;
dac-clk = "wspll";
};
diff --git a/arch/arm/boot/dts/mt2701.dtsi b/arch/arm/boot/dts/mt2701.dtsi
index 77c6b931dc24..23fe0497f708 100644
--- a/arch/arm/boot/dts/mt2701.dtsi
+++ b/arch/arm/boot/dts/mt2701.dtsi
@@ -197,12 +197,14 @@
compatible = "mediatek,mt2701-hifsys", "syscon";
reg = <0 0x1a000000 0 0x1000>;
#clock-cells = <1>;
+ #reset-cells = <1>;
};
ethsys: syscon@1b000000 {
compatible = "mediatek,mt2701-ethsys", "syscon";
reg = <0 0x1b000000 0 0x1000>;
#clock-cells = <1>;
+ #reset-cells = <1>;
};
bdpsys: syscon@1c000000 {
diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi
index 9c289ddab3df..4d6584f15b17 100644
--- a/arch/arm/boot/dts/omap4.dtsi
+++ b/arch/arm/boot/dts/omap4.dtsi
@@ -352,7 +352,7 @@
elm: elm@48078000 {
compatible = "ti,am3352-elm";
reg = <0x48078000 0x2000>;
- interrupts = <4>;
+ interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
ti,hwmods = "elm";
status = "disabled";
};
@@ -859,14 +859,12 @@
usbhsohci: ohci@4a064800 {
compatible = "ti,ohci-omap3";
reg = <0x4a064800 0x400>;
- interrupt-parent = <&gic>;
interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
};
usbhsehci: ehci@4a064c00 {
compatible = "ti,ehci-omap";
reg = <0x4a064c00 0x400>;
- interrupt-parent = <&gic>;
interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
};
};
diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi
index a853918be43f..0c10ba517cd0 100644
--- a/arch/arm/boot/dts/s5pv210.dtsi
+++ b/arch/arm/boot/dts/s5pv210.dtsi
@@ -463,6 +463,7 @@
compatible = "samsung,exynos4210-ohci";
reg = <0xec300000 0x100>;
interrupts = <23>;
+ interrupt-parent = <&vic1>;
clocks = <&clocks CLK_USB_HOST>;
clock-names = "usbhost";
#address-cells = <1>;
diff --git a/arch/arm/boot/dts/spear1310-evb.dts b/arch/arm/boot/dts/spear1310-evb.dts
index 84101e4eebbf..0f5f379323a8 100644
--- a/arch/arm/boot/dts/spear1310-evb.dts
+++ b/arch/arm/boot/dts/spear1310-evb.dts
@@ -349,7 +349,7 @@
spi0: spi@e0100000 {
status = "okay";
num-cs = <3>;
- cs-gpios = <&gpio1 7 0>, <&spics 0>, <&spics 1>;
+ cs-gpios = <&gpio1 7 0>, <&spics 0 0>, <&spics 1 0>;
stmpe610@0 {
compatible = "st,stmpe610";
diff --git a/arch/arm/boot/dts/spear1340.dtsi b/arch/arm/boot/dts/spear1340.dtsi
index df2232d767ed..6361cbfcbe5e 100644
--- a/arch/arm/boot/dts/spear1340.dtsi
+++ b/arch/arm/boot/dts/spear1340.dtsi
@@ -141,8 +141,8 @@
reg = <0xb4100000 0x1000>;
interrupts = <0 105 0x4>;
status = "disabled";
- dmas = <&dwdma0 0x600 0 0 1>, /* 0xC << 11 */
- <&dwdma0 0x680 0 1 0>; /* 0xD << 7 */
+ dmas = <&dwdma0 12 0 1>,
+ <&dwdma0 13 1 0>;
dma-names = "tx", "rx";
};
diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi
index 449acf0d8272..9564337c1815 100644
--- a/arch/arm/boot/dts/spear13xx.dtsi
+++ b/arch/arm/boot/dts/spear13xx.dtsi
@@ -100,7 +100,7 @@
reg = <0xb2800000 0x1000>;
interrupts = <0 29 0x4>;
status = "disabled";
- dmas = <&dwdma0 0 0 0 0>;
+ dmas = <&dwdma0 0 0 0>;
dma-names = "data";
};
@@ -288,8 +288,8 @@
#size-cells = <0>;
interrupts = <0 31 0x4>;
status = "disabled";
- dmas = <&dwdma0 0x2000 0 0 0>, /* 0x4 << 11 */
- <&dwdma0 0x0280 0 0 0>; /* 0x5 << 7 */
+ dmas = <&dwdma0 4 0 0>,
+ <&dwdma0 5 0 0>;
dma-names = "tx", "rx";
};
diff --git a/arch/arm/boot/dts/spear600.dtsi b/arch/arm/boot/dts/spear600.dtsi
index 9f60a7b6a42b..bd379034993c 100644
--- a/arch/arm/boot/dts/spear600.dtsi
+++ b/arch/arm/boot/dts/spear600.dtsi
@@ -194,6 +194,7 @@
rtc@fc900000 {
compatible = "st,spear600-rtc";
reg = <0xfc900000 0x1000>;
+ interrupt-parent = <&vic0>;
interrupts = <10>;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
index adb1c0998b81..1077ceebb2d6 100644
--- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
+++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
@@ -749,6 +749,7 @@
reg = <0x10120000 0x1000>;
interrupt-names = "combined";
interrupts = <14>;
+ interrupt-parent = <&vica>;
clocks = <&clcdclk>, <&hclkclcd>;
clock-names = "clcdclk", "apb_pclk";
status = "disabled";
diff --git a/arch/arm/boot/dts/stih407.dtsi b/arch/arm/boot/dts/stih407.dtsi
index 291ffacbd2e0..fe043d313ccd 100644
--- a/arch/arm/boot/dts/stih407.dtsi
+++ b/arch/arm/boot/dts/stih407.dtsi
@@ -8,6 +8,7 @@
*/
#include "stih407-clock.dtsi"
#include "stih407-family.dtsi"
+#include <dt-bindings/gpio/gpio.h>
/ {
soc {
sti-display-subsystem {
@@ -122,7 +123,7 @@
<&clk_s_d2_quadfs 0>,
<&clk_s_d2_quadfs 1>;
- hdmi,hpd-gpio = <&pio5 3>;
+ hdmi,hpd-gpio = <&pio5 3 GPIO_ACTIVE_LOW>;
reset-names = "hdmi";
resets = <&softreset STIH407_HDMI_TX_PHY_SOFTRESET>;
ddc = <&hdmiddc>;
diff --git a/arch/arm/boot/dts/stih410.dtsi b/arch/arm/boot/dts/stih410.dtsi
index 4d329b2908be..3c118fc2bf61 100644
--- a/arch/arm/boot/dts/stih410.dtsi
+++ b/arch/arm/boot/dts/stih410.dtsi
@@ -9,6 +9,7 @@
#include "stih410-clock.dtsi"
#include "stih407-family.dtsi"
#include "stih410-pinctrl.dtsi"
+#include <dt-bindings/gpio/gpio.h>
/ {
aliases {
bdisp0 = &bdisp0;
@@ -213,7 +214,7 @@
<&clk_s_d2_quadfs 0>,
<&clk_s_d2_quadfs 1>;
- hdmi,hpd-gpio = <&pio5 3>;
+ hdmi,hpd-gpio = <&pio5 3 GPIO_ACTIVE_LOW>;
reset-names = "hdmi";
resets = <&softreset STIH407_HDMI_TX_PHY_SOFTRESET>;
ddc = <&hdmiddc>;
diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c
index 6053f64c3752..94133e33943c 100644
--- a/arch/arm/common/bL_switcher_dummy_if.c
+++ b/arch/arm/common/bL_switcher_dummy_if.c
@@ -57,3 +57,7 @@ static struct miscdevice bL_switcher_device = {
&bL_switcher_fops
};
module_misc_device(bL_switcher_device);
+
+MODULE_AUTHOR("Nicolas Pitre <nico@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("big.LITTLE switcher dummy user interface");
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index 714da336ec86..5d49b60841e3 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -11,6 +11,7 @@ CONFIG_SMP=y
CONFIG_NR_CPUS=8
CONFIG_AEABI=y
CONFIG_HIGHMEM=y
+CONFIG_CMA=y
CONFIG_ARM_APPENDED_DTB=y
CONFIG_ARM_ATAG_DTB_COMPAT=y
CONFIG_CPU_FREQ=y
@@ -35,6 +36,7 @@ CONFIG_CAN_SUN4I=y
# CONFIG_WIRELESS is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DMA_CMA=y
CONFIG_BLK_DEV_SD=y
CONFIG_ATA=y
CONFIG_AHCI_SUNXI=y
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 19b5f5c1c0ff..c38bfbeec306 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -1165,6 +1165,7 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
cpu_hyp_reset();
return NOTIFY_OK;
+ case CPU_PM_ENTER_FAILED:
case CPU_PM_EXIT:
if (__this_cpu_read(kvm_arm_hardware_enabled))
/* The hardware was enabled before suspend. */
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 42f5daf715d0..4e57ebca6e69 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -38,7 +38,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
ret = kvm_psci_call(vcpu);
if (ret < 0) {
- kvm_inject_undefined(vcpu);
+ vcpu_set_reg(vcpu, 0, ~0UL);
return 1;
}
@@ -47,7 +47,16 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- kvm_inject_undefined(vcpu);
+ /*
+ * "If an SMC instruction executed at Non-secure EL1 is
+ * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a
+ * Trap exception, not a Secure Monitor Call exception [...]"
+ *
+ * We need to advance the PC after the trap, as it would
+ * otherwise return to the same address...
+ */
+ vcpu_set_reg(vcpu, 0, ~0UL);
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
return 1;
}
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index b6e715fd3c90..dac7ceb1a677 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
}
trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
- data);
+ &data);
data = vcpu_data_host_to_guest(vcpu, data, len);
vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
}
@@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
len);
- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
kvm_mmio_write_buf(data_buf, len, data);
ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
data_buf);
} else {
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
- fault_ipa, 0);
+ fault_ipa, NULL);
ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
data_buf);
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 2206e0e00934..2a35c1963f6d 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -1284,7 +1284,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
- if (is_vm_hugetlb_page(vma) && !logging_active) {
+ if (vma_kernel_pagesize(vma) && !logging_active) {
hugetlb = true;
gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
} else {
diff --git a/arch/arm/mach-omap2/omap-secure.c b/arch/arm/mach-omap2/omap-secure.c
index 5ac122e88f67..9ff92050053c 100644
--- a/arch/arm/mach-omap2/omap-secure.c
+++ b/arch/arm/mach-omap2/omap-secure.c
@@ -73,6 +73,25 @@ phys_addr_t omap_secure_ram_mempool_base(void)
return omap_secure_memblock_base;
}
+u32 omap3_save_secure_ram(void __iomem *addr, int size)
+{
+ u32 ret;
+ u32 param[5];
+
+ if (size != OMAP3_SAVE_SECURE_RAM_SZ)
+ return OMAP3_SAVE_SECURE_RAM_SZ;
+
+ param[0] = 4; /* Number of arguments */
+ param[1] = __pa(addr); /* Physical address for saving */
+ param[2] = 0;
+ param[3] = 1;
+ param[4] = 1;
+
+ ret = save_secure_ram_context(__pa(param));
+
+ return ret;
+}
+
/**
* rx51_secure_dispatcher: Routine to dispatch secure PPA API calls
* @idx: The PPA API index
diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h
index bae263fba640..c509cde71f93 100644
--- a/arch/arm/mach-omap2/omap-secure.h
+++ b/arch/arm/mach-omap2/omap-secure.h
@@ -31,6 +31,8 @@
/* Maximum Secure memory storage size */
#define OMAP_SECURE_RAM_STORAGE (88 * SZ_1K)
+#define OMAP3_SAVE_SECURE_RAM_SZ 0x803F
+
/* Secure low power HAL API index */
#define OMAP4_HAL_SAVESECURERAM_INDEX 0x1a
#define OMAP4_HAL_SAVEHW_INDEX 0x1b
@@ -65,6 +67,8 @@ extern u32 omap_smc2(u32 id, u32 falg, u32 pargs);
extern u32 omap_smc3(u32 id, u32 process, u32 flag, u32 pargs);
extern phys_addr_t omap_secure_ram_mempool_base(void);
extern int omap_secure_ram_reserve_memblock(void);
+extern u32 save_secure_ram_context(u32 args_pa);
+extern u32 omap3_save_secure_ram(void __iomem *save_regs, int size);
extern u32 rx51_secure_dispatcher(u32 idx, u32 process, u32 flag, u32 nargs,
u32 arg1, u32 arg2, u32 arg3, u32 arg4);
diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h
index b668719b9b25..8e30772cfe32 100644
--- a/arch/arm/mach-omap2/pm.h
+++ b/arch/arm/mach-omap2/pm.h
@@ -81,10 +81,6 @@ extern unsigned int omap3_do_wfi_sz;
/* ... and its pointer from SRAM after copy */
extern void (*omap3_do_wfi_sram)(void);
-/* save_secure_ram_context function pointer and size, for copy to SRAM */
-extern int save_secure_ram_context(u32 *addr);
-extern unsigned int save_secure_ram_context_sz;
-
extern void omap3_save_scratchpad_contents(void);
#define PM_RTA_ERRATUM_i608 (1 << 0)
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index d44e0e2f1106..3836958074ac 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -48,6 +48,7 @@
#include "prm3xxx.h"
#include "pm.h"
#include "sdrc.h"
+#include "omap-secure.h"
#include "sram.h"
#include "control.h"
#include "vc.h"
@@ -66,7 +67,6 @@ struct power_state {
static LIST_HEAD(pwrst_list);
-static int (*_omap_save_secure_sram)(u32 *addr);
void (*omap3_do_wfi_sram)(void);
static struct powerdomain *mpu_pwrdm, *neon_pwrdm;
@@ -121,8 +121,8 @@ static void omap3_save_secure_ram_context(void)
* will hang the system.
*/
pwrdm_set_next_pwrst(mpu_pwrdm, PWRDM_POWER_ON);
- ret = _omap_save_secure_sram((u32 *)(unsigned long)
- __pa(omap3_secure_ram_storage));
+ ret = omap3_save_secure_ram(omap3_secure_ram_storage,
+ OMAP3_SAVE_SECURE_RAM_SZ);
pwrdm_set_next_pwrst(mpu_pwrdm, mpu_next_state);
/* Following is for error tracking, it should not happen */
if (ret) {
@@ -434,15 +434,10 @@ static int __init pwrdms_setup(struct powerdomain *pwrdm, void *unused)
*
* The minimum set of functions is pushed to SRAM for execution:
* - omap3_do_wfi for erratum i581 WA,
- * - save_secure_ram_context for security extensions.
*/
void omap_push_sram_idle(void)
{
omap3_do_wfi_sram = omap_sram_push(omap3_do_wfi, omap3_do_wfi_sz);
-
- if (omap_type() != OMAP2_DEVICE_TYPE_GP)
- _omap_save_secure_sram = omap_sram_push(save_secure_ram_context,
- save_secure_ram_context_sz);
}
static void __init pm_errata_configure(void)
@@ -554,7 +549,7 @@ int __init omap3_pm_init(void)
clkdm_add_wkdep(neon_clkdm, mpu_clkdm);
if (omap_type() != OMAP2_DEVICE_TYPE_GP) {
omap3_secure_ram_storage =
- kmalloc(0x803F, GFP_KERNEL);
+ kmalloc(OMAP3_SAVE_SECURE_RAM_SZ, GFP_KERNEL);
if (!omap3_secure_ram_storage)
pr_err("Memory allocation failed when allocating for secure sram context\n");
diff --git a/arch/arm/mach-omap2/prm33xx.c b/arch/arm/mach-omap2/prm33xx.c
index dcb5001d77da..973bcd754e1c 100644
--- a/arch/arm/mach-omap2/prm33xx.c
+++ b/arch/arm/mach-omap2/prm33xx.c
@@ -176,17 +176,6 @@ static int am33xx_pwrdm_read_pwrst(struct powerdomain *pwrdm)
return v;
}
-static int am33xx_pwrdm_read_prev_pwrst(struct powerdomain *pwrdm)
-{
- u32 v;
-
- v = am33xx_prm_read_reg(pwrdm->prcm_offs, pwrdm->pwrstst_offs);
- v &= AM33XX_LASTPOWERSTATEENTERED_MASK;
- v >>= AM33XX_LASTPOWERSTATEENTERED_SHIFT;
-
- return v;
-}
-
static int am33xx_pwrdm_set_lowpwrstchange(struct powerdomain *pwrdm)
{
am33xx_prm_rmw_reg_bits(AM33XX_LOWPOWERSTATECHANGE_MASK,
@@ -357,7 +346,6 @@ struct pwrdm_ops am33xx_pwrdm_operations = {
.pwrdm_set_next_pwrst = am33xx_pwrdm_set_next_pwrst,
.pwrdm_read_next_pwrst = am33xx_pwrdm_read_next_pwrst,
.pwrdm_read_pwrst = am33xx_pwrdm_read_pwrst,
- .pwrdm_read_prev_pwrst = am33xx_pwrdm_read_prev_pwrst,
.pwrdm_set_logic_retst = am33xx_pwrdm_set_logic_retst,
.pwrdm_read_logic_pwrst = am33xx_pwrdm_read_logic_pwrst,
.pwrdm_read_logic_retst = am33xx_pwrdm_read_logic_retst,
diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S
index 1b9f0520dea9..3e0d802c59da 100644
--- a/arch/arm/mach-omap2/sleep34xx.S
+++ b/arch/arm/mach-omap2/sleep34xx.S
@@ -93,20 +93,13 @@ ENTRY(enable_omap3630_toggle_l2_on_restore)
ENDPROC(enable_omap3630_toggle_l2_on_restore)
/*
- * Function to call rom code to save secure ram context. This gets
- * relocated to SRAM, so it can be all in .data section. Otherwise
- * we need to initialize api_params separately.
+ * Function to call rom code to save secure ram context.
+ *
+ * r0 = physical address of the parameters
*/
- .data
- .align 3
ENTRY(save_secure_ram_context)
stmfd sp!, {r4 - r11, lr} @ save registers on stack
- adr r3, api_params @ r3 points to parameters
- str r0, [r3,#0x4] @ r0 has sdram address
- ldr r12, high_mask
- and r3, r3, r12
- ldr r12, sram_phy_addr_mask
- orr r3, r3, r12
+ mov r3, r0 @ physical address of parameters
mov r0, #25 @ set service ID for PPA
mov r12, r0 @ copy secure service ID in r12
mov r1, #0 @ set task id for ROM code in r1
@@ -120,18 +113,7 @@ ENTRY(save_secure_ram_context)
nop
nop
ldmfd sp!, {r4 - r11, pc}
- .align
-sram_phy_addr_mask:
- .word SRAM_BASE_P
-high_mask:
- .word 0xffff
-api_params:
- .word 0x4, 0x0, 0x0, 0x1, 0x1
ENDPROC(save_secure_ram_context)
-ENTRY(save_secure_ram_context_sz)
- .word . - save_secure_ram_context
-
- .text
/*
* ======================
diff --git a/arch/arm/mach-pxa/tosa-bt.c b/arch/arm/mach-pxa/tosa-bt.c
index 107f37210fb9..83606087edc7 100644
--- a/arch/arm/mach-pxa/tosa-bt.c
+++ b/arch/arm/mach-pxa/tosa-bt.c
@@ -132,3 +132,7 @@ static struct platform_driver tosa_bt_driver = {
},
};
module_platform_driver(tosa_bt_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dmitry Baryshkov");
+MODULE_DESCRIPTION("Bluetooth built-in chip control");
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index cf57a7799a0f..7769c2e27788 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1007,7 +1007,7 @@ source "fs/Kconfig.binfmt"
config COMPAT
bool "Kernel support for 32-bit EL0"
depends on ARM64_4K_PAGES || EXPERT
- select COMPAT_BINFMT_ELF
+ select COMPAT_BINFMT_ELF if BINFMT_ELF
select HAVE_UID16
select OLD_SIGSUSPEND3
select COMPAT_OLD_SIGACTION
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 101794f5ce10..08a4497f70a6 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -2,9 +2,11 @@ menu "Platform selection"
config ARCH_SUNXI
bool "Allwinner sunxi 64-bit SoC Family"
+ select ARCH_HAS_RESET_CONTROLLER
select GENERIC_IRQ_CHIP
select PINCTRL
select PINCTRL_SUN50I_A64
+ select RESET_CONTROLLER
help
This enables support for Allwinner sunxi based SoCs like the A64.
diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
index 6c03c1702bb3..b307d6b6357e 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
@@ -73,6 +73,7 @@
reg = <0x000>;
enable-method = "psci";
cpu-idle-states = <&CPU_SLEEP_0>;
+ #cooling-cells = <2>;
};
cpu1: cpu@1 {
@@ -89,6 +90,7 @@
reg = <0x100>;
enable-method = "psci";
cpu-idle-states = <&CPU_SLEEP_0>;
+ #cooling-cells = <2>;
};
cpu3: cpu@101 {
diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi
index 466ca5705c99..08b88f6791be 100644
--- a/arch/arm64/boot/dts/qcom/msm8916.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi
@@ -796,6 +796,7 @@
"dsi_phy_regulator";
#clock-cells = <1>;
+ #phy-cells = <0>;
clocks = <&gcc GCC_MDSS_AHB_CLK>;
clock-names = "iface_clk";
@@ -906,8 +907,8 @@
#address-cells = <1>;
#size-cells = <0>;
- qcom,ipc-1 = <&apcs 0 13>;
- qcom,ipc-6 = <&apcs 0 19>;
+ qcom,ipc-1 = <&apcs 8 13>;
+ qcom,ipc-3 = <&apcs 8 19>;
apps_smsm: apps@0 {
reg = <0>;
diff --git a/arch/arm64/crypto/crc32-arm64.c b/arch/arm64/crypto/crc32-arm64.c
index 6a37c3c6b11d..3ec568afd1d3 100644
--- a/arch/arm64/crypto/crc32-arm64.c
+++ b/arch/arm64/crypto/crc32-arm64.c
@@ -232,6 +232,7 @@ static struct shash_alg crc32_alg = {
.cra_name = "crc32",
.cra_driver_name = "crc32-arm64-hw",
.cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_alignmask = 0,
.cra_ctxsize = sizeof(struct chksum_ctx),
@@ -253,6 +254,7 @@ static struct shash_alg crc32c_alg = {
.cra_name = "crc32c",
.cra_driver_name = "crc32c-arm64-hw",
.cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_alignmask = 0,
.cra_ctxsize = sizeof(struct chksum_ctx),
diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h
index 561190d15881..0bfe1df12b19 100644
--- a/arch/arm64/include/asm/bug.h
+++ b/arch/arm64/include/asm/bug.h
@@ -20,9 +20,6 @@
#include <asm/brk-imm.h>
-#ifdef CONFIG_GENERIC_BUG
-#define HAVE_ARCH_BUG
-
#ifdef CONFIG_DEBUG_BUGVERBOSE
#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
#define __BUGVERBOSE_LOCATION(file, line) \
@@ -36,28 +33,36 @@
#define _BUGVERBOSE_LOCATION(file, line)
#endif
-#define _BUG_FLAGS(flags) __BUG_FLAGS(flags)
+#ifdef CONFIG_GENERIC_BUG
-#define __BUG_FLAGS(flags) asm volatile ( \
+#define __BUG_ENTRY(flags) \
".pushsection __bug_table,\"a\"\n\t" \
".align 2\n\t" \
"0: .long 1f - 0b\n\t" \
_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \
".short " #flags "\n\t" \
".popsection\n" \
- \
- "1: brk %[imm]" \
- :: [imm] "i" (BUG_BRK_IMM) \
-)
+ "1: "
+#else
+#define __BUG_ENTRY(flags) ""
+#endif
+
+#define __BUG_FLAGS(flags) \
+ asm volatile ( \
+ __BUG_ENTRY(flags) \
+ "brk %[imm]" :: [imm] "i" (BUG_BRK_IMM) \
+ );
-#define BUG() do { \
- _BUG_FLAGS(0); \
- unreachable(); \
+
+#define BUG() do { \
+ __BUG_FLAGS(0); \
+ unreachable(); \
} while (0)
-#define __WARN_TAINT(taint) _BUG_FLAGS(BUGFLAG_TAINT(taint))
+#define __WARN_TAINT(taint) \
+ __BUG_FLAGS(BUGFLAG_TAINT(taint))
-#endif /* ! CONFIG_GENERIC_BUG */
+#define HAVE_ARCH_BUG
#include <asm-generic/bug.h>
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 85baadab02d3..2e6e9e99977b 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -44,7 +44,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
ret = kvm_psci_call(vcpu);
if (ret < 0) {
- kvm_inject_undefined(vcpu);
+ vcpu_set_reg(vcpu, 0, ~0UL);
return 1;
}
@@ -53,7 +53,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- kvm_inject_undefined(vcpu);
+ vcpu_set_reg(vcpu, 0, ~0UL);
return 1;
}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 05615a3fdc6f..d5cc6d73c2c4 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -479,7 +479,7 @@ void __init paging_init(void)
* To do this we need to go via a temporary pgd.
*/
cpu_replace_ttbr1(__va(pgd_phys));
- memcpy(swapper_pg_dir, pgd, PAGE_SIZE);
+ memcpy(swapper_pg_dir, pgd, PGD_SIZE);
cpu_replace_ttbr1(swapper_pg_dir);
pgd_clear_fixmap();
diff --git a/arch/m68k/kernel/vmlinux-nommu.lds b/arch/m68k/kernel/vmlinux-nommu.lds
index d2c8abf1c8c4..e958abe0f51e 100644
--- a/arch/m68k/kernel/vmlinux-nommu.lds
+++ b/arch/m68k/kernel/vmlinux-nommu.lds
@@ -44,6 +44,8 @@ SECTIONS {
.text : {
HEAD_TEXT
TEXT_TEXT
+ IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds
index 5b5ce1e4d1ed..1656ae8a145d 100644
--- a/arch/m68k/kernel/vmlinux-std.lds
+++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -15,6 +15,8 @@ SECTIONS
.text : {
HEAD_TEXT
TEXT_TEXT
+ IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds
index fe5ea1974b16..07b9818b3176 100644
--- a/arch/m68k/kernel/vmlinux-sun3.lds
+++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -15,6 +15,8 @@ SECTIONS
.text : {
HEAD_TEXT
TEXT_TEXT
+ IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 5e844f68e847..2d2fd79ced9d 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -112,12 +112,12 @@ config MIPS_GENERIC
select SYS_SUPPORTS_MULTITHREADING
select SYS_SUPPORTS_RELOCATABLE
select SYS_SUPPORTS_SMARTMIPS
- select USB_EHCI_BIG_ENDIAN_DESC if BIG_ENDIAN
- select USB_EHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN
- select USB_OHCI_BIG_ENDIAN_DESC if BIG_ENDIAN
- select USB_OHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN
- select USB_UHCI_BIG_ENDIAN_DESC if BIG_ENDIAN
- select USB_UHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN
+ select USB_EHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
+ select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
+ select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
+ select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
+ select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
+ select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
select USE_OF
help
Select this to build a kernel which aims to support multiple boards,
diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c
index 3446b6fb3acb..9da4e2292fc7 100644
--- a/arch/mips/ar7/platform.c
+++ b/arch/mips/ar7/platform.c
@@ -576,7 +576,7 @@ static int __init ar7_register_uarts(void)
uart_port.type = PORT_AR7;
uart_port.uartclk = clk_get_rate(bus_clk) / 2;
uart_port.iotype = UPIO_MEM32;
- uart_port.flags = UPF_FIXED_TYPE;
+ uart_port.flags = UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF;
uart_port.regshift = 2;
uart_port.line = 0;
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index c558bce989cd..6e716a5f1173 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -683,6 +683,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
struct task_struct *t;
int max_users;
+ /* If nothing to change, return right away, successfully. */
+ if (value == mips_get_process_fp_mode(task))
+ return 0;
+
+ /* Only accept a mode change if 64-bit FP enabled for o32. */
+ if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
+ return -EOPNOTSUPP;
+
+ /* And only for o32 tasks. */
+ if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS))
+ return -EOPNOTSUPP;
+
/* Check the value is valid */
if (value & ~known_bits)
return -EOPNOTSUPP;
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 11890e6e4093..0c8ae2cc6380 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -439,63 +439,160 @@ static int gpr64_set(struct task_struct *target,
#endif /* CONFIG_64BIT */
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer,
+ * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots
+ * correspond 1:1 to buffer slots. Only general registers are copied.
+ */
+static int fpr_get_fpa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ void **kbuf, void __user **ubuf)
+{
+ return user_regset_copyout(pos, count, kbuf, ubuf,
+ &target->thread.fpu,
+ 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
+}
+
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer,
+ * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's
+ * general register slots are copied to buffer slots. Only general
+ * registers are copied.
+ */
+static int fpr_get_msa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ void **kbuf, void __user **ubuf)
+{
+ unsigned int i;
+ u64 fpr_val;
+ int err;
+
+ BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
+ err = user_regset_copyout(pos, count, kbuf, ubuf,
+ &fpr_val, i * sizeof(elf_fpreg_t),
+ (i + 1) * sizeof(elf_fpreg_t));
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer.
+ * Choose the appropriate helper for general registers, and then copy
+ * the FCSR register separately.
+ */
static int fpr_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- unsigned i;
+ const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
int err;
- u64 fpr_val;
- /* XXX fcr31 */
+ if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
+ err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf);
+ else
+ err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf);
+ if (err)
+ return err;
- if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu,
- 0, sizeof(elf_fpregset_t));
+ err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu.fcr31,
+ fcr31_pos, fcr31_pos + sizeof(u32));
- for (i = 0; i < NUM_FPU_REGS; i++) {
- fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
- err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &fpr_val, i * sizeof(elf_fpreg_t),
- (i + 1) * sizeof(elf_fpreg_t));
+ return err;
+}
+
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context,
+ * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP
+ * context's general register slots. Only general registers are copied.
+ */
+static int fpr_set_fpa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ const void **kbuf, const void __user **ubuf)
+{
+ return user_regset_copyin(pos, count, kbuf, ubuf,
+ &target->thread.fpu,
+ 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
+}
+
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context,
+ * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64
+ * bits only of FP context's general register slots. Only general
+ * registers are copied.
+ */
+static int fpr_set_msa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ const void **kbuf, const void __user **ubuf)
+{
+ unsigned int i;
+ u64 fpr_val;
+ int err;
+
+ BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
+ for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) {
+ err = user_regset_copyin(pos, count, kbuf, ubuf,
+ &fpr_val, i * sizeof(elf_fpreg_t),
+ (i + 1) * sizeof(elf_fpreg_t));
if (err)
return err;
+ set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
}
return 0;
}
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context.
+ * Choose the appropriate helper for general registers, and then copy
+ * the FCSR register separately.
+ *
+ * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
+ * which is supposed to have been guaranteed by the kernel before
+ * calling us, e.g. in `ptrace_regset'. We enforce that requirement,
+ * so that we can safely avoid preinitializing temporaries for
+ * partial register writes.
+ */
static int fpr_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- unsigned i;
+ const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
+ u32 fcr31;
int err;
- u64 fpr_val;
- /* XXX fcr31 */
+ BUG_ON(count % sizeof(elf_fpreg_t));
+
+ if (pos + count > sizeof(elf_fpregset_t))
+ return -EIO;
init_fp_ctx(target);
- if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu,
- 0, sizeof(elf_fpregset_t));
+ if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
+ err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf);
+ else
+ err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf);
+ if (err)
+ return err;
- BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
- for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) {
+ if (count > 0) {
err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &fpr_val, i * sizeof(elf_fpreg_t),
- (i + 1) * sizeof(elf_fpreg_t));
+ &fcr31,
+ fcr31_pos, fcr31_pos + sizeof(u32));
if (err)
return err;
- set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
+
+ ptrace_setfcr31(target, fcr31);
}
- return 0;
+ return err;
}
enum mips_regset {
diff --git a/arch/mn10300/mm/misalignment.c b/arch/mn10300/mm/misalignment.c
index b9920b1edd5a..70cef54dc40f 100644
--- a/arch/mn10300/mm/misalignment.c
+++ b/arch/mn10300/mm/misalignment.c
@@ -437,7 +437,7 @@ transfer_failed:
info.si_signo = SIGSEGV;
info.si_errno = 0;
- info.si_code = 0;
+ info.si_code = SEGV_MAPERR;
info.si_addr = (void *) regs->pc;
force_sig_info(SIGSEGV, &info, current);
return;
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index 3d3f6062f49c..605a284922fb 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -302,12 +302,12 @@ asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address)
siginfo_t info;
if (user_mode(regs)) {
- /* Send a SIGSEGV */
- info.si_signo = SIGSEGV;
+ /* Send a SIGBUS */
+ info.si_signo = SIGBUS;
info.si_errno = 0;
- /* info.si_code has been set above */
- info.si_addr = (void *)address;
- force_sig_info(SIGSEGV, &info, current);
+ info.si_code = BUS_ADRALN;
+ info.si_addr = (void __user *)address;
+ force_sig_info(SIGBUS, &info, current);
} else {
printk("KERNEL: Unaligned Access 0x%.8lx\n", address);
show_registers(regs);
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6eda5abbd719..0a6bb48854e3 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -128,6 +128,7 @@ config PPC
select ARCH_HAS_GCOV_PROFILE_ALL
select GENERIC_SMP_IDLE_THREAD
select GENERIC_CMOS_UPDATE
+ select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64
select GENERIC_TIME_VSYSCALL_OLD
select GENERIC_CLOCKEVENTS
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
index f058e0c3e4d4..fd1d6c83f0c0 100644
--- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c
+++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
@@ -141,6 +141,7 @@ static struct shash_alg alg = {
.cra_name = "crc32c",
.cra_driver_name = "crc32c-vpmsum",
.cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_ctxsize = sizeof(u32),
.cra_module = THIS_MODULE,
diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h
index a703452d67b6..555e22d5e07f 100644
--- a/arch/powerpc/include/asm/exception-64e.h
+++ b/arch/powerpc/include/asm/exception-64e.h
@@ -209,5 +209,11 @@ exc_##label##_book3e:
ori r3,r3,vector_offset@l; \
mtspr SPRN_IVOR##vector_number,r3;
+#define RFI_TO_KERNEL \
+ rfi
+
+#define RFI_TO_USER \
+ rfi
+
#endif /* _ASM_POWERPC_EXCEPTION_64E_H */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 9a3eee661297..903e76a9f158 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -51,6 +51,59 @@
#define EX_PPR 88 /* SMT thread status register (priority) */
#define EX_CTR 96
+/*
+ * Macros for annotating the expected destination of (h)rfid
+ *
+ * The nop instructions allow us to insert one or more instructions to flush the
+ * L1-D cache when returning to userspace or a guest.
+ */
+#define RFI_FLUSH_SLOT \
+ RFI_FLUSH_FIXUP_SECTION; \
+ nop; \
+ nop; \
+ nop
+
+#define RFI_TO_KERNEL \
+ rfid
+
+#define RFI_TO_USER \
+ RFI_FLUSH_SLOT; \
+ rfid; \
+ b rfi_flush_fallback
+
+#define RFI_TO_USER_OR_KERNEL \
+ RFI_FLUSH_SLOT; \
+ rfid; \
+ b rfi_flush_fallback
+
+#define RFI_TO_GUEST \
+ RFI_FLUSH_SLOT; \
+ rfid; \
+ b rfi_flush_fallback
+
+#define HRFI_TO_KERNEL \
+ hrfid
+
+#define HRFI_TO_USER \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
+#define HRFI_TO_USER_OR_KERNEL \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
+#define HRFI_TO_GUEST \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
+#define HRFI_TO_UNKNOWN \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
#ifdef CONFIG_RELOCATABLE
#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
@@ -189,7 +242,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mtspr SPRN_##h##SRR0,r12; \
mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
mtspr SPRN_##h##SRR1,r10; \
- h##rfid; \
+ h##RFI_TO_KERNEL; \
b . /* prevent speculative execution */
#define EXCEPTION_PROLOG_PSERIES_1(label, h) \
__EXCEPTION_PROLOG_PSERIES_1(label, h)
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index ddf54f5bbdd1..7b332342071c 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -189,4 +189,19 @@ void apply_feature_fixups(void);
void setup_feature_keys(void);
#endif
+#define RFI_FLUSH_FIXUP_SECTION \
+951: \
+ .pushsection __rfi_flush_fixup,"a"; \
+ .align 2; \
+952: \
+ FTR_ENTRY_OFFSET 951b-952b; \
+ .popsection;
+
+
+#ifndef __ASSEMBLY__
+
+extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
+
+#endif
+
#endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 708edebcf147..dc0996b9d75d 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -240,6 +240,7 @@
#define H_GET_HCA_INFO 0x1B8
#define H_GET_PERF_COUNT 0x1BC
#define H_MANAGE_TRACE 0x1C0
+#define H_GET_CPU_CHARACTERISTICS 0x1C8
#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
#define H_QUERY_INT_STATE 0x1E4
#define H_POLL_PENDING 0x1D8
@@ -306,7 +307,19 @@
#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3
#define H_SET_MODE_RESOURCE_LE 4
+/* H_GET_CPU_CHARACTERISTICS return values */
+#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0
+#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1
+#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2
+#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3
+#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4
+
+#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0
+#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1
+#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2
+
#ifndef __ASSEMBLY__
+#include <linux/types.h>
/**
* plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments
@@ -433,6 +446,11 @@ static inline unsigned long cmo_get_page_size(void)
}
#endif /* CONFIG_PPC_PSERIES */
+struct h_cpu_char_result {
+ u64 character;
+ u64 behaviour;
+};
+
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 6a6792bb39fb..c75ee2d886fc 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -205,6 +205,15 @@ struct paca_struct {
struct sibling_subcore_state *sibling_subcore_state;
#endif
#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * rfi fallback flush must be in its own cacheline to prevent
+ * other paca data leaking into the L1d
+ */
+ u64 exrfi[13] __aligned(0x80);
+ void *rfi_flush_fallback_area;
+ u64 l1d_flush_size;
+#endif
};
#ifdef CONFIG_PPC_BOOK3S
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 1b394247afc2..4e53b8570d1f 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -340,4 +340,18 @@ static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawr
return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0);
}
+static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
+ if (rc == H_SUCCESS) {
+ p->character = retbuf[0];
+ p->behaviour = retbuf[1];
+ }
+
+ return rc;
+}
+
#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 654d64c9f3ac..6825a67cc3db 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -38,6 +38,19 @@ static inline void pseries_big_endian_exceptions(void) {}
static inline void pseries_little_endian_exceptions(void) {}
#endif /* CONFIG_PPC_PSERIES */
+void rfi_flush_enable(bool enable);
+
+/* These are bit flags */
+enum l1d_flush_type {
+ L1D_FLUSH_NONE = 0x1,
+ L1D_FLUSH_FALLBACK = 0x2,
+ L1D_FLUSH_ORI = 0x4,
+ L1D_FLUSH_MTTRIG = 0x8,
+};
+
+void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
+void do_rfi_flush_fixups(enum l1d_flush_type types);
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_POWERPC_SETUP_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c833d88c423d..14fbbd9035ca 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -240,6 +240,9 @@ int main(void)
#ifdef CONFIG_PPC_BOOK3S_64
DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp));
DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce));
+ DEFINE(PACA_RFI_FLUSH_FALLBACK_AREA, offsetof(struct paca_struct, rfi_flush_fallback_area));
+ DEFINE(PACA_EXRFI, offsetof(struct paca_struct, exrfi));
+ DEFINE(PACA_L1D_FLUSH_SIZE, offsetof(struct paca_struct, l1d_flush_size));
#endif
DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index caa659671599..2dc52e6d2af4 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -39,6 +39,11 @@
#include <asm/tm.h>
#include <asm/ppc-opcode.h>
#include <asm/export.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
/*
* System calls.
@@ -251,13 +256,23 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
+ ld r2,GPR2(r1)
+ ld r1,GPR1(r1)
+ mtlr r4
+ mtcr r5
+ mtspr SPRN_SRR0,r7
+ mtspr SPRN_SRR1,r8
+ RFI_TO_USER
+ b . /* prevent speculative execution */
+
+ /* exit to kernel */
1: ld r2,GPR2(r1)
ld r1,GPR1(r1)
mtlr r4
mtcr r5
mtspr SPRN_SRR0,r7
mtspr SPRN_SRR1,r8
- RFI
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
syscall_error:
@@ -386,8 +401,7 @@ tabort_syscall:
mtmsrd r10, 1
mtspr SPRN_SRR0, r11
mtspr SPRN_SRR1, r12
-
- rfid
+ RFI_TO_USER
b . /* prevent speculative execution */
#endif
@@ -859,7 +873,7 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
REST_GPR(13, r1)
-1:
+
mtspr SPRN_SRR1,r3
ld r2,_CCR(r1)
@@ -872,8 +886,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r3,GPR3(r1)
ld r4,GPR4(r1)
ld r1,GPR1(r1)
+ RFI_TO_USER
+ b . /* prevent speculative execution */
- rfid
+1: mtspr SPRN_SRR1,r3
+
+ ld r2,_CCR(r1)
+ mtcrf 0xFF,r2
+ ld r2,_NIP(r1)
+ mtspr SPRN_SRR0,r2
+
+ ld r0,GPR0(r1)
+ ld r2,GPR2(r1)
+ ld r3,GPR3(r1)
+ ld r4,GPR4(r1)
+ ld r1,GPR1(r1)
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
#endif /* CONFIG_PPC_BOOK3E */
@@ -1049,7 +1077,7 @@ _GLOBAL(enter_rtas)
mtspr SPRN_SRR0,r5
mtspr SPRN_SRR1,r6
- rfid
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
rtas_return_loc:
@@ -1074,7 +1102,7 @@ rtas_return_loc:
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- rfid
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
.align 3
@@ -1145,7 +1173,7 @@ _GLOBAL(enter_prom)
LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
andc r11,r11,r12
mtsrr1 r11
- rfid
+ RFI_TO_KERNEL
#endif /* CONFIG_PPC_BOOK3E */
1: /* Return from OF */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index fd68e19b9ef7..7614d1dd2c0b 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -244,7 +244,7 @@ BEGIN_FTR_SECTION
LOAD_HANDLER(r12, machine_check_handle_early)
1: mtspr SPRN_SRR0,r12
mtspr SPRN_SRR1,r11
- rfid
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
2:
/* Stack overflow. Stay on emergency stack and panic.
@@ -280,7 +280,7 @@ machine_check_pSeries_0:
mtspr SPRN_SRR0,r12
mfspr r12,SPRN_SRR1
mtspr SPRN_SRR1,r10
- rfid
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
@@ -446,7 +446,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
li r3,MSR_ME
andc r10,r10,r3 /* Turn off MSR_ME */
mtspr SPRN_SRR1,r10
- rfid
+ RFI_TO_KERNEL
b .
2:
/*
@@ -464,7 +464,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
*/
bl machine_check_queue_event
MACHINE_CHECK_HANDLER_WINDUP
- rfid
+ RFI_TO_USER_OR_KERNEL
9:
/* Deliver the machine check to host kernel in V mode. */
MACHINE_CHECK_HANDLER_WINDUP
@@ -655,6 +655,8 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
andi. r10,r12,MSR_RI /* check for unrecoverable exception */
beq- 2f
+ andi. r10,r12,MSR_PR /* check for user mode (PR != 0) */
+ bne 1f
/* All done -- return from exception. */
@@ -671,7 +673,24 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
ld r11,PACA_EXSLB+EX_R11(r13)
ld r12,PACA_EXSLB+EX_R12(r13)
ld r13,PACA_EXSLB+EX_R13(r13)
- rfid
+ RFI_TO_KERNEL
+ b . /* prevent speculative execution */
+
+1:
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+ mtcrf 0x02,r9 /* I/D indication is in cr6 */
+ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
+.machine pop
+
+ RESTORE_PPR_PACA(PACA_EXSLB, r9)
+ ld r9,PACA_EXSLB+EX_R9(r13)
+ ld r10,PACA_EXSLB+EX_R10(r13)
+ ld r11,PACA_EXSLB+EX_R11(r13)
+ ld r12,PACA_EXSLB+EX_R12(r13)
+ ld r13,PACA_EXSLB+EX_R13(r13)
+ RFI_TO_USER
b . /* prevent speculative execution */
2: mfspr r11,SPRN_SRR0
@@ -679,7 +698,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
mtspr SPRN_SRR1,r10
- rfid
+ RFI_TO_KERNEL
b .
8: mfspr r11,SPRN_SRR0
@@ -687,7 +706,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
mtspr SPRN_SRR1,r10
- rfid
+ RFI_TO_KERNEL
b .
EXC_COMMON_BEGIN(unrecov_slb)
@@ -874,7 +893,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
mtspr SPRN_SRR0,r10 ; \
ld r10,PACAKMSR(r13) ; \
mtspr SPRN_SRR1,r10 ; \
- rfid ; \
+ RFI_TO_KERNEL ; \
b . ; /* prevent speculative execution */
#define SYSCALL_PSERIES_3 \
@@ -882,7 +901,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
1: mfspr r12,SPRN_SRR1 ; \
xori r12,r12,MSR_LE ; \
mtspr SPRN_SRR1,r12 ; \
- rfid ; /* return to userspace */ \
+ RFI_TO_USER ; /* return to userspace */ \
b . ; /* prevent speculative execution */
#if defined(CONFIG_RELOCATABLE)
@@ -1257,7 +1276,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld r11,PACA_EXGEN+EX_R11(r13)
ld r12,PACA_EXGEN+EX_R12(r13)
ld r13,PACA_EXGEN+EX_R13(r13)
- HRFID
+ HRFI_TO_UNKNOWN
b .
#endif
@@ -1331,7 +1350,7 @@ masked_##_H##interrupt: \
ld r10,PACA_EXGEN+EX_R10(r13); \
ld r11,PACA_EXGEN+EX_R11(r13); \
GET_SCRATCH0(r13); \
- ##_H##rfid; \
+ ##_H##RFI_TO_KERNEL; \
b .
/*
@@ -1353,7 +1372,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
addi r13, r13, 4
mtspr SPRN_SRR0, r13
GET_SCRATCH0(r13)
- rfid
+ RFI_TO_KERNEL
b .
TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
@@ -1365,7 +1384,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
addi r13, r13, 4
mtspr SPRN_HSRR0, r13
GET_SCRATCH0(r13)
- hrfid
+ HRFI_TO_KERNEL
b .
#endif
@@ -1576,6 +1595,88 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
bl kernel_bad_stack
b 1b
+ .globl rfi_flush_fallback
+rfi_flush_fallback:
+ SET_SCRATCH0(r13);
+ GET_PACA(r13);
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ mfctr r9
+ ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+ ld r11,PACA_L1D_FLUSH_SIZE(r13)
+ srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
+ mtctr r11
+ DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+ /* order ld/st prior to dcbt stop all streams with flushing */
+ sync
+
+ /*
+ * The load adresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
+ */
+1:
+ ld r11,(0x80 + 8)*0(r10)
+ ld r11,(0x80 + 8)*1(r10)
+ ld r11,(0x80 + 8)*2(r10)
+ ld r11,(0x80 + 8)*3(r10)
+ ld r11,(0x80 + 8)*4(r10)
+ ld r11,(0x80 + 8)*5(r10)
+ ld r11,(0x80 + 8)*6(r10)
+ ld r11,(0x80 + 8)*7(r10)
+ addi r10,r10,0x80*8
+ bdnz 1b
+
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ GET_SCRATCH0(r13);
+ rfid
+
+ .globl hrfi_flush_fallback
+hrfi_flush_fallback:
+ SET_SCRATCH0(r13);
+ GET_PACA(r13);
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ mfctr r9
+ ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+ ld r11,PACA_L1D_FLUSH_SIZE(r13)
+ srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
+ mtctr r11
+ DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+ /* order ld/st prior to dcbt stop all streams with flushing */
+ sync
+
+ /*
+ * The load adresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
+ */
+1:
+ ld r11,(0x80 + 8)*0(r10)
+ ld r11,(0x80 + 8)*1(r10)
+ ld r11,(0x80 + 8)*2(r10)
+ ld r11,(0x80 + 8)*3(r10)
+ ld r11,(0x80 + 8)*4(r10)
+ ld r11,(0x80 + 8)*5(r10)
+ ld r11,(0x80 + 8)*6(r10)
+ ld r11,(0x80 + 8)*7(r10)
+ addi r10,r10,0x80*8
+ bdnz 1b
+
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ GET_SCRATCH0(r13);
+ hrfid
+
/*
* Called from arch_local_irq_enable when an interrupt needs
* to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index b350ac5e3111..d92c95333435 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -9,6 +9,7 @@
*/
#include <linux/threads.h>
+#include <asm/exception-64s.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/cputable.h>
@@ -178,7 +179,7 @@ _GLOBAL(pnv_powersave_common)
mtmsrd r6, 1 /* clear RI before setting SRR0/1 */
mtspr SPRN_SRR0, r5
mtspr SPRN_SRR1, r7
- rfid
+ RFI_TO_KERNEL
.globl pnv_enter_arch207_idle_mode
pnv_enter_arch207_idle_mode:
@@ -668,7 +669,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
mtcr r6
mtspr SPRN_SRR1,r4
mtspr SPRN_SRR0,r5
- rfid
+ RFI_TO_KERNEL
/*
* R3 here contains the value that will be returned to the caller
@@ -689,4 +690,4 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
mtcr r6
mtspr SPRN_SRR1,r4
mtspr SPRN_SRR0,r5
- rfid
+ RFI_TO_KERNEL
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index a12be60181bf..5243501d95ef 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -37,6 +37,7 @@
#include <linux/memblock.h>
#include <linux/memory.h>
#include <linux/nmi.h>
+#include <linux/debugfs.h>
#include <asm/io.h>
#include <asm/kdump.h>
@@ -678,4 +679,131 @@ static int __init disable_hardlockup_detector(void)
return 0;
}
early_initcall(disable_hardlockup_detector);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static enum l1d_flush_type enabled_flush_types;
+static void *l1d_flush_fallback_area;
+static bool no_rfi_flush;
+bool rfi_flush;
+
+static int __init handle_no_rfi_flush(char *p)
+{
+ pr_info("rfi-flush: disabled on command line.");
+ no_rfi_flush = true;
+ return 0;
+}
+early_param("no_rfi_flush", handle_no_rfi_flush);
+
+/*
+ * The RFI flush is not KPTI, but because users will see doco that says to use
+ * nopti we hijack that option here to also disable the RFI flush.
+ */
+static int __init handle_no_pti(char *p)
+{
+ pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
+ handle_no_rfi_flush(NULL);
+ return 0;
+}
+early_param("nopti", handle_no_pti);
+
+static void do_nothing(void *unused)
+{
+ /*
+ * We don't need to do the flush explicitly, just enter+exit kernel is
+ * sufficient, the RFI exit handlers will do the right thing.
+ */
+}
+
+void rfi_flush_enable(bool enable)
+{
+ if (rfi_flush == enable)
+ return;
+
+ if (enable) {
+ do_rfi_flush_fixups(enabled_flush_types);
+ on_each_cpu(do_nothing, NULL, 1);
+ } else
+ do_rfi_flush_fixups(L1D_FLUSH_NONE);
+
+ rfi_flush = enable;
+}
+
+static void init_fallback_flush(void)
+{
+ u64 l1d_size, limit;
+ int cpu;
+
+ l1d_size = ppc64_caches.dsize;
+ limit = min(safe_stack_limit(), ppc64_rma_size);
+
+ /*
+ * Align to L1d size, and size it at 2x L1d size, to catch possible
+ * hardware prefetch runoff. We don't have a recipe for load patterns to
+ * reliably avoid the prefetcher.
+ */
+ l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
+ memset(l1d_flush_fallback_area, 0, l1d_size * 2);
+
+ for_each_possible_cpu(cpu) {
+ paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
+ paca[cpu].l1d_flush_size = l1d_size;
+ }
+}
+
+void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+{
+ if (types & L1D_FLUSH_FALLBACK) {
+ pr_info("rfi-flush: Using fallback displacement flush\n");
+ init_fallback_flush();
+ }
+
+ if (types & L1D_FLUSH_ORI)
+ pr_info("rfi-flush: Using ori type flush\n");
+
+ if (types & L1D_FLUSH_MTTRIG)
+ pr_info("rfi-flush: Using mttrig type flush\n");
+
+ enabled_flush_types = types;
+
+ if (!no_rfi_flush)
+ rfi_flush_enable(enable);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int rfi_flush_set(void *data, u64 val)
+{
+ if (val == 1)
+ rfi_flush_enable(true);
+ else if (val == 0)
+ rfi_flush_enable(false);
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+static int rfi_flush_get(void *data, u64 *val)
+{
+ *val = rfi_flush ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
+
+static __init int rfi_flush_debugfs_init(void)
+{
+ debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
+ return 0;
+}
+device_initcall(rfi_flush_debugfs_init);
+#endif
+
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ if (rfi_flush)
+ return sprintf(buf, "Mitigation: RFI Flush\n");
+
+ return sprintf(buf, "Vulnerable\n");
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
#endif
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 7394b770ae1f..b61fb7902018 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -132,6 +132,15 @@ SECTIONS
/* Read-only data */
RODATA
+#ifdef CONFIG_PPC64
+ . = ALIGN(8);
+ __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
+ __start___rfi_flush_fixup = .;
+ *(__rfi_flush_fixup)
+ __stop___rfi_flush_fixup = .;
+ }
+#endif
+
EXCEPTION_TABLE(0)
NOTES :kernel :notes
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 0447a22a4df6..55fbc0c78721 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -65,7 +65,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
mtmsrd r0,1 /* clear RI in MSR */
mtsrr0 r5
mtsrr1 r6
- RFI
+ RFI_TO_KERNEL
kvmppc_call_hv_entry:
ld r4, HSTATE_KVM_VCPU(r13)
@@ -171,7 +171,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mtsrr0 r8
mtsrr1 r7
beq cr1, 13f /* machine check */
- RFI
+ RFI_TO_KERNEL
/* On POWER7, we have external interrupts set to use HSRR0/1 */
11: mtspr SPRN_HSRR0, r8
@@ -1018,8 +1018,7 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r0, VCPU_GPR(R0)(r4)
ld r4, VCPU_GPR(R4)(r4)
-
- hrfid
+ HRFI_TO_GUEST
b .
secondary_too_late:
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 42a4b237df5f..34a5adeff084 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -46,6 +46,9 @@
#define FUNC(name) name
+#define RFI_TO_KERNEL RFI
+#define RFI_TO_GUEST RFI
+
.macro INTERRUPT_TRAMPOLINE intno
.global kvmppc_trampoline_\intno
@@ -141,7 +144,7 @@ kvmppc_handler_skip_ins:
GET_SCRATCH0(r13)
/* And get back into the code */
- RFI
+ RFI_TO_KERNEL
#endif
/*
@@ -164,6 +167,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline)
ori r5, r5, MSR_EE
mtsrr0 r7
mtsrr1 r6
- RFI
+ RFI_TO_KERNEL
#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index ca8f174289bb..7c982956d709 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -156,7 +156,7 @@ no_dcbz32_on:
PPC_LL r9, SVCPU_R9(r3)
PPC_LL r3, (SVCPU_R3)(r3)
- RFI
+ RFI_TO_GUEST
kvmppc_handler_trampoline_enter_end:
@@ -389,5 +389,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
cmpwi r12, BOOK3S_INTERRUPT_DOORBELL
beqa BOOK3S_INTERRUPT_DOORBELL
- RFI
+ RFI_TO_KERNEL
kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 043415f0bdb1..e86bfa111f3c 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -23,6 +23,7 @@
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/firmware.h>
+#include <asm/setup.h>
struct fixup_entry {
unsigned long mask;
@@ -115,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
}
}
+#ifdef CONFIG_PPC_BOOK3S_64
+void do_rfi_flush_fixups(enum l1d_flush_type types)
+{
+ unsigned int instrs[3], *dest;
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___rfi_flush_fixup),
+ end = PTRRELOC(&__stop___rfi_flush_fixup);
+
+ instrs[0] = 0x60000000; /* nop */
+ instrs[1] = 0x60000000; /* nop */
+ instrs[2] = 0x60000000; /* nop */
+
+ if (types & L1D_FLUSH_FALLBACK)
+ /* b .+16 to fallback flush */
+ instrs[0] = 0x48000010;
+
+ i = 0;
+ if (types & L1D_FLUSH_ORI) {
+ instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+ instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+ }
+
+ if (types & L1D_FLUSH_MTTRIG)
+ instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+
+ for (i = 0; start < end; start++, i++) {
+ dest = (void *)start + *start;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ patch_instruction(dest, instrs[0]);
+ patch_instruction(dest + 1, instrs[1]);
+ patch_instruction(dest + 2, instrs[2]);
+ }
+
+ printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
{
long *start, *end;
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 083f92746951..bf949623de90 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1381,7 +1381,7 @@ static int collect_events(struct perf_event *group, int max_count,
int n = 0;
struct perf_event *event;
- if (!is_software_event(group)) {
+ if (group->pmu->task_ctx_nr == perf_hw_context) {
if (n >= max_count)
return -1;
ctrs[n] = group;
@@ -1389,7 +1389,7 @@ static int collect_events(struct perf_event *group, int max_count,
events[n++] = group->hw.config;
}
list_for_each_entry(event, &group->sibling_list, group_entry) {
- if (!is_software_event(event) &&
+ if (event->pmu->task_ctx_nr == perf_hw_context &&
event->state != PERF_EVENT_STATE_OFF) {
if (n >= max_count)
return -1;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index b33faa0015cc..6f8b4c19373a 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -35,13 +35,63 @@
#include <asm/opal.h>
#include <asm/kexec.h>
#include <asm/smp.h>
+#include <asm/tm.h>
+#include <asm/setup.h>
#include "powernv.h"
+static void pnv_setup_rfi_flush(void)
+{
+ struct device_node *np, *fw_features;
+ enum l1d_flush_type type;
+ int enable;
+
+ /* Default to fallback in case fw-features are not available */
+ type = L1D_FLUSH_FALLBACK;
+ enable = 1;
+
+ np = of_find_node_by_name(NULL, "ibm,opal");
+ fw_features = of_get_child_by_name(np, "fw-features");
+ of_node_put(np);
+
+ if (fw_features) {
+ np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
+ if (np && of_property_read_bool(np, "enabled"))
+ type = L1D_FLUSH_MTTRIG;
+
+ of_node_put(np);
+
+ np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
+ if (np && of_property_read_bool(np, "enabled"))
+ type = L1D_FLUSH_ORI;
+
+ of_node_put(np);
+
+ /* Enable unless firmware says NOT to */
+ enable = 2;
+ np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
+ if (np && of_property_read_bool(np, "disabled"))
+ enable--;
+
+ of_node_put(np);
+
+ np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
+ if (np && of_property_read_bool(np, "disabled"))
+ enable--;
+
+ of_node_put(np);
+ of_node_put(fw_features);
+ }
+
+ setup_rfi_flush(type, enable > 0);
+}
+
static void __init pnv_setup_arch(void)
{
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
+ pnv_setup_rfi_flush();
+
/* Initialize SMP */
pnv_smp_init();
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 97aa3f332f24..1845fc611912 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -450,6 +450,39 @@ static void __init find_and_init_phbs(void)
of_pci_check_probe_only();
}
+static void pseries_setup_rfi_flush(void)
+{
+ struct h_cpu_char_result result;
+ enum l1d_flush_type types;
+ bool enable;
+ long rc;
+
+ /* Enable by default */
+ enable = true;
+
+ rc = plpar_get_cpu_characteristics(&result);
+ if (rc == H_SUCCESS) {
+ types = L1D_FLUSH_NONE;
+
+ if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
+ types |= L1D_FLUSH_MTTRIG;
+ if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
+ types |= L1D_FLUSH_ORI;
+
+ /* Use fallback if nothing set in hcall */
+ if (types == L1D_FLUSH_NONE)
+ types = L1D_FLUSH_FALLBACK;
+
+ if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+ enable = false;
+ } else {
+ /* Default to fallback if case hcall is not available */
+ types = L1D_FLUSH_FALLBACK;
+ }
+
+ setup_rfi_flush(types, enable);
+}
+
static void __init pSeries_setup_arch(void)
{
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -467,6 +500,8 @@ static void __init pSeries_setup_arch(void)
fwnmi_init();
+ pseries_setup_rfi_flush();
+
/* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index 992e630c227b..6f4985f357c6 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -238,6 +238,7 @@ static struct shash_alg crc32_vx_algs[] = {
.cra_name = "crc32",
.cra_driver_name = "crc32-vx",
.cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CRC32_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crc_ctx),
.cra_module = THIS_MODULE,
@@ -258,6 +259,7 @@ static struct shash_alg crc32_vx_algs[] = {
.cra_name = "crc32be",
.cra_driver_name = "crc32be-vx",
.cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CRC32_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crc_ctx),
.cra_module = THIS_MODULE,
@@ -278,6 +280,7 @@ static struct shash_alg crc32_vx_algs[] = {
.cra_name = "crc32c",
.cra_driver_name = "crc32c-vx",
.cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CRC32_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crc_ctx),
.cra_module = THIS_MODULE,
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index f06a9a0063f1..d0724a924184 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -110,7 +110,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setregid16, u16, rgid, u16, egid)
COMPAT_SYSCALL_DEFINE1(s390_setgid16, u16, gid)
{
- return sys_setgid((gid_t)gid);
+ return sys_setgid(low2highgid(gid));
}
COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid)
@@ -120,7 +120,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid)
COMPAT_SYSCALL_DEFINE1(s390_setuid16, u16, uid)
{
- return sys_setuid((uid_t)uid);
+ return sys_setuid(low2highuid(uid));
}
COMPAT_SYSCALL_DEFINE3(s390_setresuid16, u16, ruid, u16, euid, u16, suid)
@@ -173,12 +173,12 @@ COMPAT_SYSCALL_DEFINE3(s390_getresgid16, u16 __user *, rgidp,
COMPAT_SYSCALL_DEFINE1(s390_setfsuid16, u16, uid)
{
- return sys_setfsuid((uid_t)uid);
+ return sys_setfsuid(low2highuid(uid));
}
COMPAT_SYSCALL_DEFINE1(s390_setfsgid16, u16, gid)
{
- return sys_setfsgid((gid_t)gid);
+ return sys_setfsgid(low2highgid(gid));
}
static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info)
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index ff639342a8be..c5b997757988 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -607,7 +607,8 @@ asmlinkage void do_divide_error(unsigned long r4)
break;
}
- force_sig_info(SIGFPE, &info, current);
+ info.si_signo = SIGFPE;
+ force_sig_info(info.si_signo, &info, current);
}
#endif
diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c
index d1064e46efe8..8aa664638c3c 100644
--- a/arch/sparc/crypto/crc32c_glue.c
+++ b/arch/sparc/crypto/crc32c_glue.c
@@ -133,6 +133,7 @@ static struct shash_alg alg = {
.cra_name = "crc32c",
.cra_driver_name = "crc32c-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_ctxsize = sizeof(u32),
.cra_alignmask = 7,
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 0ca46ededfc7..9c150ccb35d2 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -117,7 +117,7 @@ archheaders:
archprepare: include/generated/user_constants.h
LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
-LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib
+LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib $(call cc-option, -no-pie)
CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
$(call cc-option, -fno-stack-protector,) \
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index da8156fd3d58..a4ac7bab15f7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -64,6 +64,7 @@ config X86
select GENERIC_CLOCKEVENTS_MIN_ADJUST
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
+ select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
select GENERIC_FIND_FIRST_BIT
select GENERIC_IOMAP
@@ -407,6 +408,19 @@ config GOLDFISH
def_bool y
depends on X86_GOLDFISH
+config RETPOLINE
+ bool "Avoid speculative indirect branches in kernel"
+ default y
+ ---help---
+ Compile kernel with the retpoline compiler options to guard against
+ kernel-to-user data leaks by avoiding speculative indirect
+ branches. Requires a compiler with -mindirect-branch=thunk-extern
+ support for full protection. The kernel may run slower.
+
+ Without compiler support, at least indirect branches in assembler
+ code are eliminated. Since this includes the syscall entry path,
+ it is not entirely pointless.
+
if X86_32
config X86_EXTENDED_PLATFORM
bool "Support for extended (non-PC) x86 platforms"
@@ -1043,7 +1057,7 @@ config X86_MCE_THRESHOLD
def_bool y
config X86_MCE_INJECT
- depends on X86_MCE
+ depends on X86_MCE && X86_LOCAL_APIC
tristate "Machine check injector support"
---help---
Provide support for injecting machine checks for testing purposes.
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 67eec55093a5..4386440fe463 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -352,6 +352,7 @@ config X86_DEBUG_FPU
config PUNIT_ATOM_DEBUG
tristate "ATOM Punit debug driver"
+ depends on PCI
select DEBUG_FS
select IOSF_MBI
---help---
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 2d449337a360..cd22cb8ebd42 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -182,6 +182,14 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
KBUILD_CFLAGS += $(mflags-y)
KBUILD_AFLAGS += $(mflags-y)
+# Avoid indirect branches in kernel to deal with Spectre
+ifdef CONFIG_RETPOLINE
+ RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+ ifneq ($(RETPOLINE_CFLAGS),)
+ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+ endif
+endif
+
archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 12ea8f8384f4..3b7156f46bc1 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -73,12 +73,13 @@ UBSAN_SANITIZE := n
$(obj)/bzImage: asflags-y := $(SVGA_MODE)
quiet_cmd_image = BUILD $@
+silent_redirect_image = >/dev/null
cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
- $(obj)/zoffset.h $@
+ $(obj)/zoffset.h $@ $($(quiet)redirect_image)
$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
$(call if_changed,image)
- @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
+ @$(kecho) 'Kernel: $@ is ready' ' (#'`cat .version`')'
OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 383a6f84a060..fa8801b35e51 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -32,6 +32,7 @@
#include <linux/linkage.h>
#include <asm/inst.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
/*
* The following macros are used to move an (un)aligned 16 byte value to/from
@@ -2734,7 +2735,7 @@ ENTRY(aesni_xts_crypt8)
pxor INC, STATE4
movdqu IV, 0x30(OUTP)
- call *%r11
+ CALL_NOSPEC %r11
movdqu 0x00(OUTP), INC
pxor INC, STATE1
@@ -2779,7 +2780,7 @@ ENTRY(aesni_xts_crypt8)
_aesni_gf128mul_x_ble()
movups IV, (IVP)
- call *%r11
+ CALL_NOSPEC %r11
movdqu 0x40(OUTP), INC
pxor INC, STATE1
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index aa8b0672f87a..d9ae404f08c9 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -906,7 +906,7 @@ static int helper_rfc4106_encrypt(struct aead_request *req)
if (sg_is_last(req->src) &&
req->src->offset + req->src->length <= PAGE_SIZE &&
- sg_is_last(req->dst) &&
++ sg_is_last(req->dst) && req->dst->length &&
req->dst->offset + req->dst->length <= PAGE_SIZE) {
one_entry_in_sg = 1;
scatterwalk_start(&src_sg_walk, req->src);
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index aa9e8bd163f6..77ff4de2224d 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -17,6 +17,7 @@
#include <linux/linkage.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
@@ -1224,7 +1225,7 @@ camellia_xts_crypt_16way:
vpxor 14 * 16(%rax), %xmm15, %xmm14;
vpxor 15 * 16(%rax), %xmm15, %xmm15;
- call *%r9;
+ CALL_NOSPEC %r9;
addq $(16 * 16), %rsp;
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 16186c18656d..7384342fbb41 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -12,6 +12,7 @@
#include <linux/linkage.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
@@ -1337,7 +1338,7 @@ camellia_xts_crypt_32way:
vpxor 14 * 32(%rax), %ymm15, %ymm14;
vpxor 15 * 32(%rax), %ymm15, %ymm15;
- call *%r9;
+ CALL_NOSPEC %r9;
addq $(16 * 32), %rsp;
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index 27226df3f7d8..c8d9cdacbf10 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -162,6 +162,7 @@ static struct shash_alg alg = {
.cra_name = "crc32",
.cra_driver_name = "crc32-pclmul",
.cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_ctxsize = sizeof(u32),
.cra_module = THIS_MODULE,
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 0857b1a1de3b..60a391b8c4a2 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -239,6 +239,7 @@ static struct shash_alg alg = {
.cra_name = "crc32c",
.cra_driver_name = "crc32c-intel",
.cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_ctxsize = sizeof(u32),
.cra_module = THIS_MODULE,
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index dc05f010ca9b..174fd4146043 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -45,6 +45,7 @@
#include <asm/inst.h>
#include <linux/linkage.h>
+#include <asm/nospec-branch.h>
## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
@@ -172,7 +173,7 @@ continue_block:
movzxw (bufp, %rax, 2), len
lea crc_array(%rip), bufp
lea (bufp, len, 1), bufp
- jmp *bufp
+ JMP_NOSPEC bufp
################################################################
## 2a) PROCESS FULL BLOCKS:
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index e32142bc071d..28c372003e44 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -164,7 +164,6 @@ static struct shash_alg alg = {
.init = poly1305_simd_init,
.update = poly1305_simd_update,
.final = crypto_poly1305_final,
- .setkey = crypto_poly1305_setkey,
.descsize = sizeof(struct poly1305_simd_desc_ctx),
.base = {
.cra_name = "poly1305",
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
index 36870b26067a..d08805032f01 100644
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
@@ -57,10 +57,12 @@ void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
{
unsigned int j;
- state->lens[0] = 0;
- state->lens[1] = 1;
- state->lens[2] = 2;
- state->lens[3] = 3;
+ /* initially all lanes are unused */
+ state->lens[0] = 0xFFFFFFFF00000000;
+ state->lens[1] = 0xFFFFFFFF00000001;
+ state->lens[2] = 0xFFFFFFFF00000002;
+ state->lens[3] = 0xFFFFFFFF00000003;
+
state->unused_lanes = 0xFF03020100;
for (j = 0; j < 4; j++)
state->ldata[j].job_in_lane = NULL;
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
index 1c3b7ceb36d2..e7273a606a07 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -55,29 +55,31 @@
#define RAB1bl %bl
#define RAB2bl %cl
+#define CD0 0x0(%rsp)
+#define CD1 0x8(%rsp)
+#define CD2 0x10(%rsp)
+
+# used only before/after all rounds
#define RCD0 %r8
#define RCD1 %r9
#define RCD2 %r10
-#define RCD0d %r8d
-#define RCD1d %r9d
-#define RCD2d %r10d
-
-#define RX0 %rbp
-#define RX1 %r11
-#define RX2 %r12
+# used only during rounds
+#define RX0 %r8
+#define RX1 %r9
+#define RX2 %r10
-#define RX0d %ebp
-#define RX1d %r11d
-#define RX2d %r12d
+#define RX0d %r8d
+#define RX1d %r9d
+#define RX2d %r10d
-#define RY0 %r13
-#define RY1 %r14
-#define RY2 %r15
+#define RY0 %r11
+#define RY1 %r12
+#define RY2 %r13
-#define RY0d %r13d
-#define RY1d %r14d
-#define RY2d %r15d
+#define RY0d %r11d
+#define RY1d %r12d
+#define RY2d %r13d
#define RT0 %rdx
#define RT1 %rsi
@@ -85,6 +87,8 @@
#define RT0d %edx
#define RT1d %esi
+#define RT1bl %sil
+
#define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \
movzbl ab ## bl, tmp2 ## d; \
movzbl ab ## bh, tmp1 ## d; \
@@ -92,6 +96,11 @@
op1##l T0(CTX, tmp2, 4), dst ## d; \
op2##l T1(CTX, tmp1, 4), dst ## d;
+#define swap_ab_with_cd(ab, cd, tmp) \
+ movq cd, tmp; \
+ movq ab, cd; \
+ movq tmp, ab;
+
/*
* Combined G1 & G2 function. Reordered with help of rotates to have moves
* at begining.
@@ -110,15 +119,15 @@
/* G1,2 && G2,2 */ \
do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \
do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \
- xchgq cd ## 0, ab ## 0; \
+ swap_ab_with_cd(ab ## 0, cd ## 0, RT0); \
\
do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \
do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \
- xchgq cd ## 1, ab ## 1; \
+ swap_ab_with_cd(ab ## 1, cd ## 1, RT0); \
\
do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \
do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \
- xchgq cd ## 2, ab ## 2;
+ swap_ab_with_cd(ab ## 2, cd ## 2, RT0);
#define enc_round_end(ab, x, y, n) \
addl y ## d, x ## d; \
@@ -168,6 +177,16 @@
decrypt_round3(ba, dc, (n*2)+1); \
decrypt_round3(ba, dc, (n*2));
+#define push_cd() \
+ pushq RCD2; \
+ pushq RCD1; \
+ pushq RCD0;
+
+#define pop_cd() \
+ popq RCD0; \
+ popq RCD1; \
+ popq RCD2;
+
#define inpack3(in, n, xy, m) \
movq 4*(n)(in), xy ## 0; \
xorq w+4*m(CTX), xy ## 0; \
@@ -223,11 +242,8 @@ ENTRY(__twofish_enc_blk_3way)
* %rdx: src, RIO
* %rcx: bool, if true: xor output
*/
- pushq %r15;
- pushq %r14;
pushq %r13;
pushq %r12;
- pushq %rbp;
pushq %rbx;
pushq %rcx; /* bool xor */
@@ -235,40 +251,36 @@ ENTRY(__twofish_enc_blk_3way)
inpack_enc3();
- encrypt_cycle3(RAB, RCD, 0);
- encrypt_cycle3(RAB, RCD, 1);
- encrypt_cycle3(RAB, RCD, 2);
- encrypt_cycle3(RAB, RCD, 3);
- encrypt_cycle3(RAB, RCD, 4);
- encrypt_cycle3(RAB, RCD, 5);
- encrypt_cycle3(RAB, RCD, 6);
- encrypt_cycle3(RAB, RCD, 7);
+ push_cd();
+ encrypt_cycle3(RAB, CD, 0);
+ encrypt_cycle3(RAB, CD, 1);
+ encrypt_cycle3(RAB, CD, 2);
+ encrypt_cycle3(RAB, CD, 3);
+ encrypt_cycle3(RAB, CD, 4);
+ encrypt_cycle3(RAB, CD, 5);
+ encrypt_cycle3(RAB, CD, 6);
+ encrypt_cycle3(RAB, CD, 7);
+ pop_cd();
popq RIO; /* dst */
- popq %rbp; /* bool xor */
+ popq RT1; /* bool xor */
- testb %bpl, %bpl;
+ testb RT1bl, RT1bl;
jnz .L__enc_xor3;
outunpack_enc3(mov);
popq %rbx;
- popq %rbp;
popq %r12;
popq %r13;
- popq %r14;
- popq %r15;
ret;
.L__enc_xor3:
outunpack_enc3(xor);
popq %rbx;
- popq %rbp;
popq %r12;
popq %r13;
- popq %r14;
- popq %r15;
ret;
ENDPROC(__twofish_enc_blk_3way)
@@ -278,35 +290,31 @@ ENTRY(twofish_dec_blk_3way)
* %rsi: dst
* %rdx: src, RIO
*/
- pushq %r15;
- pushq %r14;
pushq %r13;
pushq %r12;
- pushq %rbp;
pushq %rbx;
pushq %rsi; /* dst */
inpack_dec3();
- decrypt_cycle3(RAB, RCD, 7);
- decrypt_cycle3(RAB, RCD, 6);
- decrypt_cycle3(RAB, RCD, 5);
- decrypt_cycle3(RAB, RCD, 4);
- decrypt_cycle3(RAB, RCD, 3);
- decrypt_cycle3(RAB, RCD, 2);
- decrypt_cycle3(RAB, RCD, 1);
- decrypt_cycle3(RAB, RCD, 0);
+ push_cd();
+ decrypt_cycle3(RAB, CD, 7);
+ decrypt_cycle3(RAB, CD, 6);
+ decrypt_cycle3(RAB, CD, 5);
+ decrypt_cycle3(RAB, CD, 4);
+ decrypt_cycle3(RAB, CD, 3);
+ decrypt_cycle3(RAB, CD, 2);
+ decrypt_cycle3(RAB, CD, 1);
+ decrypt_cycle3(RAB, CD, 0);
+ pop_cd();
popq RIO; /* dst */
outunpack_dec3();
popq %rbx;
- popq %rbp;
popq %r12;
popq %r13;
- popq %r14;
- popq %r15;
ret;
ENDPROC(twofish_dec_blk_3way)
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index bdd9cc59d20f..b0cd306dc527 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -20,6 +20,7 @@
#include <linux/export.h>
#include <linux/context_tracking.h>
#include <linux/user-return-notifier.h>
+#include <linux/nospec.h>
#include <linux/uprobes.h>
#include <asm/desc.h>
@@ -201,7 +202,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
* special case only applies after poking regs and before the
* very next return to user mode.
*/
- current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
+ ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
#endif
user_enter_irqoff();
@@ -277,7 +278,8 @@ __visible void do_syscall_64(struct pt_regs *regs)
* regs->orig_ax, which changes the behavior of some syscalls.
*/
if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
- regs->ax = sys_call_table[nr & __SYSCALL_MASK](
+ nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls);
+ regs->ax = sys_call_table[nr](
regs->di, regs->si, regs->dx,
regs->r10, regs->r8, regs->r9);
}
@@ -299,7 +301,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
unsigned int nr = (unsigned int)regs->orig_ax;
#ifdef CONFIG_IA32_EMULATION
- current->thread.status |= TS_COMPAT;
+ ti->status |= TS_COMPAT;
#endif
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
@@ -313,6 +315,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
}
if (likely(nr < IA32_NR_syscalls)) {
+ nr = array_index_nospec(nr, IA32_NR_syscalls);
/*
* It's possible that a 32-bit syscall implementation
* takes a 64-bit parameter but nonetheless assumes that
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index edba8606b99a..f5434b4670c1 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -45,6 +45,7 @@
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/export.h>
+#include <asm/nospec-branch.h>
.section .entry.text, "ax"
@@ -228,6 +229,18 @@ ENTRY(__switch_to_asm)
movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
#endif
+#ifdef CONFIG_RETPOLINE
+ /*
+ * When switching from a shallower to a deeper call stack
+ * the RSB may either underflow or use entries populated
+ * with userspace addresses. On CPUs where those concerns
+ * exist, overwrite the RSB with entries which capture
+ * speculative execution to prevent attack.
+ */
+ /* Clobbers %ebx */
+ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popl %esi
popl %edi
@@ -260,7 +273,7 @@ ENTRY(ret_from_fork)
/* kernel thread */
1: movl %edi, %eax
- call *%ebx
+ CALL_NOSPEC %ebx
/*
* A kernel thread is allowed to return here after successfully
* calling do_execve(). Exit to userspace to complete the execve()
@@ -984,7 +997,8 @@ trace:
movl 0x4(%ebp), %edx
subl $MCOUNT_INSN_SIZE, %eax
- call *ftrace_trace_function
+ movl ftrace_trace_function, %ecx
+ CALL_NOSPEC %ecx
popl %edx
popl %ecx
@@ -1020,7 +1034,7 @@ return_to_handler:
movl %eax, %ecx
popl %edx
popl %eax
- jmp *%ecx
+ JMP_NOSPEC %ecx
#endif
#ifdef CONFIG_TRACING
@@ -1062,7 +1076,7 @@ error_code:
movl %ecx, %es
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
- call *%edi
+ CALL_NOSPEC %edi
jmp ret_from_exception
END(page_fault)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index af4e58132d91..db5009ce065a 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -37,6 +37,7 @@
#include <asm/pgtable_types.h>
#include <asm/export.h>
#include <asm/kaiser.h>
+#include <asm/nospec-branch.h>
#include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
@@ -176,91 +177,17 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
pushq %r9 /* pt_regs->r9 */
pushq %r10 /* pt_regs->r10 */
pushq %r11 /* pt_regs->r11 */
- sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
+ pushq %rbx /* pt_regs->rbx */
+ pushq %rbp /* pt_regs->rbp */
+ pushq %r12 /* pt_regs->r12 */
+ pushq %r13 /* pt_regs->r13 */
+ pushq %r14 /* pt_regs->r14 */
+ pushq %r15 /* pt_regs->r15 */
- /*
- * If we need to do entry work or if we guess we'll need to do
- * exit work, go straight to the slow path.
- */
- movq PER_CPU_VAR(current_task), %r11
- testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
- jnz entry_SYSCALL64_slow_path
-
-entry_SYSCALL_64_fastpath:
- /*
- * Easy case: enable interrupts and issue the syscall. If the syscall
- * needs pt_regs, we'll call a stub that disables interrupts again
- * and jumps to the slow path.
- */
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
-#if __SYSCALL_MASK == ~0
- cmpq $__NR_syscall_max, %rax
-#else
- andl $__SYSCALL_MASK, %eax
- cmpl $__NR_syscall_max, %eax
-#endif
- ja 1f /* return -ENOSYS (already in pt_regs->ax) */
- movq %r10, %rcx
-
- /*
- * This call instruction is handled specially in stub_ptregs_64.
- * It might end up jumping to the slow path. If it jumps, RAX
- * and all argument registers are clobbered.
- */
- call *sys_call_table(, %rax, 8)
-.Lentry_SYSCALL_64_after_fastpath_call:
-
- movq %rax, RAX(%rsp)
-1:
-
- /*
- * If we get here, then we know that pt_regs is clean for SYSRET64.
- * If we see that no exit work is required (which we are required
- * to check with IRQs off), then we can go straight to SYSRET64.
- */
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- movq PER_CPU_VAR(current_task), %r11
- testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
- jnz 1f
-
- LOCKDEP_SYS_EXIT
- TRACE_IRQS_ON /* user mode is traced as IRQs on */
- movq RIP(%rsp), %rcx
- movq EFLAGS(%rsp), %r11
- RESTORE_C_REGS_EXCEPT_RCX_R11
- /*
- * This opens a window where we have a user CR3, but are
- * running in the kernel. This makes using the CS
- * register useless for telling whether or not we need to
- * switch CR3 in NMIs. Normal interrupts are OK because
- * they are off here.
- */
- SWITCH_USER_CR3
- movq RSP(%rsp), %rsp
- USERGS_SYSRET64
-
-1:
- /*
- * The fast path looked good when we started, but something changed
- * along the way and we need to switch to the slow path. Calling
- * raise(3) will trigger this, for example. IRQs are off.
- */
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
- SAVE_EXTRA_REGS
- movq %rsp, %rdi
- call syscall_return_slowpath /* returns with IRQs disabled */
- jmp return_from_SYSCALL_64
-
-entry_SYSCALL64_slow_path:
/* IRQs are off. */
- SAVE_EXTRA_REGS
movq %rsp, %rdi
call do_syscall_64 /* returns with IRQs disabled */
-return_from_SYSCALL_64:
RESTORE_EXTRA_REGS
TRACE_IRQS_IRETQ /* we're about to change IF */
@@ -333,6 +260,7 @@ return_from_SYSCALL_64:
syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
RESTORE_C_REGS_EXCEPT_RCX_R11
+
/*
* This opens a window where we have a user CR3, but are
* running in the kernel. This makes using the CS
@@ -357,45 +285,6 @@ opportunistic_sysret_failed:
jmp restore_c_regs_and_iret
END(entry_SYSCALL_64)
-ENTRY(stub_ptregs_64)
- /*
- * Syscalls marked as needing ptregs land here.
- * If we are on the fast path, we need to save the extra regs,
- * which we achieve by trying again on the slow path. If we are on
- * the slow path, the extra regs are already saved.
- *
- * RAX stores a pointer to the C function implementing the syscall.
- * IRQs are on.
- */
- cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
- jne 1f
-
- /*
- * Called from fast path -- disable IRQs again, pop return address
- * and jump to slow path
- */
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- popq %rax
- jmp entry_SYSCALL64_slow_path
-
-1:
- jmp *%rax /* Called from C */
-END(stub_ptregs_64)
-
-.macro ptregs_stub func
-ENTRY(ptregs_\func)
- leaq \func(%rip), %rax
- jmp stub_ptregs_64
-END(ptregs_\func)
-.endm
-
-/* Instantiate ptregs_stub for each ptregs-using syscall */
-#define __SYSCALL_64_QUAL_(sym)
-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
-#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
-#include <asm/syscalls_64.h>
-
/*
* %rdi: prev task
* %rsi: next task
@@ -421,6 +310,18 @@ ENTRY(__switch_to_asm)
movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
#endif
+#ifdef CONFIG_RETPOLINE
+ /*
+ * When switching from a shallower to a deeper call stack
+ * the RSB may either underflow or use entries populated
+ * with userspace addresses. On CPUs where those concerns
+ * exist, overwrite the RSB with entries which capture
+ * speculative execution to prevent attack.
+ */
+ /* Clobbers %rbx */
+ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popq %r15
popq %r14
@@ -457,7 +358,7 @@ ENTRY(ret_from_fork)
1:
/* kernel thread */
movq %r12, %rdi
- call *%rbx
+ CALL_NOSPEC %rbx
/*
* A kernel thread is allowed to return here after successfully
* calling do_execve(). Exit to userspace to complete the execve()
@@ -1047,7 +948,7 @@ idtentry async_page_fault do_async_page_fault has_error_code=1
#endif
#ifdef CONFIG_X86_MCE
-idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
+idtentry machine_check do_mce has_error_code=0 paranoid=1
#endif
/*
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index d76a97653980..92c55738d543 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -83,15 +83,25 @@ ENTRY(entry_SYSENTER_compat)
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
+ xorq %r8, %r8 /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
+ xorq %r9, %r9 /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
+ xorq %r10, %r10 /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
+ xorq %r11, %r11 /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
+ xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
+ xorl %ebp, %ebp /* nospec rbp */
pushq $0 /* pt_regs->r12 = 0 */
+ xorq %r12, %r12 /* nospec r12 */
pushq $0 /* pt_regs->r13 = 0 */
+ xorq %r13, %r13 /* nospec r13 */
pushq $0 /* pt_regs->r14 = 0 */
+ xorq %r14, %r14 /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
+ xorq %r15, %r15 /* nospec r15 */
cld
/*
@@ -209,15 +219,25 @@ ENTRY(entry_SYSCALL_compat)
pushq %rbp /* pt_regs->cx (stashed in bp) */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
+ xorq %r8, %r8 /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
+ xorq %r9, %r9 /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
+ xorq %r10, %r10 /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
+ xorq %r11, %r11 /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
+ xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
+ xorl %ebp, %ebp /* nospec rbp */
pushq $0 /* pt_regs->r12 = 0 */
+ xorq %r12, %r12 /* nospec r12 */
pushq $0 /* pt_regs->r13 = 0 */
+ xorq %r13, %r13 /* nospec r13 */
pushq $0 /* pt_regs->r14 = 0 */
+ xorq %r14, %r14 /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
+ xorq %r15, %r15 /* nospec r15 */
/*
* User mode is traced as though IRQs are on, and SYSENTER
@@ -320,15 +340,25 @@ ENTRY(entry_INT80_compat)
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
+ xorq %r8, %r8 /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
+ xorq %r9, %r9 /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
+ xorq %r10, %r10 /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
+ xorq %r11, %r11 /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
+ xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp */
+ xorl %ebp, %ebp /* nospec rbp */
pushq %r12 /* pt_regs->r12 */
+ xorq %r12, %r12 /* nospec r12 */
pushq %r13 /* pt_regs->r13 */
+ xorq %r13, %r13 /* nospec r13 */
pushq %r14 /* pt_regs->r14 */
+ xorq %r14, %r14 /* nospec r14 */
pushq %r15 /* pt_regs->r15 */
+ xorq %r15, %r15 /* nospec r15 */
cld
/*
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 9dbc5abb6162..6705edda4ac3 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -6,14 +6,11 @@
#include <asm/asm-offsets.h>
#include <asm/syscall.h>
-#define __SYSCALL_64_QUAL_(sym) sym
-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
-
-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
-#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
+#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 6bb7e92c6d50..0174290b2857 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -46,6 +46,7 @@ static enum { EMULATE, NATIVE, NONE } vsyscall_mode =
#else
EMULATE;
#endif
+unsigned long vsyscall_pgprot = __PAGE_KERNEL_VSYSCALL;
static int __init vsyscall_setup(char *str)
{
@@ -336,11 +337,11 @@ void __init map_vsyscall(void)
extern char __vsyscall_page;
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
+ if (vsyscall_mode != NATIVE)
+ vsyscall_pgprot = __PAGE_KERNEL_VVAR;
if (vsyscall_mode != NONE)
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
- vsyscall_mode == NATIVE
- ? PAGE_KERNEL_VSYSCALL
- : PAGE_KERNEL_VVAR);
+ __pgprot(vsyscall_pgprot));
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
(unsigned long)VSYSCALL_ADDR);
diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c
index 9842270ed2f2..21a4e4127f43 100644
--- a/arch/x86/events/amd/power.c
+++ b/arch/x86/events/amd/power.c
@@ -277,7 +277,7 @@ static int __init amd_power_pmu_init(void)
int ret;
if (!x86_match_cpu(cpu_match))
- return 0;
+ return -ENODEV;
if (!boot_cpu_has(X86_FEATURE_ACC_POWER))
return -ENODEV;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 9604b2574d6c..f73796db8758 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -190,8 +190,8 @@ static void release_pmc_hardware(void) {}
static bool check_hw_exists(void)
{
- u64 val, val_fail, val_new= ~0;
- int i, reg, reg_fail, ret = 0;
+ u64 val, val_fail = -1, val_new= ~0;
+ int i, reg, reg_fail = -1, ret = 0;
int bios_fail = 0;
int reg_safe = -1;
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 982c9e31daca..21298c173b0e 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -22,6 +22,7 @@
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/coredump.h>
+#include <linux/kaiser.h>
#include <asm-generic/sizes.h>
#include <asm/perf_event.h>
@@ -77,6 +78,23 @@ static size_t buf_size(struct page *page)
return 1 << (PAGE_SHIFT + page_private(page));
}
+static void bts_buffer_free_aux(void *data)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ struct bts_buffer *buf = data;
+ int nbuf;
+
+ for (nbuf = 0; nbuf < buf->nr_bufs; nbuf++) {
+ struct page *page = buf->buf[nbuf].page;
+ void *kaddr = page_address(page);
+ size_t page_size = buf_size(page);
+
+ kaiser_remove_mapping((unsigned long)kaddr, page_size);
+ }
+#endif
+ kfree(data);
+}
+
static void *
bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
{
@@ -113,29 +131,33 @@ bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
buf->real_size = size - size % BTS_RECORD_SIZE;
for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
- unsigned int __nr_pages;
+ void *kaddr = pages[pg];
+ size_t page_size;
+
+ page = virt_to_page(kaddr);
+ page_size = buf_size(page);
+
+ if (kaiser_add_mapping((unsigned long)kaddr,
+ page_size, __PAGE_KERNEL) < 0) {
+ buf->nr_bufs = nbuf;
+ bts_buffer_free_aux(buf);
+ return NULL;
+ }
- page = virt_to_page(pages[pg]);
- __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
buf->buf[nbuf].page = page;
buf->buf[nbuf].offset = offset;
buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
- buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
+ buf->buf[nbuf].size = page_size - buf->buf[nbuf].displacement;
pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
buf->buf[nbuf].size -= pad;
- pg += __nr_pages;
- offset += __nr_pages << PAGE_SHIFT;
+ pg += page_size >> PAGE_SHIFT;
+ offset += page_size;
}
return buf;
}
-static void bts_buffer_free_aux(void *data)
-{
- kfree(data);
-}
-
static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
{
return buf->buf[idx].offset + buf->buf[idx].displacement;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index f0f197f459b5..0bd0c1cc3228 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3363,7 +3363,7 @@ static int intel_snb_pebs_broken(int cpu)
break;
case INTEL_FAM6_SANDYBRIDGE_X:
- switch (cpu_data(cpu).x86_mask) {
+ switch (cpu_data(cpu).x86_stepping) {
case 6: rev = 0x618; break;
case 7: rev = 0x70c; break;
}
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index f924629836a8..5d103a87e984 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1131,7 +1131,7 @@ void __init intel_pmu_lbr_init_atom(void)
* on PMU interrupt
*/
if (boot_cpu_data.x86_model == 28
- && boot_cpu_data.x86_mask < 10) {
+ && boot_cpu_data.x86_stepping < 10) {
pr_cont("LBR disabled due to erratum");
return;
}
diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c
index 1f5c47ab4c65..c5e441baccc7 100644
--- a/arch/x86/events/intel/p6.c
+++ b/arch/x86/events/intel/p6.c
@@ -233,7 +233,7 @@ static __initconst const struct x86_pmu p6_pmu = {
static __init void p6_pmu_rdpmc_quirk(void)
{
- if (boot_cpu_data.x86_mask < 9) {
+ if (boot_cpu_data.x86_stepping < 9) {
/*
* PPro erratum 26; fixed in stepping 9 and above.
*/
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 5391b0ae7cc3..d32bab65de70 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -92,7 +92,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
if (boot_cpu_data.x86 == 0x0F &&
boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
boot_cpu_data.x86_model <= 0x05 &&
- boot_cpu_data.x86_mask < 0x0A)
+ boot_cpu_data.x86_stepping < 0x0A)
return 1;
else if (amd_e400_c1e_detected)
return 1;
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index d4aea31eec03..deca9b9c7923 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -139,7 +139,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
- ".popsection"
+ ".popsection\n"
#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
OLDINSTR_2(oldinstr, 1, 2) \
@@ -150,7 +150,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
- ".popsection"
+ ".popsection\n"
/*
* Alternative instructions for different CPU types or capabilities.
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 44b8762fa0c7..166654218329 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -10,7 +10,34 @@
#include <asm/pgtable.h>
#include <asm/special_insns.h>
#include <asm/preempt.h>
+#include <asm/asm.h>
#ifndef CONFIG_X86_CMPXCHG64
extern void cmpxchg8b_emu(void);
#endif
+
+#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_X86_32
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
+#else
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
+INDIRECT_THUNK(8)
+INDIRECT_THUNK(9)
+INDIRECT_THUNK(10)
+INDIRECT_THUNK(11)
+INDIRECT_THUNK(12)
+INDIRECT_THUNK(13)
+INDIRECT_THUNK(14)
+INDIRECT_THUNK(15)
+#endif
+INDIRECT_THUNK(ax)
+INDIRECT_THUNK(bx)
+INDIRECT_THUNK(cx)
+INDIRECT_THUNK(dx)
+INDIRECT_THUNK(si)
+INDIRECT_THUNK(di)
+INDIRECT_THUNK(bp)
+asmlinkage void __fill_rsb(void);
+asmlinkage void __clear_rsb(void);
+
+#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 7acb51c49fec..7bb29a416b77 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -11,10 +11,12 @@
# define __ASM_FORM_COMMA(x) " " #x ","
#endif
-#ifdef CONFIG_X86_32
+#ifndef __x86_64__
+/* 32 bit */
# define __ASM_SEL(a,b) __ASM_FORM(a)
# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
#else
+/* 64 bit */
# define __ASM_SEL(a,b) __ASM_FORM(b)
# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
#endif
@@ -125,4 +127,15 @@
/* For C file, we already have NOKPROBE_SYMBOL macro */
#endif
+#ifndef __ASSEMBLY__
+/*
+ * This output constraint should be used for any inline asm which has a "call"
+ * instruction. Otherwise the asm may be inserted before the frame pointer
+ * gets set up by the containing function. If you forget to do this, objtool
+ * may print a "call without frame pointer save/setup" warning.
+ */
+register unsigned long current_stack_pointer asm(_ASM_SP);
+#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
+#endif
+
#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index bfb28caf97b1..78d1c6a3d221 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -23,6 +23,34 @@
#define wmb() asm volatile("sfence" ::: "memory")
#endif
+/**
+ * array_index_mask_nospec() - generate a mask that is ~0UL when the
+ * bounds check succeeds and 0 otherwise
+ * @index: array element index
+ * @size: number of elements in array
+ *
+ * Returns:
+ * 0 - (index < size)
+ */
+static inline unsigned long array_index_mask_nospec(unsigned long index,
+ unsigned long size)
+{
+ unsigned long mask;
+
+ asm ("cmp %1,%2; sbb %0,%0;"
+ :"=r" (mask)
+ :"g"(size),"r" (index)
+ :"cc");
+ return mask;
+}
+
+/* Override the default implementation from linux/nospec.h. */
+#define array_index_mask_nospec array_index_mask_nospec
+
+/* Prevent speculative execution past this barrier. */
+#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
+ "lfence", X86_FEATURE_LFENCE_RDTSC)
+
#ifdef CONFIG_X86_PPRO_FENCE
#define dma_rmb() rmb()
#else
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 1d2b69fc0ceb..8c101579f535 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -28,6 +28,7 @@ enum cpuid_leafs
CPUID_8000_000A_EDX,
CPUID_7_ECX,
CPUID_8000_0007_EBX,
+ CPUID_7_EDX,
};
#ifdef CONFIG_X86_FEATURE_NAMES
@@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 19))
#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
@@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 19))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
@@ -135,6 +138,8 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
set_bit(bit, (unsigned long *)cpu_caps_set); \
} while (0)
+#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
+
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
/*
* Static testing of CPU features. Used the same as boot_cpu_has().
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 454a37adb823..8eb23f5cf7f4 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -12,7 +12,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 18 /* N 32-bit words worth of info */
+#define NCAPINTS 19 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@@ -194,13 +194,16 @@
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
-#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
-#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+
+#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
+#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
@@ -232,6 +235,7 @@
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
@@ -256,6 +260,9 @@
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */
+#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@@ -291,6 +298,13 @@
#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
+#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
+#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+
/*
* BUG word(s)
*/
@@ -316,5 +330,8 @@
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 21c5ac15657b..1f8cca459c6c 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -59,6 +59,7 @@
#define DISABLED_MASK15 0
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
#define DISABLED_MASK17 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define DISABLED_MASK18 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 34a46dc076d3..75b748a1deb8 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -12,6 +12,7 @@
*/
#define INTEL_FAM6_CORE_YONAH 0x0E
+
#define INTEL_FAM6_CORE2_MEROM 0x0F
#define INTEL_FAM6_CORE2_MEROM_L 0x16
#define INTEL_FAM6_CORE2_PENRYN 0x17
@@ -21,6 +22,7 @@
#define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */
#define INTEL_FAM6_NEHALEM_EP 0x1A
#define INTEL_FAM6_NEHALEM_EX 0x2E
+
#define INTEL_FAM6_WESTMERE 0x25
#define INTEL_FAM6_WESTMERE_EP 0x2C
#define INTEL_FAM6_WESTMERE_EX 0x2F
@@ -36,9 +38,9 @@
#define INTEL_FAM6_HASWELL_GT3E 0x46
#define INTEL_FAM6_BROADWELL_CORE 0x3D
-#define INTEL_FAM6_BROADWELL_XEON_D 0x56
#define INTEL_FAM6_BROADWELL_GT3E 0x47
#define INTEL_FAM6_BROADWELL_X 0x4F
+#define INTEL_FAM6_BROADWELL_XEON_D 0x56
#define INTEL_FAM6_SKYLAKE_MOBILE 0x4E
#define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E
@@ -57,9 +59,10 @@
#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */
#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */
#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */
-#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */
+#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C
#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */
+#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A
/* Xeon Phi */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index cbd1d44da2d3..20cfeeb681c6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1113,7 +1113,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
static inline int emulate_instruction(struct kvm_vcpu *vcpu,
int emulation_type)
{
- return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
+ return x86_emulate_instruction(vcpu, 0,
+ emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0);
}
void kvm_enable_efer_bits(u64);
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index 15eb75484cc0..98ccbd1dbb01 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -59,7 +59,6 @@ static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table,
extern int __apply_microcode_amd(struct microcode_amd *mc_amd);
extern int apply_microcode_amd(int cpu);
-extern enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size);
#define PATCH_MAX_SIZE PAGE_SIZE
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b601ddac5719..c768bc1550a1 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -37,6 +37,13 @@
#define EFER_FFXSR (1<<_EFER_FFXSR)
/* Intel MSRs. Some also available on other CPUs */
+#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
+#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
+#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
+
+#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
+#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
+
#define MSR_IA32_PERFCTR0 0x000000c1
#define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd
@@ -50,6 +57,11 @@
#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
#define MSR_MTRRcap 0x000000fe
+
+#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
+#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
+#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
+
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
@@ -330,6 +342,9 @@
#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
#define FAM10H_MMIO_CONF_BASE_SHIFT 20
#define MSR_FAM10H_NODE_ID 0xc001100c
+#define MSR_F10H_DECFG 0xc0011029
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
/* K8 MSRs */
#define MSR_K8_TOP_MEM1 0xc001001a
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index b5fee97813cd..ed35b915b5c9 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -188,8 +188,7 @@ static __always_inline unsigned long long rdtsc_ordered(void)
* that some other imaginary CPU is updating continuously with a
* time stamp.
*/
- alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
- "lfence", X86_FEATURE_LFENCE_RDTSC);
+ barrier_nospec();
return rdtsc();
}
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
new file mode 100644
index 000000000000..76b058533e47
--- /dev/null
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_X86_NOSPEC_BRANCH_H_
+#define _ASM_X86_NOSPEC_BRANCH_H_
+
+#include <asm/alternative.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeatures.h>
+#include <asm/msr-index.h>
+
+#ifdef __ASSEMBLY__
+
+/*
+ * This should be used immediately before a retpoline alternative. It tells
+ * objtool where the retpolines are so that it can make sense of the control
+ * flow by just reading the original instruction(s) and ignoring the
+ * alternatives.
+ */
+.macro ANNOTATE_NOSPEC_ALTERNATIVE
+ .Lannotate_\@:
+ .pushsection .discard.nospec
+ .long .Lannotate_\@ - .
+ .popsection
+.endm
+
+/*
+ * These are the bare retpoline primitives for indirect jmp and call.
+ * Do not use these directly; they only exist to make the ALTERNATIVE
+ * invocation below less ugly.
+ */
+.macro RETPOLINE_JMP reg:req
+ call .Ldo_rop_\@
+.Lspec_trap_\@:
+ pause
+ lfence
+ jmp .Lspec_trap_\@
+.Ldo_rop_\@:
+ mov \reg, (%_ASM_SP)
+ ret
+.endm
+
+/*
+ * This is a wrapper around RETPOLINE_JMP so the called function in reg
+ * returns to the instruction after the macro.
+ */
+.macro RETPOLINE_CALL reg:req
+ jmp .Ldo_call_\@
+.Ldo_retpoline_jmp_\@:
+ RETPOLINE_JMP \reg
+.Ldo_call_\@:
+ call .Ldo_retpoline_jmp_\@
+.endm
+
+/*
+ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+ * indirect jmp/call which may be susceptible to the Spectre variant 2
+ * attack.
+ */
+.macro JMP_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+ ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE_2 __stringify(jmp *\reg), \
+ __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
+ __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+ jmp *\reg
+#endif
+.endm
+
+.macro CALL_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+ ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE_2 __stringify(call *\reg), \
+ __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
+ __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+ call *\reg
+#endif
+.endm
+
+/* This clobbers the BX register */
+.macro FILL_RETURN_BUFFER nr:req ftr:req
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE "", "call __clear_rsb", \ftr
+#endif
+.endm
+
+#else /* __ASSEMBLY__ */
+
+#define ANNOTATE_NOSPEC_ALTERNATIVE \
+ "999:\n\t" \
+ ".pushsection .discard.nospec\n\t" \
+ ".long 999b - .\n\t" \
+ ".popsection\n\t"
+
+#if defined(CONFIG_X86_64) && defined(RETPOLINE)
+
+/*
+ * Since the inline asm uses the %V modifier which is only in newer GCC,
+ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
+ */
+# define CALL_NOSPEC \
+ ANNOTATE_NOSPEC_ALTERNATIVE \
+ ALTERNATIVE( \
+ "call *%[thunk_target]\n", \
+ "call __x86_indirect_thunk_%V[thunk_target]\n", \
+ X86_FEATURE_RETPOLINE)
+# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
+
+#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
+/*
+ * For i386 we use the original ret-equivalent retpoline, because
+ * otherwise we'll run out of registers. We don't care about CET
+ * here, anyway.
+ */
+# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \
+ " jmp 904f;\n" \
+ " .align 16\n" \
+ "901: call 903f;\n" \
+ "902: pause;\n" \
+ " lfence;\n" \
+ " jmp 902b;\n" \
+ " .align 16\n" \
+ "903: addl $4, %%esp;\n" \
+ " pushl %[thunk_target];\n" \
+ " ret;\n" \
+ " .align 16\n" \
+ "904: call 901b;\n", \
+ X86_FEATURE_RETPOLINE)
+
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#else /* No retpoline for C / inline asm */
+# define CALL_NOSPEC "call *%[thunk_target]\n"
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#endif
+
+/* The Spectre V2 mitigation variants */
+enum spectre_v2_mitigation {
+ SPECTRE_V2_NONE,
+ SPECTRE_V2_RETPOLINE_MINIMAL,
+ SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
+ SPECTRE_V2_RETPOLINE_GENERIC,
+ SPECTRE_V2_RETPOLINE_AMD,
+ SPECTRE_V2_IBRS,
+};
+
+extern char __indirect_thunk_start[];
+extern char __indirect_thunk_end[];
+
+/*
+ * On VMEXIT we must ensure that no RSB predictions learned in the guest
+ * can be followed in the host, by overwriting the RSB completely. Both
+ * retpoline and IBRS mitigations for Spectre v2 need this; only on future
+ * CPUs with IBRS_ALL *might* it be avoided.
+ */
+static inline void vmexit_fill_RSB(void)
+{
+#ifdef CONFIG_RETPOLINE
+ alternative_input("",
+ "call __fill_rsb",
+ X86_FEATURE_RETPOLINE,
+ ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
+#endif
+}
+
+static inline void indirect_branch_prediction_barrier(void)
+{
+ asm volatile(ALTERNATIVE("",
+ "movl %[msr], %%ecx\n\t"
+ "movl %[val], %%eax\n\t"
+ "movl $0, %%edx\n\t"
+ "wrmsr",
+ X86_FEATURE_USE_IBPB)
+ : : [msr] "i" (MSR_IA32_PRED_CMD),
+ [val] "i" (PRED_CMD_IBPB)
+ : "eax", "ecx", "edx", "memory");
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 2536f90cd30c..5af0401ccff2 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -20,9 +20,15 @@
#ifdef CONFIG_PAGE_TABLE_ISOLATION
extern int kaiser_enabled;
+/*
+ * Instead of one PGD, we acquire two PGDs. Being order-1, it is
+ * both 8k in size and 8k-aligned. That lets us just flip bit 12
+ * in a pointer to swap between the two 4k halves.
+ */
#else
#define kaiser_enabled 0
#endif
+#define PGD_ALLOCATION_ORDER kaiser_enabled
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
void ptdump_walk_pgd_level_checkwx(void);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 8cb52ee3ade6..ec15ca2b32d0 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -88,7 +88,7 @@ struct cpuinfo_x86 {
__u8 x86; /* CPU family */
__u8 x86_vendor; /* CPU vendor */
__u8 x86_model;
- __u8 x86_mask;
+ __u8 x86_stepping;
#ifdef CONFIG_X86_32
char wp_works_ok; /* It doesn't on 386's */
@@ -113,7 +113,7 @@ struct cpuinfo_x86 {
char x86_vendor_id[16];
char x86_model_id[64];
/* in KB - valid for CPUS which support this call: */
- int x86_cache_size;
+ unsigned int x86_cache_size;
int x86_cache_alignment; /* In bytes */
/* Cache QoS architectural values: */
int x86_cache_max_rmid; /* max index */
@@ -156,8 +156,8 @@ extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data;
extern struct tss_struct doublefault_tss;
-extern __u32 cpu_caps_cleared[NCAPINTS];
-extern __u32 cpu_caps_set[NCAPINTS];
+extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
#ifdef CONFIG_SMP
DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
@@ -391,8 +391,6 @@ struct thread_struct {
unsigned short gsindex;
#endif
- u32 status; /* thread synchronous flags */
-
#ifdef CONFIG_X86_64
unsigned long fsbase;
unsigned long gsbase;
@@ -596,7 +594,7 @@ static inline void sync_core(void)
{
int tmp;
-#ifdef CONFIG_M486
+#ifdef CONFIG_X86_32
/*
* Do a CPUID if available, otherwise do a jump. The jump
* can conveniently enough be the jump around CPUID.
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index fac9a5c0abe9..6847d85400a8 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -100,6 +100,7 @@
#define REQUIRED_MASK15 0
#define REQUIRED_MASK16 0
#define REQUIRED_MASK17 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define REQUIRED_MASK18 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index e3c95e8e61c5..03eedc21246d 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task,
* TS_COMPAT is set for 32-bit syscall entries and then
* remains set until we return to user mode.
*/
- if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
+ if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED))
/*
* Sign-extend the value so (int)-EFOO becomes (long)-EFOO
* and will match correctly in comparisons.
@@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task->thread.status & TS_COMPAT)
+ if (task->thread_info.status & TS_COMPAT)
switch (i) {
case 0:
if (!n--) break;
@@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
const unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task->thread.status & TS_COMPAT)
+ if (task->thread_info.status & TS_COMPAT)
switch (i) {
case 0:
if (!n--) break;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ad6f5eb07a95..89978b9c667a 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -54,6 +54,7 @@ struct task_struct;
struct thread_info {
unsigned long flags; /* low level flags */
+ u32 status; /* thread synchronous flags */
};
#define INIT_THREAD_INFO(tsk) \
@@ -152,17 +153,6 @@ struct thread_info {
*/
#ifndef __ASSEMBLY__
-static inline unsigned long current_stack_pointer(void)
-{
- unsigned long sp;
-#ifdef CONFIG_X86_64
- asm("mov %%rsp,%0" : "=g" (sp));
-#else
- asm("mov %%esp,%0" : "=g" (sp));
-#endif
- return sp;
-}
-
/*
* Walks up the stack frames to make sure that the specified object is
* entirely contained by a single stack frame.
@@ -224,7 +214,7 @@ static inline int arch_within_stack_frames(const void * const stack,
#define in_ia32_syscall() true
#else
#define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
- current->thread.status & TS_COMPAT)
+ current_thread_info()->status & TS_COMPAT)
#endif
/*
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 01fd0a7f48cd..688315b7a922 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -92,6 +92,7 @@ dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long);
#ifdef CONFIG_X86_32
dotraplinkage void do_iret_error(struct pt_regs *, long);
#endif
+dotraplinkage void do_mce(struct pt_regs *, long);
static inline int get_si_code(unsigned long condition)
{
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index dead0f3921f3..a8d85a687cf4 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -123,6 +123,11 @@ extern int __get_user_bad(void);
#define __uaccess_begin() stac()
#define __uaccess_end() clac()
+#define __uaccess_begin_nospec() \
+({ \
+ stac(); \
+ barrier_nospec(); \
+})
/*
* This is a type: either unsigned long, if the argument fits into
@@ -432,7 +437,7 @@ do { \
({ \
int __gu_err; \
__inttype(*(ptr)) __gu_val; \
- __uaccess_begin(); \
+ __uaccess_begin_nospec(); \
__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
__uaccess_end(); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
@@ -474,6 +479,10 @@ struct __large_struct { unsigned long buf[100]; };
__uaccess_begin(); \
barrier();
+#define uaccess_try_nospec do { \
+ current->thread.uaccess_err = 0; \
+ __uaccess_begin_nospec(); \
+
#define uaccess_catch(err) \
__uaccess_end(); \
(err) |= (current->thread.uaccess_err ? -EFAULT : 0); \
@@ -538,7 +547,7 @@ struct __large_struct { unsigned long buf[100]; };
* get_user_ex(...);
* } get_user_catch(err)
*/
-#define get_user_try uaccess_try
+#define get_user_try uaccess_try_nospec
#define get_user_catch(err) uaccess_catch(err)
#define get_user_ex(x, ptr) do { \
@@ -573,7 +582,7 @@ extern void __cmpxchg_wrong_size(void)
__typeof__(ptr) __uval = (uval); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
- __uaccess_begin(); \
+ __uaccess_begin_nospec(); \
switch (size) { \
case 1: \
{ \
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 7d3bdd1ed697..d6d245088dd5 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -102,17 +102,17 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
switch (n) {
case 1:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_size(*(u8 *)to, from, 1, ret, 1);
__uaccess_end();
return ret;
case 2:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_size(*(u16 *)to, from, 2, ret, 2);
__uaccess_end();
return ret;
case 4:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_size(*(u32 *)to, from, 4, ret, 4);
__uaccess_end();
return ret;
@@ -130,17 +130,17 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to,
switch (n) {
case 1:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_size(*(u8 *)to, from, 1, ret, 1);
__uaccess_end();
return ret;
case 2:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_size(*(u16 *)to, from, 2, ret, 2);
__uaccess_end();
return ret;
case 4:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_size(*(u32 *)to, from, 4, ret, 4);
__uaccess_end();
return ret;
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 673059a109fe..6e5cc08134ba 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -59,31 +59,31 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
return copy_user_generic(dst, (__force void *)src, size);
switch (size) {
case 1:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u8 *)dst, (u8 __user *)src,
ret, "b", "b", "=q", 1);
__uaccess_end();
return ret;
case 2:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u16 *)dst, (u16 __user *)src,
ret, "w", "w", "=r", 2);
__uaccess_end();
return ret;
case 4:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u32 *)dst, (u32 __user *)src,
ret, "l", "k", "=r", 4);
__uaccess_end();
return ret;
case 8:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 8);
__uaccess_end();
return ret;
case 10:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 10);
if (likely(!ret))
@@ -93,7 +93,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
__uaccess_end();
return ret;
case 16:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 16);
if (likely(!ret))
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index a002b07a7099..6899cf187ba2 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -399,10 +399,11 @@ enum vmcs_field {
#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 2)
#define VMX_NR_VPIDS (1 << 16)
+#define VMX_VPID_EXTENT_INDIVIDUAL_ADDR 0
#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
#define VMX_VPID_EXTENT_ALL_CONTEXT 2
+#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL 3
-#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0
#define VMX_EPT_EXTENT_CONTEXT 1
#define VMX_EPT_EXTENT_GLOBAL 2
#define VMX_EPT_EXTENT_SHIFT 24
@@ -419,8 +420,10 @@ enum vmcs_field {
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
#define VMX_VPID_INVVPID_BIT (1ull << 0) /* (32 - 32) */
+#define VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT (1ull << 8) /* (40 - 32) */
#define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */
#define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */
+#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */
#define VMX_EPT_DEFAULT_GAW 3
#define VMX_EPT_MAX_GAW 0x4
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index 4865e10dbb55..62210da19a92 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -21,5 +21,6 @@ static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
}
static inline bool vsyscall_enabled(void) { return false; }
#endif
+extern unsigned long vsyscall_pgprot;
#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 8b678af866f7..ccdc23d89b60 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -44,6 +44,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/smap.h>
+#include <asm/nospec-branch.h>
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
@@ -216,9 +217,9 @@ privcmd_call(unsigned call,
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
stac();
- asm volatile("call *%[call]"
+ asm volatile(CALL_NOSPEC
: __HYPERCALL_5PARAM
- : [call] "a" (&hypercall_page[call])
+ : [thunk_target] "a" (&hypercall_page[call])
: __HYPERCALL_CLOBBER5);
clac();
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 11cc600f4df0..0a1e8a67cc99 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -335,13 +335,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
#ifdef CONFIG_X86_IO_APIC
#define MP_ISA_BUS 0
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+ u8 trigger, u32 gsi);
+
static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
u32 gsi)
{
- int ioapic;
- int pin;
- struct mpc_intsrc mp_irq;
-
/*
* Check bus_irq boundary.
*/
@@ -351,14 +350,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
}
/*
- * Convert 'gsi' to 'ioapic.pin'.
- */
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0)
- return;
- pin = mp_find_ioapic_pin(ioapic, gsi);
-
- /*
* TBD: This check is for faulty timer entries, where the override
* erroneously sets the trigger to level, resulting in a HUGE
* increase of timer interrupts!
@@ -366,16 +357,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
if ((bus_irq == 0) && (trigger == 3))
trigger = 1;
- mp_irq.type = MP_INTSRC;
- mp_irq.irqtype = mp_INT;
- mp_irq.irqflag = (trigger << 2) | polarity;
- mp_irq.srcbus = MP_ISA_BUS;
- mp_irq.srcbusirq = bus_irq; /* IRQ */
- mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
- mp_irq.dstirq = pin; /* INTIN# */
-
- mp_save_irq(&mp_irq);
-
+ if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0)
+ return;
/*
* Reset default identity mapping if gsi is also an legacy IRQ,
* otherwise there will be more than one entry with the same GSI
@@ -422,6 +405,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
return 0;
}
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+ u8 trigger, u32 gsi)
+{
+ struct mpc_intsrc mp_irq;
+ int ioapic, pin;
+
+ /* Convert 'gsi' to 'ioapic.pin'(INTIN#) */
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0) {
+ pr_warn("Failed to find ioapic for gsi : %u\n", gsi);
+ return ioapic;
+ }
+
+ pin = mp_find_ioapic_pin(ioapic, gsi);
+
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+ mp_irq.irqflag = (trigger << 2) | polarity;
+ mp_irq.srcbus = MP_ISA_BUS;
+ mp_irq.srcbusirq = bus_irq;
+ mp_irq.dstapic = mpc_ioapic_id(ioapic);
+ mp_irq.dstirq = pin;
+
+ mp_save_irq(&mp_irq);
+
+ return 0;
+}
+
static int __init
acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
{
@@ -466,7 +477,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
- mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+ if (bus_irq < NR_IRQS_LEGACY)
+ mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+ else
+ mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi);
+
acpi_penalize_sci_irq(bus_irq, trigger, polarity);
/*
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5cb272a7a5a3..03b6e5c6cf23 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str)
}
__setup("noreplace-smp", setup_noreplace_smp);
-#ifdef CONFIG_PARAVIRT
-static int __initdata_or_module noreplace_paravirt = 0;
-
-static int __init setup_noreplace_paravirt(char *str)
-{
- noreplace_paravirt = 1;
- return 1;
-}
-__setup("noreplace-paravirt", setup_noreplace_paravirt);
-#endif
-
#define DPRINTK(fmt, args...) \
do { \
if (debug_alternative) \
@@ -340,9 +329,12 @@ done:
static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
{
unsigned long flags;
+ int i;
- if (instr[0] != 0x90)
- return;
+ for (i = 0; i < a->padlen; i++) {
+ if (instr[i] != 0x90)
+ return;
+ }
local_irq_save(flags);
add_nops(instr + (a->instrlen - a->padlen), a->padlen);
@@ -585,9 +577,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
struct paravirt_patch_site *p;
char insnbuf[MAX_PATCH_LEN];
- if (noreplace_paravirt)
- return;
-
for (p = start; p < end; p++) {
unsigned int used;
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 4fdf6230d93c..8462e2d4ed94 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -105,7 +105,7 @@ int amd_cache_northbridges(void)
if (boot_cpu_data.x86 == 0x10 &&
boot_cpu_data.x86_model >= 0x8 &&
(boot_cpu_data.x86_model > 0x9 ||
- boot_cpu_data.x86_mask >= 0x1))
+ boot_cpu_data.x86_stepping >= 0x1))
amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
if (boot_cpu_data.x86 == 0x15)
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index f3557a1eb562..b5229abd1629 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -361,14 +361,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
irq_data->chip_data = data;
irq_data->hwirq = virq + i;
err = assign_irq_vector_policy(virq + i, node, data, info);
- if (err)
+ if (err) {
+ irq_data->chip_data = NULL;
+ free_apic_chip_data(data);
goto error;
+ }
}
return 0;
error:
- x86_vector_free_irqs(domain, virq, i + 1);
+ x86_vector_free_irqs(domain, virq, i);
return err;
}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 880aa093268d..36ebb6de1a03 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -20,7 +20,7 @@ void foo(void)
OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
- OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask);
+ OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);
OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 4a8697f7d4ef..33b63670bf09 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -20,13 +20,11 @@ obj-y := intel_cacheinfo.o scattered.o topology.o
obj-y += common.o
obj-y += rdrand.o
obj-y += match.o
+obj-y += bugs.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
-obj-$(CONFIG_X86_32) += bugs.o
-obj-$(CONFIG_X86_64) += bugs_64.o
-
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 2b4cf04239b6..c375bc672f82 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -118,7 +118,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
return;
}
- if (c->x86_model == 6 && c->x86_mask == 1) {
+ if (c->x86_model == 6 && c->x86_stepping == 1) {
const int K6_BUG_LOOP = 1000000;
int n;
void (*f_vide)(void);
@@ -147,7 +147,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
/* K6 with old style WHCR */
if (c->x86_model < 8 ||
- (c->x86_model == 8 && c->x86_mask < 8)) {
+ (c->x86_model == 8 && c->x86_stepping < 8)) {
/* We can only write allocate on the low 508Mb */
if (mbytes > 508)
mbytes = 508;
@@ -166,7 +166,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
return;
}
- if ((c->x86_model == 8 && c->x86_mask > 7) ||
+ if ((c->x86_model == 8 && c->x86_stepping > 7) ||
c->x86_model == 9 || c->x86_model == 13) {
/* The more serious chips .. */
@@ -219,7 +219,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
* are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
* As per AMD technical note 27212 0.2
*/
- if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
+ if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) {
rdmsr(MSR_K7_CLK_CTL, l, h);
if ((l & 0xfff00000) != 0x20000000) {
pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
@@ -239,12 +239,12 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
* but they are not certified as MP capable.
*/
/* Athlon 660/661 is valid. */
- if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
- (c->x86_mask == 1)))
+ if ((c->x86_model == 6) && ((c->x86_stepping == 0) ||
+ (c->x86_stepping == 1)))
return;
/* Duron 670 is valid */
- if ((c->x86_model == 7) && (c->x86_mask == 0))
+ if ((c->x86_model == 7) && (c->x86_stepping == 0))
return;
/*
@@ -254,8 +254,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
* See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
* more.
*/
- if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
- ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
+ if (((c->x86_model == 6) && (c->x86_stepping >= 2)) ||
+ ((c->x86_model == 7) && (c->x86_stepping >= 1)) ||
(c->x86_model > 7))
if (cpu_has(c, X86_FEATURE_MP))
return;
@@ -569,7 +569,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
/* Set MTRR capability flag if appropriate */
if (c->x86 == 5)
if (c->x86_model == 13 || c->x86_model == 9 ||
- (c->x86_model == 8 && c->x86_mask >= 8))
+ (c->x86_model == 8 && c->x86_stepping >= 8))
set_cpu_cap(c, X86_FEATURE_K6_MTRR);
#endif
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
@@ -782,8 +782,32 @@ static void init_amd(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_K8);
if (cpu_has(c, X86_FEATURE_XMM2)) {
- /* MFENCE stops RDTSC speculation */
- set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ unsigned long long val;
+ int ret;
+
+ /*
+ * A serializing LFENCE has less overhead than MFENCE, so
+ * use it for execution serialization. On families which
+ * don't have that MSR, LFENCE is already serializing.
+ * msr_set_bit() uses the safe accessors, too, even if the MSR
+ * is not present.
+ */
+ msr_set_bit(MSR_F10H_DECFG,
+ MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+
+ /*
+ * Verify that the MSR write was successful (could be running
+ * under a hypervisor) and only then assume that LFENCE is
+ * serializing.
+ */
+ ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
+ if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
+ /* A serializing LFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+ } else {
+ /* MFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ }
}
/*
@@ -810,11 +834,11 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
/* AMD errata T13 (order #21922) */
if ((c->x86 == 6)) {
/* Duron Rev A0 */
- if (c->x86_model == 3 && c->x86_mask == 0)
+ if (c->x86_model == 3 && c->x86_stepping == 0)
size = 64;
/* Tbird rev A1/A2 */
if (c->x86_model == 4 &&
- (c->x86_mask == 0 || c->x86_mask == 1))
+ (c->x86_stepping == 0 || c->x86_stepping == 1))
size = 256;
}
return size;
@@ -951,7 +975,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
}
/* OSVW unavailable or ID unknown, match family-model-stepping range */
- ms = (cpu->x86_model << 4) | cpu->x86_mask;
+ ms = (cpu->x86_model << 4) | cpu->x86_stepping;
while ((range = *erratum++))
if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
(ms >= AMD_MODEL_RANGE_START(range)) &&
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 0b6124315441..baddc9ed3454 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -9,6 +9,11 @@
*/
#include <linux/init.h>
#include <linux/utsname.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+
+#include <asm/nospec-branch.h>
+#include <asm/cmdline.h>
#include <asm/bugs.h>
#include <asm/processor.h>
#include <asm/processor-flags.h>
@@ -16,23 +21,25 @@
#include <asm/msr.h>
#include <asm/paravirt.h>
#include <asm/alternative.h>
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
+#include <asm/intel-family.h>
+
+static void __init spectre_v2_select_mitigation(void);
void __init check_bugs(void)
{
-#ifdef CONFIG_X86_32
- /*
- * Regardless of whether PCID is enumerated, the SDM says
- * that it can't be enabled in 32-bit mode.
- */
- setup_clear_cpu_cap(X86_FEATURE_PCID);
-#endif
-
identify_boot_cpu();
-#ifndef CONFIG_SMP
- pr_info("CPU: ");
- print_cpu_info(&boot_cpu_data);
-#endif
+ if (!IS_ENABLED(CONFIG_SMP)) {
+ pr_info("CPU: ");
+ print_cpu_info(&boot_cpu_data);
+ }
+
+ /* Select the proper spectre mitigation before patching alternatives */
+ spectre_v2_select_mitigation();
+
+#ifdef CONFIG_X86_32
/*
* Check whether we are able to run this kernel safely on SMP.
*
@@ -48,4 +55,278 @@ void __init check_bugs(void)
alternative_instructions();
fpu__init_check_bugs();
+#else /* CONFIG_X86_64 */
+ alternative_instructions();
+
+ /*
+ * Make sure the first 2MB area is not mapped by huge pages
+ * There are typically fixed size MTRRs in there and overlapping
+ * MTRRs into large pages causes slow downs.
+ *
+ * Right now we don't do that with gbpages because there seems
+ * very little benefit for that case.
+ */
+ if (!direct_gbpages)
+ set_memory_4k((unsigned long)__va(0), 1);
+#endif
+}
+
+/* The kernel command line selection */
+enum spectre_v2_mitigation_cmd {
+ SPECTRE_V2_CMD_NONE,
+ SPECTRE_V2_CMD_AUTO,
+ SPECTRE_V2_CMD_FORCE,
+ SPECTRE_V2_CMD_RETPOLINE,
+ SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+ SPECTRE_V2_CMD_RETPOLINE_AMD,
+};
+
+static const char *spectre_v2_strings[] = {
+ [SPECTRE_V2_NONE] = "Vulnerable",
+ [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
+ [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
+ [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
+ [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
+};
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Spectre V2 : " fmt
+
+static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+
+#ifdef RETPOLINE
+static bool spectre_v2_bad_module;
+
+bool retpoline_module_ok(bool has_retpoline)
+{
+ if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
+ return true;
+
+ pr_err("System may be vulnerable to spectre v2\n");
+ spectre_v2_bad_module = true;
+ return false;
+}
+
+static inline const char *spectre_v2_module_string(void)
+{
+ return spectre_v2_bad_module ? " - vulnerable module loaded" : "";
+}
+#else
+static inline const char *spectre_v2_module_string(void) { return ""; }
+#endif
+
+static void __init spec2_print_if_insecure(const char *reason)
+{
+ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ pr_info("%s selected on command line.\n", reason);
+}
+
+static void __init spec2_print_if_secure(const char *reason)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ pr_info("%s selected on command line.\n", reason);
+}
+
+static inline bool retp_compiler(void)
+{
+ return __is_defined(RETPOLINE);
+}
+
+static inline bool match_option(const char *arg, int arglen, const char *opt)
+{
+ int len = strlen(opt);
+
+ return len == arglen && !strncmp(arg, opt, len);
+}
+
+static const struct {
+ const char *option;
+ enum spectre_v2_mitigation_cmd cmd;
+ bool secure;
+} mitigation_options[] = {
+ { "off", SPECTRE_V2_CMD_NONE, false },
+ { "on", SPECTRE_V2_CMD_FORCE, true },
+ { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
+ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
+ { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
+ { "auto", SPECTRE_V2_CMD_AUTO, false },
+};
+
+static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+{
+ char arg[20];
+ int ret, i;
+ enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
+
+ if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+ return SPECTRE_V2_CMD_NONE;
+ else {
+ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
+ if (ret < 0)
+ return SPECTRE_V2_CMD_AUTO;
+
+ for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
+ if (!match_option(arg, ret, mitigation_options[i].option))
+ continue;
+ cmd = mitigation_options[i].cmd;
+ break;
+ }
+
+ if (i >= ARRAY_SIZE(mitigation_options)) {
+ pr_err("unknown option (%s). Switching to AUTO select\n", arg);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+ }
+
+ if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
+ cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
+ cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
+ !IS_ENABLED(CONFIG_RETPOLINE)) {
+ pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+ pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (mitigation_options[i].secure)
+ spec2_print_if_secure(mitigation_options[i].option);
+ else
+ spec2_print_if_insecure(mitigation_options[i].option);
+
+ return cmd;
+}
+
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ boot_cpu_data.x86 == 6) {
+ switch (boot_cpu_data.x86_model) {
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
+ return true;
+ }
+ }
+ return false;
+}
+
+static void __init spectre_v2_select_mitigation(void)
+{
+ enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+ enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
+
+ /*
+ * If the CPU is not affected and the command line mode is NONE or AUTO
+ * then nothing to do.
+ */
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
+ (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
+ return;
+
+ switch (cmd) {
+ case SPECTRE_V2_CMD_NONE:
+ return;
+
+ case SPECTRE_V2_CMD_FORCE:
+ case SPECTRE_V2_CMD_AUTO:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_auto;
+ break;
+ case SPECTRE_V2_CMD_RETPOLINE_AMD:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_amd;
+ break;
+ case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_generic;
+ break;
+ case SPECTRE_V2_CMD_RETPOLINE:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_auto;
+ break;
+ }
+ pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
+ return;
+
+retpoline_auto:
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ retpoline_amd:
+ if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+ pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
+ goto retpoline_generic;
+ }
+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
+ SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ } else {
+ retpoline_generic:
+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
+ SPECTRE_V2_RETPOLINE_MINIMAL;
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ }
+
+ spectre_v2_enabled = mode;
+ pr_info("%s\n", spectre_v2_strings[mode]);
+
+ /*
+ * If neither SMEP nor PTI are available, there is a risk of
+ * hitting userspace addresses in the RSB after a context switch
+ * from a shallow call stack to a deeper one. To prevent this fill
+ * the entire RSB, even when using IBRS.
+ *
+ * Skylake era CPUs have a separate issue with *underflow* of the
+ * RSB, when they will predict 'ret' targets from the generic BTB.
+ * The proper mitigation for this is IBRS. If IBRS is not supported
+ * or deactivated in favour of retpolines the RSB fill on context
+ * switch is required.
+ */
+ if ((!boot_cpu_has(X86_FEATURE_KAISER) &&
+ !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+ pr_info("Spectre v2 mitigation: Filling RSB on context switch\n");
+ }
+
+ /* Initialize Indirect Branch Prediction Barrier if supported */
+ if (boot_cpu_has(X86_FEATURE_IBPB)) {
+ setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
+ pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
+ }
}
+
+#undef pr_fmt
+
+#ifdef CONFIG_SYSFS
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+ return sprintf(buf, "Not affected\n");
+ if (boot_cpu_has(X86_FEATURE_KAISER))
+ return sprintf(buf, "Mitigation: PTI\n");
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
+ return sprintf(buf, "Not affected\n");
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ spectre_v2_module_string());
+}
+#endif
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
deleted file mode 100644
index a972ac4c7e7d..000000000000
--- a/arch/x86/kernel/cpu/bugs_64.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- * Copyright (C) 2000 SuSE
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <asm/alternative.h>
-#include <asm/bugs.h>
-#include <asm/processor.h>
-#include <asm/mtrr.h>
-#include <asm/cacheflush.h>
-
-void __init check_bugs(void)
-{
- identify_boot_cpu();
-#if !defined(CONFIG_SMP)
- pr_info("CPU: ");
- print_cpu_info(&boot_cpu_data);
-#endif
- alternative_instructions();
-
- /*
- * Make sure the first 2MB area is not mapped by huge pages
- * There are typically fixed size MTRRs in there and overlapping
- * MTRRs into large pages causes slow downs.
- *
- * Right now we don't do that with gbpages because there seems
- * very little benefit for that case.
- */
- if (!direct_gbpages)
- set_memory_4k((unsigned long)__va(0), 1);
-}
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 1661d8ec9280..4d2f61f92fed 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -134,7 +134,7 @@ static void init_centaur(struct cpuinfo_x86 *c)
clear_cpu_cap(c, X86_FEATURE_TSC);
break;
case 8:
- switch (c->x86_mask) {
+ switch (c->x86_stepping) {
default:
name = "2";
break;
@@ -209,7 +209,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
* - Note, it seems this may only be in engineering samples.
*/
if ((c->x86 == 6) && (c->x86_model == 9) &&
- (c->x86_mask == 1) && (size == 65))
+ (c->x86_stepping == 1) && (size == 65))
size -= 1;
return size;
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 918e44772b04..301bbd1f2373 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -44,6 +44,8 @@
#include <asm/pat.h>
#include <asm/microcode.h>
#include <asm/microcode_intel.h>
+#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/uv/uv.h>
@@ -480,8 +482,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
return NULL; /* Not found */
}
-__u32 cpu_caps_cleared[NCAPINTS];
-__u32 cpu_caps_set[NCAPINTS];
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
void load_percpu_segment(int cpu)
{
@@ -697,7 +699,7 @@ void cpu_detect(struct cpuinfo_x86 *c)
cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
c->x86 = x86_family(tfms);
c->x86_model = x86_model(tfms);
- c->x86_mask = x86_stepping(tfms);
+ c->x86_stepping = x86_stepping(tfms);
if (cap0 & (1<<19)) {
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
@@ -706,6 +708,36 @@ void cpu_detect(struct cpuinfo_x86 *c)
}
}
+static void apply_forced_caps(struct cpuinfo_x86 *c)
+{
+ int i;
+
+ for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
+ c->x86_capability[i] &= ~cpu_caps_cleared[i];
+ c->x86_capability[i] |= cpu_caps_set[i];
+ }
+}
+
+static void init_speculation_control(struct cpuinfo_x86 *c)
+{
+ /*
+ * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
+ * and they also have a different bit for STIBP support. Also,
+ * a hypervisor might have set the individual AMD bits even on
+ * Intel CPUs, for finer-grained selection of what's available.
+ *
+ * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
+ * features, which are visible in /proc/cpuinfo and used by the
+ * kernel. So set those accordingly from the Intel bits.
+ */
+ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
+ set_cpu_cap(c, X86_FEATURE_IBRS);
+ set_cpu_cap(c, X86_FEATURE_IBPB);
+ }
+ if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
+ set_cpu_cap(c, X86_FEATURE_STIBP);
+}
+
void get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
@@ -727,6 +759,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
c->x86_capability[CPUID_7_0_EBX] = ebx;
c->x86_capability[CPUID_7_ECX] = ecx;
+ c->x86_capability[CPUID_7_EDX] = edx;
}
/* Extended state features: level 0x0000000d */
@@ -799,6 +832,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
init_scattered_cpuid_features(c);
+ init_speculation_control(c);
}
static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
@@ -827,6 +861,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#endif
}
+static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY },
+ { X86_VENDOR_CENTAUR, 5 },
+ { X86_VENDOR_INTEL, 5 },
+ { X86_VENDOR_NSC, 5 },
+ { X86_VENDOR_ANY, 4 },
+ {}
+};
+
+static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
+ { X86_VENDOR_AMD },
+ {}
+};
+
+static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
+{
+ u64 ia32_cap = 0;
+
+ if (x86_match_cpu(cpu_no_meltdown))
+ return false;
+
+ if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+
+ /* Rogue Data Cache Load? No! */
+ if (ia32_cap & ARCH_CAP_RDCL_NO)
+ return false;
+
+ return true;
+}
+
/*
* Do minimum CPU detection early.
* Fields really needed: vendor, cpuid_level, family, model, mask,
@@ -872,7 +941,23 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
}
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+
+ if (!x86_match_cpu(cpu_no_speculation)) {
+ if (cpu_vulnerable_to_meltdown(c))
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+ }
+
fpu__init_system(c);
+
+#ifdef CONFIG_X86_32
+ /*
+ * Regardless of whether PCID is enumerated, the SDM says
+ * that it can't be enabled in 32-bit mode.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+#endif
}
void __init early_cpu_init(void)
@@ -1059,9 +1144,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
int i;
c->loops_per_jiffy = loops_per_jiffy;
- c->x86_cache_size = -1;
+ c->x86_cache_size = 0;
c->x86_vendor = X86_VENDOR_UNKNOWN;
- c->x86_model = c->x86_mask = 0; /* So far unknown... */
+ c->x86_model = c->x86_stepping = 0; /* So far unknown... */
c->x86_vendor_id[0] = '\0'; /* Unset */
c->x86_model_id[0] = '\0'; /* Unset */
c->x86_max_cores = 1;
@@ -1086,10 +1171,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
this_cpu->c_identify(c);
/* Clear/Set all flags overridden by options, after probe */
- for (i = 0; i < NCAPINTS; i++) {
- c->x86_capability[i] &= ~cpu_caps_cleared[i];
- c->x86_capability[i] |= cpu_caps_set[i];
- }
+ apply_forced_caps(c);
#ifdef CONFIG_X86_64
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
@@ -1151,10 +1233,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
* Clear/Set all flags overridden by options, need do it
* before following smp all cpus cap AND.
*/
- for (i = 0; i < NCAPINTS; i++) {
- c->x86_capability[i] &= ~cpu_caps_cleared[i];
- c->x86_capability[i] |= cpu_caps_set[i];
- }
+ apply_forced_caps(c);
/*
* On SMP, boot_cpu_data holds the common feature set between
@@ -1312,8 +1391,8 @@ void print_cpu_info(struct cpuinfo_x86 *c)
pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
- if (c->x86_mask || c->cpuid_level >= 0)
- pr_cont(", stepping: 0x%x)\n", c->x86_mask);
+ if (c->x86_stepping || c->cpuid_level >= 0)
+ pr_cont(", stepping: 0x%x)\n", c->x86_stepping);
else
pr_cont(")\n");
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index bd9dcd6b712d..455d8ada9b9a 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -212,7 +212,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
/* common case step number/rev -- exceptions handled below */
c->x86_model = (dir1 >> 4) + 1;
- c->x86_mask = dir1 & 0xf;
+ c->x86_stepping = dir1 & 0xf;
/* Now cook; the original recipe is by Channing Corn, from Cyrix.
* We do the same thing for each generation: we work out
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fcd484d2bb03..6ed206bd9071 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -61,6 +61,56 @@ void check_mpx_erratum(struct cpuinfo_x86 *c)
}
}
+/*
+ * Early microcode releases for the Spectre v2 mitigation were broken.
+ * Information taken from;
+ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf
+ * - https://kb.vmware.com/s/article/52345
+ * - Microcode revisions observed in the wild
+ * - Release note from 20180108 microcode release
+ */
+struct sku_microcode {
+ u8 model;
+ u8 stepping;
+ u32 microcode;
+};
+static const struct sku_microcode spectre_bad_microcodes[] = {
+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x80 },
+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x80 },
+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x80 },
+ { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x80 },
+ { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 },
+ { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
+ { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
+ { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 },
+ { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 },
+ { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b },
+ { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 },
+ { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 },
+ { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 },
+ { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 },
+ { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 },
+ { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 },
+ { INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
+ { INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
+ { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
+ /* Observed in the wild */
+ { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b },
+ { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 },
+};
+
+static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
+ if (c->x86_model == spectre_bad_microcodes[i].model &&
+ c->x86_stepping == spectre_bad_microcodes[i].stepping)
+ return (c->microcode <= spectre_bad_microcodes[i].microcode);
+ }
+ return false;
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -87,6 +137,19 @@ static void early_init_intel(struct cpuinfo_x86 *c)
rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
}
+ /* Now if any of them are set, check the blacklist and clear the lot */
+ if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
+ cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
+ cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
+ cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
+ pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
+ setup_clear_cpu_cap(X86_FEATURE_IBRS);
+ setup_clear_cpu_cap(X86_FEATURE_IBPB);
+ setup_clear_cpu_cap(X86_FEATURE_STIBP);
+ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
+ setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
+ }
+
/*
* Atom erratum AAE44/AAF40/AAG38/AAH41:
*
@@ -95,7 +158,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
* need the microcode to have already been loaded... so if it is
* not, recommend a BIOS update and disable large pages.
*/
- if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
+ if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 &&
c->microcode < 0x20e) {
pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
clear_cpu_cap(c, X86_FEATURE_PSE);
@@ -111,7 +174,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
/* CPUID workaround for 0F33/0F34 CPU */
if (c->x86 == 0xF && c->x86_model == 0x3
- && (c->x86_mask == 0x3 || c->x86_mask == 0x4))
+ && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4))
c->x86_phys_bits = 36;
/*
@@ -226,7 +289,7 @@ int ppro_with_ram_bug(void)
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
boot_cpu_data.x86 == 6 &&
boot_cpu_data.x86_model == 1 &&
- boot_cpu_data.x86_mask < 8) {
+ boot_cpu_data.x86_stepping < 8) {
pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
return 1;
}
@@ -243,7 +306,7 @@ static void intel_smp_check(struct cpuinfo_x86 *c)
* Mask B, Pentium, but not Pentium MMX
*/
if (c->x86 == 5 &&
- c->x86_mask >= 1 && c->x86_mask <= 4 &&
+ c->x86_stepping >= 1 && c->x86_stepping <= 4 &&
c->x86_model <= 3) {
/*
* Remember we have B step Pentia with bugs
@@ -286,7 +349,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
* model 3 mask 3
*/
- if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
+ if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633)
clear_cpu_cap(c, X86_FEATURE_SEP);
/*
@@ -304,7 +367,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* P4 Xeon erratum 037 workaround.
* Hardware prefetcher may cause stale data to be loaded into the cache.
*/
- if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
+ if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) {
if (msr_set_bit(MSR_IA32_MISC_ENABLE,
MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) {
pr_info("CPU: C0 stepping P4 Xeon detected.\n");
@@ -319,7 +382,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* Specification Update").
*/
if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
- (c->x86_mask < 0x6 || c->x86_mask == 0xb))
+ (c->x86_stepping < 0x6 || c->x86_stepping == 0xb))
set_cpu_bug(c, X86_BUG_11AP);
@@ -538,7 +601,7 @@ static void init_intel(struct cpuinfo_x86 *c)
case 6:
if (l2 == 128)
p = "Celeron (Mendocino)";
- else if (c->x86_mask == 0 || c->x86_mask == 5)
+ else if (c->x86_stepping == 0 || c->x86_stepping == 5)
p = "Celeron-A";
break;
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index de6626c18e42..be6337156502 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -934,6 +934,8 @@ static int __populate_cache_leaves(unsigned int cpu)
ci_leaf_init(this_leaf++, &id4_regs);
__cache_cpumap_setup(cpu, idx, &id4_regs);
}
+ this_cpu_ci->cpu_map_populated = true;
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 517619ea6498..99165b206df3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -152,7 +152,6 @@ static void raise_mce(struct mce *m)
if (context == MCJ_CTX_RANDOM)
return;
-#ifdef CONFIG_X86_LOCAL_APIC
if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
unsigned long start;
int cpu;
@@ -192,9 +191,7 @@ static void raise_mce(struct mce *m)
raise_local();
put_cpu();
put_online_cpus();
- } else
-#endif
- {
+ } else {
preempt_disable();
raise_local();
preempt_enable();
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8ca5f8ad008e..fe5cd6ea1f0e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1754,6 +1754,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
void (*machine_check_vector)(struct pt_regs *, long error_code) =
unexpected_machine_check;
+dotraplinkage void do_mce(struct pt_regs *regs, long error_code)
+{
+ machine_check_vector(regs, error_code);
+}
+
/*
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index b74bb29db6b9..732bb03fcf91 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -135,6 +135,9 @@ static size_t compute_container_size(u8 *data, u32 total_size)
return size;
}
+static enum ucode_state
+load_microcode_amd(bool save, u8 family, const u8 *data, size_t size);
+
/*
* Early load occurs before we can vmalloc(). So we look for the microcode
* patch container file in initrd, traverse equivalent cpu table, look for a
@@ -451,7 +454,7 @@ int __init save_microcode_in_initrd_amd(void)
eax = cpuid_eax(0x00000001);
eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
- ret = load_microcode_amd(smp_processor_id(), eax, container, container_size);
+ ret = load_microcode_amd(true, eax, container, container_size);
if (ret != UCODE_OK)
retval = -EINVAL;
@@ -864,7 +867,8 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
return UCODE_OK;
}
-enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size)
+static enum ucode_state
+load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
{
enum ucode_state ret;
@@ -878,8 +882,8 @@ enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t s
#ifdef CONFIG_X86_32
/* save BSP's matching patch for early load */
- if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) {
- struct ucode_patch *p = find_patch(cpu);
+ if (save) {
+ struct ucode_patch *p = find_patch(0);
if (p) {
memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data),
@@ -911,11 +915,12 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
{
char fw_name[36] = "amd-ucode/microcode_amd.bin";
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ bool bsp = c->cpu_index == boot_cpu_data.cpu_index;
enum ucode_state ret = UCODE_NFOUND;
const struct firmware *fw;
/* reload ucode container only on the boot cpu */
- if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
+ if (!refresh_fw || !bsp)
return UCODE_OK;
if (c->x86 >= 0x15)
@@ -932,7 +937,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
goto fw_release;
}
- ret = load_microcode_amd(cpu, c->x86, fw->data, fw->size);
+ ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size);
fw_release:
release_firmware(fw);
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 5ce5155f0695..0afaf00b029b 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -43,7 +43,7 @@
#define MICROCODE_VERSION "2.01"
static struct microcode_ops *microcode_ops;
-static bool dis_ucode_ldr;
+static bool dis_ucode_ldr = true;
/*
* Synchronization.
@@ -73,6 +73,7 @@ struct cpu_info_ctx {
static bool __init check_loader_disabled_bsp(void)
{
static const char *__dis_opt_str = "dis_ucode_ldr";
+ u32 a, b, c, d;
#ifdef CONFIG_X86_32
const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
@@ -85,8 +86,20 @@ static bool __init check_loader_disabled_bsp(void)
bool *res = &dis_ucode_ldr;
#endif
- if (cmdline_find_option_bool(cmdline, option))
- *res = true;
+ a = 1;
+ c = 0;
+ native_cpuid(&a, &b, &c, &d);
+
+ /*
+ * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
+ * completely accurate as xen pv guests don't see that CPUID bit set but
+ * that's good enough as they don't land on the BSP path anyway.
+ */
+ if (c & BIT(31))
+ return *res;
+
+ if (cmdline_find_option_bool(cmdline, option) <= 0)
+ *res = false;
return *res;
}
@@ -114,9 +127,7 @@ void __init load_ucode_bsp(void)
{
int vendor;
unsigned int family;
-
- if (check_loader_disabled_bsp())
- return;
+ bool intel = true;
if (!have_cpuid_p())
return;
@@ -126,16 +137,27 @@ void __init load_ucode_bsp(void)
switch (vendor) {
case X86_VENDOR_INTEL:
- if (family >= 6)
- load_ucode_intel_bsp();
+ if (family < 6)
+ return;
break;
+
case X86_VENDOR_AMD:
- if (family >= 0x10)
- load_ucode_amd_bsp(family);
+ if (family < 0x10)
+ return;
+ intel = false;
break;
+
default:
- break;
+ return;
}
+
+ if (check_loader_disabled_bsp())
+ return;
+
+ if (intel)
+ load_ucode_intel_bsp();
+ else
+ load_ucode_amd_bsp(family);
}
static bool check_loader_disabled_ap(void)
@@ -154,9 +176,6 @@ void load_ucode_ap(void)
if (check_loader_disabled_ap())
return;
- if (!have_cpuid_p())
- return;
-
vendor = x86_cpuid_vendor();
family = x86_cpuid_family();
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 13dbcc0f9d03..4bcd30c87531 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -40,6 +40,9 @@
#include <asm/setup.h>
#include <asm/msr.h>
+/* last level cache size per core */
+static int llc_size_per_core;
+
/*
* Temporary microcode blobs pointers storage. We note here during early load
* the pointers to microcode blobs we've got from whatever storage (detached
@@ -1051,8 +1054,19 @@ static bool is_blacklisted(unsigned int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
- if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
- pr_err_once("late loading on model 79 is disabled.\n");
+ /*
+ * Late loading on model 79 with microcode revision less than 0x0b000021
+ * and LLC size per core bigger than 2.5MB may result in a system hang.
+ * This behavior is documented in item BDF90, #334165 (Intel Xeon
+ * Processor E7-8800/4800 v4 Product Family).
+ */
+ if (c->x86 == 6 &&
+ c->x86_model == INTEL_FAM6_BROADWELL_X &&
+ c->x86_stepping == 0x01 &&
+ llc_size_per_core > 2621440 &&
+ c->microcode < 0x0b000021) {
+ pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+ pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
return true;
}
@@ -1071,7 +1085,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
return UCODE_NFOUND;
sprintf(name, "intel-ucode/%02x-%02x-%02x",
- c->x86, c->x86_model, c->x86_mask);
+ c->x86, c->x86_model, c->x86_stepping);
if (request_firmware_direct(&firmware, name, device)) {
pr_debug("data file %s load failed\n", name);
@@ -1116,6 +1130,15 @@ static struct microcode_ops microcode_intel_ops = {
.microcode_fini_cpu = microcode_fini_cpu,
};
+static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)
+{
+ u64 llc_size = c->x86_cache_size * 1024ULL;
+
+ do_div(llc_size, c->x86_max_cores);
+
+ return (int)llc_size;
+}
+
struct microcode_ops * __init init_intel_microcode(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -1126,6 +1149,8 @@ struct microcode_ops * __init init_intel_microcode(void)
return NULL;
}
+ llc_size_per_core = calc_llc_size_per_core(c);
+
return &microcode_intel_ops;
}
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index fdc55215d44d..e12ee86906c6 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -859,7 +859,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
*/
if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
boot_cpu_data.x86_model == 1 &&
- boot_cpu_data.x86_mask <= 7) {
+ boot_cpu_data.x86_stepping <= 7) {
if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
return -EINVAL;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 24e87e74990d..fae740c22657 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -699,8 +699,8 @@ void __init mtrr_bp_init(void)
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
boot_cpu_data.x86 == 0xF &&
boot_cpu_data.x86_model == 0x3 &&
- (boot_cpu_data.x86_mask == 0x3 ||
- boot_cpu_data.x86_mask == 0x4))
+ (boot_cpu_data.x86_stepping == 0x3 ||
+ boot_cpu_data.x86_stepping == 0x4))
phys_addr = 36;
size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 18ca99f2798b..c4f772d3f35c 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -70,8 +70,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
c->x86_model,
c->x86_model_id[0] ? c->x86_model_id : "unknown");
- if (c->x86_mask || c->cpuid_level >= 0)
- seq_printf(m, "stepping\t: %d\n", c->x86_mask);
+ if (c->x86_stepping || c->cpuid_level >= 0)
+ seq_printf(m, "stepping\t: %d\n", c->x86_stepping);
else
seq_puts(m, "stepping\t: unknown\n");
if (c->microcode)
@@ -87,8 +87,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
}
/* Cache size */
- if (c->x86_cache_size >= 0)
- seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+ if (c->x86_cache_size)
+ seq_printf(m, "cache size\t: %u KB\n", c->x86_cache_size);
show_cpuinfo_core(m, c, cpu);
show_cpuinfo_misc(m, c);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 1db8dc490b66..afbb52532791 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -31,9 +31,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
const struct cpuid_bit *cb;
static const struct cpuid_bit cpuid_bits[] = {
- { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 },
- { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 },
- { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 },
{ X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 2dabea46f039..82155d0cc310 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -35,7 +35,7 @@
#define X86 new_cpu_data+CPUINFO_x86
#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor
#define X86_MODEL new_cpu_data+CPUINFO_x86_model
-#define X86_MASK new_cpu_data+CPUINFO_x86_mask
+#define X86_STEPPING new_cpu_data+CPUINFO_x86_stepping
#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math
#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level
#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
@@ -441,7 +441,7 @@ enable_paging:
shrb $4,%al
movb %al,X86_MODEL
andb $0x0f,%cl # mask mask revision
- movb %cl,X86_MASK
+ movb %cl,X86_STEPPING
movl %edx,X86_CAPABILITY
is486:
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 1f38d9a4d9de..2763573ee1d2 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -19,6 +19,7 @@
#include <linux/mm.h>
#include <asm/apic.h>
+#include <asm/nospec-branch.h>
#ifdef CONFIG_DEBUG_STACKOVERFLOW
@@ -54,17 +55,17 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
static void call_on_stack(void *func, void *stack)
{
asm volatile("xchgl %%ebx,%%esp \n"
- "call *%%edi \n"
+ CALL_NOSPEC
"movl %%ebx,%%esp \n"
: "=b" (stack)
: "0" (stack),
- "D"(func)
+ [thunk_target] "D"(func)
: "memory", "cc", "edx", "ecx", "eax");
}
static inline void *current_stack(void)
{
- return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
+ return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
}
static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
@@ -88,17 +89,17 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
/* Save the next esp at the bottom of the stack */
prev_esp = (u32 *)irqstk;
- *prev_esp = current_stack_pointer();
+ *prev_esp = current_stack_pointer;
if (unlikely(overflow))
call_on_stack(print_stack_overflow, isp);
asm volatile("xchgl %%ebx,%%esp \n"
- "call *%%edi \n"
+ CALL_NOSPEC
"movl %%ebx,%%esp \n"
: "=a" (arg1), "=b" (isp)
: "0" (desc), "1" (isp),
- "D" (desc->handle_irq)
+ [thunk_target] "D" (desc->handle_irq)
: "memory", "cc", "ecx");
return 1;
}
@@ -139,7 +140,7 @@ void do_softirq_own_stack(void)
/* Push the previous esp onto the stack */
prev_esp = (u32 *)irqstk;
- *prev_esp = current_stack_pointer();
+ *prev_esp = current_stack_pointer;
call_on_stack(__do_softirq, isp);
}
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 4d74f7386a61..dc20da1c78f0 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -37,6 +37,7 @@
#include <asm/alternative.h>
#include <asm/insn.h>
#include <asm/debugreg.h>
+#include <asm/nospec-branch.h>
#include "common.h"
@@ -192,7 +193,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src)
}
/* Check whether insn is indirect jump */
-static int insn_is_indirect_jump(struct insn *insn)
+static int __insn_is_indirect_jump(struct insn *insn)
{
return ((insn->opcode.bytes[0] == 0xff &&
(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
@@ -226,6 +227,26 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
return (start <= target && target <= start + len);
}
+static int insn_is_indirect_jump(struct insn *insn)
+{
+ int ret = __insn_is_indirect_jump(insn);
+
+#ifdef CONFIG_RETPOLINE
+ /*
+ * Jump to x86_indirect_thunk_* is treated as an indirect jump.
+ * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
+ * older gcc may use indirect jump. So we add this check instead of
+ * replace indirect-jump check.
+ */
+ if (!ret)
+ ret = insn_jump_into_range(insn,
+ (unsigned long)__indirect_thunk_start,
+ (unsigned long)__indirect_thunk_end -
+ (unsigned long)__indirect_thunk_start);
+#endif
+ return ret;
+}
+
/* Decode whole function to ensure any instructions don't jump into target */
static int can_optimize(unsigned long paddr)
{
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index 7b0d3da52fb4..287ec3bc141f 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -8,7 +8,7 @@
#include <asm/ptrace.h>
#include <asm/ftrace.h>
#include <asm/export.h>
-
+#include <asm/nospec-branch.h>
.code64
.section .entry.text, "ax"
@@ -290,8 +290,9 @@ trace:
* ip and parent ip are used and the list function is called when
* function tracing is enabled.
*/
- call *ftrace_trace_function
+ movq ftrace_trace_function, %r8
+ CALL_NOSPEC %r8
restore_mcount_regs
jmp fgraph_trace
@@ -334,5 +335,5 @@ GLOBAL(return_to_handler)
movq 8(%rsp), %rdx
movq (%rsp), %rax
addq $24, %rsp
- jmp *%rdi
+ JMP_NOSPEC %rdi
#endif
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 0f8d20497383..d0fb941330c6 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -406,7 +406,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
processor.cpuflag = CPU_ENABLED;
processor.cpufeature = (boot_cpu_data.x86 << 8) |
- (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+ (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_stepping;
processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX];
processor.reserved[0] = 0;
processor.reserved[1] = 0;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 0887d2ae3797..dffe81d3c261 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -538,7 +538,7 @@ void set_personality_ia32(bool x32)
current->personality &= ~READ_IMPLIES_EXEC;
/* in_compat_syscall() uses the presence of the x32
syscall bit flag to determine compat status */
- current->thread.status &= ~TS_COMPAT;
+ current_thread_info()->status &= ~TS_COMPAT;
} else {
set_thread_flag(TIF_IA32);
clear_thread_flag(TIF_X32);
@@ -546,7 +546,7 @@ void set_personality_ia32(bool x32)
current->mm->context.ia32_compat = TIF_IA32;
current->personality |= force_personality32;
/* Prepare the first "return" to user space */
- current->thread.status |= TS_COMPAT;
+ current_thread_info()->status |= TS_COMPAT;
}
}
EXPORT_SYMBOL_GPL(set_personality_ia32);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0e63c0267f99..e497d374412a 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
*/
regs->orig_ax = value;
if (syscall_get_nr(child, regs) >= 0)
- child->thread.status |= TS_I386_REGS_POKED;
+ child->thread_info.status |= TS_I386_REGS_POKED;
break;
case offsetof(struct user32, regs.eflags):
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 763af1d0de64..b1a5d252d482 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -785,7 +785,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
* than the tracee.
*/
#ifdef CONFIG_IA32_EMULATION
- if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
+ if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
return __NR_ia32_restart_syscall;
#endif
#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 8402907825b0..21454e254a4c 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -134,6 +134,16 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
return -1;
set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
pte_unmap(pte);
+
+ /*
+ * PTI poisons low addresses in the kernel page tables in the
+ * name of making them unusable for userspace. To execute
+ * code at such a low address, the poison must be cleared.
+ *
+ * Note: 'pgd' actually gets set in pud_alloc().
+ */
+ pgd->pgd &= ~_PAGE_NX;
+
return 0;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index bd4e3d4d3625..322f433fbc76 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -153,7 +153,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
* from double_fault.
*/
BUG_ON((unsigned long)(current_top_of_stack() -
- current_stack_pointer()) >= THREAD_SIZE);
+ current_stack_pointer) >= THREAD_SIZE);
preempt_enable_no_resched();
}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 44bf5cf417d3..d07a9390023e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -693,7 +693,6 @@ unsigned long native_calibrate_tsc(void)
case INTEL_FAM6_KABYLAKE_DESKTOP:
crystal_khz = 24000; /* 24.0 MHz */
break;
- case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_ATOM_DENVERTON:
crystal_khz = 25000; /* 25.0 MHz */
break;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 4b3012888ada..8a1d63591399 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -160,11 +160,12 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
static void mark_screen_rdonly(struct mm_struct *mm)
{
+ struct vm_area_struct *vma;
+ spinlock_t *ptl;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- spinlock_t *ptl;
int i;
down_write(&mm->mmap_sem);
@@ -177,7 +178,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
pmd = pmd_offset(pud, 0xA0000);
if (pmd_trans_huge(*pmd)) {
- struct vm_area_struct *vma = find_vma(mm, 0xA0000);
+ vma = find_vma(mm, 0xA0000);
split_huge_pmd(vma, pmd, 0xA0000);
}
if (pmd_none_or_clear_bad(pmd))
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index dbf67f64d5ec..c7194e97c3d4 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -105,6 +105,13 @@ SECTIONS
SOFTIRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
+
+#ifdef CONFIG_RETPOLINE
+ __indirect_thunk_start = .;
+ *(.text.__x86.indirect_thunk)
+ __indirect_thunk_end = .;
+#endif
+
/* End of text section */
_etext = .;
} :text = 0x9090
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index ab8e32f7b9a8..9150e09773cb 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -22,7 +22,8 @@ config KVM
depends on HAVE_KVM
depends on HIGH_RES_TIMERS
# for TASKSTATS/TASK_DELAY_ACCT:
- depends on NET
+ depends on NET && MULTIUSER
+ depends on X86_LOCAL_APIC
select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
select ANON_INODES
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 91af75e37306..93f924de06cf 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -355,6 +355,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
+ /* cpuid 0x80000008.ebx */
+ const u32 kvm_cpuid_8000_0008_ebx_x86_features =
+ F(IBPB) | F(IBRS);
+
/* cpuid 0xC0000001.edx */
const u32 kvm_cpuid_C000_0001_edx_x86_features =
F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
@@ -376,6 +380,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/;
+ /* cpuid 7.0.edx*/
+ const u32 kvm_cpuid_7_0_edx_x86_features =
+ F(SPEC_CTRL) | F(ARCH_CAPABILITIES);
+
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@@ -458,12 +466,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* PKU is not yet implemented for shadow paging. */
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
entry->ecx &= ~F(PKU);
+ entry->edx &= kvm_cpuid_7_0_edx_x86_features;
+ cpuid_mask(&entry->edx, CPUID_7_EDX);
} else {
entry->ebx = 0;
entry->ecx = 0;
+ entry->edx = 0;
}
entry->eax = 0;
- entry->edx = 0;
break;
}
case 9:
@@ -607,7 +617,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
if (!g_phys_as)
g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8);
- entry->ebx = entry->edx = 0;
+ entry->edx = 0;
+ /* IBRS and IBPB aren't necessarily present in hardware cpuid */
+ if (boot_cpu_has(X86_FEATURE_IBPB))
+ entry->ebx |= F(IBPB);
+ if (boot_cpu_has(X86_FEATURE_IBRS))
+ entry->ebx |= F(IBRS);
+ entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
+ cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
break;
}
case 0x80000019:
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 9368fecca3ee..d1beb7156704 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -160,6 +160,37 @@ static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
return best && (best->edx & bit(X86_FEATURE_RDTSCP));
}
+static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+ if (best && (best->ebx & bit(X86_FEATURE_IBPB)))
+ return true;
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
+}
+
+static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+ if (best && (best->ebx & bit(X86_FEATURE_IBRS)))
+ return true;
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
+}
+
+static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES));
+}
+
+
/*
* NRIPS is provided through cpuidfn 0x8000000a.edx bit 3
*/
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c8f8dd8ca0a1..c8d573822e60 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -25,6 +25,7 @@
#include <asm/kvm_emulate.h>
#include <linux/stringify.h>
#include <asm/debugreg.h>
+#include <asm/nospec-branch.h>
#include "x86.h"
#include "tss.h"
@@ -1012,8 +1013,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
- asm("push %[flags]; popf; call *%[fastop]"
- : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
+ asm("push %[flags]; popf; " CALL_NOSPEC
+ : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
return rc;
}
@@ -4990,6 +4991,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
bool op_prefix = false;
bool has_seg_override = false;
struct opcode opcode;
+ u16 dummy;
+ struct desc_struct desc;
ctxt->memop.type = OP_NONE;
ctxt->memopp = NULL;
@@ -5008,6 +5011,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
switch (mode) {
case X86EMUL_MODE_REAL:
case X86EMUL_MODE_VM86:
+ def_op_bytes = def_ad_bytes = 2;
+ ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
+ if (desc.d)
+ def_op_bytes = def_ad_bytes = 4;
+ break;
case X86EMUL_MODE_PROT16:
def_op_bytes = def_ad_bytes = 2;
break;
@@ -5299,15 +5307,14 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
{
- register void *__sp asm(_ASM_SP);
ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
if (!(ctxt->d & ByteOp))
fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
- asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
+ asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
: "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
- [fastop]"+S"(fop), "+r"(__sp)
+ [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
: "c"(ctxt->src2.val));
ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 6e219e5c07d2..5f810bb80802 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -257,8 +257,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
index == RTC_GSI) {
if (kvm_apic_match_dest(vcpu, NULL, 0,
e->fields.dest_id, e->fields.dest_mode) ||
- (e->fields.trig_mode == IOAPIC_EDGE_TRIG &&
- kvm_apic_pending_eoi(vcpu, e->fields.vector)))
+ kvm_apic_pending_eoi(vcpu, e->fields.vector))
__set_bit(e->fields.vector,
ioapic_handled_vectors);
}
@@ -279,6 +278,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
{
unsigned index;
bool mask_before, mask_after;
+ int old_remote_irr, old_delivery_status;
union kvm_ioapic_redirect_entry *e;
switch (ioapic->ioregsel) {
@@ -301,14 +301,28 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
return;
e = &ioapic->redirtbl[index];
mask_before = e->fields.mask;
+ /* Preserve read-only fields */
+ old_remote_irr = e->fields.remote_irr;
+ old_delivery_status = e->fields.delivery_status;
if (ioapic->ioregsel & 1) {
e->bits &= 0xffffffff;
e->bits |= (u64) val << 32;
} else {
e->bits &= ~0xffffffffULL;
e->bits |= (u32) val;
- e->fields.remote_irr = 0;
}
+ e->fields.remote_irr = old_remote_irr;
+ e->fields.delivery_status = old_delivery_status;
+
+ /*
+ * Some OSes (Linux, Xen) assume that Remote IRR bit will
+ * be cleared by IOAPIC hardware when the entry is configured
+ * as edge-triggered. This behavior is used to simulate an
+ * explicit EOI on IOAPICs that don't have the EOI register.
+ */
+ if (e->fields.trig_mode == IOAPIC_EDGE_TRIG)
+ e->fields.remote_irr = 0;
+
mask_after = e->fields.mask;
if (mask_before != mask_after)
kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 0a324e120942..a16c06604a56 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4640,7 +4640,7 @@ void kvm_mmu_uninit_vm(struct kvm *kvm)
typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
/* The caller should hold mmu-lock before calling this function. */
-static bool
+static __always_inline bool
slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
slot_level_handler fn, int start_level, int end_level,
gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb)
@@ -4670,7 +4670,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
return flush;
}
-static bool
+static __always_inline bool
slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
slot_level_handler fn, int start_level, int end_level,
bool lock_flush_tlb)
@@ -4681,7 +4681,7 @@ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
lock_flush_tlb);
}
-static bool
+static __always_inline bool
slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
slot_level_handler fn, bool lock_flush_tlb)
{
@@ -4689,7 +4689,7 @@ slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
}
-static bool
+static __always_inline bool
slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
slot_level_handler fn, bool lock_flush_tlb)
{
@@ -4697,7 +4697,7 @@ slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
}
-static bool
+static __always_inline bool
slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
slot_level_handler fn, bool lock_flush_tlb)
{
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8148d8ca7930..be644afab1bb 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -44,6 +44,7 @@
#include <asm/debugreg.h>
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
+#include <asm/nospec-branch.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -182,6 +183,8 @@ struct vcpu_svm {
u64 gs_base;
} host;
+ u64 spec_ctrl;
+
u32 *msrpm;
ulong nmi_iret_rip;
@@ -247,6 +250,8 @@ static const struct svm_direct_access_msrs {
{ .index = MSR_CSTAR, .always = true },
{ .index = MSR_SYSCALL_MASK, .always = true },
#endif
+ { .index = MSR_IA32_SPEC_CTRL, .always = false },
+ { .index = MSR_IA32_PRED_CMD, .always = false },
{ .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
{ .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
{ .index = MSR_IA32_LASTINTFROMIP, .always = false },
@@ -509,6 +514,7 @@ struct svm_cpu_data {
struct kvm_ldttss_desc *tss_desc;
struct page *save_area;
+ struct vmcb *current_vmcb;
};
static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
@@ -860,6 +866,25 @@ static bool valid_msr_intercept(u32 index)
return false;
}
+static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
+{
+ u8 bit_write;
+ unsigned long tmp;
+ u32 offset;
+ u32 *msrpm;
+
+ msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
+ to_svm(vcpu)->msrpm;
+
+ offset = svm_msrpm_offset(msr);
+ bit_write = 2 * (msr & 0x0f) + 1;
+ tmp = msrpm[offset];
+
+ BUG_ON(offset == MSR_INVALID);
+
+ return !!test_bit(bit_write, &tmp);
+}
+
static void set_msr_interception(u32 *msrpm, unsigned msr,
int read, int write)
{
@@ -1534,6 +1559,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
u32 dummy;
u32 eax = 1;
+ svm->spec_ctrl = 0;
+
if (!init_event) {
svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
MSR_IA32_APICBASE_ENABLE;
@@ -1643,11 +1670,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
+ /*
+ * The vmcb page can be recycled, causing a false negative in
+ * svm_vcpu_load(). So do a full IBPB now.
+ */
+ indirect_branch_prediction_barrier();
}
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
int i;
if (unlikely(cpu != vcpu->cpu)) {
@@ -1676,6 +1709,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (static_cpu_has(X86_FEATURE_RDTSCP))
wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
+ if (sd->current_vmcb != svm->vmcb) {
+ sd->current_vmcb = svm->vmcb;
+ indirect_branch_prediction_barrier();
+ }
avic_vcpu_load(vcpu, cpu);
}
@@ -3507,6 +3544,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_VM_CR:
msr_info->data = svm->nested.vm_cr_msr;
break;
+ case MSR_IA32_SPEC_CTRL:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has_ibrs(vcpu))
+ return 1;
+
+ msr_info->data = svm->spec_ctrl;
+ break;
case MSR_IA32_UCODE_REV:
msr_info->data = 0x01000065;
break;
@@ -3598,6 +3642,49 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr);
break;
+ case MSR_IA32_SPEC_CTRL:
+ if (!msr->host_initiated &&
+ !guest_cpuid_has_ibrs(vcpu))
+ return 1;
+
+ /* The STIBP bit doesn't fault even if it's not advertised */
+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
+ return 1;
+
+ svm->spec_ctrl = data;
+
+ if (!data)
+ break;
+
+ /*
+ * For non-nested:
+ * When it's written (to non-zero) for the first time, pass
+ * it through.
+ *
+ * For nested:
+ * The handling of the MSR bitmap for L2 guests is done in
+ * nested_svm_vmrun_msrpm.
+ * We update the L1 MSR bit as well since it will end up
+ * touching the MSR anyway now.
+ */
+ set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
+ break;
+ case MSR_IA32_PRED_CMD:
+ if (!msr->host_initiated &&
+ !guest_cpuid_has_ibpb(vcpu))
+ return 1;
+
+ if (data & ~PRED_CMD_IBPB)
+ return 1;
+
+ if (!data)
+ break;
+
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
+ if (is_guest_mode(vcpu))
+ break;
+ set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
+ break;
case MSR_STAR:
svm->vmcb->save.star = data;
break;
@@ -4825,6 +4912,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
local_irq_enable();
+ /*
+ * If this vCPU has touched SPEC_CTRL, restore the guest's value if
+ * it's non-zero. Since vmentry is serialising on affected CPUs, there
+ * is no need to worry about the conditional branch over the wrmsr
+ * being speculatively taken.
+ */
+ if (svm->spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+
asm volatile (
"push %%" _ASM_BP "; \n\t"
"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
@@ -4869,6 +4965,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
"mov %%r14, %c[r14](%[svm]) \n\t"
"mov %%r15, %c[r15](%[svm]) \n\t"
#endif
+ /*
+ * Clear host registers marked as clobbered to prevent
+ * speculative use.
+ */
+ "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
+ "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
+ "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
+ "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
+ "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
+#ifdef CONFIG_X86_64
+ "xor %%r8, %%r8 \n\t"
+ "xor %%r9, %%r9 \n\t"
+ "xor %%r10, %%r10 \n\t"
+ "xor %%r11, %%r11 \n\t"
+ "xor %%r12, %%r12 \n\t"
+ "xor %%r13, %%r13 \n\t"
+ "xor %%r14, %%r14 \n\t"
+ "xor %%r15, %%r15 \n\t"
+#endif
"pop %%" _ASM_BP
:
: [svm]"a"(svm),
@@ -4898,6 +5013,30 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+ /*
+ * We do not use IBRS in the kernel. If this vCPU has used the
+ * SPEC_CTRL MSR it may have left it on; save the value and
+ * turn it off. This is much more efficient than blindly adding
+ * it to the atomic save/restore list. Especially as the former
+ * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
+ *
+ * For non-nested case:
+ * If the L01 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ *
+ * For nested case:
+ * If the L02 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ */
+ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
+ rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+
+ if (svm->spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, svm->host.gs_base);
#else
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 263e56059fd5..c51aaac953b4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -33,6 +33,7 @@
#include <linux/slab.h>
#include <linux/tboot.h>
#include <linux/hrtimer.h>
+#include <linux/nospec.h>
#include "kvm_cache_regs.h"
#include "x86.h"
@@ -48,6 +49,7 @@
#include <asm/kexec.h>
#include <asm/apic.h>
#include <asm/irq_remapping.h>
+#include <asm/nospec-branch.h>
#include "trace.h"
#include "pmu.h"
@@ -108,6 +110,14 @@ static u64 __read_mostly host_xss;
static bool __read_mostly enable_pml = 1;
module_param_named(pml, enable_pml, bool, S_IRUGO);
+#define MSR_TYPE_R 1
+#define MSR_TYPE_W 2
+#define MSR_TYPE_RW 3
+
+#define MSR_BITMAP_MODE_X2APIC 1
+#define MSR_BITMAP_MODE_X2APIC_APICV 2
+#define MSR_BITMAP_MODE_LM 4
+
#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
/* Guest_tsc -> host_tsc conversion requires 64-bit division. */
@@ -132,6 +142,12 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
+#define VMX_VPID_EXTENT_SUPPORTED_MASK \
+ (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
+ VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
+ VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
+ VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
+
/*
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
* ple_gap: upper bound on the amount of time between two successive
@@ -172,7 +188,6 @@ module_param(ple_window_max, int, S_IRUGO);
extern const ulong vmx_return;
#define NR_AUTOLOAD_MSRS 8
-#define VMCS02_POOL_SIZE 1
struct vmcs {
u32 revision_id;
@@ -190,6 +205,7 @@ struct loaded_vmcs {
struct vmcs *shadow_vmcs;
int cpu;
int launched;
+ unsigned long *msr_bitmap;
struct list_head loaded_vmcss_on_cpu_link;
};
@@ -206,7 +222,7 @@ struct shared_msr_entry {
* stored in guest memory specified by VMPTRLD, but is opaque to the guest,
* which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
* More than one of these structures may exist, if L1 runs multiple L2 guests.
- * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the
+ * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
* underlying hardware which will be used to run L2.
* This structure is packed to ensure that its layout is identical across
* machines (necessary for live migration).
@@ -385,13 +401,6 @@ struct __packed vmcs12 {
*/
#define VMCS12_SIZE 0x1000
-/* Used to remember the last vmcs02 used for some recently used vmcs12s */
-struct vmcs02_list {
- struct list_head list;
- gpa_t vmptr;
- struct loaded_vmcs vmcs02;
-};
-
/*
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -418,15 +427,15 @@ struct nested_vmx {
*/
bool sync_shadow_vmcs;
- /* vmcs02_list cache of VMCSs recently used to run L2 guests */
- struct list_head vmcs02_pool;
- int vmcs02_num;
bool change_vmcs01_virtual_x2apic_mode;
/* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending;
+
+ struct loaded_vmcs vmcs02;
+
/*
- * Guest pages referred to in vmcs02 with host-physical pointers, so
- * we must keep them pinned while L2 runs.
+ * Guest pages referred to in the vmcs02 with host-physical
+ * pointers, so we must keep them pinned while L2 runs.
*/
struct page *apic_access_page;
struct page *virtual_apic_page;
@@ -435,8 +444,6 @@ struct nested_vmx {
bool pi_pending;
u16 posted_intr_nv;
- unsigned long *msr_bitmap;
-
struct hrtimer preemption_timer;
bool preemption_timer_expired;
@@ -537,6 +544,7 @@ struct vcpu_vmx {
unsigned long host_rsp;
u8 fail;
bool nmi_known_unmasked;
+ u8 msr_bitmap_mode;
u32 exit_intr_info;
u32 idt_vectoring_info;
ulong rflags;
@@ -548,6 +556,10 @@ struct vcpu_vmx {
u64 msr_host_kernel_gs_base;
u64 msr_guest_kernel_gs_base;
#endif
+
+ u64 arch_capabilities;
+ u64 spec_ctrl;
+
u32 vm_entry_controls_shadow;
u32 vm_exit_controls_shadow;
/*
@@ -855,13 +867,18 @@ static const unsigned short vmcs_field_to_offset_table[] = {
static inline short vmcs_field_to_offset(unsigned long field)
{
- BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
+ const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table);
+ unsigned short offset;
- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
- vmcs_field_to_offset_table[field] == 0)
+ BUILD_BUG_ON(size > SHRT_MAX);
+ if (field >= size)
return -ENOENT;
- return vmcs_field_to_offset_table[field];
+ field = array_index_nospec(field, size);
+ offset = vmcs_field_to_offset_table[field];
+ if (offset == 0)
+ return -ENOENT;
+ return offset;
}
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
@@ -903,6 +920,9 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
static int alloc_identity_pagetable(struct kvm *kvm);
+static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
+static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+ u32 msr, int type);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -922,12 +942,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
static unsigned long *vmx_io_bitmap_a;
static unsigned long *vmx_io_bitmap_b;
-static unsigned long *vmx_msr_bitmap_legacy;
-static unsigned long *vmx_msr_bitmap_longmode;
-static unsigned long *vmx_msr_bitmap_legacy_x2apic;
-static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
-static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
static unsigned long *vmx_vmread_bitmap;
static unsigned long *vmx_vmwrite_bitmap;
@@ -1844,6 +1858,52 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
vmcs_write32(EXCEPTION_BITMAP, eb);
}
+/*
+ * Check if MSR is intercepted for currently loaded MSR bitmap.
+ */
+static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
+{
+ unsigned long *msr_bitmap;
+ int f = sizeof(unsigned long);
+
+ if (!cpu_has_vmx_msr_bitmap())
+ return true;
+
+ msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
+
+ if (msr <= 0x1fff) {
+ return !!test_bit(msr, msr_bitmap + 0x800 / f);
+ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+ msr &= 0x1fff;
+ return !!test_bit(msr, msr_bitmap + 0xc00 / f);
+ }
+
+ return true;
+}
+
+/*
+ * Check if MSR is intercepted for L01 MSR bitmap.
+ */
+static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
+{
+ unsigned long *msr_bitmap;
+ int f = sizeof(unsigned long);
+
+ if (!cpu_has_vmx_msr_bitmap())
+ return true;
+
+ msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
+
+ if (msr <= 0x1fff) {
+ return !!test_bit(msr, msr_bitmap + 0x800 / f);
+ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+ msr &= 0x1fff;
+ return !!test_bit(msr, msr_bitmap + 0xc00 / f);
+ }
+
+ return true;
+}
+
static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
unsigned long entry, unsigned long exit)
{
@@ -2253,6 +2313,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
vmcs_load(vmx->loaded_vmcs->vmcs);
+ indirect_branch_prediction_barrier();
}
if (!already_loaded) {
@@ -2521,36 +2582,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
vmx->guest_msrs[from] = tmp;
}
-static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
-{
- unsigned long *msr_bitmap;
-
- if (is_guest_mode(vcpu))
- msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
- else if (cpu_has_secondary_exec_ctrls() &&
- (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
- SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
- if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
- if (is_long_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
- else
- msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
- } else {
- if (is_long_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
- else
- msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
- }
- } else {
- if (is_long_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode;
- else
- msr_bitmap = vmx_msr_bitmap_legacy;
- }
-
- vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
-}
-
/*
* Set up the vmcs to automatically save and restore system
* msrs. Don't touch the 64-bit msrs if the guest is in legacy
@@ -2591,7 +2622,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
vmx->save_nmsrs = save_nmsrs;
if (cpu_has_vmx_msr_bitmap())
- vmx_set_msr_bitmap(&vmx->vcpu);
+ vmx_update_msr_bitmap(&vmx->vcpu);
}
/*
@@ -2814,8 +2845,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
*/
if (enable_vpid)
vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
- VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT |
- VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
+ VMX_VPID_EXTENT_SUPPORTED_MASK;
else
vmx->nested.nested_vmx_vpid_caps = 0;
@@ -2980,6 +3010,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_TSC:
msr_info->data = guest_read_tsc(vcpu);
break;
+ case MSR_IA32_SPEC_CTRL:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has_ibrs(vcpu))
+ return 1;
+
+ msr_info->data = to_vmx(vcpu)->spec_ctrl;
+ break;
+ case MSR_IA32_ARCH_CAPABILITIES:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has_arch_capabilities(vcpu))
+ return 1;
+ msr_info->data = to_vmx(vcpu)->arch_capabilities;
+ break;
case MSR_IA32_SYSENTER_CS:
msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
break;
@@ -3084,6 +3127,68 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr_info);
break;
+ case MSR_IA32_SPEC_CTRL:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has_ibrs(vcpu))
+ return 1;
+
+ /* The STIBP bit doesn't fault even if it's not advertised */
+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
+ return 1;
+
+ vmx->spec_ctrl = data;
+
+ if (!data)
+ break;
+
+ /*
+ * For non-nested:
+ * When it's written (to non-zero) for the first time, pass
+ * it through.
+ *
+ * For nested:
+ * The handling of the MSR bitmap for L2 guests is done in
+ * nested_vmx_merge_msr_bitmap. We should not touch the
+ * vmcs02.msr_bitmap here since it gets completely overwritten
+ * in the merging. We update the vmcs01 here for L1 as well
+ * since it will end up touching the MSR anyway now.
+ */
+ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
+ MSR_IA32_SPEC_CTRL,
+ MSR_TYPE_RW);
+ break;
+ case MSR_IA32_PRED_CMD:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has_ibpb(vcpu))
+ return 1;
+
+ if (data & ~PRED_CMD_IBPB)
+ return 1;
+
+ if (!data)
+ break;
+
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
+
+ /*
+ * For non-nested:
+ * When it's written (to non-zero) for the first time, pass
+ * it through.
+ *
+ * For nested:
+ * The handling of the MSR bitmap for L2 guests is done in
+ * nested_vmx_merge_msr_bitmap. We should not touch the
+ * vmcs02.msr_bitmap here since it gets completely overwritten
+ * in the merging.
+ */
+ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
+ MSR_TYPE_W);
+ break;
+ case MSR_IA32_ARCH_CAPABILITIES:
+ if (!msr_info->host_initiated)
+ return 1;
+ vmx->arch_capabilities = data;
+ break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
@@ -3523,11 +3628,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
return vmcs;
}
-static struct vmcs *alloc_vmcs(void)
-{
- return alloc_vmcs_cpu(raw_smp_processor_id());
-}
-
static void free_vmcs(struct vmcs *vmcs)
{
free_pages((unsigned long)vmcs, vmcs_config.order);
@@ -3543,9 +3643,38 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
loaded_vmcs_clear(loaded_vmcs);
free_vmcs(loaded_vmcs->vmcs);
loaded_vmcs->vmcs = NULL;
+ if (loaded_vmcs->msr_bitmap)
+ free_page((unsigned long)loaded_vmcs->msr_bitmap);
WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
}
+static struct vmcs *alloc_vmcs(void)
+{
+ return alloc_vmcs_cpu(raw_smp_processor_id());
+}
+
+static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
+{
+ loaded_vmcs->vmcs = alloc_vmcs();
+ if (!loaded_vmcs->vmcs)
+ return -ENOMEM;
+
+ loaded_vmcs->shadow_vmcs = NULL;
+ loaded_vmcs_init(loaded_vmcs);
+
+ if (cpu_has_vmx_msr_bitmap()) {
+ loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!loaded_vmcs->msr_bitmap)
+ goto out_vmcs;
+ memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
+ }
+ return 0;
+
+out_vmcs:
+ free_loaded_vmcs(loaded_vmcs);
+ return -ENOMEM;
+}
+
static void free_kvm_area(void)
{
int cpu;
@@ -4552,10 +4681,8 @@ static void free_vpid(int vpid)
spin_unlock(&vmx_vpid_lock);
}
-#define MSR_TYPE_R 1
-#define MSR_TYPE_W 2
-static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
- u32 msr, int type)
+static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+ u32 msr, int type)
{
int f = sizeof(unsigned long);
@@ -4589,8 +4716,8 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
}
}
-static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
- u32 msr, int type)
+static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
+ u32 msr, int type)
{
int f = sizeof(unsigned long);
@@ -4624,6 +4751,15 @@ static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
}
}
+static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
+ u32 msr, int type, bool value)
+{
+ if (value)
+ vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
+ else
+ vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
+}
+
/*
* If a msr is allowed by L0, we should check whether it is allowed by L1.
* The corresponding bit will be cleared unless both of L0 and L1 allow it.
@@ -4670,58 +4806,68 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
}
}
-static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
+static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
{
- if (!longmode_only)
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
- msr, MSR_TYPE_R | MSR_TYPE_W);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
- msr, MSR_TYPE_R | MSR_TYPE_W);
-}
+ u8 mode = 0;
-static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
-{
- if (apicv_active) {
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_R);
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_R);
- } else {
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
+ if (cpu_has_secondary_exec_ctrls() &&
+ (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
+ mode |= MSR_BITMAP_MODE_X2APIC;
+ if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
+ mode |= MSR_BITMAP_MODE_X2APIC_APICV;
}
+
+ if (is_long_mode(vcpu))
+ mode |= MSR_BITMAP_MODE_LM;
+
+ return mode;
}
-static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
+#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
+
+static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
+ u8 mode)
{
- if (apicv_active) {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_R);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_R);
- } else {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
+ int msr;
+
+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+ unsigned word = msr / BITS_PER_LONG;
+ msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
+ msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
+ }
+
+ if (mode & MSR_BITMAP_MODE_X2APIC) {
+ /*
+ * TPR reads and writes can be virtualized even if virtual interrupt
+ * delivery is not in use.
+ */
+ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
+ if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
+ vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
+ }
}
}
-static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active)
+static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
{
- if (apicv_active) {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_W);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_W);
- } else {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_W);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_W);
- }
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+ u8 mode = vmx_msr_bitmap_mode(vcpu);
+ u8 changed = mode ^ vmx->msr_bitmap_mode;
+
+ if (!changed)
+ return;
+
+ vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,
+ !(mode & MSR_BITMAP_MODE_LM));
+
+ if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
+ vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
+
+ vmx->msr_bitmap_mode = mode;
}
static bool vmx_get_enable_apicv(void)
@@ -4729,30 +4875,45 @@ static bool vmx_get_enable_apicv(void)
return enable_apicv;
}
-static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
+static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ gfn_t gfn;
+
+ /*
+ * Don't need to mark the APIC access page dirty; it is never
+ * written to by the CPU during APIC virtualization.
+ */
+
+ if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
+ gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
+ kvm_vcpu_mark_page_dirty(vcpu, gfn);
+ }
+
+ if (nested_cpu_has_posted_intr(vmcs12)) {
+ gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
+ kvm_vcpu_mark_page_dirty(vcpu, gfn);
+ }
+}
+
+
+static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int max_irr;
void *vapic_page;
u16 status;
- if (vmx->nested.pi_desc &&
- vmx->nested.pi_pending) {
- vmx->nested.pi_pending = false;
- if (!pi_test_and_clear_on(vmx->nested.pi_desc))
- return 0;
-
- max_irr = find_last_bit(
- (unsigned long *)vmx->nested.pi_desc->pir, 256);
+ if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
+ return;
- if (max_irr == 256)
- return 0;
+ vmx->nested.pi_pending = false;
+ if (!pi_test_and_clear_on(vmx->nested.pi_desc))
+ return;
+ max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
+ if (max_irr != 256) {
vapic_page = kmap(vmx->nested.virtual_apic_page);
- if (!vapic_page) {
- WARN_ON(1);
- return -ENOMEM;
- }
__kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
kunmap(vmx->nested.virtual_apic_page);
@@ -4763,7 +4924,8 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
vmcs_write16(GUEST_INTR_STATUS, status);
}
}
- return 0;
+
+ nested_mark_vmcs12_pages_dirty(vcpu);
}
static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
@@ -4810,14 +4972,15 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
if (is_guest_mode(vcpu) &&
vector == vmx->nested.posted_intr_nv) {
- /* the PIR and ON have been set by L1. */
- kvm_vcpu_trigger_posted_interrupt(vcpu);
/*
* If a posted intr is not recognized by hardware,
* we will accomplish it in the next vmentry.
*/
vmx->nested.pi_pending = true;
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ /* the PIR and ON have been set by L1. */
+ if (!kvm_vcpu_trigger_posted_interrupt(vcpu))
+ kvm_vcpu_kick(vcpu);
return 0;
}
return -1;
@@ -4950,7 +5113,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
}
if (cpu_has_vmx_msr_bitmap())
- vmx_set_msr_bitmap(vcpu);
+ vmx_update_msr_bitmap(vcpu);
}
static u32 vmx_exec_control(struct vcpu_vmx *vmx)
@@ -5039,7 +5202,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
}
if (cpu_has_vmx_msr_bitmap())
- vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
+ vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
@@ -5113,6 +5276,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
++vmx->nmsrs;
}
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
@@ -5141,6 +5306,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
u64 cr0;
vmx->rmode.vm86_active = 0;
+ vmx->spec_ctrl = 0;
vmx->soft_vnmi_blocked = 0;
@@ -5185,7 +5351,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
}
- vmcs_writel(GUEST_RFLAGS, 0x02);
+ kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0xfff0);
vmcs_writel(GUEST_GDTR_BASE, 0);
@@ -6248,7 +6414,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (test_bit(KVM_REQ_EVENT, &vcpu->requests))
return 1;
- err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
+ err = emulate_instruction(vcpu, 0);
if (err == EMULATE_USER_EXIT) {
++vcpu->stat.mmio_exits;
@@ -6370,7 +6536,7 @@ static void wakeup_handler(void)
static __init int hardware_setup(void)
{
- int r = -ENOMEM, i, msr;
+ int r = -ENOMEM, i;
rdmsrl_safe(MSR_EFER, &host_efer);
@@ -6385,41 +6551,13 @@ static __init int hardware_setup(void)
if (!vmx_io_bitmap_b)
goto out;
- vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy)
- goto out1;
-
- vmx_msr_bitmap_legacy_x2apic =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy_x2apic)
- goto out2;
-
- vmx_msr_bitmap_legacy_x2apic_apicv_inactive =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive)
- goto out3;
-
- vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode)
- goto out4;
-
- vmx_msr_bitmap_longmode_x2apic =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode_x2apic)
- goto out5;
-
- vmx_msr_bitmap_longmode_x2apic_apicv_inactive =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive)
- goto out6;
-
vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_vmread_bitmap)
- goto out7;
+ goto out1;
vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_vmwrite_bitmap)
- goto out8;
+ goto out2;
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
@@ -6428,12 +6566,9 @@ static __init int hardware_setup(void)
memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
- memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
- memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-
if (setup_vmcs_config(&vmcs_config) < 0) {
r = -EIO;
- goto out9;
+ goto out3;
}
if (boot_cpu_has(X86_FEATURE_NX))
@@ -6490,47 +6625,8 @@ static __init int hardware_setup(void)
kvm_tsc_scaling_ratio_frac_bits = 48;
}
- vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
- vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
- vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
-
- memcpy(vmx_msr_bitmap_legacy_x2apic,
- vmx_msr_bitmap_legacy, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_longmode_x2apic,
- vmx_msr_bitmap_longmode, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- vmx_msr_bitmap_legacy, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- vmx_msr_bitmap_longmode, PAGE_SIZE);
-
set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
- /*
- * enable_apicv && kvm_vcpu_apicv_active()
- */
- for (msr = 0x800; msr <= 0x8ff; msr++)
- vmx_disable_intercept_msr_read_x2apic(msr, true);
-
- /* TMCCT */
- vmx_enable_intercept_msr_read_x2apic(0x839, true);
- /* TPR */
- vmx_disable_intercept_msr_write_x2apic(0x808, true);
- /* EOI */
- vmx_disable_intercept_msr_write_x2apic(0x80b, true);
- /* SELF-IPI */
- vmx_disable_intercept_msr_write_x2apic(0x83f, true);
-
- /*
- * (enable_apicv && !kvm_vcpu_apicv_active()) ||
- * !enable_apicv
- */
- /* TPR */
- vmx_disable_intercept_msr_read_x2apic(0x808, false);
- vmx_disable_intercept_msr_write_x2apic(0x808, false);
-
if (enable_ept) {
kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
@@ -6576,22 +6672,10 @@ static __init int hardware_setup(void)
return alloc_kvm_area();
-out9:
- free_page((unsigned long)vmx_vmwrite_bitmap);
-out8:
- free_page((unsigned long)vmx_vmread_bitmap);
-out7:
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
-out6:
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-out5:
- free_page((unsigned long)vmx_msr_bitmap_longmode);
-out4:
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
out3:
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+ free_page((unsigned long)vmx_vmwrite_bitmap);
out2:
- free_page((unsigned long)vmx_msr_bitmap_legacy);
+ free_page((unsigned long)vmx_vmread_bitmap);
out1:
free_page((unsigned long)vmx_io_bitmap_b);
out:
@@ -6602,12 +6686,6 @@ out:
static __exit void hardware_unsetup(void)
{
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
- free_page((unsigned long)vmx_msr_bitmap_legacy);
- free_page((unsigned long)vmx_msr_bitmap_longmode);
free_page((unsigned long)vmx_io_bitmap_b);
free_page((unsigned long)vmx_io_bitmap_a);
free_page((unsigned long)vmx_vmwrite_bitmap);
@@ -6655,94 +6733,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
}
/*
- * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
- * We could reuse a single VMCS for all the L2 guests, but we also want the
- * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
- * allows keeping them loaded on the processor, and in the future will allow
- * optimizations where prepare_vmcs02 doesn't need to set all the fields on
- * every entry if they never change.
- * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE
- * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
- *
- * The following functions allocate and free a vmcs02 in this pool.
- */
-
-/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */
-static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
-{
- struct vmcs02_list *item;
- list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
- if (item->vmptr == vmx->nested.current_vmptr) {
- list_move(&item->list, &vmx->nested.vmcs02_pool);
- return &item->vmcs02;
- }
-
- if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
- /* Recycle the least recently used VMCS. */
- item = list_last_entry(&vmx->nested.vmcs02_pool,
- struct vmcs02_list, list);
- item->vmptr = vmx->nested.current_vmptr;
- list_move(&item->list, &vmx->nested.vmcs02_pool);
- return &item->vmcs02;
- }
-
- /* Create a new VMCS */
- item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
- if (!item)
- return NULL;
- item->vmcs02.vmcs = alloc_vmcs();
- item->vmcs02.shadow_vmcs = NULL;
- if (!item->vmcs02.vmcs) {
- kfree(item);
- return NULL;
- }
- loaded_vmcs_init(&item->vmcs02);
- item->vmptr = vmx->nested.current_vmptr;
- list_add(&(item->list), &(vmx->nested.vmcs02_pool));
- vmx->nested.vmcs02_num++;
- return &item->vmcs02;
-}
-
-/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
-static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
-{
- struct vmcs02_list *item;
- list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
- if (item->vmptr == vmptr) {
- free_loaded_vmcs(&item->vmcs02);
- list_del(&item->list);
- kfree(item);
- vmx->nested.vmcs02_num--;
- return;
- }
-}
-
-/*
- * Free all VMCSs saved for this vcpu, except the one pointed by
- * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
- * must be &vmx->vmcs01.
- */
-static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
-{
- struct vmcs02_list *item, *n;
-
- WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
- list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
- /*
- * Something will leak if the above WARN triggers. Better than
- * a use-after-free.
- */
- if (vmx->loaded_vmcs == &item->vmcs02)
- continue;
-
- free_loaded_vmcs(&item->vmcs02);
- list_del(&item->list);
- kfree(item);
- vmx->nested.vmcs02_num--;
- }
-}
-
-/*
* The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
* set the success or error code of an emulated VMX instruction, as specified
* by Vol 2B, VMX Instruction Reference, "Conventions".
@@ -7016,6 +7006,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
struct vmcs *shadow_vmcs;
const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+ int r;
/* The Intel VMX Instruction Reference lists a bunch of bits that
* are prerequisite to running VMXON, most notably cr4.VMXE must be
@@ -7055,12 +7046,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
return 1;
}
- if (cpu_has_vmx_msr_bitmap()) {
- vmx->nested.msr_bitmap =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx->nested.msr_bitmap)
- goto out_msr_bitmap;
- }
+ r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
+ if (r < 0)
+ goto out_vmcs02;
vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
if (!vmx->nested.cached_vmcs12)
@@ -7077,9 +7065,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
vmx->vmcs01.shadow_vmcs = shadow_vmcs;
}
- INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
- vmx->nested.vmcs02_num = 0;
-
hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL_PINNED);
vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
@@ -7094,9 +7079,9 @@ out_shadow_vmcs:
kfree(vmx->nested.cached_vmcs12);
out_cached_vmcs12:
- free_page((unsigned long)vmx->nested.msr_bitmap);
+ free_loaded_vmcs(&vmx->nested.vmcs02);
-out_msr_bitmap:
+out_vmcs02:
return -ENOMEM;
}
@@ -7172,17 +7157,13 @@ static void free_nested(struct vcpu_vmx *vmx)
vmx->nested.vmxon = false;
free_vpid(vmx->nested.vpid02);
nested_release_vmcs12(vmx);
- if (vmx->nested.msr_bitmap) {
- free_page((unsigned long)vmx->nested.msr_bitmap);
- vmx->nested.msr_bitmap = NULL;
- }
if (enable_shadow_vmcs) {
vmcs_clear(vmx->vmcs01.shadow_vmcs);
free_vmcs(vmx->vmcs01.shadow_vmcs);
vmx->vmcs01.shadow_vmcs = NULL;
}
kfree(vmx->nested.cached_vmcs12);
- /* Unpin physical memory we referred to in current vmcs02 */
+ /* Unpin physical memory we referred to in the vmcs02 */
if (vmx->nested.apic_access_page) {
nested_release_page(vmx->nested.apic_access_page);
vmx->nested.apic_access_page = NULL;
@@ -7198,7 +7179,7 @@ static void free_nested(struct vcpu_vmx *vmx)
vmx->nested.pi_desc = NULL;
}
- nested_free_all_saved_vmcss(vmx);
+ free_loaded_vmcs(&vmx->nested.vmcs02);
}
/* Emulate the VMXOFF instruction */
@@ -7232,8 +7213,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
vmptr + offsetof(struct vmcs12, launch_state),
&zero, sizeof(zero));
- nested_free_vmcs02(vmx, vmptr);
-
skip_emulated_instruction(vcpu);
nested_vmx_succeed(vcpu);
return 1;
@@ -7711,7 +7690,8 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
- types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7;
+ types = (vmx->nested.nested_vmx_vpid_caps &
+ VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
if (type >= 32 || !(types & (1 << type))) {
nested_vmx_failValid(vcpu,
@@ -7733,21 +7713,27 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
}
switch (type) {
+ case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
case VMX_VPID_EXTENT_SINGLE_CONTEXT:
- /*
- * Old versions of KVM use the single-context version so we
- * have to support it; just treat it the same as all-context.
- */
+ case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
+ if (!vpid) {
+ nested_vmx_failValid(vcpu,
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ skip_emulated_instruction(vcpu);
+ return 1;
+ }
+ break;
case VMX_VPID_EXTENT_ALL_CONTEXT:
- __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
- nested_vmx_succeed(vcpu);
break;
default:
- /* Trap individual address invalidation invvpid calls */
- BUG_ON(1);
- break;
+ WARN_ON_ONCE(1);
+ skip_emulated_instruction(vcpu);
+ return 1;
}
+ __vmx_flush_tlb(vcpu, vmx->nested.vpid02);
+ nested_vmx_succeed(vcpu);
+
skip_emulated_instruction(vcpu);
return 1;
}
@@ -8020,6 +8006,19 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
KVM_ISA_VMX);
+ /*
+ * The host physical addresses of some pages of guest memory
+ * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
+ * Page). The CPU may write to these pages via their host
+ * physical address while L2 is running, bypassing any
+ * address-translation-based dirty tracking (e.g. EPT write
+ * protection).
+ *
+ * Mark them dirty on every exit from L2 to prevent them from
+ * getting out of sync with dirty tracking.
+ */
+ nested_mark_vmcs12_pages_dirty(vcpu);
+
if (vmx->nested.nested_run_pending)
return false;
@@ -8511,7 +8510,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
}
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
- vmx_set_msr_bitmap(vcpu);
+ vmx_update_msr_bitmap(vcpu);
}
static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
@@ -8667,14 +8666,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
#endif
"pushf\n\t"
__ASM_SIZE(push) " $%c[cs]\n\t"
- "call *%[entry]\n\t"
+ CALL_NOSPEC
:
#ifdef CONFIG_X86_64
[sp]"=&r"(tmp),
#endif
"+r"(__sp)
:
- [entry]"r"(entry),
+ THUNK_TARGET(entry),
[ss]"i"(__KERNEL_DS),
[cs]"i"(__KERNEL_CS)
);
@@ -8900,6 +8899,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx_arm_hv_timer(vcpu);
+ /*
+ * If this vCPU has touched SPEC_CTRL, restore the guest's value if
+ * it's non-zero. Since vmentry is serialising on affected CPUs, there
+ * is no need to worry about the conditional branch over the wrmsr
+ * being speculatively taken.
+ */
+ if (vmx->spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
/* Store host registers */
@@ -8948,6 +8956,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
/* Save guest registers, load host registers, keep flags */
"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
"pop %0 \n\t"
+ "setbe %c[fail](%0)\n\t"
"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
@@ -8964,12 +8973,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
"mov %%r13, %c[r13](%0) \n\t"
"mov %%r14, %c[r14](%0) \n\t"
"mov %%r15, %c[r15](%0) \n\t"
+ "xor %%r8d, %%r8d \n\t"
+ "xor %%r9d, %%r9d \n\t"
+ "xor %%r10d, %%r10d \n\t"
+ "xor %%r11d, %%r11d \n\t"
+ "xor %%r12d, %%r12d \n\t"
+ "xor %%r13d, %%r13d \n\t"
+ "xor %%r14d, %%r14d \n\t"
+ "xor %%r15d, %%r15d \n\t"
#endif
"mov %%cr2, %%" _ASM_AX " \n\t"
"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
+ "xor %%eax, %%eax \n\t"
+ "xor %%ebx, %%ebx \n\t"
+ "xor %%esi, %%esi \n\t"
+ "xor %%edi, %%edi \n\t"
"pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
- "setbe %c[fail](%0) \n\t"
".pushsection .rodata \n\t"
".global vmx_return \n\t"
"vmx_return: " _ASM_PTR " 2b \n\t"
@@ -9006,6 +9026,30 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+ /*
+ * We do not use IBRS in the kernel. If this vCPU has used the
+ * SPEC_CTRL MSR it may have left it on; save the value and
+ * turn it off. This is much more efficient than blindly adding
+ * it to the atomic save/restore list. Especially as the former
+ * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
+ *
+ * For non-nested case:
+ * If the L01 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ *
+ * For nested case:
+ * If the L02 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ */
+ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
+ rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+
+ if (vmx->spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
if (debugctlmsr)
update_debugctlmsr(debugctlmsr);
@@ -9116,6 +9160,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
{
int err;
struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ unsigned long *msr_bitmap;
int cpu;
if (!vmx)
@@ -9148,17 +9193,24 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (!vmx->guest_msrs)
goto free_pml;
- vmx->loaded_vmcs = &vmx->vmcs01;
- vmx->loaded_vmcs->vmcs = alloc_vmcs();
- vmx->loaded_vmcs->shadow_vmcs = NULL;
- if (!vmx->loaded_vmcs->vmcs)
- goto free_msrs;
if (!vmm_exclusive)
kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
- loaded_vmcs_init(vmx->loaded_vmcs);
+ err = alloc_loaded_vmcs(&vmx->vmcs01);
if (!vmm_exclusive)
kvm_cpu_vmxoff();
+ if (err < 0)
+ goto free_msrs;
+
+ msr_bitmap = vmx->vmcs01.msr_bitmap;
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
+ vmx->msr_bitmap_mode = 0;
+ vmx->loaded_vmcs = &vmx->vmcs01;
cpu = get_cpu();
vmx_vcpu_load(&vmx->vcpu, cpu);
vmx->vcpu.cpu = cpu;
@@ -9552,21 +9604,31 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
int msr;
struct page *page;
unsigned long *msr_bitmap_l1;
- unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
+ unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
+ /*
+ * pred_cmd & spec_ctrl are trying to verify two things:
+ *
+ * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
+ * ensures that we do not accidentally generate an L02 MSR bitmap
+ * from the L12 MSR bitmap that is too permissive.
+ * 2. That L1 or L2s have actually used the MSR. This avoids
+ * unnecessarily merging of the bitmap if the MSR is unused. This
+ * works properly because we only update the L01 MSR bitmap lazily.
+ * So even if L0 should pass L1 these MSRs, the L01 bitmap is only
+ * updated to reflect this when L1 (or its L2s) actually write to
+ * the MSR.
+ */
+ bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
+ bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
- /* This shortcut is ok because we support only x2APIC MSRs so far. */
- if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
+ if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
+ !pred_cmd && !spec_ctrl)
return false;
page = nested_get_page(vcpu, vmcs12->msr_bitmap);
if (!page)
return false;
msr_bitmap_l1 = (unsigned long *)kmap(page);
- if (!msr_bitmap_l1) {
- nested_release_page_clean(page);
- WARN_ON(1);
- return false;
- }
memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
@@ -9593,6 +9655,19 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
MSR_TYPE_W);
}
}
+
+ if (spec_ctrl)
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_SPEC_CTRL,
+ MSR_TYPE_R | MSR_TYPE_W);
+
+ if (pred_cmd)
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_PRED_CMD,
+ MSR_TYPE_W);
+
kunmap(page);
nested_release_page_clean(page);
@@ -10072,6 +10147,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
if (kvm_has_tsc_control)
decache_tsc_multiplier(vmx);
+ if (cpu_has_vmx_msr_bitmap())
+ vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
+
if (enable_vpid) {
/*
* There is no direct mapping between vpid02 and vpid12, the
@@ -10167,7 +10245,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
struct vmcs12 *vmcs12;
struct vcpu_vmx *vmx = to_vmx(vcpu);
int cpu;
- struct loaded_vmcs *vmcs02;
bool ia32e;
u32 msr_entry_idx;
@@ -10307,17 +10384,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
* the nested entry.
*/
- vmcs02 = nested_get_current_vmcs02(vmx);
- if (!vmcs02)
- return -ENOMEM;
-
enter_guest_mode(vcpu);
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
cpu = get_cpu();
- vmx->loaded_vmcs = vmcs02;
+ vmx->loaded_vmcs = &vmx->nested.vmcs02;
vmx_vcpu_put(vcpu);
vmx_vcpu_load(vcpu, cpu);
vcpu->cpu = cpu;
@@ -10469,7 +10542,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
return 0;
}
- return vmx_complete_nested_posted_interrupt(vcpu);
+ vmx_complete_nested_posted_interrupt(vcpu);
+ return 0;
}
static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
@@ -10780,7 +10854,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
if (cpu_has_vmx_msr_bitmap())
- vmx_set_msr_bitmap(vcpu);
+ vmx_update_msr_bitmap(vcpu);
if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
vmcs12->vm_exit_msr_load_count))
@@ -10831,10 +10905,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vm_exit_controls_reset_shadow(vmx);
vmx_segment_cache_clear(vmx);
- /* if no vmcs02 cache requested, remove the one we used */
- if (VMCS02_POOL_SIZE == 0)
- nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
-
load_vmcs12_host_state(vcpu, vmcs12);
/* Update any VMCS fields that might have changed while L2 ran */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 73304b1a03cc..4b19ec1da22d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -975,6 +975,7 @@ static u32 msrs_to_save[] = {
#endif
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
+ MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES
};
static unsigned num_msrs_to_save;
@@ -1751,10 +1752,13 @@ static u64 __get_kvmclock_ns(struct kvm *kvm)
/* both __this_cpu_read() and rdtsc() should be on the same cpu */
get_cpu();
- kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
- &hv_clock.tsc_shift,
- &hv_clock.tsc_to_system_mul);
- ret = __pvclock_read_cycles(&hv_clock, rdtsc());
+ if (__this_cpu_read(cpu_tsc_khz)) {
+ kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
+ &hv_clock.tsc_shift,
+ &hv_clock.tsc_to_system_mul);
+ ret = __pvclock_read_cycles(&hv_clock, rdtsc());
+ } else
+ ret = ktime_get_boot_ns() + ka->kvmclock_offset;
put_cpu();
@@ -2843,6 +2847,12 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_x86_ops->vcpu_put(vcpu);
kvm_put_guest_fpu(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
+ /*
+ * If userspace has set any breakpoints or watchpoints, dr6 is restored
+ * on every vmexit, but if not, we might have a stale dr6 from the
+ * guest. do_debug expects dr6 to be cleared after it runs, do the same.
+ */
+ set_debugreg(0, 6);
}
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
@@ -4264,7 +4274,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
addr, n, v))
&& kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
break;
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
handled += n;
addr += n;
len -= n;
@@ -4517,7 +4527,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
{
if (vcpu->mmio_read_completed) {
trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
- vcpu->mmio_fragments[0].gpa, *(u64 *)val);
+ vcpu->mmio_fragments[0].gpa, val);
vcpu->mmio_read_completed = 0;
return 1;
}
@@ -4539,14 +4549,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
{
- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
return vcpu_mmio_write(vcpu, gpa, bytes, val);
}
static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
- trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
return X86EMUL_IO_NEEDED;
}
@@ -5308,7 +5318,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
- r = EMULATE_FAIL;
+ r = EMULATE_USER_EXIT;
}
kvm_queue_exception(vcpu, UD_VECTOR);
@@ -8417,6 +8427,13 @@ static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
sizeof(val));
}
+static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val)
+{
+
+ return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val,
+ sizeof(u32));
+}
+
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
@@ -8443,6 +8460,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
struct x86_exception fault;
+ u32 val;
if (work->wakeup_all)
work->arch.token = ~0; /* broadcast wakeup */
@@ -8450,14 +8468,24 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
trace_kvm_async_pf_ready(work->arch.token, work->gva);
- if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
- !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
- fault.vector = PF_VECTOR;
- fault.error_code_valid = true;
- fault.error_code = 0;
- fault.nested_page_fault = false;
- fault.address = work->arch.token;
- kvm_inject_page_fault(vcpu, &fault);
+ if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED &&
+ !apf_get_user(vcpu, &val)) {
+ if (val == KVM_PV_REASON_PAGE_NOT_PRESENT &&
+ vcpu->arch.exception.pending &&
+ vcpu->arch.exception.nr == PF_VECTOR &&
+ !apf_put_user(vcpu, 0)) {
+ vcpu->arch.exception.pending = false;
+ vcpu->arch.exception.nr = 0;
+ vcpu->arch.exception.has_error_code = false;
+ vcpu->arch.exception.error_code = 0;
+ } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
+ fault.vector = PF_VECTOR;
+ fault.error_code_valid = true;
+ fault.error_code = 0;
+ fault.nested_page_fault = false;
+ fault.address = work->arch.token;
+ kvm_inject_page_fault(vcpu, &fault);
+ }
}
vcpu->arch.apf.halted = false;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 34a74131a12c..4ad7c4dd311c 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -25,6 +25,8 @@ lib-y += memcpy_$(BITS).o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+lib-$(CONFIG_RETPOLINE) += retpoline.o
+OBJECT_FILES_NON_STANDARD_retpoline.o :=y
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 4d34bb548b41..46e71a74e612 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -29,7 +29,8 @@
#include <asm/errno.h>
#include <asm/asm.h>
#include <asm/export.h>
-
+#include <asm/nospec-branch.h>
+
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
*/
@@ -156,7 +157,7 @@ ENTRY(csum_partial)
negl %ebx
lea 45f(%ebx,%ebx,2), %ebx
testl %esi, %esi
- jmp *%ebx
+ JMP_NOSPEC %ebx
# Handle 2-byte-aligned regions
20: addw (%esi), %ax
@@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic)
andl $-32,%edx
lea 3f(%ebx,%ebx), %ebx
testl %esi, %esi
- jmp *%ebx
+ JMP_NOSPEC %ebx
1: addl $64,%esi
addl $64,%edi
SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c
index d6f848d1211d..2dd1fe13a37b 100644
--- a/arch/x86/lib/cpu.c
+++ b/arch/x86/lib/cpu.c
@@ -18,7 +18,7 @@ unsigned int x86_model(unsigned int sig)
{
unsigned int fam, model;
- fam = x86_family(sig);
+ fam = x86_family(sig);
model = (sig >> 4) & 0xf;
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 073d1f1a620b..9758524ee99f 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -93,6 +93,13 @@ static void delay_mwaitx(unsigned long __loops)
{
u64 start, end, delay, loops = __loops;
+ /*
+ * Timer value of 0 causes MWAITX to wait indefinitely, unless there
+ * is a store on the memory monitored by MONITORX.
+ */
+ if (loops == 0)
+ return;
+
start = rdtsc_ordered();
for (;;) {
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 37b62d412148..b12b214713a6 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -39,6 +39,8 @@ ENTRY(__get_user_1)
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
+ and %_ASM_DX, %_ASM_AX
ASM_STAC
1: movzbl (%_ASM_AX),%edx
xor %eax,%eax
@@ -53,6 +55,8 @@ ENTRY(__get_user_2)
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
+ and %_ASM_DX, %_ASM_AX
ASM_STAC
2: movzwl -1(%_ASM_AX),%edx
xor %eax,%eax
@@ -67,6 +71,8 @@ ENTRY(__get_user_4)
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
+ and %_ASM_DX, %_ASM_AX
ASM_STAC
3: movl -3(%_ASM_AX),%edx
xor %eax,%eax
@@ -82,6 +88,8 @@ ENTRY(__get_user_8)
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
+ and %_ASM_DX, %_ASM_AX
ASM_STAC
4: movq -7(%_ASM_AX),%rdx
xor %eax,%eax
@@ -93,6 +101,8 @@ ENTRY(__get_user_8)
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user_8
+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
+ and %_ASM_DX, %_ASM_AX
ASM_STAC
4: movl -7(%_ASM_AX),%edx
5: movl -3(%_ASM_AX),%ecx
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
new file mode 100644
index 000000000000..480edc3a5e03
--- /dev/null
+++ b/arch/x86/lib/retpoline.S
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeatures.h>
+#include <asm/alternative-asm.h>
+#include <asm/export.h>
+#include <asm/nospec-branch.h>
+#include <asm/bitsperlong.h>
+
+.macro THUNK reg
+ .section .text.__x86.indirect_thunk
+
+ENTRY(__x86_indirect_thunk_\reg)
+ CFI_STARTPROC
+ JMP_NOSPEC %\reg
+ CFI_ENDPROC
+ENDPROC(__x86_indirect_thunk_\reg)
+.endm
+
+/*
+ * Despite being an assembler file we can't just use .irp here
+ * because __KSYM_DEPS__ only uses the C preprocessor and would
+ * only see one instance of "__x86_indirect_thunk_\reg" rather
+ * than one per register with the correct names. So we do it
+ * the simple and nasty way...
+ */
+#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
+
+GENERATE_THUNK(_ASM_AX)
+GENERATE_THUNK(_ASM_BX)
+GENERATE_THUNK(_ASM_CX)
+GENERATE_THUNK(_ASM_DX)
+GENERATE_THUNK(_ASM_SI)
+GENERATE_THUNK(_ASM_DI)
+GENERATE_THUNK(_ASM_BP)
+#ifdef CONFIG_64BIT
+GENERATE_THUNK(r8)
+GENERATE_THUNK(r9)
+GENERATE_THUNK(r10)
+GENERATE_THUNK(r11)
+GENERATE_THUNK(r12)
+GENERATE_THUNK(r13)
+GENERATE_THUNK(r14)
+GENERATE_THUNK(r15)
+#endif
+
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version - two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+.macro STUFF_RSB nr:req sp:req
+ mov $(\nr / 2), %_ASM_BX
+ .align 16
+771:
+ call 772f
+773: /* speculation trap */
+ pause
+ lfence
+ jmp 773b
+ .align 16
+772:
+ call 774f
+775: /* speculation trap */
+ pause
+ lfence
+ jmp 775b
+ .align 16
+774:
+ dec %_ASM_BX
+ jnz 771b
+ add $((BITS_PER_LONG/8) * \nr), \sp
+.endm
+
+#define RSB_FILL_LOOPS 16 /* To avoid underflow */
+
+ENTRY(__fill_rsb)
+ STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
+ ret
+END(__fill_rsb)
+EXPORT_SYMBOL_GPL(__fill_rsb)
+
+#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
+
+ENTRY(__clear_rsb)
+ STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
+ ret
+END(__clear_rsb)
+EXPORT_SYMBOL_GPL(__clear_rsb)
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 3bc7baf2a711..5c06dbffc52f 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -570,12 +570,12 @@ do { \
unsigned long __copy_to_user_ll(void __user *to, const void *from,
unsigned long n)
{
- stac();
+ __uaccess_begin_nospec();
if (movsl_is_ok(to, from, n))
__copy_user(to, from, n);
else
n = __copy_user_intel(to, from, n);
- clac();
+ __uaccess_end();
return n;
}
EXPORT_SYMBOL(__copy_to_user_ll);
@@ -627,7 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache);
unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
unsigned long n)
{
- stac();
+ __uaccess_begin_nospec();
#ifdef CONFIG_X86_INTEL_USERCOPY
if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
n = __copy_user_intel_nocache(to, from, n);
@@ -636,7 +636,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
#else
__copy_user(to, from, n);
#endif
- clac();
+ __uaccess_end();
return n;
}
EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
diff --git a/arch/x86/math-emu/Makefile b/arch/x86/math-emu/Makefile
index 9b0c63b60302..1b2dac174321 100644
--- a/arch/x86/math-emu/Makefile
+++ b/arch/x86/math-emu/Makefile
@@ -5,8 +5,8 @@
#DEBUG = -DDEBUGGING
DEBUG =
PARANOID = -DPARANOID
-EXTRA_CFLAGS := $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION)
-EXTRA_AFLAGS := $(PARANOID)
+ccflags-y += $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION)
+asflags-y += $(PARANOID)
# From 'C' language sources:
C_OBJS =fpu_entry.o errors.o \
diff --git a/arch/x86/math-emu/reg_compare.c b/arch/x86/math-emu/reg_compare.c
index b77360fdbf4a..19b33b50adfa 100644
--- a/arch/x86/math-emu/reg_compare.c
+++ b/arch/x86/math-emu/reg_compare.c
@@ -168,7 +168,7 @@ static int compare(FPU_REG const *b, int tagb)
/* This function requires that st(0) is not empty */
int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
{
- int f = 0, c;
+ int f, c;
c = compare(loaded_data, loaded_tag);
@@ -189,12 +189,12 @@ int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
case COMP_No_Comp:
f = SW_C3 | SW_C2 | SW_C0;
break;
-#ifdef PARANOID
default:
+#ifdef PARANOID
EXCEPTION(EX_INTERNAL | 0x121);
+#endif /* PARANOID */
f = SW_C3 | SW_C2 | SW_C0;
break;
-#endif /* PARANOID */
}
setcc(f);
if (c & COMP_Denormal) {
@@ -205,7 +205,7 @@ int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
static int compare_st_st(int nr)
{
- int f = 0, c;
+ int f, c;
FPU_REG *st_ptr;
if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) {
@@ -235,12 +235,12 @@ static int compare_st_st(int nr)
case COMP_No_Comp:
f = SW_C3 | SW_C2 | SW_C0;
break;
-#ifdef PARANOID
default:
+#ifdef PARANOID
EXCEPTION(EX_INTERNAL | 0x122);
+#endif /* PARANOID */
f = SW_C3 | SW_C2 | SW_C0;
break;
-#endif /* PARANOID */
}
setcc(f);
if (c & COMP_Denormal) {
@@ -283,12 +283,12 @@ static int compare_i_st_st(int nr)
case COMP_No_Comp:
f = X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF;
break;
-#ifdef PARANOID
default:
+#ifdef PARANOID
EXCEPTION(EX_INTERNAL | 0x122);
+#endif /* PARANOID */
f = 0;
break;
-#endif /* PARANOID */
}
FPU_EFLAGS = (FPU_EFLAGS & ~(X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF)) | f;
if (c & COMP_Denormal) {
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 8b5ff88aa4f8..74dea7f14c20 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -191,14 +191,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
* 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
* faulted on a pte with its pkey=4.
*/
-static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
+static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info,
+ u32 *pkey)
{
/* This is effectively an #ifdef */
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return;
/* Fault not from Protection Keys: nothing to do */
- if (si_code != SEGV_PKUERR)
+ if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV))
return;
/*
* force_sig_info_fault() is called from a number of
@@ -237,7 +238,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
lsb = PAGE_SHIFT;
info.si_addr_lsb = lsb;
- fill_sig_info_pkey(si_code, &info, pkey);
+ fill_sig_info_pkey(si_signo, si_code, &info, pkey);
force_sig_info(si_signo, &info, tsk);
}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 7aaa2635862d..ecae9ac216fa 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -347,11 +347,11 @@ void iounmap(volatile void __iomem *addr)
(void __force *)addr < phys_to_virt(ISA_END_ADDRESS))
return;
+ mmiotrace_iounmap(addr);
+
addr = (volatile void __iomem *)
(PAGE_MASK & (unsigned long __force)addr);
- mmiotrace_iounmap(addr);
-
/* Use the vm area unlocked, assuming the caller
ensures there isn't another iounmap for the same address
in parallel. Reuse of the virtual address is prevented by
diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
index 8f8e5e03d083..ec678aafa3f8 100644
--- a/arch/x86/mm/kaiser.c
+++ b/arch/x86/mm/kaiser.c
@@ -197,6 +197,8 @@ static int kaiser_add_user_map(const void *__start_addr, unsigned long size,
* requires that not to be #defined to 0): so mask it off here.
*/
flags &= ~_PAGE_GLOBAL;
+ if (!(__supported_pte_mask & _PAGE_NX))
+ flags &= ~_PAGE_NX;
for (; address < end_addr; address += PAGE_SIZE) {
target_address = get_pa_from_mapping(address);
@@ -342,7 +344,7 @@ void __init kaiser_init(void)
if (vsyscall_enabled())
kaiser_add_user_map_early((void *)VSYSCALL_ADDR,
PAGE_SIZE,
- __PAGE_KERNEL_VSYSCALL);
+ vsyscall_pgprot);
for_each_possible_cpu(cpu) {
void *percpu_vaddr = __per_cpu_user_mapped_start +
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index afc47f5c9531..cadb82be5f36 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -434,17 +434,18 @@ int register_kmmio_probe(struct kmmio_probe *p)
unsigned long flags;
int ret = 0;
unsigned long size = 0;
+ unsigned long addr = p->addr & PAGE_MASK;
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
unsigned int l;
pte_t *pte;
spin_lock_irqsave(&kmmio_lock, flags);
- if (get_kmmio_probe(p->addr)) {
+ if (get_kmmio_probe(addr)) {
ret = -EEXIST;
goto out;
}
- pte = lookup_address(p->addr, &l);
+ pte = lookup_address(addr, &l);
if (!pte) {
ret = -EINVAL;
goto out;
@@ -453,7 +454,7 @@ int register_kmmio_probe(struct kmmio_probe *p)
kmmio_count++;
list_add_rcu(&p->list, &kmmio_probes);
while (size < size_lim) {
- if (add_kmmio_fault_page(p->addr + size))
+ if (add_kmmio_fault_page(addr + size))
pr_err("Unable to set page fault.\n");
size += page_level_size(l);
}
@@ -527,19 +528,20 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
{
unsigned long flags;
unsigned long size = 0;
+ unsigned long addr = p->addr & PAGE_MASK;
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
struct kmmio_fault_page *release_list = NULL;
struct kmmio_delayed_release *drelease;
unsigned int l;
pte_t *pte;
- pte = lookup_address(p->addr, &l);
+ pte = lookup_address(addr, &l);
if (!pte)
return;
spin_lock_irqsave(&kmmio_lock, flags);
while (size < size_lim) {
- release_kmmio_fault_page(p->addr + size, &release_list);
+ release_kmmio_fault_page(addr + size, &release_list);
size += page_level_size(l);
}
list_del_rcu(&p->list);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 5aaec8effc5f..209b9465e97a 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -345,13 +345,6 @@ static inline void _pgd_free(pgd_t *pgd)
}
#else
-/*
- * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is
- * both 8k in size and 8k-aligned. That lets us just flip bit 12
- * in a pointer to swap between the two 4k halves.
- */
-#define PGD_ALLOCATION_ORDER kaiser_enabled
-
static inline pgd_t *_pgd_alloc(void)
{
return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 41205de487e7..578973ade71b 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -110,7 +110,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* mapped in the new pgd, we'll double-fault. Forcibly
* map it.
*/
- unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
+ unsigned int stack_pgd_index = pgd_index(current_stack_pointer);
pgd_t *pgd = next->pgd + stack_pgd_index;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 15f743615923..7840331d3056 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -278,10 +278,10 @@ static void emit_bpf_tail_call(u8 **pprog)
/* if (index >= array->map.max_entries)
* goto out;
*/
- EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */
+ EMIT2(0x89, 0xD2); /* mov edx, edx */
+ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
- EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */
-#define OFFSET1 47 /* number of bytes to jump */
+#define OFFSET1 43 /* number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
label1 = cnt;
@@ -290,21 +290,20 @@ static void emit_bpf_tail_call(u8 **pprog)
*/
EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 36
+#define OFFSET2 32
EMIT2(X86_JA, OFFSET2); /* ja out */
label2 = cnt;
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */
/* prog = array->ptrs[index]; */
- EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */
+ EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
offsetof(struct bpf_array, ptrs));
- EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */
/* if (prog == NULL)
* goto out;
*/
- EMIT4(0x48, 0x83, 0xF8, 0x00); /* cmp rax, 0 */
+ EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
#define OFFSET3 10
EMIT2(X86_JE, OFFSET3); /* je out */
label3 = cnt;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 2f25a363068c..dcb2d9d185a2 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -142,7 +142,7 @@ int __init efi_alloc_page_tables(void)
return 0;
gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO;
- efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
+ efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
if (!efi_pgd)
return -ENOMEM;
diff --git a/arch/xtensa/include/asm/futex.h b/arch/xtensa/include/asm/futex.h
index b39531babec0..72bfc1cbc2b5 100644
--- a/arch/xtensa/include/asm/futex.h
+++ b/arch/xtensa/include/asm/futex.h
@@ -109,7 +109,6 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
u32 oldval, u32 newval)
{
int ret = 0;
- u32 prev;
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
@@ -120,26 +119,24 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
__asm__ __volatile__ (
" # futex_atomic_cmpxchg_inatomic\n"
- "1: l32i %1, %3, 0\n"
- " mov %0, %5\n"
- " wsr %1, scompare1\n"
- "2: s32c1i %0, %3, 0\n"
- "3:\n"
+ " wsr %5, scompare1\n"
+ "1: s32c1i %1, %4, 0\n"
+ " s32i %1, %6, 0\n"
+ "2:\n"
" .section .fixup,\"ax\"\n"
" .align 4\n"
- "4: .long 3b\n"
- "5: l32r %1, 4b\n"
- " movi %0, %6\n"
+ "3: .long 2b\n"
+ "4: l32r %1, 3b\n"
+ " movi %0, %7\n"
" jx %1\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- " .long 1b,5b,2b,5b\n"
+ " .long 1b,4b\n"
" .previous\n"
- : "+r" (ret), "=&r" (prev), "+m" (*uaddr)
- : "r" (uaddr), "r" (oldval), "r" (newval), "I" (-EFAULT)
+ : "+r" (ret), "+r" (newval), "+m" (*uaddr), "+m" (*uval)
+ : "r" (uaddr), "r" (oldval), "r" (uval), "I" (-EFAULT)
: "memory");
- *uval = prev;
return ret;
}