diff options
Diffstat (limited to 'arch')
160 files changed, 2421 insertions, 1537 deletions
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 2a07e6ecafbd..71d3efff99d3 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -117,7 +117,7 @@ static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma, vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (off < count && user_count <= (count - off)) { diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a208bfe367b5..61a0cb15067e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -380,7 +380,7 @@ config ARCH_EP93XX bool "EP93xx-based" select ARCH_HAS_HOLES_MEMORYMODEL select ARM_AMBA - select ARM_PATCH_PHYS_VIRT + imply ARM_PATCH_PHYS_VIRT select ARM_VIC select AUTO_ZRELADDR select CLKDEV_LOOKUP diff --git a/arch/arm/boot/dts/armada-388-gp.dts b/arch/arm/boot/dts/armada-388-gp.dts index 895fa6cfa15a..563901e0ec07 100644 --- a/arch/arm/boot/dts/armada-388-gp.dts +++ b/arch/arm/boot/dts/armada-388-gp.dts @@ -75,7 +75,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pca0_pins>; interrupt-parent = <&gpio0>; - interrupts = <18 IRQ_TYPE_EDGE_FALLING>; + interrupts = <18 IRQ_TYPE_LEVEL_LOW>; gpio-controller; #gpio-cells = <2>; interrupt-controller; @@ -87,7 +87,7 @@ compatible = "nxp,pca9555"; pinctrl-names = "default"; interrupt-parent = <&gpio0>; - interrupts = <18 IRQ_TYPE_EDGE_FALLING>; + interrupts = <18 IRQ_TYPE_LEVEL_LOW>; gpio-controller; #gpio-cells = <2>; interrupt-controller; diff --git a/arch/arm/boot/dts/da850-evm.dts b/arch/arm/boot/dts/da850-evm.dts index a423e8ebfb37..67e72bc72e80 100644 --- a/arch/arm/boot/dts/da850-evm.dts +++ b/arch/arm/boot/dts/da850-evm.dts @@ -301,25 +301,4 @@ pinctrl-names = "default"; pinctrl-0 = <&vpif_capture_pins>, <&vpif_display_pins>; status = "okay"; - - /* VPIF capture port */ - port@0 { - vpif_input_ch0: endpoint@0 { - reg = <0>; - bus-width = <8>; - }; - - vpif_input_ch1: endpoint@1 { - reg = <1>; - bus-width = <8>; - data-shift = <8>; - }; - }; - - /* VPIF display port */ - port@1 { - vpif_output_ch0: endpoint { - bus-width = <8>; - }; - }; }; diff --git a/arch/arm/boot/dts/da850-lcdk.dts b/arch/arm/boot/dts/da850-lcdk.dts index b837fec70eec..a0f0916156e6 100644 --- a/arch/arm/boot/dts/da850-lcdk.dts +++ b/arch/arm/boot/dts/da850-lcdk.dts @@ -318,11 +318,4 @@ pinctrl-names = "default"; pinctrl-0 = <&vpif_capture_pins>; status = "okay"; - - /* VPIF capture port */ - port { - vpif_ch0: endpoint { - bus-width = <8>; - }; - }; }; diff --git a/arch/arm/boot/dts/dm8168-evm.dts b/arch/arm/boot/dts/dm8168-evm.dts index 1865976db5f9..c72a2132aa82 100644 --- a/arch/arm/boot/dts/dm8168-evm.dts +++ b/arch/arm/boot/dts/dm8168-evm.dts @@ -68,6 +68,34 @@ DM816X_IOPAD(0x0d08, MUX_MODE0) /* USB1_DRVVBUS */ >; }; + + nandflash_pins: nandflash_pins { + pinctrl-single,pins = < + DM816X_IOPAD(0x0b38, PULL_UP | MUX_MODE0) /* PINCTRL207 GPMC_CS0*/ + DM816X_IOPAD(0x0b60, PULL_ENA | MUX_MODE0) /* PINCTRL217 GPMC_ADV_ALE */ + DM816X_IOPAD(0x0b54, PULL_UP | PULL_ENA | MUX_MODE0) /* PINCTRL214 GPMC_OE_RE */ + DM816X_IOPAD(0x0b58, PULL_ENA | MUX_MODE0) /* PINCTRL215 GPMC_BE0_CLE */ + DM816X_IOPAD(0x0b50, PULL_UP | MUX_MODE0) /* PINCTRL213 GPMC_WE */ + DM816X_IOPAD(0x0b6c, MUX_MODE0) /* PINCTRL220 GPMC_WAIT */ + DM816X_IOPAD(0x0be4, PULL_ENA | MUX_MODE0) /* PINCTRL250 GPMC_CLK */ + DM816X_IOPAD(0x0ba4, MUX_MODE0) /* PINCTRL234 GPMC_D0 */ + DM816X_IOPAD(0x0ba8, MUX_MODE0) /* PINCTRL234 GPMC_D1 */ + DM816X_IOPAD(0x0bac, MUX_MODE0) /* PINCTRL234 GPMC_D2 */ + DM816X_IOPAD(0x0bb0, MUX_MODE0) /* PINCTRL234 GPMC_D3 */ + DM816X_IOPAD(0x0bb4, MUX_MODE0) /* PINCTRL234 GPMC_D4 */ + DM816X_IOPAD(0x0bb8, MUX_MODE0) /* PINCTRL234 GPMC_D5 */ + DM816X_IOPAD(0x0bbc, MUX_MODE0) /* PINCTRL234 GPMC_D6 */ + DM816X_IOPAD(0x0bc0, MUX_MODE0) /* PINCTRL234 GPMC_D7 */ + DM816X_IOPAD(0x0bc4, MUX_MODE0) /* PINCTRL234 GPMC_D8 */ + DM816X_IOPAD(0x0bc8, MUX_MODE0) /* PINCTRL234 GPMC_D9 */ + DM816X_IOPAD(0x0bcc, MUX_MODE0) /* PINCTRL234 GPMC_D10 */ + DM816X_IOPAD(0x0bd0, MUX_MODE0) /* PINCTRL234 GPMC_D11 */ + DM816X_IOPAD(0x0bd4, MUX_MODE0) /* PINCTRL234 GPMC_D12 */ + DM816X_IOPAD(0x0bd8, MUX_MODE0) /* PINCTRL234 GPMC_D13 */ + DM816X_IOPAD(0x0bdc, MUX_MODE0) /* PINCTRL234 GPMC_D14 */ + DM816X_IOPAD(0x0be0, MUX_MODE0) /* PINCTRL234 GPMC_D15 */ + >; + }; }; &i2c1 { @@ -90,6 +118,8 @@ &gpmc { ranges = <0 0 0x04000000 0x01000000>; /* CS0: 16MB for NAND */ + pinctrl-names = "default"; + pinctrl-0 = <&nandflash_pins>; nand@0,0 { compatible = "ti,omap2-nand"; @@ -98,9 +128,11 @@ interrupt-parent = <&gpmc>; interrupts = <0 IRQ_TYPE_NONE>, /* fifoevent */ <1 IRQ_TYPE_NONE>; /* termcount */ + rb-gpios = <&gpmc 0 GPIO_ACTIVE_HIGH>; /* gpmc_wait0 */ #address-cells = <1>; #size-cells = <1>; ti,nand-ecc-opt = "bch8"; + ti,elm-id = <&elm>; nand-bus-width = <16>; gpmc,device-width = <2>; gpmc,sync-clk-ps = <0>; @@ -164,7 +196,7 @@ vmmc-supply = <&vmmcsd_fixed>; bus-width = <4>; cd-gpios = <&gpio2 7 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 8 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 8 GPIO_ACTIVE_HIGH>; }; /* At least dm8168-evm rev c won't support multipoint, later may */ diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi index 59cbf958fcc3..566b2a8c8b96 100644 --- a/arch/arm/boot/dts/dm816x.dtsi +++ b/arch/arm/boot/dts/dm816x.dtsi @@ -145,7 +145,7 @@ }; elm: elm@48080000 { - compatible = "ti,816-elm"; + compatible = "ti,am3352-elm"; ti,hwmods = "elm"; reg = <0x48080000 0x2000>; interrupts = <4>; diff --git a/arch/arm/boot/dts/dra71-evm.dts b/arch/arm/boot/dts/dra71-evm.dts index 4d57a55473af..a6298eb56978 100644 --- a/arch/arm/boot/dts/dra71-evm.dts +++ b/arch/arm/boot/dts/dra71-evm.dts @@ -190,7 +190,7 @@ ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_25_NS>; ti,tx-internal-delay = <DP83867_RGMIIDCTL_250_PS>; ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_8_B_NIB>; - ti,impedance-control = <0x1f>; + ti,min-output-impedance; }; dp83867_1: ethernet-phy@3 { @@ -198,7 +198,7 @@ ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_25_NS>; ti,tx-internal-delay = <DP83867_RGMIIDCTL_250_PS>; ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_8_B_NIB>; - ti,impedance-control = <0x1f>; + ti,min-output-impedance; }; }; diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi index 497a9470c888..5739389f5bb8 100644 --- a/arch/arm/boot/dts/exynos4.dtsi +++ b/arch/arm/boot/dts/exynos4.dtsi @@ -59,6 +59,9 @@ compatible = "samsung,exynos4210-audss-clock"; reg = <0x03810000 0x0C>; #clock-cells = <1>; + clocks = <&clock CLK_FIN_PLL>, <&clock CLK_FOUT_EPLL>, + <&clock CLK_SCLK_AUDIO0>, <&clock CLK_SCLK_AUDIO0>; + clock-names = "pll_ref", "pll_in", "sclk_audio", "sclk_pcm_in"; }; i2s0: i2s@03830000 { diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 2484f11761ea..858e1fed762a 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -1126,8 +1126,8 @@ }; }; - gpu: mali@ffa30000 { - compatible = "rockchip,rk3288-mali", "arm,mali-t760", "arm,mali-midgard"; + gpu: gpu@ffa30000 { + compatible = "rockchip,rk3288-mali", "arm,mali-t760"; reg = <0xffa30000 0x10000>; interrupts = <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm/boot/dts/sun8i-a83t.dtsi b/arch/arm/boot/dts/sun8i-a83t.dtsi index 8923ba625b76..19a8f4fcfab5 100644 --- a/arch/arm/boot/dts/sun8i-a83t.dtsi +++ b/arch/arm/boot/dts/sun8i-a83t.dtsi @@ -44,7 +44,9 @@ #include <dt-bindings/interrupt-controller/arm-gic.h> +#include <dt-bindings/clock/sun8i-a83t-ccu.h> #include <dt-bindings/clock/sun8i-r-ccu.h> +#include <dt-bindings/reset/sun8i-a83t-ccu.h> / { interrupt-parent = <&gic>; @@ -175,8 +177,8 @@ compatible = "allwinner,sun8i-a83t-dma"; reg = <0x01c02000 0x1000>; interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&ccu 21>; - resets = <&ccu 7>; + clocks = <&ccu CLK_BUS_DMA>; + resets = <&ccu RST_BUS_DMA>; #dma-cells = <1>; }; @@ -195,7 +197,7 @@ <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>; reg = <0x01c20800 0x400>; - clocks = <&ccu 45>, <&osc24M>, <&osc16Md512>; + clocks = <&ccu CLK_BUS_PIO>, <&osc24M>, <&osc16Md512>; clock-names = "apb", "hosc", "losc"; gpio-controller; interrupt-controller; @@ -247,8 +249,8 @@ "allwinner,sun8i-h3-spdif"; reg = <0x01c21000 0x400>; interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&ccu 44>, <&ccu 76>; - resets = <&ccu 32>; + clocks = <&ccu CLK_BUS_SPDIF>, <&ccu CLK_SPDIF>; + resets = <&ccu RST_BUS_SPDIF>; clock-names = "apb", "spdif"; dmas = <&dma 2>; dma-names = "tx"; @@ -263,8 +265,8 @@ interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>; reg-shift = <2>; reg-io-width = <4>; - clocks = <&ccu 53>; - resets = <&ccu 40>; + clocks = <&ccu CLK_BUS_UART0>; + resets = <&ccu RST_BUS_UART0>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi index 6f2162608006..d38282b9e5d4 100644 --- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi +++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi @@ -394,7 +394,7 @@ emac: ethernet@1c30000 { compatible = "allwinner,sun8i-h3-emac"; syscon = <&syscon>; - reg = <0x01c30000 0x104>; + reg = <0x01c30000 0x10000>; interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; resets = <&ccu RST_BUS_EMAC>; diff --git a/arch/arm/boot/dts/tango4-vantage-1172.dts b/arch/arm/boot/dts/tango4-vantage-1172.dts index 86d8df98802f..13bcc460bcb2 100644 --- a/arch/arm/boot/dts/tango4-vantage-1172.dts +++ b/arch/arm/boot/dts/tango4-vantage-1172.dts @@ -22,7 +22,7 @@ }; ð0 { - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; phy-handle = <ð0_phy>; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/include/asm/kexec.h b/arch/arm/include/asm/kexec.h index 1869af6bac5c..25021b798a1e 100644 --- a/arch/arm/include/asm/kexec.h +++ b/arch/arm/include/asm/kexec.h @@ -19,6 +19,11 @@ #ifndef __ASSEMBLY__ +#define ARCH_HAS_KIMAGE_ARCH +struct kimage_arch { + u32 kernel_r2; +}; + /** * crash_setup_regs() - save registers for the panic kernel * @newregs: registers are saved here diff --git a/arch/arm/include/asm/ucontext.h b/arch/arm/include/asm/ucontext.h index 14749aec94bf..921d8274855c 100644 --- a/arch/arm/include/asm/ucontext.h +++ b/arch/arm/include/asm/ucontext.h @@ -35,6 +35,12 @@ struct ucontext { * bytes, to prevent unpredictable padding in the signal frame. */ +/* + * Dummy padding block: if this magic is encountered, the block should + * be skipped using the corresponding size field. + */ +#define DUMMY_MAGIC 0xb0d9ed01 + #ifdef CONFIG_CRUNCH #define CRUNCH_MAGIC 0x5065cf03 #define CRUNCH_STORAGE_SIZE (CRUNCH_SIZE + 8) diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 15495887ca14..fe1419eeb932 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -30,7 +30,6 @@ extern unsigned long kexec_boot_atags; static atomic_t waiting_for_crash_ipi; -static unsigned long dt_mem; /* * Provide a dummy crash_notes definition while crash dump arrives to arm. * This prevents breakage of crash_notes attribute in kernel/ksysfs.c. @@ -42,6 +41,9 @@ int machine_kexec_prepare(struct kimage *image) __be32 header; int i, err; + image->arch.kernel_r2 = image->start - KEXEC_ARM_ZIMAGE_OFFSET + + KEXEC_ARM_ATAGS_OFFSET; + /* * Validate that if the current HW supports SMP, then the SW supports * and implements CPU hotplug for the current HW. If not, we won't be @@ -66,8 +68,8 @@ int machine_kexec_prepare(struct kimage *image) if (err) return err; - if (be32_to_cpu(header) == OF_DT_HEADER) - dt_mem = current_segment->mem; + if (header == cpu_to_be32(OF_DT_HEADER)) + image->arch.kernel_r2 = current_segment->mem; } return 0; } @@ -165,8 +167,7 @@ void machine_kexec(struct kimage *image) kexec_start_address = image->start; kexec_indirection_page = page_list; kexec_mach_type = machine_arch_type; - kexec_boot_atags = dt_mem ?: image->start - KEXEC_ARM_ZIMAGE_OFFSET - + KEXEC_ARM_ATAGS_OFFSET; + kexec_boot_atags = image->arch.kernel_r2; /* copy our kernel relocation code to the control code page */ reboot_entry = fncpy(reboot_code_buffer, diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 4e80bf7420d4..8e9a3e40d949 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -987,6 +987,9 @@ static void __init reserve_crashkernel(void) if (crash_base <= 0) { unsigned long long crash_max = idmap_to_phys((u32)~0); + unsigned long long lowmem_max = __pa(high_memory - 1) + 1; + if (crash_max > lowmem_max) + crash_max = lowmem_max; crash_base = memblock_find_in_range(CRASH_ALIGN, crash_max, crash_size, CRASH_ALIGN); if (!crash_base) { diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 7b8f2141427b..5814298ef0b7 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -40,8 +40,10 @@ static int preserve_crunch_context(struct crunch_sigframe __user *frame) return __copy_to_user(frame, kframe, sizeof(*frame)); } -static int restore_crunch_context(struct crunch_sigframe __user *frame) +static int restore_crunch_context(char __user **auxp) { + struct crunch_sigframe __user *frame = + (struct crunch_sigframe __user *)*auxp; char kbuf[sizeof(*frame) + 8]; struct crunch_sigframe *kframe; @@ -52,6 +54,7 @@ static int restore_crunch_context(struct crunch_sigframe __user *frame) if (kframe->magic != CRUNCH_MAGIC || kframe->size != CRUNCH_STORAGE_SIZE) return -1; + *auxp += CRUNCH_STORAGE_SIZE; crunch_task_restore(current_thread_info(), &kframe->storage); return 0; } @@ -59,21 +62,39 @@ static int restore_crunch_context(struct crunch_sigframe __user *frame) #ifdef CONFIG_IWMMXT -static int preserve_iwmmxt_context(struct iwmmxt_sigframe *frame) +static int preserve_iwmmxt_context(struct iwmmxt_sigframe __user *frame) { char kbuf[sizeof(*frame) + 8]; struct iwmmxt_sigframe *kframe; + int err = 0; /* the iWMMXt context must be 64 bit aligned */ kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7); - kframe->magic = IWMMXT_MAGIC; - kframe->size = IWMMXT_STORAGE_SIZE; - iwmmxt_task_copy(current_thread_info(), &kframe->storage); - return __copy_to_user(frame, kframe, sizeof(*frame)); + + if (test_thread_flag(TIF_USING_IWMMXT)) { + kframe->magic = IWMMXT_MAGIC; + kframe->size = IWMMXT_STORAGE_SIZE; + iwmmxt_task_copy(current_thread_info(), &kframe->storage); + + err = __copy_to_user(frame, kframe, sizeof(*frame)); + } else { + /* + * For bug-compatibility with older kernels, some space + * has to be reserved for iWMMXt even if it's not used. + * Set the magic and size appropriately so that properly + * written userspace can skip it reliably: + */ + __put_user_error(DUMMY_MAGIC, &frame->magic, err); + __put_user_error(IWMMXT_STORAGE_SIZE, &frame->size, err); + } + + return err; } -static int restore_iwmmxt_context(struct iwmmxt_sigframe *frame) +static int restore_iwmmxt_context(char __user **auxp) { + struct iwmmxt_sigframe __user *frame = + (struct iwmmxt_sigframe __user *)*auxp; char kbuf[sizeof(*frame) + 8]; struct iwmmxt_sigframe *kframe; @@ -81,10 +102,28 @@ static int restore_iwmmxt_context(struct iwmmxt_sigframe *frame) kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7); if (__copy_from_user(kframe, frame, sizeof(*frame))) return -1; - if (kframe->magic != IWMMXT_MAGIC || - kframe->size != IWMMXT_STORAGE_SIZE) + + /* + * For non-iWMMXt threads: a single iwmmxt_sigframe-sized dummy + * block is discarded for compatibility with setup_sigframe() if + * present, but we don't mandate its presence. If some other + * magic is here, it's not for us: + */ + if (!test_thread_flag(TIF_USING_IWMMXT) && + kframe->magic != DUMMY_MAGIC) + return 0; + + if (kframe->size != IWMMXT_STORAGE_SIZE) return -1; - iwmmxt_task_restore(current_thread_info(), &kframe->storage); + + if (test_thread_flag(TIF_USING_IWMMXT)) { + if (kframe->magic != IWMMXT_MAGIC) + return -1; + + iwmmxt_task_restore(current_thread_info(), &kframe->storage); + } + + *auxp += IWMMXT_STORAGE_SIZE; return 0; } @@ -107,8 +146,10 @@ static int preserve_vfp_context(struct vfp_sigframe __user *frame) return vfp_preserve_user_clear_hwstate(&frame->ufp, &frame->ufp_exc); } -static int restore_vfp_context(struct vfp_sigframe __user *frame) +static int restore_vfp_context(char __user **auxp) { + struct vfp_sigframe __user *frame = + (struct vfp_sigframe __user *)*auxp; unsigned long magic; unsigned long size; int err = 0; @@ -121,6 +162,7 @@ static int restore_vfp_context(struct vfp_sigframe __user *frame) if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) return -EINVAL; + *auxp += size; return vfp_restore_user_hwstate(&frame->ufp, &frame->ufp_exc); } @@ -141,7 +183,7 @@ struct rt_sigframe { static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf) { - struct aux_sigframe __user *aux; + char __user *aux; sigset_t set; int err; @@ -169,18 +211,18 @@ static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf) err |= !valid_user_regs(regs); - aux = (struct aux_sigframe __user *) sf->uc.uc_regspace; + aux = (char __user *) sf->uc.uc_regspace; #ifdef CONFIG_CRUNCH if (err == 0) - err |= restore_crunch_context(&aux->crunch); + err |= restore_crunch_context(&aux); #endif #ifdef CONFIG_IWMMXT - if (err == 0 && test_thread_flag(TIF_USING_IWMMXT)) - err |= restore_iwmmxt_context(&aux->iwmmxt); + if (err == 0) + err |= restore_iwmmxt_context(&aux); #endif #ifdef CONFIG_VFP if (err == 0) - err |= restore_vfp_context(&aux->vfp); + err |= restore_vfp_context(&aux); #endif return err; @@ -286,7 +328,7 @@ setup_sigframe(struct sigframe __user *sf, struct pt_regs *regs, sigset_t *set) err |= preserve_crunch_context(&aux->crunch); #endif #ifdef CONFIG_IWMMXT - if (err == 0 && test_thread_flag(TIF_USING_IWMMXT)) + if (err == 0) err |= preserve_iwmmxt_context(&aux->iwmmxt); #endif #ifdef CONFIG_VFP diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 54442e375354..cf8bf6bf87c4 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -67,7 +67,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) { trace_kvm_wfx(*vcpu_pc(vcpu), true); vcpu->stat.wfe_exit_stat++; - kvm_vcpu_on_spin(vcpu); + kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); } else { trace_kvm_wfx(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index b5625d009288..e568c8c6f69c 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -1166,7 +1166,7 @@ static struct tvp514x_platform_data tvp5146_pdata = { #define TVP514X_STD_ALL (V4L2_STD_NTSC | V4L2_STD_PAL) -static const struct vpif_input da850_ch0_inputs[] = { +static struct vpif_input da850_ch0_inputs[] = { { .input = { .index = 0, @@ -1181,7 +1181,7 @@ static const struct vpif_input da850_ch0_inputs[] = { }, }; -static const struct vpif_input da850_ch1_inputs[] = { +static struct vpif_input da850_ch1_inputs[] = { { .input = { .index = 0, diff --git a/arch/arm/mach-davinci/clock.c b/arch/arm/mach-davinci/clock.c index f5dce9b4e617..f77a4f766050 100644 --- a/arch/arm/mach-davinci/clock.c +++ b/arch/arm/mach-davinci/clock.c @@ -218,6 +218,15 @@ int clk_set_parent(struct clk *clk, struct clk *parent) } EXPORT_SYMBOL(clk_set_parent); +struct clk *clk_get_parent(struct clk *clk) +{ + if (!clk) + return NULL; + + return clk->parent; +} +EXPORT_SYMBOL(clk_get_parent); + int clk_register(struct clk *clk) { if (clk == NULL || IS_ERR(clk)) diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c index 39ef3b613912..beec5f16443a 100644 --- a/arch/arm/mach-ep93xx/clock.c +++ b/arch/arm/mach-ep93xx/clock.c @@ -475,6 +475,26 @@ int clk_set_rate(struct clk *clk, unsigned long rate) } EXPORT_SYMBOL(clk_set_rate); +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_round_rate); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_set_parent); + +struct clk *clk_get_parent(struct clk *clk) +{ + return clk->parent; +} +EXPORT_SYMBOL(clk_get_parent); + static char fclk_divisors[] = { 1, 2, 4, 8, 16, 1, 1, 1 }; static char hclk_divisors[] = { 1, 2, 4, 5, 6, 8, 16, 32 }; diff --git a/arch/arm/mach-ixp4xx/include/mach/io.h b/arch/arm/mach-ixp4xx/include/mach/io.h index 7a0c13bf4269..844e8ac593e2 100644 --- a/arch/arm/mach-ixp4xx/include/mach/io.h +++ b/arch/arm/mach-ixp4xx/include/mach/io.h @@ -95,8 +95,10 @@ static inline void __indirect_writeb(u8 value, volatile void __iomem *p) } static inline void __indirect_writesb(volatile void __iomem *bus_addr, - const u8 *vaddr, int count) + const void *p, int count) { + const u8 *vaddr = p; + while (count--) writeb(*vaddr++, bus_addr); } @@ -118,8 +120,10 @@ static inline void __indirect_writew(u16 value, volatile void __iomem *p) } static inline void __indirect_writesw(volatile void __iomem *bus_addr, - const u16 *vaddr, int count) + const void *p, int count) { + const u16 *vaddr = p; + while (count--) writew(*vaddr++, bus_addr); } @@ -137,8 +141,9 @@ static inline void __indirect_writel(u32 value, volatile void __iomem *p) } static inline void __indirect_writesl(volatile void __iomem *bus_addr, - const u32 *vaddr, int count) + const void *p, int count) { + const u32 *vaddr = p; while (count--) writel(*vaddr++, bus_addr); } @@ -160,8 +165,10 @@ static inline u8 __indirect_readb(const volatile void __iomem *p) } static inline void __indirect_readsb(const volatile void __iomem *bus_addr, - u8 *vaddr, u32 count) + void *p, u32 count) { + u8 *vaddr = p; + while (count--) *vaddr++ = readb(bus_addr); } @@ -183,8 +190,10 @@ static inline u16 __indirect_readw(const volatile void __iomem *p) } static inline void __indirect_readsw(const volatile void __iomem *bus_addr, - u16 *vaddr, u32 count) + void *p, u32 count) { + u16 *vaddr = p; + while (count--) *vaddr++ = readw(bus_addr); } @@ -204,8 +213,10 @@ static inline u32 __indirect_readl(const volatile void __iomem *p) } static inline void __indirect_readsl(const volatile void __iomem *bus_addr, - u32 *vaddr, u32 count) + void *p, u32 count) { + u32 *vaddr = p; + while (count--) *vaddr++ = readl(bus_addr); } @@ -523,8 +534,15 @@ static inline void iowrite32_rep(void __iomem *addr, const void *vaddr, #endif } -#define ioport_map(port, nr) ((void __iomem*)(port + PIO_OFFSET)) -#define ioport_unmap(addr) +#define ioport_map(port, nr) ioport_map(port, nr) +static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) +{ + return ((void __iomem*)((port) + PIO_OFFSET)); +} +#define ioport_unmap(addr) ioport_unmap(addr) +static inline void ioport_unmap(void __iomem *addr) +{ +} #endif /* CONFIG_PCI */ #endif /* __ASM_ARM_ARCH_IO_H */ diff --git a/arch/arm/mach-mmp/devices.c b/arch/arm/mach-mmp/devices.c index 3330ac7cfbef..671c7a09ab3d 100644 --- a/arch/arm/mach-mmp/devices.c +++ b/arch/arm/mach-mmp/devices.c @@ -238,7 +238,7 @@ void pxa_usb_phy_deinit(void __iomem *phy_reg) #endif #if IS_ENABLED(CONFIG_USB_SUPPORT) -static u64 usb_dma_mask = ~(u32)0; +static u64 __maybe_unused usb_dma_mask = ~(u32)0; #if IS_ENABLED(CONFIG_USB_MV_UDC) struct resource pxa168_u2o_resources[] = { diff --git a/arch/arm/mach-mvebu/platsmp.c b/arch/arm/mach-mvebu/platsmp.c index e62273aacb43..4ffbbd217e82 100644 --- a/arch/arm/mach-mvebu/platsmp.c +++ b/arch/arm/mach-mvebu/platsmp.c @@ -211,7 +211,7 @@ static int mv98dx3236_resume_set_cpu_boot_addr(int hw_cpu, void *boot_addr) return PTR_ERR(base); writel(0, base + MV98DX3236_CPU_RESUME_CTRL_REG); - writel(virt_to_phys(boot_addr), base + MV98DX3236_CPU_RESUME_ADDR_REG); + writel(__pa_symbol(boot_addr), base + MV98DX3236_CPU_RESUME_ADDR_REG); iounmap(base); diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index 6613a6ff5dbc..6cbc69c92913 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -510,6 +510,7 @@ static void __init ams_delta_init(void) static void modem_pm(struct uart_port *port, unsigned int state, unsigned old) { struct modem_private_data *priv = port->private_data; + int ret; if (IS_ERR(priv->regulator)) return; @@ -518,9 +519,16 @@ static void modem_pm(struct uart_port *port, unsigned int state, unsigned old) return; if (state == 0) - regulator_enable(priv->regulator); + ret = regulator_enable(priv->regulator); else if (old == 0) - regulator_disable(priv->regulator); + ret = regulator_disable(priv->regulator); + else + ret = 0; + + if (ret) + dev_warn(port->dev, + "ams_delta modem_pm: failed to %sable regulator: %d\n", + state ? "dis" : "en", ret); } static struct plat_serial8250_port ams_delta_modem_ports[] = { diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c index 4dfb99504810..95ac1929aede 100644 --- a/arch/arm/mach-omap1/board-osk.c +++ b/arch/arm/mach-omap1/board-osk.c @@ -441,13 +441,11 @@ static struct spi_board_info __initdata mistral_boardinfo[] = { { .chip_select = 0, } }; -#ifdef CONFIG_PM static irqreturn_t osk_mistral_wake_interrupt(int irq, void *ignored) { return IRQ_HANDLED; } -#endif static void __init osk_mistral_init(void) { @@ -515,7 +513,6 @@ static void __init osk_mistral_init(void) gpio_direction_input(OMAP_MPUIO(2)); irq_set_irq_type(irq, IRQ_TYPE_EDGE_RISING); -#ifdef CONFIG_PM /* share the IRQ in case someone wants to use the * button for more than wakeup from system sleep. */ @@ -529,7 +526,6 @@ static void __init osk_mistral_init(void) ret); } else enable_irq_wake(irq); -#endif } else printk(KERN_ERR "OSK+Mistral: wakeup button is awol\n"); diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index dc9e34e670a2..b1e661bb5521 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -28,7 +28,7 @@ static const struct of_device_id omap_dt_match_table[] __initconst = { { } }; -static void __init omap_generic_init(void) +static void __init __maybe_unused omap_generic_init(void) { pdata_quirks_init(omap_dt_match_table); diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c index be517b048762..5b614388d72f 100644 --- a/arch/arm/mach-omap2/hsmmc.c +++ b/arch/arm/mach-omap2/hsmmc.c @@ -32,120 +32,6 @@ static u16 control_devconf1_offset; #define HSMMC_NAME_LEN 9 -static void omap_hsmmc1_before_set_reg(struct device *dev, - int power_on, int vdd) -{ - u32 reg, prog_io; - struct omap_hsmmc_platform_data *mmc = dev->platform_data; - - if (mmc->remux) - mmc->remux(dev, power_on); - - /* - * Assume we power both OMAP VMMC1 (for CMD, CLK, DAT0..3) and the - * card with Vcc regulator (from twl4030 or whatever). OMAP has both - * 1.8V and 3.0V modes, controlled by the PBIAS register. - * - * In 8-bit modes, OMAP VMMC1A (for DAT4..7) needs a supply, which - * is most naturally TWL VSIM; those pins also use PBIAS. - * - * FIXME handle VMMC1A as needed ... - */ - if (power_on) { - if (cpu_is_omap2430()) { - reg = omap_ctrl_readl(OMAP243X_CONTROL_DEVCONF1); - if ((1 << vdd) >= MMC_VDD_30_31) - reg |= OMAP243X_MMC1_ACTIVE_OVERWRITE; - else - reg &= ~OMAP243X_MMC1_ACTIVE_OVERWRITE; - omap_ctrl_writel(reg, OMAP243X_CONTROL_DEVCONF1); - } - - if (mmc->internal_clock) { - reg = omap_ctrl_readl(OMAP2_CONTROL_DEVCONF0); - reg |= OMAP2_MMCSDIO1ADPCLKISEL; - omap_ctrl_writel(reg, OMAP2_CONTROL_DEVCONF0); - } - - reg = omap_ctrl_readl(control_pbias_offset); - if (cpu_is_omap3630()) { - /* Set MMC I/O to 52MHz */ - prog_io = omap_ctrl_readl(OMAP343X_CONTROL_PROG_IO1); - prog_io |= OMAP3630_PRG_SDMMC1_SPEEDCTRL; - omap_ctrl_writel(prog_io, OMAP343X_CONTROL_PROG_IO1); - } else { - reg |= OMAP2_PBIASSPEEDCTRL0; - } - reg &= ~OMAP2_PBIASLITEPWRDNZ0; - omap_ctrl_writel(reg, control_pbias_offset); - } else { - reg = omap_ctrl_readl(control_pbias_offset); - reg &= ~OMAP2_PBIASLITEPWRDNZ0; - omap_ctrl_writel(reg, control_pbias_offset); - } -} - -static void omap_hsmmc1_after_set_reg(struct device *dev, int power_on, int vdd) -{ - u32 reg; - - /* 100ms delay required for PBIAS configuration */ - msleep(100); - - if (power_on) { - reg = omap_ctrl_readl(control_pbias_offset); - reg |= (OMAP2_PBIASLITEPWRDNZ0 | OMAP2_PBIASSPEEDCTRL0); - if ((1 << vdd) <= MMC_VDD_165_195) - reg &= ~OMAP2_PBIASLITEVMODE0; - else - reg |= OMAP2_PBIASLITEVMODE0; - omap_ctrl_writel(reg, control_pbias_offset); - } else { - reg = omap_ctrl_readl(control_pbias_offset); - reg |= (OMAP2_PBIASSPEEDCTRL0 | OMAP2_PBIASLITEPWRDNZ0 | - OMAP2_PBIASLITEVMODE0); - omap_ctrl_writel(reg, control_pbias_offset); - } -} - -static void hsmmc2_select_input_clk_src(struct omap_hsmmc_platform_data *mmc) -{ - u32 reg; - - reg = omap_ctrl_readl(control_devconf1_offset); - if (mmc->internal_clock) - reg |= OMAP2_MMCSDIO2ADPCLKISEL; - else - reg &= ~OMAP2_MMCSDIO2ADPCLKISEL; - omap_ctrl_writel(reg, control_devconf1_offset); -} - -static void hsmmc2_before_set_reg(struct device *dev, int power_on, int vdd) -{ - struct omap_hsmmc_platform_data *mmc = dev->platform_data; - - if (mmc->remux) - mmc->remux(dev, power_on); - - if (power_on) - hsmmc2_select_input_clk_src(mmc); -} - -static int am35x_hsmmc2_set_power(struct device *dev, int power_on, int vdd) -{ - struct omap_hsmmc_platform_data *mmc = dev->platform_data; - - if (power_on) - hsmmc2_select_input_clk_src(mmc); - - return 0; -} - -static int nop_mmc_set_power(struct device *dev, int power_on, int vdd) -{ - return 0; -} - static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c, struct omap_hsmmc_platform_data *mmc) { @@ -157,101 +43,11 @@ static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c, return -ENOMEM; } - if (c->name) - strncpy(hc_name, c->name, HSMMC_NAME_LEN); - else - snprintf(hc_name, (HSMMC_NAME_LEN + 1), "mmc%islot%i", - c->mmc, 1); + snprintf(hc_name, (HSMMC_NAME_LEN + 1), "mmc%islot%i", c->mmc, 1); mmc->name = hc_name; mmc->caps = c->caps; - mmc->internal_clock = !c->ext_clock; mmc->reg_offset = 0; - if (c->cover_only) { - /* detect if mobile phone cover removed */ - mmc->gpio_cd = -EINVAL; - mmc->gpio_cod = c->gpio_cd; - } else { - /* card detect pin on the mmc socket itself */ - mmc->gpio_cd = c->gpio_cd; - mmc->gpio_cod = -EINVAL; - } - mmc->gpio_wp = c->gpio_wp; - - mmc->remux = c->remux; - mmc->init_card = c->init_card; - - if (c->nonremovable) - mmc->nonremovable = 1; - - /* - * NOTE: MMC slots should have a Vcc regulator set up. - * This may be from a TWL4030-family chip, another - * controllable regulator, or a fixed supply. - * - * temporary HACK: ocr_mask instead of fixed supply - */ - if (soc_is_am35xx()) - mmc->ocr_mask = MMC_VDD_165_195 | - MMC_VDD_26_27 | - MMC_VDD_27_28 | - MMC_VDD_29_30 | - MMC_VDD_30_31 | - MMC_VDD_31_32; - else - mmc->ocr_mask = c->ocr_mask; - - if (!soc_is_am35xx()) - mmc->features |= HSMMC_HAS_PBIAS; - - switch (c->mmc) { - case 1: - if (mmc->features & HSMMC_HAS_PBIAS) { - /* on-chip level shifting via PBIAS0/PBIAS1 */ - mmc->before_set_reg = - omap_hsmmc1_before_set_reg; - mmc->after_set_reg = - omap_hsmmc1_after_set_reg; - } - - if (soc_is_am35xx()) - mmc->set_power = nop_mmc_set_power; - - /* OMAP3630 HSMMC1 supports only 4-bit */ - if (cpu_is_omap3630() && - (c->caps & MMC_CAP_8_BIT_DATA)) { - c->caps &= ~MMC_CAP_8_BIT_DATA; - c->caps |= MMC_CAP_4_BIT_DATA; - mmc->caps = c->caps; - } - break; - case 2: - if (soc_is_am35xx()) - mmc->set_power = am35x_hsmmc2_set_power; - - if (c->ext_clock) - c->transceiver = 1; - if (c->transceiver && (c->caps & MMC_CAP_8_BIT_DATA)) { - c->caps &= ~MMC_CAP_8_BIT_DATA; - c->caps |= MMC_CAP_4_BIT_DATA; - } - if (mmc->features & HSMMC_HAS_PBIAS) { - /* off-chip level shifting, or none */ - mmc->before_set_reg = hsmmc2_before_set_reg; - mmc->after_set_reg = NULL; - } - break; - case 3: - case 4: - case 5: - mmc->before_set_reg = NULL; - mmc->after_set_reg = NULL; - break; - default: - pr_err("MMC%d configuration not supported!\n", c->mmc); - kfree(hc_name); - return -ENODEV; - } return 0; } @@ -260,7 +56,6 @@ static int omap_hsmmc_done; void omap_hsmmc_late_init(struct omap2_hsmmc_info *c) { struct platform_device *pdev; - struct omap_hsmmc_platform_data *mmc_pdata; int res; if (omap_hsmmc_done != 1) @@ -269,32 +64,12 @@ void omap_hsmmc_late_init(struct omap2_hsmmc_info *c) omap_hsmmc_done++; for (; c->mmc; c++) { - if (!c->deferred) - continue; - pdev = c->pdev; if (!pdev) continue; - - mmc_pdata = pdev->dev.platform_data; - if (!mmc_pdata) - continue; - - if (c->cover_only) { - /* detect if mobile phone cover removed */ - mmc_pdata->gpio_cd = -EINVAL; - mmc_pdata->gpio_cod = c->gpio_cd; - } else { - /* card detect pin on the mmc socket itself */ - mmc_pdata->gpio_cd = c->gpio_cd; - mmc_pdata->gpio_cod = -EINVAL; - } - mmc_pdata->gpio_wp = c->gpio_wp; - res = omap_device_register(pdev); if (res) - pr_err("Could not late init MMC %s\n", - c->name); + pr_err("Could not late init MMC\n"); } } @@ -336,13 +111,6 @@ static void __init omap_hsmmc_init_one(struct omap2_hsmmc_info *hsmmcinfo, if (oh->dev_attr != NULL) { mmc_dev_attr = oh->dev_attr; mmc_data->controller_flags = mmc_dev_attr->flags; - /* - * erratum 2.1.1.128 doesn't apply if board has - * a transceiver is attached - */ - if (hsmmcinfo->transceiver) - mmc_data->controller_flags &= - ~OMAP_HSMMC_BROKEN_MULTIBLOCK_READ; } pdev = platform_device_alloc(name, ctrl_nr - 1); @@ -367,9 +135,6 @@ static void __init omap_hsmmc_init_one(struct omap2_hsmmc_info *hsmmcinfo, hsmmcinfo->pdev = pdev; - if (hsmmcinfo->deferred) - goto free_mmc; - res = omap_device_register(pdev); if (res) { pr_err("Could not register od for %s\n", name); diff --git a/arch/arm/mach-omap2/hsmmc.h b/arch/arm/mach-omap2/hsmmc.h index 69b619ddc765..af9af5094ec3 100644 --- a/arch/arm/mach-omap2/hsmmc.h +++ b/arch/arm/mach-omap2/hsmmc.h @@ -12,18 +12,9 @@ struct omap2_hsmmc_info { u8 mmc; /* controller 1/2/3 */ u32 caps; /* 4/8 wires and any additional host * capabilities OR'd (ref. linux/mmc/host.h) */ - bool transceiver; /* MMC-2 option */ - bool ext_clock; /* use external pin for input clock */ - bool cover_only; /* No card detect - just cover switch */ - bool nonremovable; /* Nonremovable e.g. eMMC */ - bool deferred; /* mmc needs a deferred probe */ int gpio_cd; /* or -EINVAL */ int gpio_wp; /* or -EINVAL */ - char *name; /* or NULL for default */ struct platform_device *pdev; /* mmc controller instance */ - int ocr_mask; /* temporary HACK */ - /* Remux (pad configuration) when powering on/off */ - void (*remux)(struct device *dev, int power_on); /* init some special card */ void (*init_card)(struct mmc_card *card); }; diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 1d739d1a0a65..1cd20e4d56b0 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -410,7 +410,7 @@ static int _set_hwmod_postsetup_state(struct omap_hwmod *oh, void *data) return omap_hwmod_set_postsetup_state(oh, *(u8 *)data); } -static void __init omap_hwmod_init_postsetup(void) +static void __init __maybe_unused omap_hwmod_init_postsetup(void) { u8 postsetup_state; diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index d44e0e2f1106..841ba19d64a6 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -486,7 +486,6 @@ int __init omap3_pm_init(void) ret = request_irq(omap_prcm_event_to_irq("io"), _prcm_int_handle_io, IRQF_SHARED | IRQF_NO_SUSPEND, "pm_io", omap3_pm_init); - enable_irq(omap_prcm_event_to_irq("io")); if (ret) { pr_err("pm: Failed to request pm_io irq\n"); diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c index 382e236fbfd9..64f6451499a7 100644 --- a/arch/arm/mach-omap2/prm3xxx.c +++ b/arch/arm/mach-omap2/prm3xxx.c @@ -692,7 +692,6 @@ static int omap3xxx_prm_late_init(void) { struct device_node *np; int irq_num; - int ret; if (!(prm_features & PRM_HAS_IO_WAKEUP)) return 0; @@ -712,12 +711,8 @@ static int omap3xxx_prm_late_init(void) } omap3xxx_prm_enable_io_wakeup(); - ret = omap_prcm_register_chain_handler(&omap3_prcm_irq_setup); - if (!ret) - irq_set_status_flags(omap_prcm_event_to_irq("io"), - IRQ_NOAUTOEN); - return ret; + return omap_prcm_register_chain_handler(&omap3_prcm_irq_setup); } static void __exit omap3xxx_prm_exit(void) diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c index 87e86a4a9ead..3ab5df1ce900 100644 --- a/arch/arm/mach-omap2/prm44xx.c +++ b/arch/arm/mach-omap2/prm44xx.c @@ -337,6 +337,27 @@ static void omap44xx_prm_reconfigure_io_chain(void) } /** + * omap44xx_prm_enable_io_wakeup - enable wakeup events from I/O wakeup latches + * + * Activates the I/O wakeup event latches and allows events logged by + * those latches to signal a wakeup event to the PRCM. For I/O wakeups + * to occur, WAKEUPENABLE bits must be set in the pad mux registers, and + * omap44xx_prm_reconfigure_io_chain() must be called. No return value. + */ +static void __init omap44xx_prm_enable_io_wakeup(void) +{ + s32 inst = omap4_prmst_get_prm_dev_inst(); + + if (inst == PRM_INSTANCE_UNKNOWN) + return; + + omap4_prm_rmw_inst_reg_bits(OMAP4430_GLOBAL_WUEN_MASK, + OMAP4430_GLOBAL_WUEN_MASK, + inst, + omap4_prcm_irq_setup.pm_ctrl); +} + +/** * omap44xx_prm_read_reset_sources - return the last SoC reset source * * Return a u32 representing the last reset sources of the SoC. The @@ -668,6 +689,8 @@ struct pwrdm_ops omap4_pwrdm_operations = { .pwrdm_has_voltdm = omap4_check_vcvp, }; +static int omap44xx_prm_late_init(void); + /* * XXX document */ @@ -675,6 +698,7 @@ static struct prm_ll_data omap44xx_prm_ll_data = { .read_reset_sources = &omap44xx_prm_read_reset_sources, .was_any_context_lost_old = &omap44xx_prm_was_any_context_lost_old, .clear_context_loss_flags_old = &omap44xx_prm_clear_context_loss_flags_old, + .late_init = &omap44xx_prm_late_init, .assert_hardreset = omap4_prminst_assert_hardreset, .deassert_hardreset = omap4_prminst_deassert_hardreset, .is_hardreset_asserted = omap4_prminst_is_hardreset_asserted, @@ -711,6 +735,37 @@ int __init omap44xx_prm_init(const struct omap_prcm_init_data *data) return prm_register(&omap44xx_prm_ll_data); } +static int omap44xx_prm_late_init(void) +{ + int irq_num; + + if (!(prm_features & PRM_HAS_IO_WAKEUP)) + return 0; + + irq_num = of_irq_get(prm_init_data->np, 0); + /* + * Already have OMAP4 IRQ num. For all other platforms, we need + * IRQ numbers from DT + */ + if (irq_num < 0 && !(prm_init_data->flags & PRM_IRQ_DEFAULT)) { + if (irq_num == -EPROBE_DEFER) + return irq_num; + + /* Have nothing to do */ + return 0; + } + + /* Once OMAP4 DT is filled as well */ + if (irq_num >= 0) { + omap4_prcm_irq_setup.irq = irq_num; + omap4_prcm_irq_setup.xlate_irq = NULL; + } + + omap44xx_prm_enable_io_wakeup(); + + return omap_prcm_register_chain_handler(&omap4_prcm_irq_setup); +} + static void __exit omap44xx_prm_exit(void) { prm_unregister(&omap44xx_prm_ll_data); diff --git a/arch/arm/mach-prima2/common.c b/arch/arm/mach-prima2/common.c index 8cadb302a7d2..ffe05c27087e 100644 --- a/arch/arm/mach-prima2/common.c +++ b/arch/arm/mach-prima2/common.c @@ -15,7 +15,7 @@ #include <linux/of_platform.h> #include "common.h" -static void __init sirfsoc_init_late(void) +static void __init __maybe_unused sirfsoc_init_late(void) { sirfsoc_pm_init(); } diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig index 76fbc115ec33..ce7d97babb0f 100644 --- a/arch/arm/mach-pxa/Kconfig +++ b/arch/arm/mach-pxa/Kconfig @@ -566,6 +566,7 @@ config MACH_ICONTROL config ARCH_PXA_ESERIES bool "PXA based Toshiba e-series PDAs" select FB_W100 + select FB select PXA25x config MACH_E330 diff --git a/arch/arm/mach-pxa/include/mach/mtd-xip.h b/arch/arm/mach-pxa/include/mach/mtd-xip.h index 990d2bf2fb45..9bf4ea6a6f74 100644 --- a/arch/arm/mach-pxa/include/mach/mtd-xip.h +++ b/arch/arm/mach-pxa/include/mach/mtd-xip.h @@ -17,11 +17,15 @@ #include <mach/regs-ost.h> -#define xip_irqpending() (ICIP & ICMR) +/* restored July 2017, this did not build since 2011! */ + +#define ICIP io_p2v(0x40d00000) +#define ICMR io_p2v(0x40d00004) +#define xip_irqpending() (readl(ICIP) & readl(ICMR)) /* we sample OSCR and convert desired delta to usec (1/4 ~= 1000000/3686400) */ -#define xip_currtime() (OSCR) -#define xip_elapsed_since(x) (signed)((OSCR - (x)) / 4) +#define xip_currtime() readl(OSCR) +#define xip_elapsed_since(x) (signed)((readl(OSCR) - (x)) / 4) /* * xip_cpu_idle() is used when waiting for a delay equal or larger than diff --git a/arch/arm/mach-rpc/include/mach/hardware.h b/arch/arm/mach-rpc/include/mach/hardware.h index aa79fa47373a..622d4e5df029 100644 --- a/arch/arm/mach-rpc/include/mach/hardware.h +++ b/arch/arm/mach-rpc/include/mach/hardware.h @@ -25,8 +25,8 @@ * *_SIZE is the size of the region * *_BASE is the virtual address */ -#define RAM_SIZE 0x10000000 -#define RAM_START 0x10000000 +#define RPC_RAM_SIZE 0x10000000 +#define RPC_RAM_START 0x10000000 #define EASI_SIZE 0x08000000 /* EASI I/O */ #define EASI_START 0x08000000 diff --git a/arch/arm/mach-sa1100/clock.c b/arch/arm/mach-sa1100/clock.c index 0db46895c82a..7d52cd97d96e 100644 --- a/arch/arm/mach-sa1100/clock.c +++ b/arch/arm/mach-sa1100/clock.c @@ -35,6 +35,31 @@ struct clk clk_##_name = { \ static DEFINE_SPINLOCK(clocks_lock); +/* Dummy clk routine to build generic kernel parts that may be using them */ +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + return clk_get_rate(clk); +} +EXPORT_SYMBOL(clk_round_rate); + +int clk_set_rate(struct clk *clk, unsigned long rate) +{ + return 0; +} +EXPORT_SYMBOL(clk_set_rate); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + return 0; +} +EXPORT_SYMBOL(clk_set_parent); + +struct clk *clk_get_parent(struct clk *clk) +{ + return NULL; +} +EXPORT_SYMBOL(clk_get_parent); + static void clk_gpio27_enable(struct clk *clk) { /* diff --git a/arch/arm/mach-sa1100/include/mach/mtd-xip.h b/arch/arm/mach-sa1100/include/mach/mtd-xip.h index b3d684098fbf..cb76096a2e36 100644 --- a/arch/arm/mach-sa1100/include/mach/mtd-xip.h +++ b/arch/arm/mach-sa1100/include/mach/mtd-xip.h @@ -20,7 +20,7 @@ #define xip_irqpending() (ICIP & ICMR) /* we sample OSCR and convert desired delta to usec (1/4 ~= 1000000/3686400) */ -#define xip_currtime() (OSCR) -#define xip_elapsed_since(x) (signed)((OSCR - (x)) / 4) +#define xip_currtime() readl_relaxed(OSCR) +#define xip_elapsed_since(x) (signed)((readl_relaxed(OSCR) - (x)) / 4) #endif /* __ARCH_SA1100_MTD_XIP_H__ */ diff --git a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c index 73e3adbc1330..44438f344dc8 100644 --- a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c +++ b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c @@ -67,8 +67,12 @@ static int regulator_quirk_notify(struct notifier_block *nb, { struct device *dev = data; struct i2c_client *client; + static bool done; u32 mon; + if (done) + return 0; + mon = ioread32(irqc + IRQC_MONITOR); dev_dbg(dev, "%s: %ld, IRQC_MONITOR = 0x%x\n", __func__, action, mon); if (mon & REGULATOR_IRQ_MASK) @@ -99,7 +103,7 @@ static int regulator_quirk_notify(struct notifier_block *nb, remove: dev_info(dev, "IRQ2 is not asserted, removing quirk\n"); - bus_unregister_notifier(&i2c_bus_type, nb); + done = true; iounmap(irqc); return 0; } diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 28083ef72819..71a34e8c345a 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -133,6 +133,7 @@ static irqreturn_t db8500_pmu_handler(int irq, void *dev, irq_handler_t handler) static struct arm_pmu_platdata db8500_pmu_platdata = { .handle_irq = db8500_pmu_handler, + .irq_flags = IRQF_NOBALANCING | IRQF_NO_THREAD, }; static struct of_dev_auxdata u8500_auxdata_lookup[] __initdata = { diff --git a/arch/arm/mach-w90x900/clock.c b/arch/arm/mach-w90x900/clock.c index ac6fd1a2cb59..3f93fac98d97 100644 --- a/arch/arm/mach-w90x900/clock.c +++ b/arch/arm/mach-w90x900/clock.c @@ -93,3 +93,32 @@ void nuc900_subclk_enable(struct clk *clk, int enable) __raw_writel(clken, W90X900_VA_CLKPWR + SUBCLK); } + +/* dummy functions, should not be called */ +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_round_rate); + +int clk_set_rate(struct clk *clk, unsigned long rate) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_set_rate); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_set_parent); + +struct clk *clk_get_parent(struct clk *clk) +{ + WARN_ON(clk); + return NULL; +} +EXPORT_SYMBOL(clk_get_parent); diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index 90ee354d803e..6db5fc26d154 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -40,9 +40,21 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size, { const struct dma_map_ops *ops = &dma_noop_ops; + void *ret; /* - * We are here because: + * Try generic allocator first if we are advertised that + * consistency is not required. + */ + + if (attrs & DMA_ATTR_NON_CONSISTENT) + return ops->alloc(dev, size, dma_handle, gfp, attrs); + + ret = dma_alloc_from_global_coherent(size, dma_handle); + + /* + * dma_alloc_from_global_coherent() may fail because: + * * - no consistent DMA region has been defined, so we can't * continue. * - there is no space left in consistent DMA region, so we @@ -50,11 +62,8 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size, * advertised that consistency is not required. */ - if (attrs & DMA_ATTR_NON_CONSISTENT) - return ops->alloc(dev, size, dma_handle, gfp, attrs); - - WARN_ON_ONCE(1); - return NULL; + WARN_ON_ONCE(ret == NULL); + return ret; } static void arm_nommu_dma_free(struct device *dev, size_t size, @@ -63,14 +72,31 @@ static void arm_nommu_dma_free(struct device *dev, size_t size, { const struct dma_map_ops *ops = &dma_noop_ops; - if (attrs & DMA_ATTR_NON_CONSISTENT) + if (attrs & DMA_ATTR_NON_CONSISTENT) { ops->free(dev, size, cpu_addr, dma_addr, attrs); - else - WARN_ON_ONCE(1); + } else { + int ret = dma_release_from_global_coherent(get_order(size), + cpu_addr); + + WARN_ON_ONCE(ret == 0); + } return; } +static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + int ret; + + if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret)) + return ret; + + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); +} + + static void __dma_page_cpu_to_dev(phys_addr_t paddr, size_t size, enum dma_data_direction dir) { @@ -173,6 +199,7 @@ static void arm_nommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist const struct dma_map_ops arm_nommu_dma_ops = { .alloc = arm_nommu_dma_alloc, .free = arm_nommu_dma_free, + .mmap = arm_nommu_dma_mmap, .map_page = arm_nommu_dma_map_page, .unmap_page = arm_nommu_dma_unmap_page, .map_sg = arm_nommu_dma_map_sg, diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index e7380bafbfa6..fcf1473d6fed 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -851,7 +851,7 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, unsigned long pfn = dma_to_pfn(dev, dma_addr); unsigned long off = vma->vm_pgoff; - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi index 9d00622ce845..bd0f33b77f57 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi @@ -452,7 +452,7 @@ emac: ethernet@1c30000 { compatible = "allwinner,sun50i-a64-emac"; syscon = <&syscon>; - reg = <0x01c30000 0x100>; + reg = <0x01c30000 0x10000>; interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; resets = <&ccu RST_BUS_EMAC>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 35b8c88c3220..738ed689ff69 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -400,7 +400,7 @@ }; pwm_AO_ab: pwm@550 { - compatible = "amlogic,meson-gx-pwm", "amlogic,meson-gxbb-pwm"; + compatible = "amlogic,meson-gx-ao-pwm", "amlogic,meson-gxbb-ao-pwm"; reg = <0x0 0x00550 0x0 0x10>; #pwm-cells = <3>; status = "disabled"; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts index 72c5a9f64ca8..94567eb17875 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts @@ -109,8 +109,8 @@ status = "okay"; pinctrl-0 = <&pwm_ao_a_3_pins>, <&pwm_ao_b_pins>; pinctrl-names = "default"; - clocks = <&clkc CLKID_FCLK_DIV4>; - clock-names = "clkin0"; + clocks = <&xtal> , <&xtal>; + clock-names = "clkin0", "clkin1" ; }; &pwm_ef { diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts index 890821d6e52b..266fbcf3e47f 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts @@ -10,12 +10,20 @@ #include <dt-bindings/input/input.h> -#include "meson-gxl-s905x-p212.dtsi" +#include "meson-gxl-s905x.dtsi" / { compatible = "libretech,cc", "amlogic,s905x", "amlogic,meson-gxl"; model = "Libre Technology CC"; + aliases { + serial0 = &uart_AO; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; + cvbs-connector { compatible = "composite-video-connector"; @@ -26,6 +34,11 @@ }; }; + emmc_pwrseq: emmc-pwrseq { + compatible = "mmc-pwrseq-emmc"; + reset-gpios = <&gpio BOOT_9 GPIO_ACTIVE_LOW>; + }; + hdmi-connector { compatible = "hdmi-connector"; type = "a"; @@ -53,6 +66,39 @@ linux,default-trigger = "heartbeat"; }; }; + + memory@0 { + device_type = "memory"; + reg = <0x0 0x0 0x0 0x80000000>; + }; + + vcc_3v3: regulator-vcc_3v3 { + compatible = "regulator-fixed"; + regulator-name = "VCC_3V3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + + vcc_card: regulator-vcc-card { + compatible = "regulator-gpio"; + + regulator-name = "VCC_CARD"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; + + gpios = <&gpio_ao GPIOAO_3 GPIO_ACTIVE_HIGH>; + gpios-states = <0>; + + states = <3300000 0>, + <1800000 1>; + }; + + vddio_boot: regulator-vddio_boot { + compatible = "regulator-fixed"; + regulator-name = "VDDIO_BOOT"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; }; &cvbs_vdac_port { @@ -61,6 +107,16 @@ }; }; +ðmac { + status = "okay"; +}; + +&ir { + status = "okay"; + pinctrl-0 = <&remote_input_ao_pins>; + pinctrl-names = "default"; +}; + &hdmi_tx { status = "okay"; pinctrl-0 = <&hdmi_hpd_pins>, <&hdmi_i2c_pins>; @@ -73,20 +129,43 @@ }; }; -/* - * The following devices exists but are exposed on the general - * purpose GPIO header. End user may well decide to use those pins - * for another purpose - */ +/* SD card */ +&sd_emmc_b { + status = "okay"; + pinctrl-0 = <&sdcard_pins>; + pinctrl-names = "default"; + + bus-width = <4>; + cap-sd-highspeed; + max-frequency = <100000000>; + disable-wp; + + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; + cd-inverted; -&sd_emmc_a { - status = "disabled"; + vmmc-supply = <&vcc_3v3>; + vqmmc-supply = <&vcc_card>; }; -&uart_A { - status = "disabled"; +/* eMMC */ +&sd_emmc_c { + status = "okay"; + pinctrl-0 = <&emmc_pins>; + pinctrl-names = "default"; + + bus-width = <8>; + cap-mmc-highspeed; + max-frequency = <50000000>; + non-removable; + disable-wp; + + mmc-pwrseq = <&emmc_pwrseq>; + vmmc-supply = <&vcc_3v3>; + vqmmc-supply = <&vddio_boot>; }; -&wifi32k { - status = "disabled"; +&uart_AO { + status = "okay"; + pinctrl-0 = <&uart_ao_a_pins>; + pinctrl-names = "default"; }; diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index dbcc3d4e2ed5..51763d674050 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -219,7 +219,7 @@ reg = <0x18800 0x100>, <0x18C00 0x20>; gpiosb: gpio { #gpio-cells = <2>; - gpio-ranges = <&pinctrl_sb 0 0 29>; + gpio-ranges = <&pinctrl_sb 0 0 30>; gpio-controller; interrupts = <GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi index 726528ce54e9..4c68605675a8 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi @@ -270,6 +270,7 @@ interrupt-names = "mem", "ring0", "ring1", "ring2", "ring3", "eip"; clocks = <&cpm_clk 1 26>; + dma-coherent; }; }; diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi index 95f8e5f607f6..923f354b02f0 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi @@ -64,7 +64,7 @@ compatible = "marvell,armada-8k-rtc"; reg = <0x284000 0x20>, <0x284080 0x24>; reg-names = "rtc", "rtc-soc"; - interrupts = <GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <ICU_GRP_NSR 77 IRQ_TYPE_LEVEL_HIGH>; }; cps_ethernet: ethernet@0 { @@ -261,6 +261,7 @@ interrupt-names = "mem", "ring0", "ring1", "ring2", "ring3", "eip"; clocks = <&cps_clk 1 26>; + dma-coherent; /* * The cryptographic engine found on the cp110 * master is enabled by default at the SoC diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index aef35e0b685a..a451996f590a 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -508,7 +508,7 @@ /* audio_clkout0/1/2/3 */ #clock-cells = <1>; - clock-frequency = <11289600 12288000>; + clock-frequency = <12288000 11289600>; status = "okay"; diff --git a/arch/arm64/boot/dts/renesas/ulcb.dtsi b/arch/arm64/boot/dts/renesas/ulcb.dtsi index b5c6ee07d7f9..d1a3f3b7a0ab 100644 --- a/arch/arm64/boot/dts/renesas/ulcb.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb.dtsi @@ -281,7 +281,7 @@ /* audio_clkout0/1/2/3 */ #clock-cells = <1>; - clock-frequency = <11289600 12288000>; + clock-frequency = <12288000 11289600>; status = "okay"; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 6c7d147eed54..b4ca115b3be1 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -476,6 +476,7 @@ CONFIG_QCOM_CLK_SMD_RPM=y CONFIG_MSM_GCC_8916=y CONFIG_MSM_GCC_8994=y CONFIG_MSM_MMCC_8996=y +CONFIG_HWSPINLOCK=y CONFIG_HWSPINLOCK_QCOM=y CONFIG_ARM_MHU=y CONFIG_PLATFORM_MHU=y diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 99fa69c9c3cf..9ef0797380cb 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -435,7 +435,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) " sub x30, x30, %[ret]\n" " cbnz x30, 1b\n" "2:") - : [ret] "+&r" (x0), [v] "+Q" (v->counter) + : [ret] "+r" (x0), [v] "+Q" (v->counter) : : __LL_SC_CLOBBERS, "cc", "memory"); diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 32f82723338a..ef39dcb9ca6a 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -64,8 +64,10 @@ * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. */ #define VA_BITS (CONFIG_ARM64_VA_BITS) -#define VA_START (UL(0xffffffffffffffff) << VA_BITS) -#define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) +#define VA_START (UL(0xffffffffffffffff) - \ + (UL(1) << VA_BITS) + 1) +#define PAGE_OFFSET (UL(0xffffffffffffffff) - \ + (UL(1) << (VA_BITS - 1)) + 1) #define KIMAGE_VADDR (MODULES_END) #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) #define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 16e44fa9b3b6..248339e4aaf5 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -492,7 +492,7 @@ asm( * the "%x0" template means XZR. */ #define write_sysreg(v, r) do { \ - u64 __val = (u64)v; \ + u64 __val = (u64)(v); \ asm volatile("msr " __stringify(r) ", %x0" \ : : "rZ" (__val)); \ } while (0) @@ -508,7 +508,7 @@ asm( }) #define write_sysreg_s(v, r) do { \ - u64 __val = (u64)v; \ + u64 __val = (u64)(v); \ asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \ } while (0) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 8f0a1de11e4a..fab46a0ea223 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -69,7 +69,7 @@ static inline void set_fs(mm_segment_t fs) */ #define __range_ok(addr, size) \ ({ \ - unsigned long __addr = (unsigned long __force)(addr); \ + unsigned long __addr = (unsigned long)(addr); \ unsigned long flag, roksum; \ __chk_user_ptr(addr); \ asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls" \ diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index e137ceaf5016..d16978213c5b 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -82,8 +82,8 @@ static const char *__init cpu_read_enable_method(int cpu) * Don't warn spuriously. */ if (cpu != 0) - pr_err("%s: missing enable-method property\n", - dn->full_name); + pr_err("%pOF: missing enable-method property\n", + dn); } } else { enable_method = acpi_get_enable_method(cpu); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 321119881abf..dc66e6ec3a99 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -469,7 +469,7 @@ static u64 __init of_get_cpu_mpidr(struct device_node *dn) */ cell = of_get_property(dn, "reg", NULL); if (!cell) { - pr_err("%s: missing reg property\n", dn->full_name); + pr_err("%pOF: missing reg property\n", dn); return INVALID_HWID; } @@ -478,7 +478,7 @@ static u64 __init of_get_cpu_mpidr(struct device_node *dn) * Non affinity bits must be set to 0 in the DT */ if (hwid & ~MPIDR_HWID_BITMASK) { - pr_err("%s: invalid reg property\n", dn->full_name); + pr_err("%pOF: invalid reg property\n", dn); return INVALID_HWID; } return hwid; @@ -627,8 +627,8 @@ static void __init of_parse_and_init_cpus(void) goto next; if (is_mpidr_duplicate(cpu_count, hwid)) { - pr_err("%s: duplicate cpu reg properties in the DT\n", - dn->full_name); + pr_err("%pOF: duplicate cpu reg properties in the DT\n", + dn); goto next; } @@ -640,8 +640,8 @@ static void __init of_parse_and_init_cpus(void) */ if (hwid == cpu_logical_map(0)) { if (bootcpu_valid) { - pr_err("%s: duplicate boot cpu reg property in DT\n", - dn->full_name); + pr_err("%pOF: duplicate boot cpu reg property in DT\n", + dn); goto next; } diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 79244c75eaec..8d48b233e6ce 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -45,7 +45,7 @@ static int __init get_cpu_for_node(struct device_node *node) } } - pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name); + pr_crit("Unable to find CPU node for %pOF\n", cpu_node); of_node_put(cpu_node); return -1; @@ -71,8 +71,8 @@ static int __init parse_core(struct device_node *core, int cluster_id, cpu_topology[cpu].core_id = core_id; cpu_topology[cpu].thread_id = i; } else { - pr_err("%s: Can't get CPU for thread\n", - t->full_name); + pr_err("%pOF: Can't get CPU for thread\n", + t); of_node_put(t); return -EINVAL; } @@ -84,15 +84,15 @@ static int __init parse_core(struct device_node *core, int cluster_id, cpu = get_cpu_for_node(core); if (cpu >= 0) { if (!leaf) { - pr_err("%s: Core has both threads and CPU\n", - core->full_name); + pr_err("%pOF: Core has both threads and CPU\n", + core); return -EINVAL; } cpu_topology[cpu].cluster_id = cluster_id; cpu_topology[cpu].core_id = core_id; } else if (leaf) { - pr_err("%s: Can't get CPU for leaf core\n", core->full_name); + pr_err("%pOF: Can't get CPU for leaf core\n", core); return -EINVAL; } @@ -137,8 +137,8 @@ static int __init parse_cluster(struct device_node *cluster, int depth) has_cores = true; if (depth == 0) { - pr_err("%s: cpu-map children should be clusters\n", - c->full_name); + pr_err("%pOF: cpu-map children should be clusters\n", + c); of_node_put(c); return -EINVAL; } @@ -146,8 +146,8 @@ static int __init parse_cluster(struct device_node *cluster, int depth) if (leaf) { ret = parse_core(c, cluster_id, core_id++); } else { - pr_err("%s: Non-leaf cluster with core %s\n", - cluster->full_name, name); + pr_err("%pOF: Non-leaf cluster with core %s\n", + cluster, name); ret = -EINVAL; } @@ -159,7 +159,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) } while (c); if (leaf && !has_cores) - pr_warn("%s: empty cluster\n", cluster->full_name); + pr_warn("%pOF: empty cluster\n", cluster); if (leaf) cluster_id++; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index c7c7088097be..8a62648848e5 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -274,10 +274,12 @@ static DEFINE_RAW_SPINLOCK(die_lock); void die(const char *str, struct pt_regs *regs, int err) { int ret; + unsigned long flags; + + raw_spin_lock_irqsave(&die_lock, flags); oops_enter(); - raw_spin_lock_irq(&die_lock); console_verbose(); bust_spinlocks(1); ret = __die(str, err, regs); @@ -287,13 +289,15 @@ void die(const char *str, struct pt_regs *regs, int err) bust_spinlocks(0); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); - raw_spin_unlock_irq(&die_lock); oops_exit(); if (in_interrupt()) panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); + + raw_spin_unlock_irqrestore(&die_lock, flags); + if (ret != NOTIFY_STOP) do_exit(SIGSEGV); } @@ -519,7 +523,7 @@ static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) { int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; - pt_regs_write_reg(regs, rt, read_sysreg(cntfrq_el0)); + pt_regs_write_reg(regs, rt, arch_timer_get_rate()); regs->pc += 4; } diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 17d8a1677a0b..7debb74843a0 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -84,7 +84,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); vcpu->stat.wfe_exit_stat++; - kvm_vcpu_on_spin(vcpu); + kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); } else { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 77862881ae86..2e070d3baf9f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -764,7 +764,7 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) { if (r->CRm & 0x2) /* accessing PMOVSSET_EL0 */ - kvm_pmu_overflow_set(vcpu, p->regval & mask); + vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask); else /* accessing PMOVSCLR_EL0 */ vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index c3cd65e31814..076c43715e64 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -30,9 +30,10 @@ */ ENTRY(copy_page) alternative_if ARM64_HAS_NO_HW_PREFETCH - # Prefetch two cache lines ahead. - prfm pldl1strm, [x1, #128] - prfm pldl1strm, [x1, #256] + // Prefetch three cache lines ahead. + prfm pldl1strm, [x1, #128] + prfm pldl1strm, [x1, #256] + prfm pldl1strm, [x1, #384] alternative_else_nop_endif ldp x2, x3, [x1] @@ -50,7 +51,7 @@ alternative_else_nop_endif subs x18, x18, #128 alternative_if ARM64_HAS_NO_HW_PREFETCH - prfm pldl1strm, [x1, #384] + prfm pldl1strm, [x1, #384] alternative_else_nop_endif stnp x2, x3, [x0] diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index e90cd1db42a8..f27d4dd04384 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -329,7 +329,7 @@ static int __swiotlb_mmap(struct device *dev, vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, is_device_dma_coherent(dev)); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; return __swiotlb_mmap_pfn(vma, pfn, size); @@ -706,7 +706,7 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, is_device_dma_coherent(dev)); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c7861c9864e6..2509e4fe6992 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -163,26 +163,27 @@ int ptep_set_access_flags(struct vm_area_struct *vma, /* only preserve the access flags and write permission */ pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY; - /* - * PTE_RDONLY is cleared by default in the asm below, so set it in - * back if necessary (read-only or clean PTE). - */ + /* set PTE_RDONLY if actual read-only or clean PTE */ if (!pte_write(entry) || !pte_sw_dirty(entry)) pte_val(entry) |= PTE_RDONLY; /* * Setting the flags must be done atomically to avoid racing with the - * hardware update of the access/dirty state. + * hardware update of the access/dirty state. The PTE_RDONLY bit must + * be set to the most permissive (lowest value) of *ptep and entry + * (calculated as: a & b == ~(~a | ~b)). */ + pte_val(entry) ^= PTE_RDONLY; asm volatile("// ptep_set_access_flags\n" " prfm pstl1strm, %2\n" "1: ldxr %0, %2\n" - " and %0, %0, %3 // clear PTE_RDONLY\n" + " eor %0, %0, %3 // negate PTE_RDONLY in *ptep\n" " orr %0, %0, %4 // set flags\n" + " eor %0, %0, %3 // negate final PTE_RDONLY\n" " stxr %w1, %0, %2\n" " cbnz %w1, 1b\n" : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)) - : "L" (~PTE_RDONLY), "r" (pte_val(entry))); + : "L" (PTE_RDONLY), "r" (pte_val(entry))); flush_tlb_fix_spurious_fault(vma, address); return 1; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 23c2d89a362e..f1eb15e0e864 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -496,7 +496,7 @@ void mark_rodata_ro(void) static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, pgprot_t prot, struct vm_struct *vma, - int flags) + int flags, unsigned long vm_flags) { phys_addr_t pa_start = __pa_symbol(va_start); unsigned long size = va_end - va_start; @@ -507,10 +507,13 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, early_pgtable_alloc, flags); + if (!(vm_flags & VM_NO_GUARD)) + size += PAGE_SIZE; + vma->addr = va_start; vma->phys_addr = pa_start; vma->size = size; - vma->flags = VM_MAP; + vma->flags = VM_MAP | vm_flags; vma->caller = __builtin_return_address(0); vm_area_add_early(vma); @@ -541,14 +544,15 @@ static void __init map_kernel(pgd_t *pgd) * Only rodata will be remapped with different permissions later on, * all other segments are allowed to use contiguous mappings. */ - map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0); + map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0, + VM_NO_GUARD); map_kernel_segment(pgd, __start_rodata, __inittext_begin, PAGE_KERNEL, - &vmlinux_rodata, NO_CONT_MAPPINGS); + &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD); map_kernel_segment(pgd, __inittext_begin, __inittext_end, text_prot, - &vmlinux_inittext, 0); + &vmlinux_inittext, 0, VM_NO_GUARD); map_kernel_segment(pgd, __initdata_begin, __initdata_end, PAGE_KERNEL, - &vmlinux_initdata, 0); - map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0); + &vmlinux_initdata, 0, VM_NO_GUARD); + map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) { /* diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index b388a99fea7b..dad128ba98bf 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -208,8 +208,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) } node_set(nid, numa_nodes_parsed); - pr_info("Adding memblock [0x%llx - 0x%llx] on node %d\n", - start, (end - 1), nid); return ret; } @@ -223,10 +221,7 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) void *nd; int tnid; - if (start_pfn < end_pfn) - pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid, - start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); - else + if (start_pfn >= end_pfn) pr_info("Initmem setup node %d [<memory-less node>]\n", nid); nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); diff --git a/arch/mips/include/asm/mach-ralink/ralink_regs.h b/arch/mips/include/asm/mach-ralink/ralink_regs.h index 9df1a53bcb36..b4e7dfa214eb 100644 --- a/arch/mips/include/asm/mach-ralink/ralink_regs.h +++ b/arch/mips/include/asm/mach-ralink/ralink_regs.h @@ -13,6 +13,8 @@ #ifndef _RALINK_REGS_H_ #define _RALINK_REGS_H_ +#include <linux/io.h> + enum ralink_soc_type { RALINK_UNKNOWN = 0, RT2880_SOC, diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index d4b2ad18eef2..bce2a6431430 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -98,6 +98,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return !!(vcpu->arch.pending_exceptions); } +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return false; +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return 1; diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index e08598c70b3e..8e78251eccc2 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -232,7 +232,7 @@ static int mips_dma_mmap(struct device *dev, struct vm_area_struct *vma, else vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (off < count && user_count <= (count - off)) { diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c index 094a0ee4af46..9be8b08ae46b 100644 --- a/arch/mips/ralink/mt7620.c +++ b/arch/mips/ralink/mt7620.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/init.h> +#include <linux/bug.h> #include <asm/mipsregs.h> #include <asm/mach-ralink/ralink_regs.h> diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 531da9eb8f43..dda1f558ef35 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -47,6 +47,9 @@ config PARISC and later HP3000 series). The PA-RISC Linux project home page is at <http://www.parisc-linux.org/>. +config CPU_BIG_ENDIAN + def_bool y + config MMU def_bool y diff --git a/arch/parisc/configs/712_defconfig b/arch/parisc/configs/712_defconfig index 143d02652792..ccc109761f44 100644 --- a/arch/parisc/configs/712_defconfig +++ b/arch/parisc/configs/712_defconfig @@ -1,11 +1,9 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y @@ -14,7 +12,6 @@ CONFIG_OPROFILE=m CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set CONFIG_PA7100LC=y CONFIG_PREEMPT_VOLUNTARY=y CONFIG_GSC_LASI=y @@ -32,11 +29,9 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_INET_AH=m CONFIG_INET_ESP=m -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m # CONFIG_IPV6 is not set CONFIG_NETFILTER=y -CONFIG_IP_NF_QUEUE=m CONFIG_LLC2=m CONFIG_NET_PKTGEN=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -65,21 +60,20 @@ CONFIG_MD_LINEAR=m CONFIG_MD_RAID0=m CONFIG_MD_RAID1=m CONFIG_NETDEVICES=y -CONFIG_DUMMY=m CONFIG_BONDING=m +CONFIG_DUMMY=m CONFIG_TUN=m -CONFIG_NET_ETHERNET=y -CONFIG_MII=m CONFIG_LASI_82596=y CONFIG_PPP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m CONFIG_PPP_MPPE=m CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m # CONFIG_KEYBOARD_HIL_OLD is not set CONFIG_MOUSE_SERIAL=m +CONFIG_LEGACY_PTY_COUNT=64 CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=17 @@ -88,22 +82,17 @@ CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y # CONFIG_SERIAL_MUX is not set CONFIG_PDC_CONSOLE=y -CONFIG_LEGACY_PTY_COUNT=64 CONFIG_PRINTER=m CONFIG_PPDEV=m # CONFIG_HW_RANDOM is not set CONFIG_RAW_DRIVER=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y CONFIG_DUMMY_CONSOLE_COLUMNS=128 CONFIG_DUMMY_CONSOLE_ROWS=48 CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FONTS=y -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set @@ -111,13 +100,9 @@ CONFIG_LOGO=y CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SEQUENCER=y -CONFIG_SND_MIXER_OSS=y -CONFIG_SND_PCM_OSS=y -CONFIG_SND_SEQUENCER_OSS=y CONFIG_SND_HARMONY=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set CONFIG_JFS_FS=m CONFIG_XFS_FS=m CONFIG_AUTOFS4_FS=y @@ -130,14 +115,10 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V4=y -CONFIG_RPCSEC_GSS_SPKM3=m -CONFIG_SMB_FS=m -CONFIG_SMB_NLS_DEFAULT=y CONFIG_CIFS=m CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_737=m @@ -177,21 +158,16 @@ CONFIG_NLS_ISO8859_15=m CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST6=m @@ -200,6 +176,7 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_DEFLATE=m -# CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_LIBCRC32C=m +CONFIG_FONTS=y +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y diff --git a/arch/parisc/configs/a500_defconfig b/arch/parisc/configs/a500_defconfig index 1a4f776b49b8..5acb93dcaabf 100644 --- a/arch/parisc/configs/a500_defconfig +++ b/arch/parisc/configs/a500_defconfig @@ -1,13 +1,10 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y @@ -16,7 +13,6 @@ CONFIG_OPROFILE=m CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set CONFIG_PA8X00=y CONFIG_64BIT=y CONFIG_SMP=y @@ -43,21 +39,17 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_INET_AH=m CONFIG_INET_ESP=m -# CONFIG_INET_LRO is not set CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_TUNNEL=m CONFIG_NETFILTER=y # CONFIG_NETFILTER_XT_MATCH_DCCP is not set -CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_RAW=m @@ -70,7 +62,6 @@ CONFIG_IP6_NF_MATCH_OPTS=m CONFIG_IP6_NF_MATCH_HL=m CONFIG_IP6_NF_MATCH_IPV6HEADER=m CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_LOG=m CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP6_NF_MANGLE=m @@ -94,7 +85,6 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_ISCSI_ATTRS=m CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_QLOGIC_1280=m @@ -106,43 +96,38 @@ CONFIG_MD_RAID0=y CONFIG_MD_RAID1=y CONFIG_FUSION=y CONFIG_FUSION_SPI=m -CONFIG_FUSION_FC=m CONFIG_FUSION_CTL=m CONFIG_NETDEVICES=y -CONFIG_DUMMY=m CONFIG_BONDING=m +CONFIG_DUMMY=m CONFIG_TUN=m -CONFIG_NET_ETHERNET=y -CONFIG_NET_VENDOR_3COM=y +CONFIG_PCMCIA_3C574=m +CONFIG_PCMCIA_3C589=m CONFIG_VORTEX=m CONFIG_TYPHOON=m +CONFIG_ACENIC=m +CONFIG_ACENIC_OMIT_TIGON_I=y +CONFIG_PCNET32=m +CONFIG_TIGON3=m CONFIG_NET_TULIP=y CONFIG_DE2104X=m CONFIG_TULIP=y CONFIG_TULIP_MMIO=y CONFIG_PCMCIA_XIRCOM=m CONFIG_HP100=m -CONFIG_NET_PCI=y -CONFIG_PCNET32=m CONFIG_E100=m -CONFIG_ACENIC=m -CONFIG_ACENIC_OMIT_TIGON_I=y CONFIG_E1000=m -CONFIG_TIGON3=m -CONFIG_NET_PCMCIA=y -CONFIG_PCMCIA_3C589=m -CONFIG_PCMCIA_3C574=m CONFIG_PCMCIA_SMC91C92=m CONFIG_PCMCIA_XIRC2PS=m CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_BSDCOMP=m -# CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_CS=m @@ -151,7 +136,6 @@ CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_PDC_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_RAW_DRIVER=y # CONFIG_HWMON is not set @@ -160,7 +144,6 @@ CONFIG_AGP_PARISC=y # CONFIG_STI_CONSOLE is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set CONFIG_JFS_FS=m CONFIG_XFS_FS=m CONFIG_AUTOFS4_FS=y @@ -173,13 +156,9 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_UFS_FS=m CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFSD=m CONFIG_NFSD_V4=y -CONFIG_RPCSEC_GSS_SPKM3=m -CONFIG_SMB_FS=m -CONFIG_SMB_NLS_DEFAULT=y CONFIG_CIFS=m CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m @@ -187,17 +166,12 @@ CONFIG_NLS_ASCII=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_ISO8859_15=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y CONFIG_HEADERS_CHECK=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_BLOWFISH=m -# CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_LIBCRC32C=m diff --git a/arch/parisc/configs/b180_defconfig b/arch/parisc/configs/b180_defconfig index f1a0c25bef8d..83ffd161aec5 100644 --- a/arch/parisc/configs/b180_defconfig +++ b/arch/parisc/configs/b180_defconfig @@ -3,7 +3,6 @@ CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -25,8 +24,6 @@ CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_LRO is not set -CONFIG_IPV6=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -53,10 +50,9 @@ CONFIG_MD_LINEAR=y CONFIG_MD_RAID0=y CONFIG_MD_RAID1=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_LASI_82596=y CONFIG_NET_TULIP=y CONFIG_TULIP=y +CONFIG_LASI_82596=y CONFIG_PPP=y CONFIG_INPUT_EVDEV=y # CONFIG_KEYBOARD_HIL_OLD is not set @@ -71,40 +67,31 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_PRINTER=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SEQUENCER=y -CONFIG_SND_MIXER_OSS=y -CONFIG_SND_PCM_OSS=y -CONFIG_SND_SEQUENCER_OSS=y CONFIG_SND_HARMONY=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set CONFIG_AUTOFS4_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y CONFIG_NFSD_V3=y -CONFIG_SMB_FS=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m CONFIG_NLS_ASCII=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_ISO8859_15=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_HEADERS_CHECK=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SECURITY=y -# CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/parisc/configs/c3000_defconfig b/arch/parisc/configs/c3000_defconfig index 8e8f0e34f817..0764d3971cf6 100644 --- a/arch/parisc/configs/c3000_defconfig +++ b/arch/parisc/configs/c3000_defconfig @@ -1,12 +1,9 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y @@ -15,7 +12,6 @@ CONFIG_OPROFILE=m CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set CONFIG_PA8X00=y CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_GSC is not set @@ -31,13 +27,11 @@ CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set CONFIG_INET6_IPCOMP=m CONFIG_IPV6_TUNNEL=m CONFIG_NETFILTER=y CONFIG_NETFILTER_DEBUG=y -CONFIG_IP_NF_QUEUE=m CONFIG_NET_PKTGEN=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y @@ -50,13 +44,11 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_IDE=y CONFIG_BLK_DEV_IDECD=y CONFIG_BLK_DEV_NS87415=y -CONFIG_PATA_SIL680=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_ISCSI_ATTRS=m CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 @@ -76,28 +68,23 @@ CONFIG_FUSION=y CONFIG_FUSION_SPI=m CONFIG_FUSION_CTL=m CONFIG_NETDEVICES=y -CONFIG_DUMMY=m CONFIG_BONDING=m +CONFIG_DUMMY=m CONFIG_TUN=m -CONFIG_NET_ETHERNET=y +CONFIG_ACENIC=m +CONFIG_TIGON3=m CONFIG_NET_TULIP=y CONFIG_DE2104X=m CONFIG_TULIP=y CONFIG_TULIP_MMIO=y -CONFIG_NET_PCI=y CONFIG_E100=m -CONFIG_ACENIC=m CONFIG_E1000=m -CONFIG_TIGON3=m CONFIG_PPP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m CONFIG_PPPOE=m -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1600 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=1200 +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m # CONFIG_KEYBOARD_ATKBD is not set # CONFIG_MOUSE_PS2 is not set CONFIG_SERIO=m @@ -111,7 +98,6 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y # CONFIG_HW_RANDOM is not set CONFIG_RAW_DRIVER=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y @@ -121,9 +107,6 @@ CONFIG_LOGO=y CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SEQUENCER=y -CONFIG_SND_MIXER_OSS=y -CONFIG_SND_PCM_OSS=y -CONFIG_SND_SEQUENCER_OSS=y CONFIG_SND_AD1889=y CONFIG_USB_HIDDEV=y CONFIG_USB=y @@ -139,7 +122,6 @@ CONFIG_USB_MICROTEK=m CONFIG_USB_LEGOTOWER=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set CONFIG_XFS_FS=m CONFIG_AUTOFS4_FS=y CONFIG_ISO9660_FS=y @@ -149,7 +131,6 @@ CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y CONFIG_NFSD_V3=y @@ -159,18 +140,13 @@ CONFIG_NLS_ASCII=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_ISO8859_15=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y CONFIG_HEADERS_CHECK=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MUTEXES=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_MD5=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_DES=m -# CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_LIBCRC32C=m diff --git a/arch/parisc/configs/c8000_defconfig b/arch/parisc/configs/c8000_defconfig index f6a4c016304b..088ab948a5ca 100644 --- a/arch/parisc/configs/c8000_defconfig +++ b/arch/parisc/configs/c8000_defconfig @@ -1,16 +1,13 @@ # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y +# CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y -CONFIG_RD_BZIP2=y -CONFIG_RD_LZMA=y -CONFIG_RD_LZO=y CONFIG_EXPERT=y CONFIG_SYSCTL_SYSCALL=y CONFIG_SLAB=y @@ -23,7 +20,6 @@ CONFIG_PA8X00=y CONFIG_64BIT=y CONFIG_SMP=y CONFIG_PREEMPT=y -# CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_IOMMU_CCIO=y CONFIG_PCI=y CONFIG_PCI_LBA=y @@ -146,7 +142,6 @@ CONFIG_FB_FOREIGN_ENDIAN=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y # CONFIG_FB_STI is not set -CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_LCD_CLASS_DEVICE is not set # CONFIG_BACKLIGHT_GENERIC is not set CONFIG_FRAMEBUFFER_CONSOLE=y @@ -157,12 +152,9 @@ CONFIG_LOGO=y # CONFIG_LOGO_LINUX_CLUT224 is not set CONFIG_SOUND=m CONFIG_SND=m +CONFIG_SND_VERBOSE_PRINTK=y CONFIG_SND_SEQUENCER=m CONFIG_SND_SEQ_DUMMY=m -CONFIG_SND_MIXER_OSS=m -CONFIG_SND_PCM_OSS=m -CONFIG_SND_SEQUENCER_OSS=y -CONFIG_SND_VERBOSE_PRINTK=y CONFIG_SND_AD1889=m # CONFIG_SND_USB is not set # CONFIG_SND_GSC is not set @@ -174,8 +166,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT4_FS=m CONFIG_REISERFS_FS=m CONFIG_REISERFS_PROC_INFO=y CONFIG_XFS_FS=m @@ -238,11 +228,8 @@ CONFIG_DEBUG_SLAB=y CONFIG_DEBUG_SLAB_LEAK=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y -CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y CONFIG_PANIC_ON_OOPS=y CONFIG_DEBUG_RT_MUTEXES=y -CONFIG_PROVE_RCU_DELAY=y CONFIG_DEBUG_BLOCK_EXT_DEVT=y CONFIG_LATENCYTOP=y CONFIG_KEYS=y diff --git a/arch/parisc/configs/default_defconfig b/arch/parisc/configs/default_defconfig index 310b6657e4ac..52c9050a7c5c 100644 --- a/arch/parisc/configs/default_defconfig +++ b/arch/parisc/configs/default_defconfig @@ -1,11 +1,9 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y @@ -41,9 +39,7 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_INET_AH=m CONFIG_INET_ESP=m -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m -CONFIG_IPV6=y CONFIG_INET6_AH=y CONFIG_INET6_ESP=y CONFIG_INET6_IPCOMP=y @@ -82,26 +78,23 @@ CONFIG_MD_RAID1=y CONFIG_MD_RAID10=y CONFIG_BLK_DEV_DM=y CONFIG_NETDEVICES=y -CONFIG_DUMMY=m CONFIG_BONDING=m +CONFIG_DUMMY=m CONFIG_TUN=m -CONFIG_NET_ETHERNET=y -CONFIG_MII=m -CONFIG_LASI_82596=y -CONFIG_NET_TULIP=y -CONFIG_TULIP=y -CONFIG_NET_PCI=y CONFIG_ACENIC=y CONFIG_TIGON3=y -CONFIG_NET_PCMCIA=y +CONFIG_NET_TULIP=y +CONFIG_TULIP=y +CONFIG_LASI_82596=y CONFIG_PPP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m # CONFIG_KEYBOARD_HIL_OLD is not set CONFIG_MOUSE_SERIAL=y +CONFIG_LEGACY_PTY_COUNT=64 CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_CS=y @@ -109,31 +102,24 @@ CONFIG_SERIAL_8250_NR_UARTS=17 CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y -CONFIG_LEGACY_PTY_COUNT=64 CONFIG_PRINTER=m CONFIG_PPDEV=m # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y CONFIG_DUMMY_CONSOLE_COLUMNS=128 CONFIG_DUMMY_CONSOLE_ROWS=48 CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FONTS=y -CONFIG_FONT_8x16=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set # CONFIG_LOGO_LINUX_CLUT224 is not set CONFIG_SOUND=y CONFIG_SND=y -CONFIG_SND_SEQUENCER=y -CONFIG_SND_MIXER_OSS=y -CONFIG_SND_PCM_OSS=y -CONFIG_SND_SEQUENCER_OSS=y CONFIG_SND_DYNAMIC_MINORS=y +CONFIG_SND_SEQUENCER=y CONFIG_SND_AD1889=y CONFIG_SND_HARMONY=y CONFIG_HID_GYRATION=y @@ -141,7 +127,6 @@ CONFIG_HID_NTRIG=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_TOPSEED=y CONFIG_USB=y @@ -150,21 +135,15 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_UHCI_HCD=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set -CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y CONFIG_NFSD_V4=y -CONFIG_RPCSEC_GSS_SPKM3=m -CONFIG_SMB_FS=m -CONFIG_SMB_NLS_DEFAULT=y CONFIG_CIFS=m CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_737=m @@ -204,30 +183,24 @@ CONFIG_NLS_ISO8859_15=m CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_NLS_UTF8=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y CONFIG_HEADERS_CHECK=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_KEYS=y -CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -# CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set CONFIG_LIBCRC32C=m +CONFIG_FONTS=y diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index 8688ba7f5966..37ae4b57c001 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -2,15 +2,11 @@ CONFIG_LOCALVERSION="-32bit" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 CONFIG_BLK_DEV_INITRD=y -CONFIG_RD_BZIP2=y -CONFIG_RD_LZMA=y -CONFIG_RD_LZO=y CONFIG_EXPERT=y CONFIG_SYSCTL_SYSCALL=y CONFIG_PERF_EVENTS=y @@ -49,7 +45,6 @@ CONFIG_INET_ESP=m # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_LLC2=m # CONFIG_WIRELESS is not set @@ -149,10 +144,8 @@ CONFIG_PRINTER=m CONFIG_PPDEV=m # CONFIG_HW_RANDOM is not set CONFIG_I2C=y -CONFIG_POWER_SUPPLY=y # CONFIG_HWMON is not set CONFIG_AGP=y -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_FB=y CONFIG_FB_FOREIGN_ENDIAN=y CONFIG_FB_MODE_HELPERS=y @@ -169,11 +162,8 @@ CONFIG_LOGO=y # CONFIG_LOGO_LINUX_CLUT224 is not set CONFIG_SOUND=m CONFIG_SND=m -CONFIG_SND_SEQUENCER=m -CONFIG_SND_MIXER_OSS=m -CONFIG_SND_PCM_OSS=m -CONFIG_SND_SEQUENCER_OSS=y CONFIG_SND_DYNAMIC_MINORS=y +CONFIG_SND_SEQUENCER=m CONFIG_SND_AD1889=m CONFIG_SND_HARMONY=m CONFIG_HIDRAW=y @@ -223,12 +213,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_SECURITY=y -CONFIG_EXT4_FS=y -CONFIG_XFS_FS=m -CONFIG_XFS_QUOTA=y -CONFIG_XFS_RT=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_QFMT_V2=y @@ -293,15 +278,12 @@ CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_TIMER_STATS=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y -CONFIG_RCU_CPU_STALL_INFO=y CONFIG_LATENCYTOP=y CONFIG_LKDTM=m CONFIG_KEYS=y -CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MD5=y @@ -320,7 +302,6 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_DEFLATE=y -# CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC_CCITT=m CONFIG_CRC_T10DIF=y CONFIG_FONTS=y diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index c564e6e1fa23..d39e7f821aba 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -8,10 +8,11 @@ CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y -# CONFIG_UTS_NS is not set -# CONFIG_IPC_NS is not set -# CONFIG_PID_NS is not set -# CONFIG_NET_NS is not set +CONFIG_CGROUPS=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y +CONFIG_CGROUP_PIDS=y +CONFIG_CPUSETS=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y @@ -52,7 +53,6 @@ CONFIG_INET_ESP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_INET_XFRM_MODE_BEET=m -CONFIG_INET_LRO=m CONFIG_INET_DIAG=m CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set @@ -84,7 +84,6 @@ CONFIG_PATA_SIL680=y CONFIG_ATA_GENERIC=y CONFIG_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_BLK_DEV_DM=m CONFIG_DM_RAID=m CONFIG_DM_UEVENT=y @@ -138,21 +137,21 @@ CONFIG_QLGE=m # CONFIG_NET_VENDOR_TI is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +CONFIG_MDIO_BITBANG=m CONFIG_PHYLIB=y -CONFIG_MARVELL_PHY=m -CONFIG_DAVICOM_PHY=m -CONFIG_QSEMI_PHY=m -CONFIG_LXT_PHY=m -CONFIG_CICADA_PHY=m -CONFIG_VITESSE_PHY=m -CONFIG_SMSC_PHY=m CONFIG_BROADCOM_PHY=m +CONFIG_CICADA_PHY=m +CONFIG_DAVICOM_PHY=m CONFIG_ICPLUS_PHY=m -CONFIG_REALTEK_PHY=m +CONFIG_LSI_ET1011C_PHY=m +CONFIG_LXT_PHY=m +CONFIG_MARVELL_PHY=m CONFIG_NATIONAL_PHY=m +CONFIG_QSEMI_PHY=m +CONFIG_REALTEK_PHY=m +CONFIG_SMSC_PHY=m CONFIG_STE10XP=m -CONFIG_LSI_ET1011C_PHY=m -CONFIG_MDIO_BITBANG=m +CONFIG_VITESSE_PHY=m CONFIG_SLIP=m CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP_SMART=y @@ -166,10 +165,8 @@ CONFIG_INPUT_MISC=y CONFIG_SERIO_SERPORT=m # CONFIG_HP_SDC is not set CONFIG_SERIO_RAW=m -CONFIG_DEVPTS_MULTIPLE_INSTANCES=y # CONFIG_LEGACY_PTYS is not set CONFIG_NOZOMI=m -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_CONSOLE=y @@ -207,10 +204,8 @@ CONFIG_AGP=y CONFIG_AGP_PARISC=y CONFIG_DRM=y CONFIG_DRM_RADEON=y -CONFIG_DRM_RADEON_UMS=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_MODE_HELPERS=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_BACKLIGHT_GENERIC is not set CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y CONFIG_LOGO=y @@ -246,8 +241,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT3_FS=y CONFIG_EXT3_FS_SECURITY=y -CONFIG_EXT4_FS=y -CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_BTRFS_FS=m CONFIG_QUOTA=y @@ -286,27 +279,16 @@ CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y -CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y -CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y # CONFIG_SCHED_DEBUG is not set -CONFIG_TIMER_STATS=y CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_DEFLATE=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC_CCITT=m CONFIG_LIBCRC32C=y -CONFIG_XZ_DEC_X86=y -CONFIG_XZ_DEC_POWERPC=y -CONFIG_XZ_DEC_IA64=y -CONFIG_XZ_DEC_ARM=y -CONFIG_XZ_DEC_ARMTHUMB=y -CONFIG_XZ_DEC_SPARC=y diff --git a/arch/parisc/include/asm/pdcpat.h b/arch/parisc/include/asm/pdcpat.h index 32e105fb8adb..e3c0586260d8 100644 --- a/arch/parisc/include/asm/pdcpat.h +++ b/arch/parisc/include/asm/pdcpat.h @@ -150,7 +150,7 @@ #define PDC_PAT_MEM_SETGM 9L /* Set Good Memory value */ #define PDC_PAT_MEM_ADD_PAGE 10L /* ADDs a page to the cell */ #define PDC_PAT_MEM_ADDRESS 11L /* Get Physical Location From */ - /* Memory Address */ + /* Memory Address */ #define PDC_PAT_MEM_GET_TXT_SIZE 12L /* Get Formatted Text Size */ #define PDC_PAT_MEM_GET_PD_TXT 13L /* Get PD Formatted Text */ #define PDC_PAT_MEM_GET_CELL_TXT 14L /* Get Cell Formatted Text */ @@ -228,6 +228,17 @@ struct pdc_pat_mem_read_pd_retinfo { /* PDC_PAT_MEM/PDC_PAT_MEM_PD_READ */ unsigned long pdt_entries; }; +struct pdc_pat_mem_phys_mem_location { /* PDC_PAT_MEM/PDC_PAT_MEM_ADDRESS */ + u64 cabinet:8; + u64 ign1:8; + u64 ign2:8; + u64 cell_slot:8; + u64 ign3:8; + u64 dimm_slot:8; /* DIMM slot, e.g. 0x1A, 0x2B, show user hex value! */ + u64 ign4:8; + u64 source:4; /* for mem: always 0x07 */ + u64 source_detail:4; /* for mem: always 0x04 (SIMM or DIMM) */ +}; struct pdc_pat_pd_addr_map_entry { unsigned char entry_type; /* 1 = Memory Descriptor Entry Type */ @@ -319,6 +330,9 @@ extern int pdc_pat_mem_read_cell_pdt(struct pdc_pat_mem_read_pd_retinfo *pret, extern int pdc_pat_mem_read_pd_pdt(struct pdc_pat_mem_read_pd_retinfo *pret, unsigned long *pdt_entries_ptr, unsigned long count, unsigned long offset); +extern int pdc_pat_mem_get_dimm_phys_location( + struct pdc_pat_mem_phys_mem_location *pret, + unsigned long phys_addr); #endif /* __ASSEMBLY__ */ diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index 88fe0aad4390..bc208136bbb2 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -34,7 +34,7 @@ struct thread_info { /* thread information allocation */ -#define THREAD_SIZE_ORDER 2 /* PA-RISC requires at least 16k stack */ +#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */ /* Be sure to hunt all references to this down when you change the size of * the kernel stack */ #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index c32a09095216..19c0c141bc3f 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -453,8 +453,8 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, before it can be accessed through the kernel mapping. */ preempt_disable(); flush_dcache_page_asm(__pa(vfrom), vaddr); - preempt_enable(); copy_page_asm(vto, vfrom); + preempt_enable(); } EXPORT_SYMBOL(copy_user_page); @@ -539,6 +539,10 @@ void flush_cache_mm(struct mm_struct *mm) struct vm_area_struct *vma; pgd_t *pgd; + /* Flush the TLB to avoid speculation if coherency is required. */ + if (parisc_requires_coherency()) + flush_tlb_all(); + /* Flushing the whole cache on each cpu takes forever on rp3440, etc. So, avoid it if the mm isn't too big. */ if (mm_total_size(mm) >= parisc_cache_flush_threshold) { @@ -577,33 +581,21 @@ void flush_cache_mm(struct mm_struct *mm) void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - unsigned long addr; - pgd_t *pgd; - BUG_ON(!vma->vm_mm->context); - if ((end - start) >= parisc_cache_flush_threshold) { - flush_cache_all(); - return; - } + /* Flush the TLB to avoid speculation if coherency is required. */ + if (parisc_requires_coherency()) + flush_tlb_range(vma, start, end); - if (vma->vm_mm->context == mfsp(3)) { - flush_user_dcache_range_asm(start, end); - if (vma->vm_flags & VM_EXEC) - flush_user_icache_range_asm(start, end); + if ((end - start) >= parisc_cache_flush_threshold + || vma->vm_mm->context != mfsp(3)) { + flush_cache_all(); return; } - pgd = vma->vm_mm->pgd; - for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { - unsigned long pfn; - pte_t *ptep = get_ptep(pgd, addr); - if (!ptep) - continue; - pfn = pte_pfn(*ptep); - if (pfn_valid(pfn)) - __flush_cache_page(vma, addr, PFN_PHYS(pfn)); - } + flush_user_dcache_range_asm(start, end); + if (vma->vm_flags & VM_EXEC) + flush_user_icache_range_asm(start, end); } void @@ -612,7 +604,8 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long BUG_ON(!vma->vm_mm->context); if (pfn_valid(pfn)) { - flush_tlb_page(vma, vmaddr); + if (parisc_requires_coherency()) + flush_tlb_page(vma, vmaddr); __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); } } diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 98190252c12f..f622a311d04a 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -1481,12 +1481,44 @@ int pdc_pat_mem_read_pd_pdt(struct pdc_pat_mem_read_pd_retinfo *pret, unsigned long offset) { int retval; - unsigned long flags; + unsigned long flags, entries; spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_PAT_MEM, PDC_PAT_MEM_PD_READ, - __pa(&pret), __pa(pdt_entries_ptr), + __pa(&pdc_result), __pa(pdt_entries_ptr), count, offset); + + if (retval == PDC_OK) { + entries = min(pdc_result[0], count); + pret->actual_count_bytes = entries; + pret->pdt_entries = entries / sizeof(unsigned long); + } + + spin_unlock_irqrestore(&pdc_lock, flags); + + return retval; +} + +/** + * pdc_pat_mem_get_dimm_phys_location - Get physical DIMM slot via PAT firmware + * @pret: ptr to hold returned information + * @phys_addr: physical address to examine + * + */ +int pdc_pat_mem_get_dimm_phys_location( + struct pdc_pat_mem_phys_mem_location *pret, + unsigned long phys_addr) +{ + int retval; + unsigned long flags; + + spin_lock_irqsave(&pdc_lock, flags); + retval = mem_pdc_call(PDC_PAT_MEM, PDC_PAT_MEM_ADDRESS, + __pa(&pdc_result), phys_addr); + + if (retval == PDC_OK) + memcpy(pret, &pdc_result, sizeof(*pret)); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index ba5e1c7b1f17..0ca254085a66 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -380,7 +380,7 @@ static inline int eirr_to_irq(unsigned long eirr) /* * IRQ STACK - used for irq handler */ -#define IRQ_STACK_SIZE (4096 << 2) /* 16k irq stack size */ +#define IRQ_STACK_SIZE (4096 << 3) /* 32k irq stack size */ union irq_stack_union { unsigned long stack[IRQ_STACK_SIZE/sizeof(unsigned long)]; @@ -413,6 +413,10 @@ static inline void stack_overflow_check(struct pt_regs *regs) if (regs->sr[7]) return; + /* exit if already in panic */ + if (sysctl_panic_on_stackoverflow < 0) + return; + /* calculate kernel stack usage */ stack_usage = sp - stack_start; #ifdef CONFIG_IRQSTACKS @@ -454,8 +458,10 @@ check_kernel_stack: #ifdef CONFIG_IRQSTACKS panic_check: #endif - if (sysctl_panic_on_stackoverflow) + if (sysctl_panic_on_stackoverflow) { + sysctl_panic_on_stackoverflow = -1; /* disable further checks */ panic("low stack detected by irq handler - check messages\n"); + } #endif } diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c index f3a797e670b0..d02874ecb94d 100644 --- a/arch/parisc/kernel/pdt.c +++ b/arch/parisc/kernel/pdt.c @@ -112,10 +112,12 @@ void __init pdc_pdt_init(void) #ifdef CONFIG_64BIT struct pdc_pat_mem_read_pd_retinfo pat_pret; + /* try old obsolete PAT firmware function first */ + pdt_type = PDT_PAT_OLD; ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry, MAX_PDT_ENTRIES); if (ret != PDC_OK) { - pdt_type = PDT_PAT_OLD; + pdt_type = PDT_PAT_NEW; ret = pdc_pat_mem_read_pd_pdt(&pat_pret, pdt_entry, MAX_PDT_TABLE_SIZE, 0); } @@ -131,11 +133,20 @@ void __init pdc_pdt_init(void) } for (i = 0; i < pdt_status.pdt_entries; i++) { - if (i < 20) - pr_warn("PDT: BAD PAGE #%d at 0x%08lx (error_type = %lu)\n", - i, - pdt_entry[i] & PAGE_MASK, - pdt_entry[i] & 1); + struct pdc_pat_mem_phys_mem_location loc; + + /* get DIMM slot number */ + loc.dimm_slot = 0xff; +#ifdef CONFIG_64BIT + pdc_pat_mem_get_dimm_phys_location(&loc, pdt_entry[i]); +#endif + + pr_warn("PDT: BAD PAGE #%d at 0x%08lx, " + "DIMM slot %02x (error_type = %lu)\n", + i, + pdt_entry[i] & PAGE_MASK, + loc.dimm_slot, + pdt_entry[i] & 1); /* mark memory page bad */ memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE); diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index b64d7d21646e..a45a67d526f8 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -53,6 +53,7 @@ #include <linux/uaccess.h> #include <linux/rcupdate.h> #include <linux/random.h> +#include <linux/nmi.h> #include <asm/io.h> #include <asm/asm-offsets.h> @@ -145,6 +146,7 @@ void machine_power_off(void) /* prevent soft lockup/stalled CPU messages for endless loop. */ rcu_sysrq_start(); + lockup_detector_suspend(); for (;;); } diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 3d6ef1b29c6a..ffe2cbf52d1a 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -78,6 +78,8 @@ SECTIONS *(.text.sys_exit) *(.text.do_sigaltstack) *(.text.do_fork) + *(.text.div) + *($$*) /* millicode routines */ *(.text.*) *(.fixup) *(.lock.text) /* out-of-line lock text */ diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 8d4ed73d5490..e2b3e7a00c9e 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -59,6 +59,19 @@ machine-$(CONFIG_PPC64) += 64 machine-$(CONFIG_CPU_LITTLE_ENDIAN) += le UTS_MACHINE := $(subst $(space),,$(machine-y)) +# XXX This needs to be before we override LD below +ifdef CONFIG_PPC32 +KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o +else +ifeq ($(call ld-ifversion, -ge, 225000000, y),y) +# Have the linker provide sfpr if possible. +# There is a corresponding test in arch/powerpc/lib/Makefile +KBUILD_LDFLAGS_MODULE += --save-restore-funcs +else +KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o +endif +endif + ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) override LD += -EL LDEMULATION := lppc @@ -190,18 +203,6 @@ else CHECKFLAGS += -D__LITTLE_ENDIAN__ endif -ifdef CONFIG_PPC32 -KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o -else -ifeq ($(call ld-ifversion, -ge, 225000000, y),y) -# Have the linker provide sfpr if possible. -# There is a corresponding test in arch/powerpc/lib/Makefile -KBUILD_LDFLAGS_MODULE += --save-restore-funcs -else -KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o -endif -endif - ifeq ($(CONFIG_476FPE_ERR46),y) KBUILD_LDFLAGS_MODULE += --ppc476-workaround \ -T $(srctree)/arch/powerpc/platforms/44x/ppc476_modules.lds diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index a7814a7b1523..6f952fe1f084 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -25,12 +25,20 @@ compress-$(CONFIG_KERNEL_XZ) := CONFIG_KERNEL_XZ BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -Os -msoft-float -pipe \ -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ - -isystem $(shell $(CROSS32CC) -print-file-name=include) \ -D$(compress-y) +BOOTCC := $(CC) ifdef CONFIG_PPC64_BOOT_WRAPPER BOOTCFLAGS += -m64 +else +BOOTCFLAGS += -m32 +ifdef CROSS32_COMPILE + BOOTCC := $(CROSS32_COMPILE)gcc +endif endif + +BOOTCFLAGS += -isystem $(shell $(BOOTCC) -print-file-name=include) + ifdef CONFIG_CPU_BIG_ENDIAN BOOTCFLAGS += -mbig-endian else @@ -183,10 +191,10 @@ clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \ empty.c zImage.coff.lds zImage.ps3.lds zImage.lds quiet_cmd_bootcc = BOOTCC $@ - cmd_bootcc = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $< + cmd_bootcc = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $< quiet_cmd_bootas = BOOTAS $@ - cmd_bootas = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $< + cmd_bootas = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $< quiet_cmd_bootar = BOOTAR $@ cmd_bootar = $(CROSS32AR) -cr$(KBUILD_ARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@ diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 77529a3e3811..5b4023c616f7 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -59,13 +59,14 @@ extern struct patb_entry *partition_tb; #define PRTS_MASK 0x1f /* process table size field */ #define PRTB_MASK 0x0ffffffffffff000UL -/* - * Limit process table to PAGE_SIZE table. This - * also limit the max pid we can support. - * MAX_USER_CONTEXT * 16 bytes of space. - */ -#define PRTB_SIZE_SHIFT (CONTEXT_BITS + 4) -#define PRTB_ENTRIES (1ul << CONTEXT_BITS) +/* Number of supported PID bits */ +extern unsigned int mmu_pid_bits; + +/* Base PID to allocate from */ +extern unsigned int mmu_base_pid; + +#define PRTB_SIZE_SHIFT (mmu_pid_bits + 4) +#define PRTB_ENTRIES (1ul << mmu_pid_bits) /* * Power9 currently only support 64K partition table size. diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index d1da415e283c..818a58fc3f4f 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -608,9 +608,17 @@ static inline pte_t pte_mkdevmap(pte_t pte) return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP); } +/* + * This is potentially called with a pmd as the argument, in which case it's not + * safe to check _PAGE_DEVMAP unless we also confirm that _PAGE_PTE is set. + * That's because the bit we use for _PAGE_DEVMAP is not reserved for software + * use in page directory entries (ie. non-ptes). + */ static inline int pte_devmap(pte_t pte) { - return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DEVMAP)); + u64 mask = cpu_to_be64(_PAGE_DEVMAP | _PAGE_PTE); + + return (pte_raw(pte) & mask) == mask; } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index da7e9432fa8f..0c76675394c5 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -45,7 +45,7 @@ extern void set_context(unsigned long id, pgd_t *pgd); #ifdef CONFIG_PPC_BOOK3S_64 extern void radix__switch_mmu_context(struct mm_struct *prev, - struct mm_struct *next); + struct mm_struct *next); static inline void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) @@ -67,6 +67,12 @@ extern void __destroy_context(unsigned long context_id); extern void mmu_context_init(void); #endif +#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU) +extern void radix_kvm_prefetch_workaround(struct mm_struct *mm); +#else +static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { } +#endif + extern void switch_cop(struct mm_struct *next); extern int use_cop(unsigned long acop, struct mm_struct *mm); extern void drop_cop(unsigned long acop, struct mm_struct *mm); @@ -79,9 +85,13 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { + bool new_on_cpu = false; + /* Mark this context has been used on the new CPU */ - if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) + if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) { cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + new_on_cpu = true; + } /* 32-bit keeps track of the current PGDIR in the thread struct */ #ifdef CONFIG_PPC32 @@ -109,6 +119,10 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, if (cpu_has_feature(CPU_FTR_ALTIVEC)) asm volatile ("dssall"); #endif /* CONFIG_ALTIVEC */ + + if (new_on_cpu) + radix_kvm_prefetch_workaround(next); + /* * The actual HW switching method differs between the various * sub architectures. Out of line for now diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 9029afd1fa2a..f14f3c04ec7e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1325,10 +1325,18 @@ EXC_VIRT_NONE(0x5800, 0x100) std r10,PACA_EXGEN+EX_R13(r13); \ EXCEPTION_PROLOG_PSERIES_1(soft_nmi_common, _H) +/* + * Branch to soft_nmi_interrupt using the emergency stack. The emergency + * stack is one that is usable by maskable interrupts so long as MSR_EE + * remains off. It is used for recovery when something has corrupted the + * normal kernel stack, for example. The "soft NMI" must not use the process + * stack because we want irq disabled sections to avoid touching the stack + * at all (other than PMU interrupts), so use the emergency stack for this, + * and run it entirely with interrupts hard disabled. + */ EXC_COMMON_BEGIN(soft_nmi_common) mr r10,r1 ld r1,PACAEMERGSP(r13) - ld r1,PACA_NMI_EMERG_SP(r13) subi r1,r1,INT_FRAME_SIZE EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900, system_reset, soft_nmi_interrupt, diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 516ebef905c0..e6252c5a57a4 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -460,11 +460,17 @@ pnv_restore_hyp_resource_arch300: /* * Workaround for POWER9, if we lost resources, the ERAT * might have been mixed up and needs flushing. We also need - * to reload MMCR0 (see comment above). + * to reload MMCR0 (see comment above). We also need to set + * then clear bit 60 in MMCRA to ensure the PMU starts running. */ blt cr3,1f PPC_INVALIDATE_ERAT ld r1,PACAR1(r13) + mfspr r4,SPRN_MMCRA + ori r4,r4,(1 << (63-60)) + mtspr SPRN_MMCRA,r4 + xori r4,r4,(1 << (63-60)) + mtspr SPRN_MMCRA,r4 ld r4,_MMCR0(r1) mtspr SPRN_MMCR0,r4 1: diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0bcec745a672..f291f7826abc 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -145,6 +145,19 @@ notrace unsigned int __check_irq_replay(void) /* Clear bit 0 which we wouldn't clear otherwise */ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + if (happened & PACA_IRQ_HARD_DIS) { + /* + * We may have missed a decrementer interrupt if hard disabled. + * Check the decrementer register in case we had a rollover + * while hard disabled. + */ + if (!(happened & PACA_IRQ_DEC)) { + if (decrementer_check_overflow()) { + local_paca->irq_happened |= PACA_IRQ_DEC; + happened |= PACA_IRQ_DEC; + } + } + } /* * Force the delivery of pending soft-disabled interrupts on PS3. @@ -170,7 +183,7 @@ notrace unsigned int __check_irq_replay(void) * in case we also had a rollover while hard disabled */ local_paca->irq_happened &= ~PACA_IRQ_DEC; - if ((happened & PACA_IRQ_DEC) || decrementer_check_overflow()) + if (happened & PACA_IRQ_DEC) return 0x900; /* Finally check if an external interrupt happened */ diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 925a4ef90559..660ed39e9c9a 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -127,12 +127,19 @@ static void flush_tmregs_to_thread(struct task_struct *tsk) * If task is not current, it will have been flushed already to * it's thread_struct during __switch_to(). * - * A reclaim flushes ALL the state. + * A reclaim flushes ALL the state or if not in TM save TM SPRs + * in the appropriate thread structures from live. */ - if (tsk == current && MSR_TM_SUSPENDED(mfmsr())) - tm_reclaim_current(TM_CAUSE_SIGNAL); + if (tsk != current) + return; + if (MSR_TM_SUSPENDED(mfmsr())) { + tm_reclaim_current(TM_CAUSE_SIGNAL); + } else { + tm_enable(); + tm_save_sprs(&(tsk->thread)); + } } #else static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 997c88d54acf..cf0e1245b8cc 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1003,21 +1003,13 @@ static struct sched_domain_topology_level powerpc_topology[] = { { NULL, }, }; -static __init long smp_setup_cpu_workfn(void *data __always_unused) -{ - smp_ops->setup_cpu(boot_cpuid); - return 0; -} - void __init smp_cpus_done(unsigned int max_cpus) { /* - * We want the setup_cpu() here to be called on the boot CPU, but - * init might run on any CPU, so make sure it's invoked on the boot - * CPU. + * We are running pinned to the boot CPU, see rest_init(). */ if (smp_ops && smp_ops->setup_cpu) - work_on_cpu_safe(boot_cpuid, smp_setup_cpu_workfn, NULL); + smp_ops->setup_cpu(boot_cpuid); if (smp_ops && smp_ops->bringup_done) smp_ops->bringup_done(); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 8cb0190e2a73..b42812e014c0 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -164,8 +164,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order) goto out; } - if (kvm->arch.hpt.virt) + if (kvm->arch.hpt.virt) { kvmppc_free_hpt(&kvm->arch.hpt); + kvmppc_rmap_reset(kvm); + } err = kvmppc_allocate_hpt(&info, order); if (err < 0) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 0b436df746fc..359c79cdf0cc 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3211,6 +3211,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) run->fail_entry.hardware_entry_failure_reason = 0; return -EINVAL; } + /* Enable TM so we can read the TM SPRs */ + mtmsr(mfmsr() | MSR_TM); current->thread.tm_tfhar = mfspr(SPRN_TFHAR); current->thread.tm_tfiar = mfspr(SPRN_TFIAR); current->thread.tm_texasr = mfspr(SPRN_TEXASR); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index cb44065e2946..c52184a8efdf 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1443,12 +1443,14 @@ mc_cont: ori r6,r6,1 mtspr SPRN_CTRLT,r6 4: - /* Read the guest SLB and save it away */ + /* Check if we are running hash or radix and store it in cr2 */ ld r5, VCPU_KVM(r9) lbz r0, KVM_RADIX(r5) - cmpwi r0, 0 + cmpwi cr2,r0,0 + + /* Read the guest SLB and save it away */ li r5, 0 - bne 3f /* for radix, save 0 entries */ + bne cr2, 3f /* for radix, save 0 entries */ lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ mtctr r0 li r6,0 @@ -1712,11 +1714,6 @@ BEGIN_FTR_SECTION_NESTED(96) END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 22: - /* Clear out SLB */ - li r5,0 - slbmte r5,r5 - slbia - ptesync /* Restore host values of some registers */ BEGIN_FTR_SECTION @@ -1737,10 +1734,56 @@ BEGIN_FTR_SECTION mtspr SPRN_PID, r7 mtspr SPRN_IAMR, r8 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + +#ifdef CONFIG_PPC_RADIX_MMU + /* + * Are we running hash or radix ? + */ + beq cr2,3f + + /* Radix: Handle the case where the guest used an illegal PID */ + LOAD_REG_ADDR(r4, mmu_base_pid) + lwz r3, VCPU_GUEST_PID(r9) + lwz r5, 0(r4) + cmpw cr0,r3,r5 + blt 2f + + /* + * Illegal PID, the HW might have prefetched and cached in the TLB + * some translations for the LPID 0 / guest PID combination which + * Linux doesn't know about, so we need to flush that PID out of + * the TLB. First we need to set LPIDR to 0 so tlbiel applies to + * the right context. + */ + li r0,0 + mtspr SPRN_LPID,r0 + isync + + /* Then do a congruence class local flush */ + ld r6,VCPU_KVM(r9) + lwz r0,KVM_TLB_SETS(r6) + mtctr r0 + li r7,0x400 /* IS field = 0b01 */ + ptesync + sldi r0,r3,32 /* RS has PID */ +1: PPC_TLBIEL(7,0,2,1,1) /* RIC=2, PRS=1, R=1 */ + addi r7,r7,0x1000 + bdnz 1b + ptesync + +2: /* Flush the ERAT on radix P9 DD1 guest exit */ BEGIN_FTR_SECTION PPC_INVALIDATE_ERAT END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) + b 4f +#endif /* CONFIG_PPC_RADIX_MMU */ + /* Hash: clear out SLB */ +3: li r5,0 + slbmte r5,r5 + slbia + ptesync +4: /* * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1a75c0b5f4ca..3480faaf1ef8 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -58,6 +58,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) return !!(v->arch.pending_exceptions) || kvm_request_pending(v); } +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return false; +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return 1; diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index abed1fe6992f..a75f63833284 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -126,9 +126,10 @@ static int hash__init_new_context(struct mm_struct *mm) static int radix__init_new_context(struct mm_struct *mm) { unsigned long rts_field; - int index; + int index, max_id; - index = alloc_context_id(1, PRTB_ENTRIES - 1); + max_id = (1 << mmu_pid_bits) - 1; + index = alloc_context_id(mmu_base_pid, max_id); if (index < 0) return index; diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 5cc50d47ce3f..671a45d86c18 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -25,6 +25,9 @@ #include <trace/events/thp.h> +unsigned int mmu_pid_bits; +unsigned int mmu_base_pid; + static int native_register_process_table(unsigned long base, unsigned long pg_sz, unsigned long table_size) { @@ -261,11 +264,34 @@ static void __init radix_init_pgtable(void) for_each_memblock(memory, reg) WARN_ON(create_physical_mapping(reg->base, reg->base + reg->size)); + + /* Find out how many PID bits are supported */ + if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (!mmu_pid_bits) + mmu_pid_bits = 20; +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* + * When KVM is possible, we only use the top half of the + * PID space to avoid collisions between host and guest PIDs + * which can cause problems due to prefetch when exiting the + * guest with AIL=3 + */ + mmu_base_pid = 1 << (mmu_pid_bits - 1); +#else + mmu_base_pid = 1; +#endif + } else { + /* The guest uses the bottom half of the PID space */ + if (!mmu_pid_bits) + mmu_pid_bits = 19; + mmu_base_pid = 1; + } + /* * Allocate Partition table and process table for the * host. */ - BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large."); + BUG_ON(PRTB_SIZE_SHIFT > 36); process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); /* * Fill in the process table. @@ -339,6 +365,12 @@ static int __init radix_dt_scan_page_sizes(unsigned long node, if (type == NULL || strcmp(type, "cpu") != 0) return 0; + /* Find MMU PID size */ + prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size); + if (prop && size == 4) + mmu_pid_bits = be32_to_cpup(prop); + + /* Grab page size encodings */ prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size); if (!prop) return 0; diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index e94fbd4c8845..781532d7bc4d 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -36,7 +36,7 @@ void subpage_prot_free(struct mm_struct *mm) } } addr = 0; - for (i = 0; i < 2; ++i) { + for (i = 0; i < (TASK_SIZE_USER64 >> 43); ++i) { p = spt->protptrs[i]; if (!p) continue; diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 744e0164ecf5..16ae1bbe13f0 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -12,12 +12,12 @@ #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/memblock.h> -#include <asm/ppc-opcode.h> +#include <asm/ppc-opcode.h> #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/trace.h> - +#include <asm/cputhreads.h> #define RIC_FLUSH_TLB 0 #define RIC_FLUSH_PWC 1 @@ -454,3 +454,44 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, else radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize); } + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) +{ + unsigned int pid = mm->context.id; + + if (unlikely(pid == MMU_NO_CONTEXT)) + return; + + /* + * If this context hasn't run on that CPU before and KVM is + * around, there's a slim chance that the guest on another + * CPU just brought in obsolete translation into the TLB of + * this CPU due to a bad prefetch using the guest PID on + * the way into the hypervisor. + * + * We work around this here. If KVM is possible, we check if + * any sibling thread is in KVM. If it is, the window may exist + * and thus we flush that PID from the core. + * + * A potential future improvement would be to mark which PIDs + * have never been used on the system and avoid it if the PID + * is new and the process has no other cpumask bit set. + */ + if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) { + int cpu = smp_processor_id(); + int sib = cpu_first_thread_sibling(cpu); + bool flush = false; + + for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { + if (sib == cpu) + continue; + if (paca[sib].kvm_hstate.kvm_vcpu) + flush = true; + } + if (flush) + _tlbiel_pid(pid, RIC_FLUSH_ALL); + } +} +EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround); +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index d7c9b186954d..763ffca9628d 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -89,7 +89,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk, goto err; ret = of_irq_to_resource(np, 0, &res[1]); - if (!ret) + if (ret <= 0) goto err; pdev = platform_device_alloc("mpc83xx_spi", i); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 437613588df1..b900eb1d5e17 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1852,6 +1852,14 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) /* 4GB offset bypasses 32-bit space */ set_dma_offset(&pdev->dev, (1ULL << 32)); set_dma_ops(&pdev->dev, &dma_direct_ops); + } else if (dma_mask >> 32 && dma_mask != DMA_BIT_MASK(64)) { + /* + * Fail the request if a DMA mask between 32 and 64 bits + * was requested but couldn't be fulfilled. Ideally we + * would do this for 64-bits but historically we have + * always fallen back to 32-bits. + */ + return -ENOMEM; } else { dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); set_dma_ops(&pdev->dev, &dma_iommu_ops); diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index e5bf1e84047f..011ef2180fe6 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -82,7 +82,6 @@ static int pSeries_reconfig_remove_node(struct device_node *np) of_detach_node(np); of_node_put(parent); - of_node_put(np); /* Must decrement the refcount */ return 0; } diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 0c82f7903fc7..c1bf75ffb875 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -998,7 +998,7 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) psw_bits(regs.psw).ia = sfr->basic.ia; psw_bits(regs.psw).dat = sfr->basic.T; psw_bits(regs.psw).wait = sfr->basic.W; - psw_bits(regs.psw).per = sfr->basic.P; + psw_bits(regs.psw).pstate = sfr->basic.P; psw_bits(regs.psw).as = sfr->basic.AS; /* diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index ce865bd4f81d..5ee90020382d 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -150,7 +150,7 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); vcpu->stat.diagnose_44++; - kvm_vcpu_on_spin(vcpu); + kvm_vcpu_on_spin(vcpu, true); return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 39115f5a38df..40d0a1a97889 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2527,6 +2527,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return kvm_s390_vcpu_has_irq(vcpu, 0); } +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); +} + void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) { atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 459716de5318..ae677f814bc0 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -677,11 +677,11 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) unsigned long ptev; pgste_t pgste; - /* Clear storage key */ + /* Clear storage key ACC and F, but set R/C */ preempt_disable(); pgste = pgste_get_lock(ptep); - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | - PGSTE_GR_BIT | PGSTE_GC_BIT); + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT; ptev = pte_val(*ptep); if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); diff --git a/arch/sparc/configs/sparc32_defconfig b/arch/sparc/configs/sparc32_defconfig index c74d3701ad68..207a43a2d8b3 100644 --- a/arch/sparc/configs/sparc32_defconfig +++ b/arch/sparc/configs/sparc32_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 @@ -23,7 +22,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_INET_AH=y CONFIG_INET_ESP=y CONFIG_INET_IPCOMP=y -# CONFIG_INET_LRO is not set CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m @@ -69,7 +67,6 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_AUTOFS_FS=m CONFIG_AUTOFS4_FS=m CONFIG_ISO9660_FS=m CONFIG_PROC_KCORE=y @@ -82,7 +79,6 @@ CONFIG_NLS=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_KGDB=y CONFIG_KGDB_TESTS=y CONFIG_CRYPTO_NULL=m diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index b2e650d1764f..ca8609d7292f 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -1,5 +1,4 @@ CONFIG_64BIT=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -184,7 +183,6 @@ CONFIG_HID_TOPSEED=y CONFIG_HID_THRUSTMASTER=y CONFIG_HID_ZEROPLUS=y CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_EHCI_HCD=m # CONFIG_USB_EHCI_TT_NEWSCHED is not set CONFIG_USB_OHCI_HCD=y @@ -210,8 +208,6 @@ CONFIG_LOCKUP_DETECTOR=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_UPROBE_EVENTS=y CONFIG_KEYS=y diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index 2cddcda4f85f..87841d687f8d 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -27,9 +27,11 @@ void destroy_context(struct mm_struct *mm); void __tsb_context_switch(unsigned long pgd_pa, struct tsb_config *tsb_base, struct tsb_config *tsb_huge, - unsigned long tsb_descr_pa); + unsigned long tsb_descr_pa, + unsigned long secondary_ctx); -static inline void tsb_context_switch(struct mm_struct *mm) +static inline void tsb_context_switch_ctx(struct mm_struct *mm, + unsigned long ctx) { __tsb_context_switch(__pa(mm->pgd), &mm->context.tsb_block[MM_TSB_BASE], @@ -40,9 +42,12 @@ static inline void tsb_context_switch(struct mm_struct *mm) #else NULL #endif - , __pa(&mm->context.tsb_descr[MM_TSB_BASE])); + , __pa(&mm->context.tsb_descr[MM_TSB_BASE]), + ctx); } +#define tsb_context_switch(X) tsb_context_switch_ctx(X, 0) + void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long mm_rss); @@ -112,8 +117,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str * cpu0 to update it's TSB because at that point the cpu_vm_mask * only had cpu1 set in it. */ - load_secondary_context(mm); - tsb_context_switch(mm); + tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); /* Any time a processor runs a context on an address space * for the first time, we must flush that context out of the diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index 07c0df924960..db872dbfafe9 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -360,6 +360,7 @@ tsb_flush: * %o1: TSB base config pointer * %o2: TSB huge config pointer, or NULL if none * %o3: Hypervisor TSB descriptor physical address + * %o4: Secondary context to load, if non-zero * * We have to run this whole thing with interrupts * disabled so that the current cpu doesn't change @@ -372,6 +373,17 @@ __tsb_context_switch: rdpr %pstate, %g1 wrpr %g1, PSTATE_IE, %pstate + brz,pn %o4, 1f + mov SECONDARY_CONTEXT, %o5 + +661: stxa %o4, [%o5] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %o4, [%o5] ASI_MMU + .previous + flush %g6 + +1: TRAP_LOAD_TRAP_BLOCK(%g2, %g3) stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR] diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S index 54f98706b03b..5a8cb37f0a3b 100644 --- a/arch/sparc/lib/U3memcpy.S +++ b/arch/sparc/lib/U3memcpy.S @@ -145,13 +145,13 @@ ENDPROC(U3_retl_o2_plus_GS_plus_0x08) ENTRY(U3_retl_o2_and_7_plus_GS) and %o2, 7, %o2 retl - add %o2, GLOBAL_SPARE, %o2 + add %o2, GLOBAL_SPARE, %o0 ENDPROC(U3_retl_o2_and_7_plus_GS) ENTRY(U3_retl_o2_and_7_plus_GS_plus_8) add GLOBAL_SPARE, 8, GLOBAL_SPARE and %o2, 7, %o2 retl - add %o2, GLOBAL_SPARE, %o2 + add %o2, GLOBAL_SPARE, %o0 ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8) #endif diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 3c40ebd50f92..fed73f14aa49 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -325,6 +325,29 @@ static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_inde } #ifdef CONFIG_HUGETLB_PAGE +static void __init add_huge_page_size(unsigned long size) +{ + unsigned int order; + + if (size_to_hstate(size)) + return; + + order = ilog2(size) - PAGE_SHIFT; + hugetlb_add_hstate(order); +} + +static int __init hugetlbpage_init(void) +{ + add_huge_page_size(1UL << HPAGE_64K_SHIFT); + add_huge_page_size(1UL << HPAGE_SHIFT); + add_huge_page_size(1UL << HPAGE_256MB_SHIFT); + add_huge_page_size(1UL << HPAGE_2GB_SHIFT); + + return 0; +} + +arch_initcall(hugetlbpage_init); + static int __init setup_hugepagesz(char *string) { unsigned long long hugepage_size; @@ -364,7 +387,7 @@ static int __init setup_hugepagesz(char *string) goto out; } - hugetlb_add_hstate(hugepage_shift - PAGE_SHIFT); + add_huge_page_size(hugepage_size); rc = 1; out: diff --git a/arch/sparc/power/hibernate.c b/arch/sparc/power/hibernate.c index 17bd2e167e07..df707a8ad311 100644 --- a/arch/sparc/power/hibernate.c +++ b/arch/sparc/power/hibernate.c @@ -35,6 +35,5 @@ void restore_processor_state(void) { struct mm_struct *mm = current->active_mm; - load_secondary_context(mm); - tsb_context_switch(mm); + tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); } diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 2c860ad4fe06..8a958274b54c 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -34,6 +34,7 @@ KBUILD_CFLAGS += $(cflags-y) KBUILD_CFLAGS += -mno-mmx -mno-sse KBUILD_CFLAGS += $(call cc-option,-ffreestanding) KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) +KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 630e3664906b..16f49123d747 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -16,6 +16,15 @@ #include "ctype.h" #include "string.h" +/* + * Undef these macros so that the functions that we provide + * here will have the correct names regardless of how string.h + * may have chosen to #define them. + */ +#undef memcpy +#undef memset +#undef memcmp + int memcmp(const void *s1, const void *s2, size_t len) { bool diff; diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a9a8027a6c0e..d271fb79248f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -705,6 +705,7 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR x86_platform_ipi smp_x86_platform_ipi #ifdef CONFIG_HAVE_KVM apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi +apicinterrupt3 POSTED_INTR_NESTED_VECTOR kvm_posted_intr_nested_ipi smp_kvm_posted_intr_nested_ipi #endif #ifdef CONFIG_X86_MCE_THRESHOLD diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index dae2fedc1601..4f9127644b80 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -316,7 +316,7 @@ #define SKX_UPI_PCI_PMON_CTL0 0x350 #define SKX_UPI_PCI_PMON_CTR0 0x318 #define SKX_UPI_PCI_PMON_BOX_CTL 0x378 -#define SKX_PMON_CTL_UMASK_EXT 0xff +#define SKX_UPI_CTL_UMASK_EXT 0xffefff /* SKX M2M */ #define SKX_M2M_PCI_PMON_CTL0 0x228 @@ -328,7 +328,7 @@ DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6"); DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); -DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-39"); +DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-43,45-55"); DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); @@ -351,7 +351,6 @@ DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5"); DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8"); DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8"); DEFINE_UNCORE_FORMAT_ATTR(filter_link3, filter_link, "config1:12"); -DEFINE_UNCORE_FORMAT_ATTR(filter_link4, filter_link, "config1:9-12"); DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17"); DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47"); DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22"); @@ -3302,7 +3301,6 @@ static struct attribute *skx_uncore_cha_formats_attr[] = { &format_attr_inv.attr, &format_attr_thresh8.attr, &format_attr_filter_tid4.attr, - &format_attr_filter_link4.attr, &format_attr_filter_state5.attr, &format_attr_filter_rem.attr, &format_attr_filter_loc.attr, @@ -3312,7 +3310,6 @@ static struct attribute *skx_uncore_cha_formats_attr[] = { &format_attr_filter_opc_0.attr, &format_attr_filter_opc_1.attr, &format_attr_filter_nc.attr, - &format_attr_filter_c6.attr, &format_attr_filter_isoc.attr, NULL, }; @@ -3333,8 +3330,11 @@ static struct extra_reg skx_uncore_cha_extra_regs[] = { SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x2134, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x8134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x3134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x9134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x8), + EVENT_EXTRA_END }; static u64 skx_cha_filter_mask(int fields) @@ -3347,6 +3347,17 @@ static u64 skx_cha_filter_mask(int fields) mask |= SKX_CHA_MSR_PMON_BOX_FILTER_LINK; if (fields & 0x4) mask |= SKX_CHA_MSR_PMON_BOX_FILTER_STATE; + if (fields & 0x8) { + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_REM; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_LOC; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_ALL_OPC; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_NM; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_NOT_NM; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_OPC0; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_OPC1; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_NC; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_ISOC; + } return mask; } @@ -3492,6 +3503,26 @@ static struct intel_uncore_type skx_uncore_irp = { .format_group = &skx_uncore_format_group, }; +static struct attribute *skx_uncore_pcu_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_occ_invert.attr, + &format_attr_occ_edge_det.attr, + &format_attr_filter_band0.attr, + &format_attr_filter_band1.attr, + &format_attr_filter_band2.attr, + &format_attr_filter_band3.attr, + NULL, +}; + +static struct attribute_group skx_uncore_pcu_format_group = { + .name = "format", + .attrs = skx_uncore_pcu_formats_attr, +}; + static struct intel_uncore_ops skx_uncore_pcu_ops = { IVBEP_UNCORE_MSR_OPS_COMMON_INIT(), .hw_config = hswep_pcu_hw_config, @@ -3510,7 +3541,7 @@ static struct intel_uncore_type skx_uncore_pcu = { .box_ctl = HSWEP_PCU_MSR_PMON_BOX_CTL, .num_shared_regs = 1, .ops = &skx_uncore_pcu_ops, - .format_group = &snbep_uncore_pcu_format_group, + .format_group = &skx_uncore_pcu_format_group, }; static struct intel_uncore_type *skx_msr_uncores[] = { @@ -3603,8 +3634,8 @@ static struct intel_uncore_type skx_uncore_upi = { .perf_ctr_bits = 48, .perf_ctr = SKX_UPI_PCI_PMON_CTR0, .event_ctl = SKX_UPI_PCI_PMON_CTL0, - .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, - .event_mask_ext = SKX_PMON_CTL_UMASK_EXT, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SKX_UPI_CTL_UMASK_EXT, .box_ctl = SKX_UPI_PCI_PMON_BOX_CTL, .ops = &skx_upi_uncore_pci_ops, .format_group = &skx_upi_uncore_format_group, diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ca3c48c0872f..0e25e7a8ab03 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -287,6 +287,7 @@ #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE (15*32+15) /* Virtual VMLOAD VMSAVE */ +#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index df002992d8fd..07b06955a05d 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -25,6 +25,8 @@ BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR, smp_kvm_posted_intr_ipi) BUILD_INTERRUPT3(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR, smp_kvm_posted_intr_wakeup_ipi) +BUILD_INTERRUPT3(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR, + smp_kvm_posted_intr_nested_ipi) #endif /* diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 9b76cd331990..ad1ed531febc 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -15,6 +15,7 @@ typedef struct { #ifdef CONFIG_HAVE_KVM unsigned int kvm_posted_intr_ipis; unsigned int kvm_posted_intr_wakeup_ipis; + unsigned int kvm_posted_intr_nested_ipis; #endif unsigned int x86_platform_ipis; /* arch dependent */ unsigned int apic_perf_irqs; diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b90e1053049b..d6dbafbd4207 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -30,6 +30,7 @@ extern asmlinkage void apic_timer_interrupt(void); extern asmlinkage void x86_platform_ipi(void); extern asmlinkage void kvm_posted_intr_ipi(void); extern asmlinkage void kvm_posted_intr_wakeup_ipi(void); +extern asmlinkage void kvm_posted_intr_nested_ipi(void); extern asmlinkage void error_interrupt(void); extern asmlinkage void irq_work_interrupt(void); @@ -62,6 +63,7 @@ extern void trace_call_function_single_interrupt(void); #define trace_reboot_interrupt reboot_interrupt #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi #define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi +#define trace_kvm_posted_intr_nested_ipi kvm_posted_intr_nested_ipi #endif /* CONFIG_TRACING */ #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 6ca9fd6234e1..aaf8d28b5d00 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -83,7 +83,6 @@ */ #define X86_PLATFORM_IPI_VECTOR 0xf7 -#define POSTED_INTR_WAKEUP_VECTOR 0xf1 /* * IRQ work vector: */ @@ -98,6 +97,8 @@ /* Vector for KVM to deliver posted interrupt IPI */ #ifdef CONFIG_HAVE_KVM #define POSTED_INTR_VECTOR 0xf2 +#define POSTED_INTR_WAKEUP_VECTOR 0xf1 +#define POSTED_INTR_NESTED_VECTOR 0xf0 #endif /* diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index fde36f189836..fa2558e12024 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -219,8 +219,8 @@ struct x86_emulate_ops { struct x86_instruction_info *info, enum x86_intercept_stage stage); - void (*get_cpuid)(struct x86_emulate_ctxt *ctxt, - u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); + bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx, bool check_limit); void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 87ac4fba6d8e..afa70749903e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -79,15 +79,14 @@ | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) -#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL #define CR3_PCID_INVD BIT_64(63) #define CR4_RESERVED_BITS \ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ - | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP \ - | X86_CR4_PKE)) + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \ + | X86_CR4_SMAP | X86_CR4_PKE)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) @@ -204,7 +203,6 @@ enum { #define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT) #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ - PFERR_USER_MASK | \ PFERR_WRITE_MASK | \ PFERR_PRESENT_MASK) @@ -317,15 +315,17 @@ struct kvm_pio_request { int size; }; +#define PT64_ROOT_MAX_LEVEL 5 + struct rsvd_bits_validate { - u64 rsvd_bits_mask[2][4]; + u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL]; u64 bad_mt_xwr; }; /* - * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level - * 32-bit). The kvm_mmu structure abstracts the details of the current mmu - * mode. + * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, + * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the + * current mmu mode. */ struct kvm_mmu { void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); @@ -547,8 +547,8 @@ struct kvm_vcpu_arch { struct kvm_queued_exception { bool pending; + bool injected; bool has_error_code; - bool reinject; u8 nr; u32 error_code; u8 nested_apf; @@ -686,8 +686,12 @@ struct kvm_vcpu_arch { int pending_ioapic_eoi; int pending_external_vector; - /* GPA available (AMD only) */ + /* GPA available */ bool gpa_available; + gpa_t gpa_val; + + /* be preempted when it's in kernel-mode(cpl=0) */ + bool preempted_in_kernel; }; struct kvm_lpage_info { @@ -978,7 +982,7 @@ struct kvm_x86_ops { void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); - int (*get_tdp_level)(void); + int (*get_tdp_level)(struct kvm_vcpu *vcpu); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); int (*get_lpage_level)(void); bool (*rdtscp_supported)(void); @@ -1296,20 +1300,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); } -static inline u64 get_canonical(u64 la) -{ - return ((int64_t)la << 16) >> 16; -} - -static inline bool is_noncanonical_address(u64 la) -{ -#ifdef CONFIG_X86_64 - return get_canonical(la) != la; -#else - return false; -#endif -} - #define TSS_IOPB_BASE_OFFSET 0x66 #define TSS_BASE_SIZE 0x68 #define TSS_IOPB_SIZE (65536 / 8) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 58fffe79e417..14835dd205a5 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -107,6 +107,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_IRQ_SHIFT 8 #define V_IRQ_MASK (1 << V_IRQ_SHIFT) +#define V_GIF_SHIFT 9 +#define V_GIF_MASK (1 << V_GIF_SHIFT) + #define V_INTR_PRIO_SHIFT 16 #define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) @@ -116,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_INTR_MASKING_SHIFT 24 #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) +#define V_GIF_ENABLE_SHIFT 25 +#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT) + #define AVIC_ENABLE_SHIFT 31 #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 35cd06f636ab..caec8417539f 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -72,6 +72,7 @@ #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define SECONDARY_EXEC_RDRAND 0x00000800 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 #define SECONDARY_EXEC_RDSEED 0x00010000 #define SECONDARY_EXEC_ENABLE_PML 0x00020000 @@ -114,6 +115,10 @@ #define VMX_MISC_SAVE_EFER_LMA 0x00000020 #define VMX_MISC_ACTIVITY_HLT 0x00000040 +/* VMFUNC functions */ +#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 +#define VMFUNC_EPTP_ENTRIES 512 + static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) { return vmx_basic & GENMASK_ULL(30, 0); @@ -187,6 +192,8 @@ enum vmcs_field { APIC_ACCESS_ADDR_HIGH = 0x00002015, POSTED_INTR_DESC_ADDR = 0x00002016, POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, + VM_FUNCTION_CONTROL = 0x00002018, + VM_FUNCTION_CONTROL_HIGH = 0x00002019, EPT_POINTER = 0x0000201a, EPT_POINTER_HIGH = 0x0000201b, EOI_EXIT_BITMAP0 = 0x0000201c, @@ -197,6 +204,8 @@ enum vmcs_field { EOI_EXIT_BITMAP2_HIGH = 0x00002021, EOI_EXIT_BITMAP3 = 0x00002022, EOI_EXIT_BITMAP3_HIGH = 0x00002023, + EPTP_LIST_ADDRESS = 0x00002024, + EPTP_LIST_ADDRESS_HIGH = 0x00002025, VMREAD_BITMAP = 0x00002026, VMWRITE_BITMAP = 0x00002028, XSS_EXIT_BITMAP = 0x0000202C, @@ -444,6 +453,7 @@ enum vmcs_field { #define VMX_EPT_EXECUTE_ONLY_BIT (1ull) #define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6) +#define VMX_EPT_PAGE_WALK_5_BIT (1ull << 7) #define VMX_EPTP_UC_BIT (1ull << 8) #define VMX_EPTP_WB_BIT (1ull << 14) #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) @@ -459,12 +469,14 @@ enum vmcs_field { #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ #define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */ -#define VMX_EPT_DEFAULT_GAW 3 -#define VMX_EPT_MAX_GAW 0x4 #define VMX_EPT_MT_EPTE_SHIFT 3 -#define VMX_EPT_GAW_EPTP_SHIFT 3 -#define VMX_EPT_AD_ENABLE_BIT (1ull << 6) -#define VMX_EPT_DEFAULT_MT 0x6ull +#define VMX_EPTP_PWL_MASK 0x38ull +#define VMX_EPTP_PWL_4 0x18ull +#define VMX_EPTP_PWL_5 0x20ull +#define VMX_EPTP_AD_ENABLE_BIT (1ull << 6) +#define VMX_EPTP_MT_MASK 0x7ull +#define VMX_EPTP_MT_WB 0x6ull +#define VMX_EPTP_MT_UC 0x0ull #define VMX_EPT_READABLE_MASK 0x1ull #define VMX_EPT_WRITABLE_MASK 0x2ull #define VMX_EPT_EXECUTABLE_MASK 0x4ull diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index d869c8671e36..7cf7c70b6ef2 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -8,20 +8,25 @@ * This file is licensed under GPLv2. */ -#include <linux/jiffies.h> +#include <linux/delay.h> +#include <linux/ktime.h> #include <linux/math64.h> #include <linux/percpu.h> #include <linux/smp.h> struct aperfmperf_sample { unsigned int khz; - unsigned long jiffies; + ktime_t time; u64 aperf; u64 mperf; }; static DEFINE_PER_CPU(struct aperfmperf_sample, samples); +#define APERFMPERF_CACHE_THRESHOLD_MS 10 +#define APERFMPERF_REFRESH_DELAY_MS 20 +#define APERFMPERF_STALE_THRESHOLD_MS 1000 + /* * aperfmperf_snapshot_khz() * On the current CPU, snapshot APERF, MPERF, and jiffies @@ -33,9 +38,11 @@ static void aperfmperf_snapshot_khz(void *dummy) u64 aperf, aperf_delta; u64 mperf, mperf_delta; struct aperfmperf_sample *s = this_cpu_ptr(&samples); + ktime_t now = ktime_get(); + s64 time_delta = ktime_ms_delta(now, s->time); - /* Don't bother re-computing within 10 ms */ - if (time_before(jiffies, s->jiffies + HZ/100)) + /* Don't bother re-computing within the cache threshold time. */ + if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS) return; rdmsrl(MSR_IA32_APERF, aperf); @@ -51,22 +58,21 @@ static void aperfmperf_snapshot_khz(void *dummy) if (mperf_delta == 0) return; - /* - * if (cpu_khz * aperf_delta) fits into ULLONG_MAX, then - * khz = (cpu_khz * aperf_delta) / mperf_delta - */ - if (div64_u64(ULLONG_MAX, cpu_khz) > aperf_delta) - s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta); - else /* khz = aperf_delta / (mperf_delta / cpu_khz) */ - s->khz = div64_u64(aperf_delta, - div64_u64(mperf_delta, cpu_khz)); - s->jiffies = jiffies; + s->time = now; s->aperf = aperf; s->mperf = mperf; + + /* If the previous iteration was too long ago, discard it. */ + if (time_delta > APERFMPERF_STALE_THRESHOLD_MS) + s->khz = 0; + else + s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta); } unsigned int arch_freq_get_on_cpu(int cpu) { + unsigned int khz; + if (!cpu_khz) return 0; @@ -74,6 +80,12 @@ unsigned int arch_freq_get_on_cpu(int cpu) return 0; smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1); + khz = per_cpu(samples.khz, cpu); + if (khz) + return khz; + + msleep(APERFMPERF_REFRESH_DELAY_MS); + smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1); return per_cpu(samples.khz, cpu); } diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 16f82a3aaec7..8ce4212e2b8d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -345,21 +345,10 @@ static int hpet_shutdown(struct clock_event_device *evt, int timer) return 0; } -static int hpet_resume(struct clock_event_device *evt, int timer) -{ - if (!timer) { - hpet_enable_legacy_int(); - } else { - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - - irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); - irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); - disable_hardirq(hdev->irq); - irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); - enable_irq(hdev->irq); - } +static int hpet_resume(struct clock_event_device *evt) +{ + hpet_enable_legacy_int(); hpet_print_config(); - return 0; } @@ -417,7 +406,7 @@ static int hpet_legacy_set_periodic(struct clock_event_device *evt) static int hpet_legacy_resume(struct clock_event_device *evt) { - return hpet_resume(evt, 0); + return hpet_resume(evt); } static int hpet_legacy_next_event(unsigned long delta, @@ -510,8 +499,14 @@ static int hpet_msi_set_periodic(struct clock_event_device *evt) static int hpet_msi_resume(struct clock_event_device *evt) { struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + struct irq_data *data = irq_get_irq_data(hdev->irq); + struct msi_msg msg; - return hpet_resume(evt, hdev->num); + /* Restore the MSI msg and unmask the interrupt */ + irq_chip_compose_msi_msg(data, &msg); + hpet_msi_write(hdev, &msg); + hpet_msi_unmask(data); + return 0; } static int hpet_msi_next_event(unsigned long delta, diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 4aa03c5a14c9..4ed0aba8dbc8 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -155,6 +155,12 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis); seq_puts(p, " Posted-interrupt notification event\n"); + seq_printf(p, "%*s: ", prec, "NPI"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + irq_stats(j)->kvm_posted_intr_nested_ipis); + seq_puts(p, " Nested posted-interrupt event\n"); + seq_printf(p, "%*s: ", prec, "PIW"); for_each_online_cpu(j) seq_printf(p, "%10u ", @@ -313,6 +319,19 @@ __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs) exiting_irq(); set_irq_regs(old_regs); } + +/* + * Handler for POSTED_INTERRUPT_NESTED_VECTOR. + */ +__visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + entering_ack_irq(); + inc_irq_stat(kvm_posted_intr_nested_ipis); + exiting_irq(); + set_irq_regs(old_regs); +} #endif __visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs) diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 7468c6987547..c7fd18526c3e 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -150,6 +150,8 @@ static void __init apic_intr_init(void) alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); /* IPI for KVM to deliver interrupt to wake up tasks */ alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi); + /* IPI for KVM to deliver nested posted interrupt */ + alloc_intr_gate(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi); #endif /* IPI vectors for APIC spurious and error interrupts */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6b877807598b..f0153714ddac 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -457,6 +457,8 @@ static int arch_copy_kprobe(struct kprobe *p) int arch_prepare_kprobe(struct kprobe *p) { + int ret; + if (alternatives_text_reserved(p->addr, p->addr)) return -EINVAL; @@ -467,7 +469,13 @@ int arch_prepare_kprobe(struct kprobe *p) if (!p->ainsn.insn) return -ENOMEM; - return arch_copy_kprobe(p); + ret = arch_copy_kprobe(p); + if (ret) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } + + return ret; } void arch_arm_kprobe(struct kprobe *p) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 71c17a5be983..d04e30e3c0ff 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -151,6 +151,8 @@ void kvm_async_pf_task_wait(u32 token) if (hlist_unhashed(&n.link)) break; + rcu_irq_exit(); + if (!n.halted) { local_irq_enable(); schedule(); @@ -159,11 +161,11 @@ void kvm_async_pf_task_wait(u32 token) /* * We cannot reschedule. So halt. */ - rcu_irq_exit(); native_safe_halt(); local_irq_disable(); - rcu_irq_enter(); } + + rcu_irq_enter(); } if (!n.halted) finish_swait(&n.wq, &wait); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 67393fc88353..a56bf6051f4e 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -471,12 +471,12 @@ static int __init reboot_init(void) /* * The DMI quirks table takes precedence. If no quirks entry - * matches and the ACPI Hardware Reduced bit is set, force EFI - * reboot. + * matches and the ACPI Hardware Reduced bit is set and EFI + * runtime services are enabled, force EFI reboot. */ rv = dmi_check_system(reboot_dmi_table); - if (!rv && efi_reboot_required()) + if (!rv && efi_reboot_required() && !efi_runtime_disabled()) reboot_type = BOOT_EFI; return 0; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 59ca2eea522c..1cea3ffba82d 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -126,16 +126,20 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); /* - * The existing code assumes virtual address is 48-bit in the canonical - * address checks; exit if it is ever changed. + * The existing code assumes virtual address is 48-bit or 57-bit in the + * canonical address checks; exit if it is ever changed. */ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); - if (best && ((best->eax & 0xff00) >> 8) != 48 && - ((best->eax & 0xff00) >> 8) != 0) - return -EINVAL; + if (best) { + int vaddr_bits = (best->eax & 0xff00) >> 8; + + if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0) + return -EINVAL; + } /* Update physical-address width */ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); + kvm_mmu_reset_context(vcpu); kvm_pmu_refresh(vcpu); return 0; @@ -383,7 +387,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = - F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ); + F(AVX512VBMI) | F(LA57) | F(PKU) | + 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ); /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = @@ -853,16 +858,24 @@ static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); } -void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) +bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx, bool check_limit) { u32 function = *eax, index = *ecx; struct kvm_cpuid_entry2 *best; + bool entry_found = true; best = kvm_find_cpuid_entry(vcpu, function, index); - if (!best) + if (!best) { + entry_found = false; + if (!check_limit) + goto out; + best = check_cpuid_limit(vcpu, function, index); + } +out: if (best) { *eax = best->eax; *ebx = best->ebx; @@ -870,7 +883,8 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) *edx = best->edx; } else *eax = *ebx = *ecx = *edx = 0; - trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx); + trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, entry_found); + return entry_found; } EXPORT_SYMBOL_GPL(kvm_cpuid); @@ -883,7 +897,7 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu) eax = kvm_register_read(vcpu, VCPU_REGS_RAX); ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); - kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx); + kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true); kvm_register_write(vcpu, VCPU_REGS_RAX, eax); kvm_register_write(vcpu, VCPU_REGS_RBX, ebx); kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index da6728383052..1ea3c0e1e3a9 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -3,6 +3,7 @@ #include "x86.h" #include <asm/cpu.h> +#include <asm/processor.h> int kvm_update_cpuid(struct kvm_vcpu *vcpu); bool kvm_mpx_supported(void); @@ -20,7 +21,8 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries); -void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); +bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx, bool check_limit); int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); @@ -29,95 +31,87 @@ static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) return vcpu->arch.maxphyaddr; } -static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - if (!static_cpu_has(X86_FEATURE_XSAVE)) - return false; - - best = kvm_find_cpuid_entry(vcpu, 1, 0); - return best && (best->ecx & bit(X86_FEATURE_XSAVE)); -} - -static inline bool guest_cpuid_has_mtrr(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 1, 0); - return best && (best->edx & bit(X86_FEATURE_MTRR)); -} - -static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_TSC_ADJUST)); -} +struct cpuid_reg { + u32 function; + u32 index; + int reg; +}; -static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_SMEP)); -} +static const struct cpuid_reg reverse_cpuid[] = { + [CPUID_1_EDX] = { 1, 0, CPUID_EDX}, + [CPUID_8000_0001_EDX] = {0x80000001, 0, CPUID_EDX}, + [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX}, + [CPUID_1_ECX] = { 1, 0, CPUID_ECX}, + [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX}, + [CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX}, + [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, + [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, + [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX}, + [CPUID_F_1_EDX] = { 0xf, 1, CPUID_EDX}, + [CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX}, + [CPUID_6_EAX] = { 6, 0, CPUID_EAX}, + [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX}, + [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, + [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, +}; -static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu) +static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) { - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_SMAP)); -} + unsigned x86_leaf = x86_feature / 32; -static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; + BUILD_BUG_ON(!__builtin_constant_p(x86_leaf)); + BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); + BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); + return reverse_cpuid[x86_leaf]; } -static inline bool guest_cpuid_has_pku(struct kvm_vcpu *vcpu) +static __always_inline int *guest_cpuid_get_register(struct kvm_vcpu *vcpu, unsigned x86_feature) { - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ecx & bit(X86_FEATURE_PKU)); -} + struct kvm_cpuid_entry2 *entry; + const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature); -static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; + entry = kvm_find_cpuid_entry(vcpu, cpuid.function, cpuid.index); + if (!entry) + return NULL; - best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - return best && (best->edx & bit(X86_FEATURE_LM)); + switch (cpuid.reg) { + case CPUID_EAX: + return &entry->eax; + case CPUID_EBX: + return &entry->ebx; + case CPUID_ECX: + return &entry->ecx; + case CPUID_EDX: + return &entry->edx; + default: + BUILD_BUG(); + return NULL; + } } -static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu) +static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu, unsigned x86_feature) { - struct kvm_cpuid_entry2 *best; + int *reg; - best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - return best && (best->ecx & bit(X86_FEATURE_OSVW)); -} + if (x86_feature == X86_FEATURE_XSAVE && + !static_cpu_has(X86_FEATURE_XSAVE)) + return false; -static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; + reg = guest_cpuid_get_register(vcpu, x86_feature); + if (!reg) + return false; - best = kvm_find_cpuid_entry(vcpu, 1, 0); - return best && (best->ecx & bit(X86_FEATURE_PCID)); + return *reg & bit(x86_feature); } -static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu) +static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu, unsigned x86_feature) { - struct kvm_cpuid_entry2 *best; + int *reg; - best = kvm_find_cpuid_entry(vcpu, 1, 0); - return best && (best->ecx & bit(X86_FEATURE_X2APIC)); + reg = guest_cpuid_get_register(vcpu, x86_feature); + if (reg) + *reg &= ~bit(x86_feature); } static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu) @@ -128,58 +122,6 @@ static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu) return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx; } -static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - return best && (best->edx & bit(X86_FEATURE_GBPAGES)); -} - -static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_RTM)); -} - -static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_MPX)); -} - -static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - return best && (best->edx & bit(X86_FEATURE_RDTSCP)); -} - -/* - * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 - */ -#define BIT_NRIPS 3 - -static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 0x8000000a, 0); - - /* - * NRIPS is a scattered cpuid feature, so we can't use - * X86_FEATURE_NRIPS here (X86_FEATURE_NRIPS would be bit - * position 8, not 3). - */ - return best && (best->edx & bit(BIT_NRIPS)); -} -#undef BIT_NRIPS - static inline int guest_cpuid_family(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index fb0055953fbc..16bf6655aa85 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -28,6 +28,7 @@ #include "x86.h" #include "tss.h" +#include "mmu.h" /* * Operand types @@ -688,16 +689,18 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, ulong la; u32 lim; u16 sel; + u8 va_bits; la = seg_base(ctxt, addr.seg) + addr.ea; *max_size = 0; switch (mode) { case X86EMUL_MODE_PROT64: *linear = la; - if (is_noncanonical_address(la)) + va_bits = ctxt_virt_addr_bits(ctxt); + if (get_canonical(la, va_bits) != la) goto bad; - *max_size = min_t(u64, ~0u, (1ull << 48) - la); + *max_size = min_t(u64, ~0u, (1ull << va_bits) - la); if (size > *max_size) goto bad; break; @@ -1748,8 +1751,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, sizeof(base3), &ctxt->exception); if (ret != X86EMUL_CONTINUE) return ret; - if (is_noncanonical_address(get_desc_base(&seg_desc) | - ((u64)base3 << 32))) + if (emul_is_noncanonical_address(get_desc_base(&seg_desc) | + ((u64)base3 << 32), ctxt)) return emulate_gp(ctxt, 0); } load: @@ -2333,7 +2336,7 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) eax = 0x80000001; ecx = 0; - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); return edx & bit(X86_FEATURE_LM); } @@ -2636,7 +2639,7 @@ static bool vendor_intel(struct x86_emulate_ctxt *ctxt) u32 eax, ebx, ecx, edx; eax = ecx = 0; - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx; @@ -2656,7 +2659,7 @@ static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) eax = 0x00000000; ecx = 0x00000000; - ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); /* * Intel ("GenuineIntel") * remark: Intel CPUs only support "syscall" in 64bit @@ -2840,8 +2843,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ss_sel = cs_sel + 8; cs.d = 0; cs.l = 1; - if (is_noncanonical_address(rcx) || - is_noncanonical_address(rdx)) + if (emul_is_noncanonical_address(rcx, ctxt) || + emul_is_noncanonical_address(rdx, ctxt)) return emulate_gp(ctxt, 0); break; } @@ -3551,7 +3554,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt) /* * Check MOVBE is set in the guest-visible CPUID leaf. */ - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); if (!(ecx & FFL(MOVBE))) return emulate_ud(ctxt); @@ -3756,7 +3759,7 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt) if (rc != X86EMUL_CONTINUE) return rc; if (ctxt->mode == X86EMUL_MODE_PROT64 && - is_noncanonical_address(desc_ptr.address)) + emul_is_noncanonical_address(desc_ptr.address, ctxt)) return emulate_gp(ctxt, 0); if (lgdt) ctxt->ops->set_gdt(ctxt, &desc_ptr); @@ -3865,7 +3868,7 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt) eax = reg_read(ctxt, VCPU_REGS_RAX); ecx = reg_read(ctxt, VCPU_REGS_RCX); - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true); *reg_write(ctxt, VCPU_REGS_RAX) = eax; *reg_write(ctxt, VCPU_REGS_RBX) = ebx; *reg_write(ctxt, VCPU_REGS_RCX) = ecx; @@ -3924,7 +3927,7 @@ static int check_fxsr(struct x86_emulate_ctxt *ctxt) { u32 eax = 1, ebx, ecx = 0, edx; - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); if (!(edx & FFL(FXSR))) return emulate_ud(ctxt); @@ -4097,8 +4100,17 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) u64 rsvd = 0; ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); - if (efer & EFER_LMA) - rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD; + if (efer & EFER_LMA) { + u64 maxphyaddr; + u32 eax = 0x80000008; + + if (ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL, + NULL, false)) + maxphyaddr = eax & 0xff; + else + maxphyaddr = 36; + rsvd = rsvd_bits(maxphyaddr, 62); + } if (new_val & rsvd) return emulate_gp(ctxt, 0); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 337b6d2730fa..dc97f2544b6f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1160,6 +1160,12 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), pdata); } + case HV_X64_MSR_TSC_FREQUENCY: + data = (u64)vcpu->arch.virtual_tsc_khz * 1000; + break; + case HV_X64_MSR_APIC_FREQUENCY: + data = APIC_BUS_FREQUENCY; + break; default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; @@ -1268,7 +1274,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) switch (code) { case HVCALL_NOTIFY_LONG_SPIN_WAIT: - kvm_vcpu_on_spin(vcpu); + kvm_vcpu_on_spin(vcpu, true); break; case HVCALL_POST_MESSAGE: case HVCALL_SIGNAL_EVENT: diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 762cdf2595f9..0052317b2733 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -4,7 +4,7 @@ #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS #define KVM_POSSIBLE_CR4_GUEST_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT | X86_CR4_PGE) + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE) static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, enum kvm_reg reg) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 2819d4c123eb..aaf10b6f5380 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -54,8 +54,6 @@ #define PRIu64 "u" #define PRIo64 "o" -#define APIC_BUS_CYCLE_NS 1 - /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ #define apic_debug(fmt, arg...) @@ -1495,11 +1493,10 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); static void cancel_hv_timer(struct kvm_lapic *apic) { + WARN_ON(preemptible()); WARN_ON(!apic->lapic_timer.hv_timer_in_use); - preempt_disable(); kvm_x86_ops->cancel_hv_timer(apic->vcpu); apic->lapic_timer.hv_timer_in_use = false; - preempt_enable(); } static bool start_hv_timer(struct kvm_lapic *apic) @@ -1507,6 +1504,7 @@ static bool start_hv_timer(struct kvm_lapic *apic) struct kvm_timer *ktimer = &apic->lapic_timer; int r; + WARN_ON(preemptible()); if (!kvm_x86_ops->set_hv_timer) return false; @@ -1538,6 +1536,8 @@ static bool start_hv_timer(struct kvm_lapic *apic) static void start_sw_timer(struct kvm_lapic *apic) { struct kvm_timer *ktimer = &apic->lapic_timer; + + WARN_ON(preemptible()); if (apic->lapic_timer.hv_timer_in_use) cancel_hv_timer(apic); if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) @@ -1552,15 +1552,20 @@ static void start_sw_timer(struct kvm_lapic *apic) static void restart_apic_timer(struct kvm_lapic *apic) { + preempt_disable(); if (!start_hv_timer(apic)) start_sw_timer(apic); + preempt_enable(); } void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - WARN_ON(!apic->lapic_timer.hv_timer_in_use); + preempt_disable(); + /* If the preempt notifier has already run, it also called apic_timer_expired */ + if (!apic->lapic_timer.hv_timer_in_use) + goto out; WARN_ON(swait_active(&vcpu->wq)); cancel_hv_timer(apic); apic_timer_expired(apic); @@ -1569,6 +1574,8 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) advance_periodic_target_expiration(apic); restart_apic_timer(apic); } +out: + preempt_enable(); } EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer); @@ -1582,9 +1589,11 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; + preempt_disable(); /* Possibly the TSC deadline timer is not enabled yet */ if (apic->lapic_timer.hv_timer_in_use) start_sw_timer(apic); + preempt_enable(); } EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 29caa2c3dff9..215721e1426a 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -12,6 +12,9 @@ #define KVM_APIC_SHORT_MASK 0xc0000 #define KVM_APIC_DEST_MASK 0x800 +#define APIC_BUS_CYCLE_NS 1 +#define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS) + struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9b1dd114956a..2a8a6e3e2a31 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2167,8 +2167,8 @@ static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, } struct mmu_page_path { - struct kvm_mmu_page *parent[PT64_ROOT_LEVEL]; - unsigned int idx[PT64_ROOT_LEVEL]; + struct kvm_mmu_page *parent[PT64_ROOT_MAX_LEVEL]; + unsigned int idx[PT64_ROOT_MAX_LEVEL]; }; #define for_each_sp(pvec, sp, parents, i) \ @@ -2383,8 +2383,8 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, iterator->shadow_addr = vcpu->arch.mmu.root_hpa; iterator->level = vcpu->arch.mmu.shadow_root_level; - if (iterator->level == PT64_ROOT_LEVEL && - vcpu->arch.mmu.root_level < PT64_ROOT_LEVEL && + if (iterator->level == PT64_ROOT_4LEVEL && + vcpu->arch.mmu.root_level < PT64_ROOT_4LEVEL && !vcpu->arch.mmu.direct_map) --iterator->level; @@ -2608,9 +2608,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, sp = list_last_entry(&kvm->arch.active_mmu_pages, struct kvm_mmu_page, link); - kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); - - return true; + return kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); } /* @@ -3259,7 +3257,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); -static void make_mmu_pages_available(struct kvm_vcpu *vcpu); +static int make_mmu_pages_available(struct kvm_vcpu *vcpu); static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, gfn_t gfn, bool prefault) @@ -3299,7 +3297,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; - make_mmu_pages_available(vcpu); + if (make_mmu_pages_available(vcpu) < 0) + goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); @@ -3323,8 +3322,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; - if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL && - (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL || + if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL && + (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL || vcpu->arch.mmu.direct_map)) { hpa_t root = vcpu->arch.mmu.root_hpa; @@ -3376,10 +3375,14 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) struct kvm_mmu_page *sp; unsigned i; - if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { + if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL) { spin_lock(&vcpu->kvm->mmu_lock); - make_mmu_pages_available(vcpu); - sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL); + if(make_mmu_pages_available(vcpu) < 0) { + spin_unlock(&vcpu->kvm->mmu_lock); + return 1; + } + sp = kvm_mmu_get_page(vcpu, 0, 0, + vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu.root_hpa = __pa(sp->spt); @@ -3389,7 +3392,10 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) MMU_WARN_ON(VALID_PAGE(root)); spin_lock(&vcpu->kvm->mmu_lock); - make_mmu_pages_available(vcpu); + if (make_mmu_pages_available(vcpu) < 0) { + spin_unlock(&vcpu->kvm->mmu_lock); + return 1; + } sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); root = __pa(sp->spt); @@ -3420,15 +3426,18 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) * Do we shadow a long mode page table? If so we need to * write-protect the guests page table root. */ - if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { + if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) { hpa_t root = vcpu->arch.mmu.root_hpa; MMU_WARN_ON(VALID_PAGE(root)); spin_lock(&vcpu->kvm->mmu_lock); - make_mmu_pages_available(vcpu); - sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, - 0, ACC_ALL); + if (make_mmu_pages_available(vcpu) < 0) { + spin_unlock(&vcpu->kvm->mmu_lock); + return 1; + } + sp = kvm_mmu_get_page(vcpu, root_gfn, 0, + vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL); root = __pa(sp->spt); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); @@ -3442,7 +3451,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) * the shadow page table may be a PAE or a long mode page table. */ pm_mask = PT_PRESENT_MASK; - if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) + if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; for (i = 0; i < 4; ++i) { @@ -3460,7 +3469,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) return 1; } spin_lock(&vcpu->kvm->mmu_lock); - make_mmu_pages_available(vcpu); + if (make_mmu_pages_available(vcpu) < 0) { + spin_unlock(&vcpu->kvm->mmu_lock); + return 1; + } sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, 0, ACC_ALL); root = __pa(sp->spt); @@ -3475,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) * If we shadow a 32 bit page table with a long mode page * table we enter this path. */ - if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { + if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) { if (vcpu->arch.mmu.lm_root == NULL) { /* * The additional page necessary for this is only @@ -3520,7 +3532,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); - if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { + if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) { hpa_t root = vcpu->arch.mmu.root_hpa; sp = page_header(root); mmu_sync_children(vcpu, sp); @@ -3585,6 +3597,13 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level) static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) { + /* + * A nested guest cannot use the MMIO cache if it is using nested + * page tables, because cr2 is a nGPA while the cache stores GPAs. + */ + if (mmu_is_nested(vcpu)) + return false; + if (direct) return vcpu_match_mmio_gpa(vcpu, addr); @@ -3596,7 +3615,7 @@ static bool walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { struct kvm_shadow_walk_iterator iterator; - u64 sptes[PT64_ROOT_LEVEL], spte = 0ull; + u64 sptes[PT64_ROOT_MAX_LEVEL], spte = 0ull; int root, leaf; bool reserved = false; @@ -3637,7 +3656,23 @@ exit: return reserved; } -int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) +/* + * Return values of handle_mmio_page_fault: + * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction + * directly. + * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page + * fault path update the mmio spte. + * RET_MMIO_PF_RETRY: let CPU fault again on the address. + * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed). + */ +enum { + RET_MMIO_PF_EMULATE = 1, + RET_MMIO_PF_INVALID = 2, + RET_MMIO_PF_RETRY = 0, + RET_MMIO_PF_BUG = -1 +}; + +static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) { u64 spte; bool reserved; @@ -3869,7 +3904,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; - make_mmu_pages_available(vcpu); + if (make_mmu_pages_available(vcpu) < 0) + goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); @@ -4022,7 +4058,13 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; break; - case PT64_ROOT_LEVEL: + case PT64_ROOT_5LEVEL: + rsvd_check->rsvd_bits_mask[0][4] = exb_bit_rsvd | + nonleaf_bit8_rsvd | rsvd_bits(7, 7) | + rsvd_bits(maxphyaddr, 51); + rsvd_check->rsvd_bits_mask[1][4] = + rsvd_check->rsvd_bits_mask[0][4]; + case PT64_ROOT_4LEVEL: rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd | nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51); @@ -4052,7 +4094,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, { __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check, cpuid_maxphyaddr(vcpu), context->root_level, - context->nx, guest_cpuid_has_gbpages(vcpu), + context->nx, + guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES), is_pse(vcpu), guest_cpuid_is_amd(vcpu)); } @@ -4062,6 +4105,8 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, { u64 bad_mt_xwr; + rsvd_check->rsvd_bits_mask[0][4] = + rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); rsvd_check->rsvd_bits_mask[0][3] = rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); rsvd_check->rsvd_bits_mask[0][2] = @@ -4071,6 +4116,7 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); /* large page */ + rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4]; rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3]; rsvd_check->rsvd_bits_mask[1][2] = rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29); @@ -4114,8 +4160,8 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, boot_cpu_data.x86_phys_bits, context->shadow_root_level, uses_nx, - guest_cpuid_has_gbpages(vcpu), is_pse(vcpu), - true); + guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES), + is_pse(vcpu), true); } EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask); @@ -4158,66 +4204,85 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, boot_cpu_data.x86_phys_bits, execonly); } +#define BYTE_MASK(access) \ + ((1 & (access) ? 2 : 0) | \ + (2 & (access) ? 4 : 0) | \ + (3 & (access) ? 8 : 0) | \ + (4 & (access) ? 16 : 0) | \ + (5 & (access) ? 32 : 0) | \ + (6 & (access) ? 64 : 0) | \ + (7 & (access) ? 128 : 0)) + + static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, bool ept) { - unsigned bit, byte, pfec; - u8 map; - bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0; + unsigned byte; + + const u8 x = BYTE_MASK(ACC_EXEC_MASK); + const u8 w = BYTE_MASK(ACC_WRITE_MASK); + const u8 u = BYTE_MASK(ACC_USER_MASK); + + bool cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP) != 0; + bool cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP) != 0; + bool cr0_wp = is_write_protection(vcpu); - cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); - cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { - pfec = byte << 1; - map = 0; - wf = pfec & PFERR_WRITE_MASK; - uf = pfec & PFERR_USER_MASK; - ff = pfec & PFERR_FETCH_MASK; + unsigned pfec = byte << 1; + /* - * PFERR_RSVD_MASK bit is set in PFEC if the access is not - * subject to SMAP restrictions, and cleared otherwise. The - * bit is only meaningful if the SMAP bit is set in CR4. + * Each "*f" variable has a 1 bit for each UWX value + * that causes a fault with the given PFEC. */ - smapf = !(pfec & PFERR_RSVD_MASK); - for (bit = 0; bit < 8; ++bit) { - x = bit & ACC_EXEC_MASK; - w = bit & ACC_WRITE_MASK; - u = bit & ACC_USER_MASK; - - if (!ept) { - /* Not really needed: !nx will cause pte.nx to fault */ - x |= !mmu->nx; - /* Allow supervisor writes if !cr0.wp */ - w |= !is_write_protection(vcpu) && !uf; - /* Disallow supervisor fetches of user code if cr4.smep */ - x &= !(cr4_smep && u && !uf); - - /* - * SMAP:kernel-mode data accesses from user-mode - * mappings should fault. A fault is considered - * as a SMAP violation if all of the following - * conditions are ture: - * - X86_CR4_SMAP is set in CR4 - * - A user page is accessed - * - Page fault in kernel mode - * - if CPL = 3 or X86_EFLAGS_AC is clear - * - * Here, we cover the first three conditions. - * The fourth is computed dynamically in - * permission_fault() and is in smapf. - * - * Also, SMAP does not affect instruction - * fetches, add the !ff check here to make it - * clearer. - */ - smap = cr4_smap && u && !uf && !ff; - } - fault = (ff && !x) || (uf && !u) || (wf && !w) || - (smapf && smap); - map |= fault << bit; + /* Faults from writes to non-writable pages */ + u8 wf = (pfec & PFERR_WRITE_MASK) ? ~w : 0; + /* Faults from user mode accesses to supervisor pages */ + u8 uf = (pfec & PFERR_USER_MASK) ? ~u : 0; + /* Faults from fetches of non-executable pages*/ + u8 ff = (pfec & PFERR_FETCH_MASK) ? ~x : 0; + /* Faults from kernel mode fetches of user pages */ + u8 smepf = 0; + /* Faults from kernel mode accesses of user pages */ + u8 smapf = 0; + + if (!ept) { + /* Faults from kernel mode accesses to user pages */ + u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u; + + /* Not really needed: !nx will cause pte.nx to fault */ + if (!mmu->nx) + ff = 0; + + /* Allow supervisor writes if !cr0.wp */ + if (!cr0_wp) + wf = (pfec & PFERR_USER_MASK) ? wf : 0; + + /* Disallow supervisor fetches of user code if cr4.smep */ + if (cr4_smep) + smepf = (pfec & PFERR_FETCH_MASK) ? kf : 0; + + /* + * SMAP:kernel-mode data accesses from user-mode + * mappings should fault. A fault is considered + * as a SMAP violation if all of the following + * conditions are ture: + * - X86_CR4_SMAP is set in CR4 + * - A user page is accessed + * - The access is not a fetch + * - Page fault in kernel mode + * - if CPL = 3 or X86_EFLAGS_AC is clear + * + * Here, we cover the first three conditions. + * The fourth is computed dynamically in permission_fault(); + * PFERR_RSVD_MASK bit will be set in PFEC if the access is + * *not* subject to SMAP restrictions. + */ + if (cr4_smap) + smapf = (pfec & (PFERR_RSVD_MASK|PFERR_FETCH_MASK)) ? 0 : kf; } - mmu->permissions[byte] = map; + + mmu->permissions[byte] = ff | uf | wf | smepf | smapf; } } @@ -4331,7 +4396,10 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu, static void paging64_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { - paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL); + int root_level = is_la57_mode(vcpu) ? + PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL; + + paging64_init_context_common(vcpu, context, root_level); } static void paging32_init_context(struct kvm_vcpu *vcpu, @@ -4372,7 +4440,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context->sync_page = nonpaging_sync_page; context->invlpg = nonpaging_invlpg; context->update_pte = nonpaging_update_pte; - context->shadow_root_level = kvm_x86_ops->get_tdp_level(); + context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu); context->root_hpa = INVALID_PAGE; context->direct_map = true; context->set_cr3 = kvm_x86_ops->set_tdp_cr3; @@ -4386,7 +4454,8 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context->root_level = 0; } else if (is_long_mode(vcpu)) { context->nx = is_nx(vcpu); - context->root_level = PT64_ROOT_LEVEL; + context->root_level = is_la57_mode(vcpu) ? + PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL; reset_rsvds_bits_mask(vcpu, context); context->gva_to_gpa = paging64_gva_to_gpa; } else if (is_pae(vcpu)) { @@ -4443,7 +4512,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, MMU_WARN_ON(VALID_PAGE(context->root_hpa)); - context->shadow_root_level = kvm_x86_ops->get_tdp_level(); + context->shadow_root_level = PT64_ROOT_4LEVEL; context->nx = true; context->ept_ad = accessed_dirty; @@ -4452,7 +4521,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, context->sync_page = ept_sync_page; context->invlpg = ept_invlpg; context->update_pte = ept_update_pte; - context->root_level = context->shadow_root_level; + context->root_level = PT64_ROOT_4LEVEL; context->root_hpa = INVALID_PAGE; context->direct_map = false; context->base_role.ad_disabled = !accessed_dirty; @@ -4497,7 +4566,8 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested; } else if (is_long_mode(vcpu)) { g_context->nx = is_nx(vcpu); - g_context->root_level = PT64_ROOT_LEVEL; + g_context->root_level = is_la57_mode(vcpu) ? + PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL; reset_rsvds_bits_mask(vcpu, g_context); g_context->gva_to_gpa = paging64_gva_to_gpa_nested; } else if (is_pae(vcpu)) { @@ -4787,12 +4857,12 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) } EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); -static void make_mmu_pages_available(struct kvm_vcpu *vcpu) +static int make_mmu_pages_available(struct kvm_vcpu *vcpu) { LIST_HEAD(invalid_list); if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES)) - return; + return 0; while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) { if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list)) @@ -4801,6 +4871,10 @@ static void make_mmu_pages_available(struct kvm_vcpu *vcpu) ++vcpu->kvm->stat.mmu_recycled; } kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); + + if (!kvm_mmu_available_pages(vcpu->kvm)) + return -ENOSPC; + return 0; } int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, @@ -4808,7 +4882,13 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, { int r, emulation_type = EMULTYPE_RETRY; enum emulation_result er; - bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu); + bool direct = vcpu->arch.mmu.direct_map; + + /* With shadow page tables, fault_address contains a GVA or nGPA. */ + if (vcpu->arch.mmu.direct_map) { + vcpu->arch.gpa_available = true; + vcpu->arch.gpa_val = cr2; + } if (unlikely(error_code & PFERR_RSVD_MASK)) { r = handle_mmio_page_fault(vcpu, cr2, direct); @@ -4820,6 +4900,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, return 1; if (r < 0) return r; + /* Must be RET_MMIO_PF_INVALID. */ } r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code), @@ -4835,11 +4916,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, * This can occur when using nested virtualization with nested * paging in both guests. If true, we simply unprotect the page * and resume the guest. - * - * Note: AMD only (since it supports the PFERR_GUEST_PAGE_MASK used - * in PFERR_NEXT_GUEST_PAGE) */ - if (error_code == PFERR_NESTED_GUEST_PAGE) { + if (vcpu->arch.mmu.direct_map && + (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) { kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2)); return 1; } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index d7d248a000dd..e2999f57bfc4 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -37,7 +37,8 @@ #define PT32_DIR_PSE36_MASK \ (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) -#define PT64_ROOT_LEVEL 4 +#define PT64_ROOT_5LEVEL 5 +#define PT64_ROOT_4LEVEL 4 #define PT32_ROOT_LEVEL 2 #define PT32E_ROOT_LEVEL 3 @@ -48,6 +49,9 @@ static inline u64 rsvd_bits(int s, int e) { + if (e < s) + return 0; + return ((1ULL << (e - s + 1)) - 1) << s; } @@ -56,23 +60,6 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value); void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); -/* - * Return values of handle_mmio_page_fault: - * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction - * directly. - * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page - * fault path update the mmio spte. - * RET_MMIO_PF_RETRY: let CPU fault again on the address. - * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed). - */ -enum { - RET_MMIO_PF_EMULATE = 1, - RET_MMIO_PF_INVALID = 2, - RET_MMIO_PF_RETRY = 0, - RET_MMIO_PF_BUG = -1 -}; - -int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, bool accessed_dirty); diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index dcce533d420c..d22ddbdf5e6e 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -62,11 +62,11 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn) if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; - if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { + if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) { hpa_t root = vcpu->arch.mmu.root_hpa; sp = page_header(root); - __mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_LEVEL); + __mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu.root_level); return; } diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 0149ac59c273..e9ea2d45ae66 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -130,7 +130,7 @@ static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu) * enable MTRRs and it is obviously undesirable to run the * guest entirely with UC memory and we use WB. */ - if (guest_cpuid_has_mtrr(vcpu)) + if (guest_cpuid_has(vcpu, X86_FEATURE_MTRR)) return MTRR_TYPE_UNCACHABLE; else return MTRR_TYPE_WRBACK; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index b0454c7e4cff..86b68dc5a649 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -790,8 +790,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, &map_writable)) return 0; - if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr, - walker.gfn, pfn, walker.pte_access, &r)) + if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r)) return r; /* @@ -819,7 +818,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, goto out_unlock; kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); - make_mmu_pages_available(vcpu); + if (make_mmu_pages_available(vcpu) < 0) + goto out_unlock; if (!force_pt_level) transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); r = FNAME(fetch)(vcpu, addr, &walker, write_fault, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 4d8141e533c3..9f6fb0ef206c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -280,9 +280,9 @@ module_param(avic, int, S_IRUGO); static int vls = true; module_param(vls, int, 0444); -/* AVIC VM ID bit masks and lock */ -static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR); -static DEFINE_SPINLOCK(avic_vm_id_lock); +/* enable/disable Virtual GIF */ +static int vgif = true; +module_param(vgif, int, 0444); static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu); @@ -479,19 +479,33 @@ static inline void clr_intercept(struct vcpu_svm *svm, int bit) recalc_intercepts(svm); } +static inline bool vgif_enabled(struct vcpu_svm *svm) +{ + return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK); +} + static inline void enable_gif(struct vcpu_svm *svm) { - svm->vcpu.arch.hflags |= HF_GIF_MASK; + if (vgif_enabled(svm)) + svm->vmcb->control.int_ctl |= V_GIF_MASK; + else + svm->vcpu.arch.hflags |= HF_GIF_MASK; } static inline void disable_gif(struct vcpu_svm *svm) { - svm->vcpu.arch.hflags &= ~HF_GIF_MASK; + if (vgif_enabled(svm)) + svm->vmcb->control.int_ctl &= ~V_GIF_MASK; + else + svm->vcpu.arch.hflags &= ~HF_GIF_MASK; } static inline bool gif_set(struct vcpu_svm *svm) { - return !!(svm->vcpu.arch.hflags & HF_GIF_MASK); + if (vgif_enabled(svm)) + return !!(svm->vmcb->control.int_ctl & V_GIF_MASK); + else + return !!(svm->vcpu.arch.hflags & HF_GIF_MASK); } static unsigned long iopm_base; @@ -567,10 +581,10 @@ static inline void invlpga(unsigned long addr, u32 asid) asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); } -static int get_npt_level(void) +static int get_npt_level(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64 - return PT64_ROOT_LEVEL; + return PT64_ROOT_4LEVEL; #else return PT32E_ROOT_LEVEL; #endif @@ -641,7 +655,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); unsigned nr = vcpu->arch.exception.nr; bool has_error_code = vcpu->arch.exception.has_error_code; - bool reinject = vcpu->arch.exception.reinject; + bool reinject = vcpu->arch.exception.injected; u32 error_code = vcpu->arch.exception.error_code; /* @@ -973,6 +987,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) static void disable_nmi_singlestep(struct vcpu_svm *svm) { svm->nmi_singlestep = false; + if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) { /* Clear our flags if they were not set by the guest */ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF)) @@ -989,6 +1004,8 @@ static void disable_nmi_singlestep(struct vcpu_svm *svm) */ #define SVM_VM_DATA_HASH_BITS 8 static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); +static u32 next_vm_id = 0; +static bool next_vm_id_wrapped = 0; static DEFINE_SPINLOCK(svm_vm_data_hash_lock); /* Note: @@ -1108,6 +1125,13 @@ static __init int svm_hardware_setup(void) } } + if (vgif) { + if (!boot_cpu_has(X86_FEATURE_VGIF)) + vgif = false; + else + pr_info("Virtual GIF supported\n"); + } + return 0; err: @@ -1305,6 +1329,12 @@ static void init_vmcb(struct vcpu_svm *svm) svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; } + if (vgif) { + clr_intercept(svm, INTERCEPT_STGI); + clr_intercept(svm, INTERCEPT_CLGI); + svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK; + } + mark_all_dirty(svm->vmcb); enable_gif(svm); @@ -1387,34 +1417,6 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) return 0; } -static inline int avic_get_next_vm_id(void) -{ - int id; - - spin_lock(&avic_vm_id_lock); - - /* AVIC VM ID is one-based. */ - id = find_next_zero_bit(avic_vm_id_bitmap, AVIC_VM_ID_NR, 1); - if (id <= AVIC_VM_ID_MASK) - __set_bit(id, avic_vm_id_bitmap); - else - id = -EAGAIN; - - spin_unlock(&avic_vm_id_lock); - return id; -} - -static inline int avic_free_vm_id(int id) -{ - if (id <= 0 || id > AVIC_VM_ID_MASK) - return -EINVAL; - - spin_lock(&avic_vm_id_lock); - __clear_bit(id, avic_vm_id_bitmap); - spin_unlock(&avic_vm_id_lock); - return 0; -} - static void avic_vm_destroy(struct kvm *kvm) { unsigned long flags; @@ -1423,8 +1425,6 @@ static void avic_vm_destroy(struct kvm *kvm) if (!avic) return; - avic_free_vm_id(vm_data->avic_vm_id); - if (vm_data->avic_logical_id_table_page) __free_page(vm_data->avic_logical_id_table_page); if (vm_data->avic_physical_id_table_page) @@ -1438,19 +1438,16 @@ static void avic_vm_destroy(struct kvm *kvm) static int avic_vm_init(struct kvm *kvm) { unsigned long flags; - int vm_id, err = -ENOMEM; + int err = -ENOMEM; struct kvm_arch *vm_data = &kvm->arch; struct page *p_page; struct page *l_page; + struct kvm_arch *ka; + u32 vm_id; if (!avic) return 0; - vm_id = avic_get_next_vm_id(); - if (vm_id < 0) - return vm_id; - vm_data->avic_vm_id = (u32)vm_id; - /* Allocating physical APIC ID table (4KB) */ p_page = alloc_page(GFP_KERNEL); if (!p_page) @@ -1468,6 +1465,22 @@ static int avic_vm_init(struct kvm *kvm) clear_page(page_address(l_page)); spin_lock_irqsave(&svm_vm_data_hash_lock, flags); + again: + vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK; + if (vm_id == 0) { /* id is 1-based, zero is not okay */ + next_vm_id_wrapped = 1; + goto again; + } + /* Is it still in use? Only possible if wrapped at least once */ + if (next_vm_id_wrapped) { + hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) { + struct kvm *k2 = container_of(ka, struct kvm, arch); + struct kvm_arch *vd2 = &k2->arch; + if (vd2->avic_vm_id == vm_id) + goto again; + } + } + vm_data->avic_vm_id = vm_id; hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id); spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); @@ -1580,7 +1593,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) } init_vmcb(svm); - kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); + kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true); kvm_register_write(vcpu, VCPU_REGS_RDX, eax); if (kvm_vcpu_apicv_active(vcpu) && !init_event) @@ -2389,7 +2402,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; - vcpu->arch.mmu.shadow_root_level = get_npt_level(); + vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu); reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu); vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; } @@ -2430,6 +2443,16 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; svm->vmcb->control.exit_code_hi = 0; svm->vmcb->control.exit_info_1 = error_code; + + /* + * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception. + * The fix is to add the ancillary datum (CR2 or DR6) to structs + * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be + * written only when inject_pending_event runs (DR6 would written here + * too). This should be conditional on a new capability---if the + * capability is disabled, kvm_multiple_exception would write the + * ancillary information to CR2 or DR6, for backwards ABI-compatibility. + */ if (svm->vcpu.arch.exception.nested_apf) svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; else @@ -3142,6 +3165,13 @@ static int stgi_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; + /* + * If VGIF is enabled, the STGI intercept is only added to + * detect the opening of the NMI window; remove it now. + */ + if (vgif_enabled(svm)) + clr_intercept(svm, INTERCEPT_STGI); + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; ret = kvm_skip_emulated_instruction(&svm->vcpu); kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); @@ -3739,7 +3769,10 @@ static int interrupt_window_interception(struct vcpu_svm *svm) static int pause_interception(struct vcpu_svm *svm) { - kvm_vcpu_on_spin(&(svm->vcpu)); + struct kvm_vcpu *vcpu = &svm->vcpu; + bool in_kernel = (svm_get_cpl(vcpu) == 0); + + kvm_vcpu_on_spin(vcpu, in_kernel); return 1; } @@ -4223,8 +4256,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); - vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF); - if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) vcpu->arch.cr0 = svm->vmcb->save.cr0; if (npt_enabled) @@ -4676,9 +4707,11 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes * 1, because that's a separate STGI/VMRUN intercept. The next time we * get that intercept, this function will be called again though and - * we'll get the vintr intercept. + * we'll get the vintr intercept. However, if the vGIF feature is + * enabled, the STGI interception will not occur. Enable the irq + * window under the assumption that the hardware will set the GIF. */ - if (gif_set(svm) && nested_svm_intr(svm)) { + if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) { svm_set_vintr(svm); svm_inject_irq(svm, 0x0); } @@ -4692,8 +4725,11 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) == HF_NMI_MASK) return; /* IRET will cause a vm exit */ - if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0) + if (!gif_set(svm)) { + if (vgif_enabled(svm)) + set_intercept(svm, INTERCEPT_STGI); return; /* STGI will cause a vm exit */ + } if (svm->nested.exit_required) return; /* we're not going to run the guest yet */ @@ -5065,17 +5101,14 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) static void svm_cpuid_update(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct kvm_cpuid_entry2 *entry; /* Update nrips enabled cache */ - svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu); + svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS); if (!kvm_vcpu_apicv_active(vcpu)) return; - entry = kvm_find_cpuid_entry(vcpu, 1, 0); - if (entry) - entry->ecx &= ~bit(X86_FEATURE_X2APIC); + guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC); } static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 0a6cc6754ec5..8a202c49e2a0 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -151,8 +151,8 @@ TRACE_EVENT(kvm_fast_mmio, */ TRACE_EVENT(kvm_cpuid, TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx, - unsigned long rcx, unsigned long rdx), - TP_ARGS(function, rax, rbx, rcx, rdx), + unsigned long rcx, unsigned long rdx, bool found), + TP_ARGS(function, rax, rbx, rcx, rdx, found), TP_STRUCT__entry( __field( unsigned int, function ) @@ -160,6 +160,7 @@ TRACE_EVENT(kvm_cpuid, __field( unsigned long, rbx ) __field( unsigned long, rcx ) __field( unsigned long, rdx ) + __field( bool, found ) ), TP_fast_assign( @@ -168,11 +169,13 @@ TRACE_EVENT(kvm_cpuid, __entry->rbx = rbx; __entry->rcx = rcx; __entry->rdx = rdx; + __entry->found = found; ), - TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx", + TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx, cpuid entry %s", __entry->function, __entry->rax, - __entry->rbx, __entry->rcx, __entry->rdx) + __entry->rbx, __entry->rcx, __entry->rdx, + __entry->found ? "found" : "not found") ); #define AREG(x) { APIC_##x, "APIC_" #x } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 29fd8af5c347..ec6b1932dea9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -122,7 +122,7 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) #define KVM_CR4_GUEST_OWNED_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT | X86_CR4_TSD) + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD) #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) @@ -243,11 +243,13 @@ struct __packed vmcs12 { u64 virtual_apic_page_addr; u64 apic_access_addr; u64 posted_intr_desc_addr; + u64 vm_function_control; u64 ept_pointer; u64 eoi_exit_bitmap0; u64 eoi_exit_bitmap1; u64 eoi_exit_bitmap2; u64 eoi_exit_bitmap3; + u64 eptp_list_address; u64 xss_exit_bitmap; u64 guest_physical_address; u64 vmcs_link_pointer; @@ -416,13 +418,10 @@ struct nested_vmx { /* The guest-physical address of the current VMCS L1 keeps for L2 */ gpa_t current_vmptr; - /* The host-usable pointer to the above */ - struct page *current_vmcs12_page; - struct vmcs12 *current_vmcs12; /* * Cache of the guest's VMCS, existing outside of guest memory. * Loaded from guest memory during VMPTRLD. Flushed to guest - * memory during VMXOFF, VMCLEAR, VMPTRLD. + * memory during VMCLEAR and VMPTRLD. */ struct vmcs12 *cached_vmcs12; /* @@ -484,6 +483,7 @@ struct nested_vmx { u64 nested_vmx_cr4_fixed0; u64 nested_vmx_cr4_fixed1; u64 nested_vmx_vmcs_enum; + u64 nested_vmx_vmfunc_controls; }; #define POSTED_INTR_ON 0 @@ -563,7 +563,6 @@ struct vcpu_vmx { struct kvm_vcpu vcpu; unsigned long host_rsp; u8 fail; - bool nmi_known_unmasked; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; @@ -577,6 +576,8 @@ struct vcpu_vmx { #endif u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; + u32 secondary_exec_control; + /* * loaded_vmcs points to the VMCS currently used in this vcpu. For a * non-nested (L1) guest, it always points to vmcs01. For a nested @@ -767,11 +768,13 @@ static const unsigned short vmcs_field_to_offset_table[] = { FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), FIELD64(APIC_ACCESS_ADDR, apic_access_addr), FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr), + FIELD64(VM_FUNCTION_CONTROL, vm_function_control), FIELD64(EPT_POINTER, ept_pointer), FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0), FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1), FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2), FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3), + FIELD64(EPTP_LIST_ADDRESS, eptp_list_address), FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap), FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), @@ -895,25 +898,6 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) return to_vmx(vcpu)->nested.cached_vmcs12; } -static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr) -{ - struct page *page = kvm_vcpu_gfn_to_page(vcpu, addr >> PAGE_SHIFT); - if (is_error_page(page)) - return NULL; - - return page; -} - -static void nested_release_page(struct page *page) -{ - kvm_release_page_dirty(page); -} - -static void nested_release_page_clean(struct page *page) -{ - kvm_release_page_clean(page); -} - static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu); static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); @@ -928,6 +912,10 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); static int alloc_identity_pagetable(struct kvm *kvm); +static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); +static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); +static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, + u16 error_code); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -1214,6 +1202,16 @@ static inline bool cpu_has_vmx_ept_4levels(void) return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT; } +static inline bool cpu_has_vmx_ept_mt_wb(void) +{ + return vmx_capability.ept & VMX_EPTP_WB_BIT; +} + +static inline bool cpu_has_vmx_ept_5levels(void) +{ + return vmx_capability.ept & VMX_EPT_PAGE_WALK_5_BIT; +} + static inline bool cpu_has_vmx_ept_ad_bits(void) { return vmx_capability.ept & VMX_EPT_AD_BIT; @@ -1319,6 +1317,12 @@ static inline bool cpu_has_vmx_tsc_scaling(void) SECONDARY_EXEC_TSC_SCALING; } +static inline bool cpu_has_vmx_vmfunc(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_ENABLE_VMFUNC; +} + static inline bool report_flexpriority(void) { return flexpriority_enabled; @@ -1359,8 +1363,7 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12) { - return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES) && - vmx_xsaves_supported(); + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); } static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12) @@ -1393,6 +1396,18 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12) return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR; } +static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12) +{ + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC); +} + +static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12) +{ + return nested_cpu_has_vmfunc(vmcs12) && + (vmcs12->vm_function_control & + VMX_VMFUNC_EPTP_SWITCHING); +} + static inline bool is_nmi(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) @@ -2429,32 +2444,69 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) vmx_set_interrupt_shadow(vcpu, 0); } +static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, + unsigned long exit_qual) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned int nr = vcpu->arch.exception.nr; + u32 intr_info = nr | INTR_INFO_VALID_MASK; + + if (vcpu->arch.exception.has_error_code) { + vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code; + intr_info |= INTR_INFO_DELIVER_CODE_MASK; + } + + if (kvm_exception_is_soft(nr)) + intr_info |= INTR_TYPE_SOFT_EXCEPTION; + else + intr_info |= INTR_TYPE_HARD_EXCEPTION; + + if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) && + vmx_get_nmi_mask(vcpu)) + intr_info |= INTR_INFO_UNBLOCK_NMI; + + nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual); +} + /* * KVM wants to inject page-faults which it got to the guest. This function * checks whether in a nested guest, we need to inject them to L1 or L2. */ -static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) +static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); unsigned int nr = vcpu->arch.exception.nr; - if (!((vmcs12->exception_bitmap & (1u << nr)) || - (nr == PF_VECTOR && vcpu->arch.exception.nested_apf))) - return 0; - - if (vcpu->arch.exception.nested_apf) { - vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code); - nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, - PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | - INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, - vcpu->arch.apf.nested_apf_token); - return 1; + if (nr == PF_VECTOR) { + if (vcpu->arch.exception.nested_apf) { + *exit_qual = vcpu->arch.apf.nested_apf_token; + return 1; + } + /* + * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception. + * The fix is to add the ancillary datum (CR2 or DR6) to structs + * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 + * can be written only when inject_pending_event runs. This should be + * conditional on a new capability---if the capability is disabled, + * kvm_multiple_exception would write the ancillary information to + * CR2 or DR6, for backwards ABI-compatibility. + */ + if (nested_vmx_is_page_fault_vmexit(vmcs12, + vcpu->arch.exception.error_code)) { + *exit_qual = vcpu->arch.cr2; + return 1; + } + } else { + if (vmcs12->exception_bitmap & (1u << nr)) { + if (nr == DB_VECTOR) + *exit_qual = vcpu->arch.dr6; + else + *exit_qual = 0; + return 1; + } } - nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, - vmcs_read32(VM_EXIT_INTR_INFO), - vmcs_readl(EXIT_QUALIFICATION)); - return 1; + return 0; } static void vmx_queue_exception(struct kvm_vcpu *vcpu) @@ -2462,14 +2514,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned nr = vcpu->arch.exception.nr; bool has_error_code = vcpu->arch.exception.has_error_code; - bool reinject = vcpu->arch.exception.reinject; u32 error_code = vcpu->arch.exception.error_code; u32 intr_info = nr | INTR_INFO_VALID_MASK; - if (!reinject && is_guest_mode(vcpu) && - nested_vmx_check_exception(vcpu)) - return; - if (has_error_code) { vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); intr_info |= INTR_INFO_DELIVER_CODE_MASK; @@ -2568,7 +2615,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) if (index >= 0) move_msr_up(vmx, index, save_nmsrs++); index = __find_msr_index(vmx, MSR_TSC_AUX); - if (index >= 0 && guest_cpuid_has_rdtscp(&vmx->vcpu)) + if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) move_msr_up(vmx, index, save_nmsrs++); /* * MSR_STAR is only needed on long mode guests, and only @@ -2628,12 +2675,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) } } -static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0); - return best && (best->ecx & (1 << (X86_FEATURE_VMX & 31))); -} - /* * nested_vmx_allowed() checks whether a guest should be allowed to use VMX * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for @@ -2642,7 +2683,7 @@ static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) */ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu) { - return nested && guest_cpuid_has_vmx(vcpu); + return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX); } /* @@ -2668,7 +2709,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) * reason is that if one of these bits is necessary, it will appear * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control * fields of vmcs01 and vmcs02, will turn these bits off - and - * nested_vmx_exit_handled() will not pass related exits to L1. + * nested_vmx_exit_reflected() will not pass related exits to L1. * These rules have exceptions below. */ @@ -2765,21 +2806,21 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx->nested.nested_vmx_procbased_ctls_low &= ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); - /* secondary cpu-based controls */ + /* + * secondary cpu-based controls. Do not include those that + * depend on CPUID bits, they are added later by vmx_cpuid_update. + */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, vmx->nested.nested_vmx_secondary_ctls_low, vmx->nested.nested_vmx_secondary_ctls_high); vmx->nested.nested_vmx_secondary_ctls_low = 0; vmx->nested.nested_vmx_secondary_ctls_high &= - SECONDARY_EXEC_RDRAND | SECONDARY_EXEC_RDSEED | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_DESC | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_WBINVD_EXITING | - SECONDARY_EXEC_XSAVES; + SECONDARY_EXEC_WBINVD_EXITING; if (enable_ept) { /* nested EPT: emulate EPT also to L1 */ @@ -2802,6 +2843,17 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) } else vmx->nested.nested_vmx_ept_caps = 0; + if (cpu_has_vmx_vmfunc()) { + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_ENABLE_VMFUNC; + /* + * Advertise EPTP switching unconditionally + * since we emulate it + */ + vmx->nested.nested_vmx_vmfunc_controls = + VMX_VMFUNC_EPTP_SWITCHING; + } + /* * Old versions of KVM use the single-context version without * checking for support, so declare that it is supported even @@ -3171,6 +3223,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) *pdata = vmx->nested.nested_vmx_ept_caps | ((u64)vmx->nested.nested_vmx_vpid_caps << 32); break; + case MSR_IA32_VMX_VMFUNC: + *pdata = vmx->nested.nested_vmx_vmfunc_controls; + break; default: return 1; } @@ -3224,7 +3279,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_BNDCFGS: if (!kvm_mpx_supported() || - (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) + (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) return 1; msr_info->data = vmcs_read64(GUEST_BNDCFGS); break; @@ -3248,7 +3304,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.ia32_xss; break; case MSR_TSC_AUX: - if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated) + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) return 1; /* Otherwise falls through */ default: @@ -3307,9 +3364,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_BNDCFGS: if (!kvm_mpx_supported() || - (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) + (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) return 1; - if (is_noncanonical_address(data & PAGE_MASK) || + if (is_noncanonical_address(data & PAGE_MASK, vcpu) || (data & MSR_IA32_BNDCFGS_RSVD)) return 1; vmcs_write64(GUEST_BNDCFGS, data); @@ -3370,7 +3428,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) clear_atomic_switch_msr(vmx, MSR_IA32_XSS); break; case MSR_TSC_AUX: - if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated) + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) return 1; /* Check reserved bit, higher 32 bits should be zero */ if ((data >> 32) != 0) @@ -3607,8 +3666,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_SHADOW_VMCS | SECONDARY_EXEC_XSAVES | + SECONDARY_EXEC_RDSEED | + SECONDARY_EXEC_RDRAND | SECONDARY_EXEC_ENABLE_PML | - SECONDARY_EXEC_TSC_SCALING; + SECONDARY_EXEC_TSC_SCALING | + SECONDARY_EXEC_ENABLE_VMFUNC; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) @@ -4240,16 +4302,22 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vmx->emulation_required = emulation_required(vcpu); } +static int get_ept_level(struct kvm_vcpu *vcpu) +{ + if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) + return 5; + return 4; +} + static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) { - u64 eptp; + u64 eptp = VMX_EPTP_MT_WB; + + eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4; - /* TODO write the value reading from MSR */ - eptp = VMX_EPT_DEFAULT_MT | - VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; if (enable_ept_ad_bits && (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu))) - eptp |= VMX_EPT_AD_ENABLE_BIT; + eptp |= VMX_EPTP_AD_ENABLE_BIT; eptp |= (root_hpa & PAGE_MASK); return eptp; @@ -4956,6 +5024,28 @@ static bool vmx_get_enable_apicv(void) return enable_apicv; } +static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + gfn_t gfn; + + /* + * Don't need to mark the APIC access page dirty; it is never + * written to by the CPU during APIC virtualization. + */ + + if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { + gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } + + if (nested_cpu_has_posted_intr(vmcs12)) { + gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } +} + + static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4963,18 +5053,15 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) void *vapic_page; u16 status; - if (vmx->nested.pi_desc && - vmx->nested.pi_pending) { - vmx->nested.pi_pending = false; - if (!pi_test_and_clear_on(vmx->nested.pi_desc)) - return; - - max_irr = find_last_bit( - (unsigned long *)vmx->nested.pi_desc->pir, 256); + if (!vmx->nested.pi_desc || !vmx->nested.pi_pending) + return; - if (max_irr == 256) - return; + vmx->nested.pi_pending = false; + if (!pi_test_and_clear_on(vmx->nested.pi_desc)) + return; + max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); + if (max_irr != 256) { vapic_page = kmap(vmx->nested.virtual_apic_page); __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); kunmap(vmx->nested.virtual_apic_page); @@ -4986,11 +5073,16 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) vmcs_write16(GUEST_INTR_STATUS, status); } } + + nested_mark_vmcs12_pages_dirty(vcpu); } -static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) +static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, + bool nested) { #ifdef CONFIG_SMP + int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR; + if (vcpu->mode == IN_GUEST_MODE) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -5008,8 +5100,7 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) */ WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc)); - apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), - POSTED_INTR_VECTOR); + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec); return true; } #endif @@ -5024,7 +5115,7 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, if (is_guest_mode(vcpu) && vector == vmx->nested.posted_intr_nv) { /* the PIR and ON have been set by L1. */ - kvm_vcpu_trigger_posted_interrupt(vcpu); + kvm_vcpu_trigger_posted_interrupt(vcpu, true); /* * If a posted intr is not recognized by hardware, * we will accomplish it in the next vmentry. @@ -5058,7 +5149,7 @@ static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) if (pi_test_and_set_on(&vmx->pi_desc)) return; - if (!kvm_vcpu_trigger_posted_interrupt(vcpu)) + if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) kvm_vcpu_kick(vcpu); } @@ -5188,10 +5279,24 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) return exec_control; } -static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) +static bool vmx_rdrand_supported(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_RDRAND; +} + +static bool vmx_rdseed_supported(void) { + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_RDSEED; +} + +static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) +{ + struct kvm_vcpu *vcpu = &vmx->vcpu; + u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; - if (!cpu_need_virtualize_apic_accesses(&vmx->vcpu)) + if (!cpu_need_virtualize_apic_accesses(vcpu)) exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; if (vmx->vpid == 0) exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; @@ -5205,7 +5310,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; if (!ple_gap) exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; - if (!kvm_vcpu_apicv_active(&vmx->vcpu)) + if (!kvm_vcpu_apicv_active(vcpu)) exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; @@ -5219,7 +5324,92 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) if (!enable_pml) exec_control &= ~SECONDARY_EXEC_ENABLE_PML; - return exec_control; + if (vmx_xsaves_supported()) { + /* Exposing XSAVES only when XSAVE is exposed */ + bool xsaves_enabled = + guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && + guest_cpuid_has(vcpu, X86_FEATURE_XSAVES); + + if (!xsaves_enabled) + exec_control &= ~SECONDARY_EXEC_XSAVES; + + if (nested) { + if (xsaves_enabled) + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_XSAVES; + else + vmx->nested.nested_vmx_secondary_ctls_high &= + ~SECONDARY_EXEC_XSAVES; + } + } + + if (vmx_rdtscp_supported()) { + bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP); + if (!rdtscp_enabled) + exec_control &= ~SECONDARY_EXEC_RDTSCP; + + if (nested) { + if (rdtscp_enabled) + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_RDTSCP; + else + vmx->nested.nested_vmx_secondary_ctls_high &= + ~SECONDARY_EXEC_RDTSCP; + } + } + + if (vmx_invpcid_supported()) { + /* Exposing INVPCID only when PCID is exposed */ + bool invpcid_enabled = + guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) && + guest_cpuid_has(vcpu, X86_FEATURE_PCID); + + if (!invpcid_enabled) { + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; + guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID); + } + + if (nested) { + if (invpcid_enabled) + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_ENABLE_INVPCID; + else + vmx->nested.nested_vmx_secondary_ctls_high &= + ~SECONDARY_EXEC_ENABLE_INVPCID; + } + } + + if (vmx_rdrand_supported()) { + bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND); + if (rdrand_enabled) + exec_control &= ~SECONDARY_EXEC_RDRAND; + + if (nested) { + if (rdrand_enabled) + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_RDRAND; + else + vmx->nested.nested_vmx_secondary_ctls_high &= + ~SECONDARY_EXEC_RDRAND; + } + } + + if (vmx_rdseed_supported()) { + bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED); + if (rdseed_enabled) + exec_control &= ~SECONDARY_EXEC_RDSEED; + + if (nested) { + if (rdseed_enabled) + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_RDSEED; + else + vmx->nested.nested_vmx_secondary_ctls_high &= + ~SECONDARY_EXEC_RDSEED; + } + } + + vmx->secondary_exec_control = exec_control; } static void ept_set_mmio_spte_mask(void) @@ -5263,8 +5453,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); if (cpu_has_secondary_exec_ctrls()) { + vmx_compute_secondary_exec_control(vmx); vmcs_write32(SECONDARY_VM_EXEC_CONTROL, - vmx_secondary_exec_control(vmx)); + vmx->secondary_exec_control); } if (kvm_vcpu_apicv_active(&vmx->vcpu)) { @@ -5302,6 +5493,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */ #endif + if (cpu_has_vmx_vmfunc()) + vmcs_write64(VM_FUNCTION_CONTROL, 0); + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); @@ -5780,6 +5974,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu) static int handle_triple_fault(struct kvm_vcpu *vcpu) { vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; + vcpu->mmio_needed = 0; return 0; } @@ -6275,7 +6470,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) { unsigned long exit_qualification; gpa_t gpa; - u32 error_code; + u64 error_code; exit_qualification = vmcs_readl(EXIT_QUALIFICATION); @@ -6307,9 +6502,10 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) EPT_VIOLATION_EXECUTABLE)) ? PFERR_PRESENT_MASK : 0; - vcpu->arch.gpa_available = true; - vcpu->arch.exit_qualification = exit_qualification; + error_code |= (exit_qualification & 0x100) != 0 ? + PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; + vcpu->arch.exit_qualification = exit_qualification; return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); } @@ -6318,23 +6514,20 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) int ret; gpa_t gpa; + /* + * A nested guest cannot optimize MMIO vmexits, because we have an + * nGPA here instead of the required GPA. + */ gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); - if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { + if (!is_guest_mode(vcpu) && + !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { trace_kvm_fast_mmio(gpa); return kvm_skip_emulated_instruction(vcpu); } - ret = handle_mmio_page_fault(vcpu, gpa, true); - vcpu->arch.gpa_available = true; - if (likely(ret == RET_MMIO_PF_EMULATE)) - return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == - EMULATE_DONE; - - if (unlikely(ret == RET_MMIO_PF_INVALID)) - return kvm_mmu_page_fault(vcpu, gpa, 0, NULL, 0); - - if (unlikely(ret == RET_MMIO_PF_RETRY)) - return 1; + ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); + if (ret >= 0) + return ret; /* It is the real ept misconfig */ WARN_ON(1); @@ -6556,7 +6749,8 @@ static __init int hardware_setup(void) init_vmcs_shadow_fields(); if (!cpu_has_vmx_ept() || - !cpu_has_vmx_ept_4levels()) { + !cpu_has_vmx_ept_4levels() || + !cpu_has_vmx_ept_mt_wb()) { enable_ept = 0; enable_unrestricted_guest = 0; enable_ept_ad_bits = 0; @@ -6699,7 +6893,13 @@ static int handle_pause(struct kvm_vcpu *vcpu) if (ple_gap) grow_ple_window(vcpu); - kvm_vcpu_on_spin(vcpu); + /* + * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting" + * VM-execution control is ignored if CPL > 0. OTOH, KVM + * never set PAUSE_EXITING and just set PLE if supported, + * so the vcpu must be CPL=0 if it gets a PAUSE exit. + */ + kvm_vcpu_on_spin(vcpu, true); return kvm_skip_emulated_instruction(vcpu); } @@ -6714,6 +6914,12 @@ static int handle_mwait(struct kvm_vcpu *vcpu) return handle_nop(vcpu); } +static int handle_invalid_op(struct kvm_vcpu *vcpu) +{ + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} + static int handle_monitor_trap(struct kvm_vcpu *vcpu) { return 1; @@ -6930,7 +7136,7 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, * non-canonical form. This is the only check on the memory * destination for long mode! */ - exn = is_noncanonical_address(*ret); + exn = is_noncanonical_address(*ret, vcpu); } else if (is_protmode(vcpu)) { /* Protected mode: apply checks for segment validity in the * following order: @@ -7094,19 +7300,19 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } - page = nested_get_page(vcpu, vmptr); - if (page == NULL) { + page = kvm_vcpu_gpa_to_page(vcpu, vmptr); + if (is_error_page(page)) { nested_vmx_failInvalid(vcpu); return kvm_skip_emulated_instruction(vcpu); } if (*(u32 *)kmap(page) != VMCS12_REVISION) { kunmap(page); - nested_release_page_clean(page); + kvm_release_page_clean(page); nested_vmx_failInvalid(vcpu); return kvm_skip_emulated_instruction(vcpu); } kunmap(page); - nested_release_page_clean(page); + kvm_release_page_clean(page); vmx->nested.vmxon_ptr = vmptr; ret = enter_vmx_operation(vcpu); @@ -7133,34 +7339,32 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) return 1; } +static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) +{ + vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); + vmcs_write64(VMCS_LINK_POINTER, -1ull); +} + static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) { if (vmx->nested.current_vmptr == -1ull) return; - /* current_vmptr and current_vmcs12 are always set/reset together */ - if (WARN_ON(vmx->nested.current_vmcs12 == NULL)) - return; - if (enable_shadow_vmcs) { /* copy to memory all shadowed fields in case they were modified */ copy_shadow_to_vmcs12(vmx); vmx->nested.sync_shadow_vmcs = false; - vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, - SECONDARY_EXEC_SHADOW_VMCS); - vmcs_write64(VMCS_LINK_POINTER, -1ull); + vmx_disable_shadow_vmcs(vmx); } vmx->nested.posted_intr_nv = -1; /* Flush VMCS12 to guest memory */ - memcpy(vmx->nested.current_vmcs12, vmx->nested.cached_vmcs12, - VMCS12_SIZE); + kvm_vcpu_write_guest_page(&vmx->vcpu, + vmx->nested.current_vmptr >> PAGE_SHIFT, + vmx->nested.cached_vmcs12, 0, VMCS12_SIZE); - kunmap(vmx->nested.current_vmcs12_page); - nested_release_page(vmx->nested.current_vmcs12_page); vmx->nested.current_vmptr = -1ull; - vmx->nested.current_vmcs12 = NULL; } /* @@ -7174,12 +7378,14 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.vmxon = false; free_vpid(vmx->nested.vpid02); - nested_release_vmcs12(vmx); + vmx->nested.posted_intr_nv = -1; + vmx->nested.current_vmptr = -1ull; if (vmx->nested.msr_bitmap) { free_page((unsigned long)vmx->nested.msr_bitmap); vmx->nested.msr_bitmap = NULL; } if (enable_shadow_vmcs) { + vmx_disable_shadow_vmcs(vmx); vmcs_clear(vmx->vmcs01.shadow_vmcs); free_vmcs(vmx->vmcs01.shadow_vmcs); vmx->vmcs01.shadow_vmcs = NULL; @@ -7187,16 +7393,16 @@ static void free_nested(struct vcpu_vmx *vmx) kfree(vmx->nested.cached_vmcs12); /* Unpin physical memory we referred to in current vmcs02 */ if (vmx->nested.apic_access_page) { - nested_release_page(vmx->nested.apic_access_page); + kvm_release_page_dirty(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; } if (vmx->nested.virtual_apic_page) { - nested_release_page(vmx->nested.virtual_apic_page); + kvm_release_page_dirty(vmx->nested.virtual_apic_page); vmx->nested.virtual_apic_page = NULL; } if (vmx->nested.pi_desc_page) { kunmap(vmx->nested.pi_desc_page); - nested_release_page(vmx->nested.pi_desc_page); + kvm_release_page_dirty(vmx->nested.pi_desc_page); vmx->nested.pi_desc_page = NULL; vmx->nested.pi_desc = NULL; } @@ -7563,29 +7769,29 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) if (vmx->nested.current_vmptr != vmptr) { struct vmcs12 *new_vmcs12; struct page *page; - page = nested_get_page(vcpu, vmptr); - if (page == NULL) { + page = kvm_vcpu_gpa_to_page(vcpu, vmptr); + if (is_error_page(page)) { nested_vmx_failInvalid(vcpu); return kvm_skip_emulated_instruction(vcpu); } new_vmcs12 = kmap(page); if (new_vmcs12->revision_id != VMCS12_REVISION) { kunmap(page); - nested_release_page_clean(page); + kvm_release_page_clean(page); nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); return kvm_skip_emulated_instruction(vcpu); } nested_release_vmcs12(vmx); - vmx->nested.current_vmcs12 = new_vmcs12; - vmx->nested.current_vmcs12_page = page; /* * Load VMCS12 from guest memory since it is not already * cached. */ - memcpy(vmx->nested.cached_vmcs12, - vmx->nested.current_vmcs12, VMCS12_SIZE); + memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); + kunmap(page); + kvm_release_page_clean(page); + set_current_vmptr(vmx, vmptr); } @@ -7735,7 +7941,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) switch (type) { case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: - if (is_noncanonical_address(operand.gla)) { + if (is_noncanonical_address(operand.gla, vcpu)) { nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); return kvm_skip_emulated_instruction(vcpu); @@ -7792,6 +7998,124 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu) return 1; } +static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int maxphyaddr = cpuid_maxphyaddr(vcpu); + + /* Check for memory type validity */ + switch (address & VMX_EPTP_MT_MASK) { + case VMX_EPTP_MT_UC: + if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT)) + return false; + break; + case VMX_EPTP_MT_WB: + if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT)) + return false; + break; + default: + return false; + } + + /* only 4 levels page-walk length are valid */ + if ((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4) + return false; + + /* Reserved bits should not be set */ + if (address >> maxphyaddr || ((address >> 7) & 0x1f)) + return false; + + /* AD, if set, should be supported */ + if (address & VMX_EPTP_AD_ENABLE_BIT) { + if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT)) + return false; + } + + return true; +} + +static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + u32 index = vcpu->arch.regs[VCPU_REGS_RCX]; + u64 address; + bool accessed_dirty; + struct kvm_mmu *mmu = vcpu->arch.walk_mmu; + + if (!nested_cpu_has_eptp_switching(vmcs12) || + !nested_cpu_has_ept(vmcs12)) + return 1; + + if (index >= VMFUNC_EPTP_ENTRIES) + return 1; + + + if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT, + &address, index * 8, 8)) + return 1; + + accessed_dirty = !!(address & VMX_EPTP_AD_ENABLE_BIT); + + /* + * If the (L2) guest does a vmfunc to the currently + * active ept pointer, we don't have to do anything else + */ + if (vmcs12->ept_pointer != address) { + if (!valid_ept_address(vcpu, address)) + return 1; + + kvm_mmu_unload(vcpu); + mmu->ept_ad = accessed_dirty; + mmu->base_role.ad_disabled = !accessed_dirty; + vmcs12->ept_pointer = address; + /* + * TODO: Check what's the correct approach in case + * mmu reload fails. Currently, we just let the next + * reload potentially fail + */ + kvm_mmu_reload(vcpu); + } + + return 0; +} + +static int handle_vmfunc(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs12 *vmcs12; + u32 function = vcpu->arch.regs[VCPU_REGS_RAX]; + + /* + * VMFUNC is only supported for nested guests, but we always enable the + * secondary control for simplicity; for non-nested mode, fake that we + * didn't by injecting #UD. + */ + if (!is_guest_mode(vcpu)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + vmcs12 = get_vmcs12(vcpu); + if ((vmcs12->vm_function_control & (1 << function)) == 0) + goto fail; + + switch (function) { + case 0: + if (nested_vmx_eptp_switching(vcpu, vmcs12)) + goto fail; + break; + default: + goto fail; + } + return kvm_skip_emulated_instruction(vcpu); + +fail: + nested_vmx_vmexit(vcpu, vmx->exit_reason, + vmcs_read32(VM_EXIT_INTR_INFO), + vmcs_readl(EXIT_QUALIFICATION)); + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -7839,9 +8163,12 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, [EXIT_REASON_INVEPT] = handle_invept, [EXIT_REASON_INVVPID] = handle_invvpid, + [EXIT_REASON_RDRAND] = handle_invalid_op, + [EXIT_REASON_RDSEED] = handle_invalid_op, [EXIT_REASON_XSAVES] = handle_xsaves, [EXIT_REASON_XRSTORS] = handle_xrstors, [EXIT_REASON_PML_FULL] = handle_pml_full, + [EXIT_REASON_VMFUNC] = handle_vmfunc, [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, }; @@ -8018,12 +8345,11 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, * should handle it ourselves in L0 (and then continue L2). Only call this * when in is_guest_mode (L2). */ -static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) +static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) { u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - u32 exit_reason = vmx->exit_reason; trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, vmcs_readl(EXIT_QUALIFICATION), @@ -8032,6 +8358,18 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) vmcs_read32(VM_EXIT_INTR_ERROR_CODE), KVM_ISA_VMX); + /* + * The host physical addresses of some pages of guest memory + * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU + * may write to these pages via their host physical address while + * L2 is running, bypassing any address-translation-based dirty + * tracking (e.g. EPT write protection). + * + * Mark them dirty on every exit from L2 to prevent them from + * getting out of sync with dirty tracking. + */ + nested_mark_vmcs12_pages_dirty(vcpu); + if (vmx->nested.nested_run_pending) return false; @@ -8146,6 +8484,10 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) * table is L0's fault. */ return false; + case EXIT_REASON_INVPCID: + return + nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) && + nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); case EXIT_REASON_WBINVD: return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); case EXIT_REASON_XSETBV: @@ -8163,11 +8505,37 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_PML_FULL: /* We emulate PML support to L1. */ return false; + case EXIT_REASON_VMFUNC: + /* VM functions are emulated through L2->L0 vmexits. */ + return false; default: return true; } } +static int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason) +{ + u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + /* + * At this point, the exit interruption info in exit_intr_info + * is only valid for EXCEPTION_NMI exits. For EXTERNAL_INTERRUPT + * we need to query the in-kernel LAPIC. + */ + WARN_ON(exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT); + if ((exit_intr_info & + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + vmcs12->vm_exit_intr_error_code = + vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + } + + nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, + vmcs_readl(EXIT_QUALIFICATION)); + return 1; +} + static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) { *info1 = vmcs_readl(EXIT_QUALIFICATION); @@ -8398,7 +8766,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) u32 vectoring_info = vmx->idt_vectoring_info; trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); - vcpu->arch.gpa_available = false; /* * Flush logged GPAs PML buffer, this will make dirty_bitmap more @@ -8414,12 +8781,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) if (vmx->emulation_required) return handle_invalid_guest_state(vcpu); - if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { - nested_vmx_vmexit(vcpu, exit_reason, - vmcs_read32(VM_EXIT_INTR_INFO), - vmcs_readl(EXIT_QUALIFICATION)); - return 1; - } + if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason)) + return nested_vmx_reflect_vmexit(vcpu, exit_reason); if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { dump_vmcs(); @@ -9222,7 +9585,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; - vmx->nested.current_vmcs12 = NULL; vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; @@ -9257,11 +9619,6 @@ static void __init vmx_check_processor_compat(void *rtn) } } -static int get_ept_level(void) -{ - return VMX_EPT_DEFAULT_GAW + 1; -} - static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { u8 cache; @@ -9378,39 +9735,13 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) static void vmx_cpuid_update(struct kvm_vcpu *vcpu) { - struct kvm_cpuid_entry2 *best; struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 secondary_exec_ctl = vmx_secondary_exec_control(vmx); - - if (vmx_rdtscp_supported()) { - bool rdtscp_enabled = guest_cpuid_has_rdtscp(vcpu); - if (!rdtscp_enabled) - secondary_exec_ctl &= ~SECONDARY_EXEC_RDTSCP; - - if (nested) { - if (rdtscp_enabled) - vmx->nested.nested_vmx_secondary_ctls_high |= - SECONDARY_EXEC_RDTSCP; - else - vmx->nested.nested_vmx_secondary_ctls_high &= - ~SECONDARY_EXEC_RDTSCP; - } - } - /* Exposing INVPCID only when PCID is exposed */ - best = kvm_find_cpuid_entry(vcpu, 0x7, 0); - if (vmx_invpcid_supported() && - (!best || !(best->ebx & bit(X86_FEATURE_INVPCID)) || - !guest_cpuid_has_pcid(vcpu))) { - secondary_exec_ctl &= ~SECONDARY_EXEC_ENABLE_INVPCID; - - if (best) - best->ebx &= ~bit(X86_FEATURE_INVPCID); + if (cpu_has_secondary_exec_ctrls()) { + vmx_compute_secondary_exec_control(vmx); + vmcs_set_secondary_exec_control(vmx->secondary_exec_control); } - if (cpu_has_secondary_exec_ctrls()) - vmcs_set_secondary_exec_control(secondary_exec_ctl); - if (nested_vmx_allowed(vcpu)) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; @@ -9451,7 +9782,7 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu) { - return nested_ept_get_cr3(vcpu) & VMX_EPT_AD_ENABLE_BIT; + return nested_ept_get_cr3(vcpu) & VMX_EPTP_AD_ENABLE_BIT; } /* Callbacks for nested_ept_init_mmu_context: */ @@ -9464,18 +9795,15 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) { - bool wants_ad; - WARN_ON(mmu_is_nested(vcpu)); - wants_ad = nested_ept_ad_enabled(vcpu); - if (wants_ad && !enable_ept_ad_bits) + if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu))) return 1; kvm_mmu_unload(vcpu); kvm_init_shadow_ept_mmu(vcpu, to_vmx(vcpu)->nested.nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT, - wants_ad); + nested_ept_ad_enabled(vcpu)); vcpu->arch.mmu.set_cr3 = vmx_set_cr3; vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; @@ -9508,12 +9836,15 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, WARN_ON(!is_guest_mode(vcpu)); - if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) - nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, - vmcs_read32(VM_EXIT_INTR_INFO), - vmcs_readl(EXIT_QUALIFICATION)); - else + if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) { + vmcs12->vm_exit_intr_error_code = fault->error_code; + nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, + PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | + INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, + fault->address); + } else { kvm_inject_page_fault(vcpu, fault); + } } static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, @@ -9523,6 +9854,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct vcpu_vmx *vmx = to_vmx(vcpu); + struct page *page; u64 hpa; if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { @@ -9532,17 +9864,19 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, * physical address remains valid. We keep a reference * to it so we can release it later. */ - if (vmx->nested.apic_access_page) /* shouldn't happen */ - nested_release_page(vmx->nested.apic_access_page); - vmx->nested.apic_access_page = - nested_get_page(vcpu, vmcs12->apic_access_addr); + if (vmx->nested.apic_access_page) { /* shouldn't happen */ + kvm_release_page_dirty(vmx->nested.apic_access_page); + vmx->nested.apic_access_page = NULL; + } + page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr); /* * If translation failed, no matter: This feature asks * to exit when accessing the given address, and if it * can never be accessed, this feature won't do * anything anyway. */ - if (vmx->nested.apic_access_page) { + if (!is_error_page(page)) { + vmx->nested.apic_access_page = page; hpa = page_to_phys(vmx->nested.apic_access_page); vmcs_write64(APIC_ACCESS_ADDR, hpa); } else { @@ -9557,10 +9891,11 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, } if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { - if (vmx->nested.virtual_apic_page) /* shouldn't happen */ - nested_release_page(vmx->nested.virtual_apic_page); - vmx->nested.virtual_apic_page = - nested_get_page(vcpu, vmcs12->virtual_apic_page_addr); + if (vmx->nested.virtual_apic_page) { /* shouldn't happen */ + kvm_release_page_dirty(vmx->nested.virtual_apic_page); + vmx->nested.virtual_apic_page = NULL; + } + page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->virtual_apic_page_addr); /* * If translation failed, VM entry will fail because @@ -9575,7 +9910,8 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, * control. But such a configuration is useless, so * let's keep the code simple. */ - if (vmx->nested.virtual_apic_page) { + if (!is_error_page(page)) { + vmx->nested.virtual_apic_page = page; hpa = page_to_phys(vmx->nested.virtual_apic_page); vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa); } @@ -9584,16 +9920,14 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, if (nested_cpu_has_posted_intr(vmcs12)) { if (vmx->nested.pi_desc_page) { /* shouldn't happen */ kunmap(vmx->nested.pi_desc_page); - nested_release_page(vmx->nested.pi_desc_page); + kvm_release_page_dirty(vmx->nested.pi_desc_page); + vmx->nested.pi_desc_page = NULL; } - vmx->nested.pi_desc_page = - nested_get_page(vcpu, vmcs12->posted_intr_desc_addr); - vmx->nested.pi_desc = - (struct pi_desc *)kmap(vmx->nested.pi_desc_page); - if (!vmx->nested.pi_desc) { - nested_release_page_clean(vmx->nested.pi_desc_page); + page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr); + if (is_error_page(page)) return; - } + vmx->nested.pi_desc_page = page; + vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page); vmx->nested.pi_desc = (struct pi_desc *)((void *)vmx->nested.pi_desc + (unsigned long)(vmcs12->posted_intr_desc_addr & @@ -9659,6 +9993,18 @@ static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, return 0; } +static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) + return 0; + + if (!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)) + return -EINVAL; + + return 0; +} + /* * Merge L0's and L1's MSR bitmap, return false to indicate that * we do not use the hardware. @@ -9675,8 +10021,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) return false; - page = nested_get_page(vcpu, vmcs12->msr_bitmap); - if (!page) + page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); + if (is_error_page(page)) return false; msr_bitmap_l1 = (unsigned long *)kmap(page); @@ -9706,7 +10052,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, } } kunmap(page); - nested_release_page_clean(page); + kvm_release_page_clean(page); return true; } @@ -10041,6 +10387,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->vm_entry_instruction_len); vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, vmcs12->guest_interruptibility_info); + vmx->loaded_vmcs->nmi_known_unmasked = + !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI); } else { vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); } @@ -10065,13 +10413,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, /* Posted interrupts setting is only taken from vmcs12. */ if (nested_cpu_has_posted_intr(vmcs12)) { - /* - * Note that we use L0's vector here and in - * vmx_deliver_nested_posted_interrupt. - */ vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; vmx->nested.pi_pending = false; - vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); + vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR); } else { exec_control &= ~PIN_BASED_POSTED_INTR; } @@ -10095,12 +10439,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept, * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when * !enable_ept, EB.PF is 1, so the "or" will always be 1. - * - * A problem with this approach (when !enable_ept) is that L1 may be - * injected with more page faults than it asked for. This could have - * caused problems, but in practice existing hypervisors don't care. - * To fix this, we will need to emulate the PFEC checking (on the L1 - * page tables), using walk_addr(), when injecting PFs to L1. */ vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, enable_ept ? vmcs12->page_fault_error_code_mask : 0); @@ -10108,13 +10446,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, enable_ept ? vmcs12->page_fault_error_code_match : 0); if (cpu_has_secondary_exec_ctrls()) { - exec_control = vmx_secondary_exec_control(vmx); + exec_control = vmx->secondary_exec_control; /* Take the following fields only from vmcs12 */ exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_RDTSCP | + SECONDARY_EXEC_XSAVES | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_APIC_REGISTER_VIRT); + SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_ENABLE_VMFUNC); if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) { vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control & @@ -10122,6 +10463,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, exec_control |= vmcs12_exec_ctrl; } + /* All VMFUNCs are currently emulated through L0 vmexits. */ + if (exec_control & SECONDARY_EXEC_ENABLE_VMFUNC) + vmcs_write64(VM_FUNCTION_CONTROL, 0); + if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0); @@ -10347,6 +10692,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; @@ -10374,6 +10722,18 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmx->nested.nested_vmx_entry_ctls_high)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_cpu_has_vmfunc(vmcs12)) { + if (vmcs12->vm_function_control & + ~vmx->nested.nested_vmx_vmfunc_controls) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + if (nested_cpu_has_eptp_switching(vmcs12)) { + if (!nested_cpu_has_ept(vmcs12) || + !page_address_valid(vcpu, vmcs12->eptp_list_address)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + } + } + if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; @@ -10620,7 +10980,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, u32 idt_vectoring; unsigned int nr; - if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) { + if (vcpu->arch.exception.injected) { nr = vcpu->arch.exception.nr; idt_vectoring = nr | VECTORING_INFO_VALID_MASK; @@ -10659,12 +11019,20 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) { struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long exit_qual; - if (vcpu->arch.exception.pending || - vcpu->arch.nmi_injected || - vcpu->arch.interrupt.pending) + if (kvm_event_needs_reinjection(vcpu)) return -EBUSY; + if (vcpu->arch.exception.pending && + nested_vmx_check_exception(vcpu, &exit_qual)) { + if (vmx->nested.nested_run_pending) + return -EBUSY; + nested_vmx_inject_exception_vmexit(vcpu, exit_qual); + vcpu->arch.exception.pending = false; + return 0; + } + if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && vmx->nested.preemption_timer_expired) { if (vmx->nested.nested_run_pending) @@ -10848,13 +11216,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->vm_exit_reason = exit_reason; vmcs12->exit_qualification = exit_qualification; - vmcs12->vm_exit_intr_info = exit_intr_info; - if ((vmcs12->vm_exit_intr_info & - (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == - (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) - vmcs12->vm_exit_intr_error_code = - vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + vmcs12->idt_vectoring_info_field = 0; vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); @@ -10942,7 +11305,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, */ vmx_flush_tlb(vcpu); } - + /* Restore posted intr vector. */ + if (nested_cpu_has_posted_intr(vmcs12)) + vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs); vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp); @@ -11048,8 +11413,15 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmx_switch_vmcs(vcpu, &vmx->vmcs01); - if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) - && nested_exit_intr_ack_set(vcpu)) { + /* + * TODO: SDM says that with acknowledge interrupt on exit, bit 31 of + * the VM-exit interrupt information (valid interrupt) is always set to + * 1 on EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't need + * kvm_cpu_has_interrupt(). See the commit message for details. + */ + if (nested_exit_intr_ack_set(vcpu) && + exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && + kvm_cpu_has_interrupt(vcpu)) { int irq = kvm_cpu_get_interrupt(vcpu); WARN_ON(irq < 0); vmcs12->vm_exit_intr_info = irq | @@ -11101,16 +11473,16 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, /* Unpin physical memory we referred to in vmcs02 */ if (vmx->nested.apic_access_page) { - nested_release_page(vmx->nested.apic_access_page); + kvm_release_page_dirty(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; } if (vmx->nested.virtual_apic_page) { - nested_release_page(vmx->nested.virtual_apic_page); + kvm_release_page_dirty(vmx->nested.virtual_apic_page); vmx->nested.virtual_apic_page = NULL; } if (vmx->nested.pi_desc_page) { kunmap(vmx->nested.pi_desc_page); - nested_release_page(vmx->nested.pi_desc_page); + kvm_release_page_dirty(vmx->nested.pi_desc_page); vmx->nested.pi_desc_page = NULL; vmx->nested.pi_desc = NULL; } @@ -11286,14 +11658,14 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; - page = nested_get_page(vcpu, vmcs12->pml_address); - if (!page) + page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->pml_address); + if (is_error_page(page)) return 0; pml_address = kmap(page); pml_address[vmcs12->guest_pml_index--] = gpa; kunmap(page); - nested_release_page_clean(page); + kvm_release_page_clean(page); } return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 82a63c59f77b..3abaff61829d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -310,13 +310,13 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); u64 new_state = msr_info->data & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); - u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | - 0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE); + u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff | + (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE); + if ((msr_info->data & reserved_bits) || new_state == X2APIC_ENABLE) + return 1; if (!msr_info->host_initiated && - ((msr_info->data & reserved_bits) != 0 || - new_state == X2APIC_ENABLE || - (new_state == MSR_IA32_APICBASE_ENABLE && + ((new_state == MSR_IA32_APICBASE_ENABLE && old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) || (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) && old_state == 0))) @@ -389,15 +389,28 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, kvm_make_request(KVM_REQ_EVENT, vcpu); - if (!vcpu->arch.exception.pending) { + if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) { queue: if (has_error && !is_protmode(vcpu)) has_error = false; - vcpu->arch.exception.pending = true; + if (reinject) { + /* + * On vmentry, vcpu->arch.exception.pending is only + * true if an event injection was blocked by + * nested_run_pending. In that case, however, + * vcpu_enter_guest requests an immediate exit, + * and the guest shouldn't proceed far enough to + * need reinjection. + */ + WARN_ON_ONCE(vcpu->arch.exception.pending); + vcpu->arch.exception.injected = true; + } else { + vcpu->arch.exception.pending = true; + vcpu->arch.exception.injected = false; + } vcpu->arch.exception.has_error_code = has_error; vcpu->arch.exception.nr = nr; vcpu->arch.exception.error_code = error_code; - vcpu->arch.exception.reinject = reinject; return; } @@ -412,8 +425,13 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, class2 = exception_class(nr); if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { - /* generate double fault per SDM Table 5-5 */ + /* + * Generate double fault per SDM Table 5-5. Set + * exception.pending = true so that the double fault + * can trigger a nested vmexit. + */ vcpu->arch.exception.pending = true; + vcpu->arch.exception.injected = false; vcpu->arch.exception.has_error_code = true; vcpu->arch.exception.nr = DF_VECTOR; vcpu->arch.exception.error_code = 0; @@ -597,8 +615,8 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu) (unsigned long *)&vcpu->arch.regs_avail)) return true; - gfn = (kvm_read_cr3(vcpu) & ~31ul) >> PAGE_SHIFT; - offset = (kvm_read_cr3(vcpu) & ~31ul) & (PAGE_SIZE - 1); + gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT; + offset = (kvm_read_cr3(vcpu) & 0xffffffe0ul) & (PAGE_SIZE - 1); r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte), PFERR_USER_MASK | PFERR_WRITE_MASK); if (r < 0) @@ -754,19 +772,22 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (cr4 & CR4_RESERVED_BITS) return 1; - if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) + return 1; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP)) return 1; - if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP)) return 1; - if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE)) return 1; - if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE)) return 1; - if (!guest_cpuid_has_pku(vcpu) && (cr4 & X86_CR4_PKE)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57)) return 1; if (is_long_mode(vcpu)) { @@ -779,7 +800,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) { - if (!guest_cpuid_has_pcid(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID)) return 1; /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ @@ -813,10 +834,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) return 0; } - if (is_long_mode(vcpu)) { - if (cr3 & CR3_L_MODE_RESERVED_BITS) - return 1; - } else if (is_pae(vcpu) && is_paging(vcpu) && + if (is_long_mode(vcpu) && + (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 62))) + return 1; + else if (is_pae(vcpu) && is_paging(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) return 1; @@ -883,7 +904,7 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) { u64 fixed = DR6_FIXED_1; - if (!guest_cpuid_has_rtm(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM)) fixed |= DR6_RTM; return fixed; } @@ -993,6 +1014,7 @@ static u32 emulated_msrs[] = { MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, + HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY, HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, HV_X64_MSR_RESET, @@ -1021,21 +1043,11 @@ bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) if (efer & efer_reserved_bits) return false; - if (efer & EFER_FFXSR) { - struct kvm_cpuid_entry2 *feat; - - feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) + if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT)) return false; - } - - if (efer & EFER_SVME) { - struct kvm_cpuid_entry2 *feat; - feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) + if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM)) return false; - } return true; } @@ -1083,7 +1095,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_KERNEL_GS_BASE: case MSR_CSTAR: case MSR_LSTAR: - if (is_noncanonical_address(msr->data)) + if (is_noncanonical_address(msr->data, vcpu)) return 1; break; case MSR_IA32_SYSENTER_EIP: @@ -1100,7 +1112,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) * value, and that something deterministic happens if the guest * invokes 64-bit SYSENTER. */ - msr->data = get_canonical(msr->data); + msr->data = get_canonical(msr->data, vcpu_virt_addr_bits(vcpu)); } return kvm_x86_ops->set_msr(vcpu, msr); } @@ -1533,8 +1545,9 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec; vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; - if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated) + if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) update_ia32_tsc_adjust_msr(vcpu, offset); + kvm_vcpu_write_tsc_offset(vcpu, offset); raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); @@ -2184,7 +2197,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) kvm_set_lapic_tscdeadline_msr(vcpu, data); break; case MSR_IA32_TSC_ADJUST: - if (guest_cpuid_has_tsc_adjust(vcpu)) { + if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) { if (!msr_info->host_initiated) { s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; adjust_tsc_offset_guest(vcpu, adj); @@ -2306,12 +2319,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); break; case MSR_AMD64_OSVW_ID_LENGTH: - if (!guest_cpuid_has_osvw(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) return 1; vcpu->arch.osvw.length = data; break; case MSR_AMD64_OSVW_STATUS: - if (!guest_cpuid_has_osvw(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) return 1; vcpu->arch.osvw.status = data; break; @@ -2536,12 +2549,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0xbe702111; break; case MSR_AMD64_OSVW_ID_LENGTH: - if (!guest_cpuid_has_osvw(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) return 1; msr_info->data = vcpu->arch.osvw.length; break; case MSR_AMD64_OSVW_STATUS: - if (!guest_cpuid_has_osvw(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) return 1; msr_info->data = vcpu->arch.osvw.status; break; @@ -2881,6 +2894,10 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { int idx; + + if (vcpu->preempted) + vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu); + /* * Disable page faults because we're in atomic context here. * kvm_write_guest_offset_cached() would call might_fault() @@ -3073,8 +3090,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { process_nmi(vcpu); + /* + * FIXME: pass injected and pending separately. This is only + * needed for nested virtualization, whose state cannot be + * migrated yet. For now we can combine them. + */ events->exception.injected = - vcpu->arch.exception.pending && + (vcpu->arch.exception.pending || + vcpu->arch.exception.injected) && !kvm_exception_is_soft(vcpu->arch.exception.nr); events->exception.nr = vcpu->arch.exception.nr; events->exception.has_error_code = vcpu->arch.exception.has_error_code; @@ -3129,6 +3152,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, return -EINVAL; process_nmi(vcpu); + vcpu->arch.exception.injected = false; vcpu->arch.exception.pending = events->exception.injected; vcpu->arch.exception.nr = events->exception.nr; vcpu->arch.exception.has_error_code = events->exception.has_error_code; @@ -3159,15 +3183,18 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, kvm_set_hflags(vcpu, hflags); vcpu->arch.smi_pending = events->smi.pending; - if (events->smi.smm_inside_nmi) - vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; - else - vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; - if (lapic_in_kernel(vcpu)) { - if (events->smi.latched_init) - set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); + + if (events->smi.smm) { + if (events->smi.smm_inside_nmi) + vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; else - clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); + vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; + if (lapic_in_kernel(vcpu)) { + if (events->smi.latched_init) + set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); + else + clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); + } } } @@ -4658,25 +4685,18 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, */ if (vcpu->arch.gpa_available && emulator_can_use_gpa(ctxt) && - vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) && - (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) { - gpa = exception->address; - goto mmio; + (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) { + gpa = vcpu->arch.gpa_val; + ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write); + } else { + ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); + if (ret < 0) + return X86EMUL_PROPAGATE_FAULT; } - ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); - - if (ret < 0) - return X86EMUL_PROPAGATE_FAULT; - - /* For APIC access vmexit */ - if (ret) - goto mmio; - - if (ops->read_write_emulate(vcpu, gpa, val, bytes)) + if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes)) return X86EMUL_CONTINUE; -mmio: /* * Is this MMIO handled locally? */ @@ -5214,10 +5234,10 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt, return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); } -static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, - u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) +static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, + u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit) { - kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx); + return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit); } static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) @@ -6215,6 +6235,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) lapic_irq.shorthand = 0; lapic_irq.dest_mode = 0; + lapic_irq.level = 0; lapic_irq.dest_id = apicid; lapic_irq.msi_redir_hint = false; @@ -6348,11 +6369,42 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) int r; /* try to reinject previous events if any */ + if (vcpu->arch.exception.injected) { + kvm_x86_ops->queue_exception(vcpu); + return 0; + } + + /* + * Exceptions must be injected immediately, or the exception + * frame will have the address of the NMI or interrupt handler. + */ + if (!vcpu->arch.exception.pending) { + if (vcpu->arch.nmi_injected) { + kvm_x86_ops->set_nmi(vcpu); + return 0; + } + + if (vcpu->arch.interrupt.pending) { + kvm_x86_ops->set_irq(vcpu); + return 0; + } + } + + if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { + r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); + if (r != 0) + return r; + } + + /* try to inject new event if pending */ if (vcpu->arch.exception.pending) { trace_kvm_inj_exception(vcpu->arch.exception.nr, vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code); + vcpu->arch.exception.pending = false; + vcpu->arch.exception.injected = true; + if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | X86_EFLAGS_RF); @@ -6364,27 +6416,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) } kvm_x86_ops->queue_exception(vcpu); - return 0; - } - - if (vcpu->arch.nmi_injected) { - kvm_x86_ops->set_nmi(vcpu); - return 0; - } - - if (vcpu->arch.interrupt.pending) { - kvm_x86_ops->set_irq(vcpu); - return 0; - } - - if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { - r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); - if (r != 0) - return r; - } - - /* try to inject new event if pending */ - if (vcpu->arch.smi_pending && !is_smm(vcpu)) { + } else if (vcpu->arch.smi_pending && !is_smm(vcpu)) { vcpu->arch.smi_pending = false; enter_smm(vcpu); } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) { @@ -6601,7 +6633,7 @@ static void enter_smm(struct kvm_vcpu *vcpu) trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true); vcpu->arch.hflags |= HF_SMM_MASK; memset(buf, 0, 512); - if (guest_cpuid_has_longmode(vcpu)) + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) enter_smm_save_state_64(vcpu, buf); else enter_smm_save_state_32(vcpu, buf); @@ -6653,7 +6685,7 @@ static void enter_smm(struct kvm_vcpu *vcpu) kvm_set_segment(vcpu, &ds, VCPU_SREG_GS); kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); - if (guest_cpuid_has_longmode(vcpu)) + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) kvm_x86_ops->set_efer(vcpu, 0); kvm_update_cpuid(vcpu); @@ -6771,6 +6803,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; + vcpu->mmio_needed = 0; r = 0; goto out; } @@ -6859,6 +6892,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops->enable_nmi_window(vcpu); if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) kvm_x86_ops->enable_irq_window(vcpu); + WARN_ON(vcpu->arch.exception.pending); } if (kvm_lapic_enabled(vcpu)) { @@ -7001,6 +7035,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (vcpu->arch.apic_attention) kvm_lapic_sync_from_vapic(vcpu); + vcpu->arch.gpa_available = false; r = kvm_x86_ops->handle_exit(vcpu); return r; @@ -7419,7 +7454,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int pending_vec, max_bits, idx; struct desc_ptr dt; - if (!guest_cpuid_has_xsave(vcpu) && (sregs->cr4 & X86_CR4_OSXSAVE)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && + (sregs->cr4 & X86_CR4_OSXSAVE)) + return -EINVAL; + + apic_base_msr.data = sregs->apic_base; + apic_base_msr.host_initiated = true; + if (kvm_set_apic_base(vcpu, &apic_base_msr)) return -EINVAL; dt.size = sregs->idt.limit; @@ -7438,9 +7479,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, mmu_reset_needed |= vcpu->arch.efer != sregs->efer; kvm_x86_ops->set_efer(vcpu, sregs->efer); - apic_base_msr.data = sregs->apic_base; - apic_base_msr.host_initiated = true; - kvm_set_apic_base(vcpu, &apic_base_msr); mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; kvm_x86_ops->set_cr0(vcpu, sregs->cr0); @@ -7729,6 +7767,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.nmi_injected = false; kvm_clear_interrupt_queue(vcpu); kvm_clear_exception_queue(vcpu); + vcpu->arch.exception.pending = false; memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); kvm_update_dr0123(vcpu); @@ -7988,6 +8027,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_pmu_init(vcpu); vcpu->arch.pending_external_vector = -1; + vcpu->arch.preempted_in_kernel = false; kvm_hv_vcpu_init(vcpu); @@ -8435,6 +8475,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); } +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.preempted_in_kernel; +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 612067074905..51e349cf5f45 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -11,7 +11,7 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) { - vcpu->arch.exception.pending = false; + vcpu->arch.exception.injected = false; } static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector, @@ -29,7 +29,7 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu) static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu) { - return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending || + return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending || vcpu->arch.nmi_injected; } @@ -62,6 +62,16 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu) return cs_l; } +static inline bool is_la57_mode(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_X86_64 + return (vcpu->arch.efer & EFER_LMA) && + kvm_read_cr4_bits(vcpu, X86_CR4_LA57); +#else + return 0; +#endif +} + static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) { return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; @@ -87,10 +97,48 @@ static inline u32 bit(int bitno) return 1 << (bitno & 31); } +static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu) +{ + return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48; +} + +static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt) +{ + return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48; +} + +static inline u64 get_canonical(u64 la, u8 vaddr_bits) +{ + return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits); +} + +static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_X86_64 + return get_canonical(la, vcpu_virt_addr_bits(vcpu)) != la; +#else + return false; +#endif +} + +static inline bool emul_is_noncanonical_address(u64 la, + struct x86_emulate_ctxt *ctxt) +{ +#ifdef CONFIG_X86_64 + return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la; +#else + return false; +#endif +} + static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, unsigned access) { - vcpu->arch.mmio_gva = gva & PAGE_MASK; + /* + * If this is a shadow nested page table, the "GVA" is + * actually a nGPA. + */ + vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK; vcpu->arch.access = access; vcpu->arch.mmio_gfn = gfn; vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation; |