diff options
author | Hiago De Franco <hiago.franco@toradex.com> | 2025-06-02 15:00:56 -0300 |
---|---|---|
committer | Hiago De Franco <hiago.franco@toradex.com> | 2025-06-03 10:26:20 -0300 |
commit | defb81960f4fcb580b75bbc207e5440dad4376d6 (patch) | |
tree | da4539935a24104d027d92a8f9afd519f2ee625a /arch | |
parent | 5a6602cc6c3204b24470b2429e658c25638b10a9 (diff) | |
parent | 1b69a86802c7e77a7624fe4970ca7d382a785b8f (diff) |
Merge branch '5.15-2.2.x-imx' into toradex_5.15-2.2.x-imxtoradex_5.15-2.2.x-imx
Conflicts:
drivers/tty/serial/fsl_lpuart.c
Signed-off-by: Hiago De Franco <hiago.franco@toradex.com>
Diffstat (limited to 'arch')
141 files changed, 1269 insertions, 812 deletions
diff --git a/arch/alpha/include/asm/elf.h b/arch/alpha/include/asm/elf.h index 8049997fa372..2039a8c8d547 100644 --- a/arch/alpha/include/asm/elf.h +++ b/arch/alpha/include/asm/elf.h @@ -74,7 +74,7 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; /* * This is used to ensure we don't load something for the wrong architecture. */ -#define elf_check_arch(x) ((x)->e_machine == EM_ALPHA) +#define elf_check_arch(x) (((x)->e_machine == EM_ALPHA) && !((x)->e_flags & EF_ALPHA_32BIT)) /* * These are used to set parameters in the core dumps. @@ -145,10 +145,6 @@ extern int dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task); : amask (AMASK_CIX) ? "ev6" : "ev67"); \ }) -#define SET_PERSONALITY(EX) \ - set_personality(((EX).e_flags & EF_ALPHA_32BIT) \ - ? PER_LINUX_32BIT : PER_LINUX) - extern int alpha_l1i_cacheshape; extern int alpha_l1d_cacheshape; extern int alpha_l2_cacheshape; diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 02f0429f1068..8e3cf3c9f913 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -340,7 +340,7 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) extern void paging_init(void); -/* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT. */ +/* We have our own get_unmapped_area */ #define HAVE_ARCH_UNMAPPED_AREA #endif /* _ALPHA_PGTABLE_H */ diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h index 6100431da07a..d27db62c3247 100644 --- a/arch/alpha/include/asm/processor.h +++ b/arch/alpha/include/asm/processor.h @@ -8,23 +8,19 @@ #ifndef __ASM_ALPHA_PROCESSOR_H #define __ASM_ALPHA_PROCESSOR_H -#include <linux/personality.h> /* for ADDR_LIMIT_32BIT */ - /* * We have a 42-bit user address space: 4TB user VM... */ #define TASK_SIZE (0x40000000000UL) -#define STACK_TOP \ - (current->personality & ADDR_LIMIT_32BIT ? 0x80000000 : 0x00120000000UL) +#define STACK_TOP (0x00120000000UL) #define STACK_TOP_MAX 0x00120000000UL /* This decides where the kernel will search for a free chunk of vm * space during mmap's. */ -#define TASK_UNMAPPED_BASE \ - ((current->personality & ADDR_LIMIT_32BIT) ? 0x40000000 : TASK_SIZE / 2) +#define TASK_UNMAPPED_BASE (TASK_SIZE / 2) typedef struct { unsigned long seg; diff --git a/arch/alpha/include/uapi/asm/ptrace.h b/arch/alpha/include/uapi/asm/ptrace.h index c29194181025..22170f7b8be8 100644 --- a/arch/alpha/include/uapi/asm/ptrace.h +++ b/arch/alpha/include/uapi/asm/ptrace.h @@ -42,6 +42,8 @@ struct pt_regs { unsigned long trap_a0; unsigned long trap_a1; unsigned long trap_a2; +/* This makes the stack 16-byte aligned as GCC expects */ + unsigned long __pad0; /* These are saved by PAL-code: */ unsigned long ps; unsigned long pc; diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c index 2e125e5c1508..05d9296af5ea 100644 --- a/arch/alpha/kernel/asm-offsets.c +++ b/arch/alpha/kernel/asm-offsets.c @@ -32,7 +32,9 @@ void foo(void) DEFINE(CRED_EGID, offsetof(struct cred, egid)); BLANK(); + DEFINE(SP_OFF, offsetof(struct pt_regs, ps)); DEFINE(SIZEOF_PT_REGS, sizeof(struct pt_regs)); + DEFINE(SWITCH_STACK_SIZE, sizeof(struct switch_stack)); DEFINE(PT_PTRACED, PT_PTRACED); DEFINE(CLONE_VM, CLONE_VM); DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index c41a5a9c3b9f..ba99cc9d27c7 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -15,10 +15,6 @@ .set noat .cfi_sections .debug_frame -/* Stack offsets. */ -#define SP_OFF 184 -#define SWITCH_STACK_SIZE 320 - .macro CFI_START_OSF_FRAME func .align 4 .globl \func @@ -199,8 +195,8 @@ CFI_END_OSF_FRAME entArith CFI_START_OSF_FRAME entMM SAVE_ALL /* save $9 - $15 so the inline exception code can manipulate them. */ - subq $sp, 56, $sp - .cfi_adjust_cfa_offset 56 + subq $sp, 64, $sp + .cfi_adjust_cfa_offset 64 stq $9, 0($sp) stq $10, 8($sp) stq $11, 16($sp) @@ -215,7 +211,7 @@ CFI_START_OSF_FRAME entMM .cfi_rel_offset $13, 32 .cfi_rel_offset $14, 40 .cfi_rel_offset $15, 48 - addq $sp, 56, $19 + addq $sp, 64, $19 /* handle the fault */ lda $8, 0x3fff bic $sp, $8, $8 @@ -228,7 +224,7 @@ CFI_START_OSF_FRAME entMM ldq $13, 32($sp) ldq $14, 40($sp) ldq $15, 48($sp) - addq $sp, 56, $sp + addq $sp, 64, $sp .cfi_restore $9 .cfi_restore $10 .cfi_restore $11 @@ -236,7 +232,7 @@ CFI_START_OSF_FRAME entMM .cfi_restore $13 .cfi_restore $14 .cfi_restore $15 - .cfi_adjust_cfa_offset -56 + .cfi_adjust_cfa_offset -64 /* finish up the syscall as normal. */ br ret_from_sys_call CFI_END_OSF_FRAME entMM @@ -383,8 +379,8 @@ entUnaUser: .cfi_restore $0 .cfi_adjust_cfa_offset -256 SAVE_ALL /* setup normal kernel stack */ - lda $sp, -56($sp) - .cfi_adjust_cfa_offset 56 + lda $sp, -64($sp) + .cfi_adjust_cfa_offset 64 stq $9, 0($sp) stq $10, 8($sp) stq $11, 16($sp) @@ -400,7 +396,7 @@ entUnaUser: .cfi_rel_offset $14, 40 .cfi_rel_offset $15, 48 lda $8, 0x3fff - addq $sp, 56, $19 + addq $sp, 64, $19 bic $sp, $8, $8 jsr $26, do_entUnaUser ldq $9, 0($sp) @@ -410,7 +406,7 @@ entUnaUser: ldq $13, 32($sp) ldq $14, 40($sp) ldq $15, 48($sp) - lda $sp, 56($sp) + lda $sp, 64($sp) .cfi_restore $9 .cfi_restore $10 .cfi_restore $11 @@ -418,7 +414,7 @@ entUnaUser: .cfi_restore $13 .cfi_restore $14 .cfi_restore $15 - .cfi_adjust_cfa_offset -56 + .cfi_adjust_cfa_offset -64 br ret_from_sys_call CFI_END_OSF_FRAME entUna diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 8bbeebb73cf0..2dfb69a2ae43 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1212,8 +1212,7 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p) return ret; } -/* Get an address range which is currently unmapped. Similar to the - generic version except that we know how to honor ADDR_LIMIT_32BIT. */ +/* Get an address range which is currently unmapped. */ static unsigned long arch_get_unmapped_area_1(unsigned long addr, unsigned long len, @@ -1235,13 +1234,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - unsigned long limit; - - /* "32 bit" actually means 31 bit, since pointers sign extend. */ - if (current->personality & ADDR_LIMIT_32BIT) - limit = 0x80000000; - else - limit = TASK_SIZE; + unsigned long limit = TASK_SIZE; if (len > limit) return -ENOMEM; diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index afaf4f6ad0f4..a78e93256ecb 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -709,7 +709,7 @@ s_reg_to_mem (unsigned long s_reg) static int unauser_reg_offsets[32] = { R(r0), R(r1), R(r2), R(r3), R(r4), R(r5), R(r6), R(r7), R(r8), /* r9 ... r15 are stored in front of regs. */ - -56, -48, -40, -32, -24, -16, -8, + -64, -56, -48, -40, -32, -24, -16, /* padding at -8 */ R(r16), R(r17), R(r18), R(r19), R(r20), R(r21), R(r22), R(r23), R(r24), R(r25), R(r26), R(r27), R(r28), R(gp), diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index e9193d52222e..56ea2856e488 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -78,8 +78,8 @@ __load_new_mm_context(struct mm_struct *next_mm) /* Macro for exception fixup code to access integer registers. */ #define dpf_reg(r) \ - (((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 : \ - (r) <= 18 ? (r)+10 : (r)-10]) + (((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-17 : \ + (r) <= 18 ? (r)+11 : (r)-10]) asmlinkage void do_page_fault(unsigned long address, unsigned long mmcsr, diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi index 89af57482bc8..da0eb8702602 100644 --- a/arch/arm/boot/dts/bcm2711.dtsi +++ b/arch/arm/boot/dts/bcm2711.dtsi @@ -133,7 +133,7 @@ clocks = <&clocks BCM2835_CLOCK_UART>, <&clocks BCM2835_CLOCK_VPU>; clock-names = "uartclk", "apb_pclk"; - arm,primecell-periphid = <0x00241011>; + arm,primecell-periphid = <0x00341011>; status = "disabled"; }; @@ -144,7 +144,7 @@ clocks = <&clocks BCM2835_CLOCK_UART>, <&clocks BCM2835_CLOCK_VPU>; clock-names = "uartclk", "apb_pclk"; - arm,primecell-periphid = <0x00241011>; + arm,primecell-periphid = <0x00341011>; status = "disabled"; }; @@ -155,7 +155,7 @@ clocks = <&clocks BCM2835_CLOCK_UART>, <&clocks BCM2835_CLOCK_VPU>; clock-names = "uartclk", "apb_pclk"; - arm,primecell-periphid = <0x00241011>; + arm,primecell-periphid = <0x00341011>; status = "disabled"; }; @@ -166,7 +166,7 @@ clocks = <&clocks BCM2835_CLOCK_UART>, <&clocks BCM2835_CLOCK_VPU>; clock-names = "uartclk", "apb_pclk"; - arm,primecell-periphid = <0x00241011>; + arm,primecell-periphid = <0x00341011>; status = "disabled"; }; @@ -450,8 +450,6 @@ IRQ_TYPE_LEVEL_LOW)>, <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>; - /* This only applies to the ARMv7 stub */ - arm,cpu-registers-not-fw-configured; }; cpus: cpus { @@ -1142,6 +1140,7 @@ }; &uart0 { + arm,primecell-periphid = <0x00341011>; interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>; }; diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index 5733e3a4ea8e..3fdb79b0e8bf 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi +++ b/arch/arm/boot/dts/dra7-l4.dtsi @@ -12,6 +12,7 @@ ranges = <0x00000000 0x4a000000 0x100000>, /* segment 0 */ <0x00100000 0x4a100000 0x100000>, /* segment 1 */ <0x00200000 0x4a200000 0x100000>; /* segment 2 */ + dma-ranges; segment@0 { /* 0x4a000000 */ compatible = "simple-pm-bus"; @@ -557,6 +558,7 @@ <0x0007e000 0x0017e000 0x001000>, /* ap 124 */ <0x00059000 0x00159000 0x001000>, /* ap 125 */ <0x0005a000 0x0015a000 0x001000>; /* ap 126 */ + dma-ranges; target-module@2000 { /* 0x4a102000, ap 27 3c.0 */ compatible = "ti,sysc"; diff --git a/arch/arm/boot/dts/imx6ul-imx6ull-opos6ul.dtsi b/arch/arm/boot/dts/imx6ul-imx6ull-opos6ul.dtsi index f2386dcb9ff2..dda4fa91b2f2 100644 --- a/arch/arm/boot/dts/imx6ul-imx6ull-opos6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul-imx6ull-opos6ul.dtsi @@ -40,6 +40,9 @@ reg = <1>; interrupt-parent = <&gpio4>; interrupts = <16 IRQ_TYPE_LEVEL_LOW>; + micrel,led-mode = <1>; + clocks = <&clks IMX6UL_CLK_ENET_REF>; + clock-names = "rmii-ref"; status = "okay"; }; }; diff --git a/arch/arm/boot/dts/mt7623.dtsi b/arch/arm/boot/dts/mt7623.dtsi index a7d62dbad602..64756888fd0d 100644 --- a/arch/arm/boot/dts/mt7623.dtsi +++ b/arch/arm/boot/dts/mt7623.dtsi @@ -309,7 +309,7 @@ clock-names = "spi", "wrap"; }; - cir: cir@10013000 { + cir: ir-receiver@10013000 { compatible = "mediatek,mt7623-cir"; reg = <0 0x10013000 0 0x1000>; interrupts = <GIC_SPI 87 IRQ_TYPE_LEVEL_LOW>; diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index c8cc993ca8ca..91efc3d4de61 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -403,7 +403,21 @@ static int at91_suspend_finish(unsigned long val) return 0; } -static void at91_pm_switch_ba_to_vbat(void) +/** + * at91_pm_switch_ba_to_auto() - Configure Backup Unit Power Switch + * to automatic/hardware mode. + * + * The Backup Unit Power Switch can be managed either by software or hardware. + * Enabling hardware mode allows the automatic transition of power between + * VDDANA (or VDDIN33) and VDDBU (or VBAT, respectively), based on the + * availability of these power sources. + * + * If the Backup Unit Power Switch is already in automatic mode, no action is + * required. If it is in software-controlled mode, it is switched to automatic + * mode to enhance safety and eliminate the need for toggling between power + * sources. + */ +static void at91_pm_switch_ba_to_auto(void) { unsigned int offset = offsetof(struct at91_pm_sfrbu_regs, pswbu); unsigned int val; @@ -414,24 +428,19 @@ static void at91_pm_switch_ba_to_vbat(void) val = readl(soc_pm.data.sfrbu + offset); - /* Already on VBAT. */ - if (!(val & soc_pm.sfrbu_regs.pswbu.state)) + /* Already on auto/hardware. */ + if (!(val & soc_pm.sfrbu_regs.pswbu.ctrl)) return; - val &= ~soc_pm.sfrbu_regs.pswbu.softsw; - val |= soc_pm.sfrbu_regs.pswbu.key | soc_pm.sfrbu_regs.pswbu.ctrl; + val &= ~soc_pm.sfrbu_regs.pswbu.ctrl; + val |= soc_pm.sfrbu_regs.pswbu.key; writel(val, soc_pm.data.sfrbu + offset); - - /* Wait for update. */ - val = readl(soc_pm.data.sfrbu + offset); - while (val & soc_pm.sfrbu_regs.pswbu.state) - val = readl(soc_pm.data.sfrbu + offset); } static void at91_pm_suspend(suspend_state_t state) { if (soc_pm.data.mode == AT91_PM_BACKUP) { - at91_pm_switch_ba_to_vbat(); + at91_pm_switch_ba_to_auto(); cpu_suspend(0, at91_suspend_finish); diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S index 9466ae61f56a..b45c68d88275 100644 --- a/arch/arm/mach-shmobile/headsmp.S +++ b/arch/arm/mach-shmobile/headsmp.S @@ -136,6 +136,7 @@ ENDPROC(shmobile_smp_sleep) .long shmobile_smp_arg - 1b .bss + .align 2 .globl shmobile_smp_mpidr shmobile_smp_mpidr: .space NR_CPUS * 4 diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index af5177801fb1..bf1577216ffa 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -26,6 +26,13 @@ #ifdef CONFIG_MMU +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) +{ + unsigned long addr = (unsigned long)unsafe_src; + + return addr >= TASK_SIZE && ULONG_MAX - addr >= size; +} + /* * This is useful to dump out the page tables associated with * 'addr' in mm 'mm'. @@ -552,6 +559,7 @@ do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs)) return; + pr_alert("8<--- cut here ---\n"); pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n", inf->name, ifsr, addr); diff --git a/arch/arm64/boot/dts/mediatek/mt8173-elm.dtsi b/arch/arm64/boot/dts/mediatek/mt8173-elm.dtsi index e666ebb28980..7d15be690894 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173-elm.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173-elm.dtsi @@ -914,7 +914,7 @@ interrupt-controller; #interrupt-cells = <2>; - clock: mt6397clock { + clock: clocks { compatible = "mediatek,mt6397-clk"; #clock-cells = <1>; }; @@ -926,11 +926,10 @@ #gpio-cells = <2>; }; - regulator: mt6397regulator { + regulators { compatible = "mediatek,mt6397-regulator"; mt6397_vpca15_reg: buck_vpca15 { - regulator-compatible = "buck_vpca15"; regulator-name = "vpca15"; regulator-min-microvolt = < 700000>; regulator-max-microvolt = <1350000>; @@ -940,7 +939,6 @@ }; mt6397_vpca7_reg: buck_vpca7 { - regulator-compatible = "buck_vpca7"; regulator-name = "vpca7"; regulator-min-microvolt = < 700000>; regulator-max-microvolt = <1350000>; @@ -950,7 +948,6 @@ }; mt6397_vsramca15_reg: buck_vsramca15 { - regulator-compatible = "buck_vsramca15"; regulator-name = "vsramca15"; regulator-min-microvolt = < 700000>; regulator-max-microvolt = <1350000>; @@ -959,7 +956,6 @@ }; mt6397_vsramca7_reg: buck_vsramca7 { - regulator-compatible = "buck_vsramca7"; regulator-name = "vsramca7"; regulator-min-microvolt = < 700000>; regulator-max-microvolt = <1350000>; @@ -968,7 +964,6 @@ }; mt6397_vcore_reg: buck_vcore { - regulator-compatible = "buck_vcore"; regulator-name = "vcore"; regulator-min-microvolt = < 700000>; regulator-max-microvolt = <1350000>; @@ -977,7 +972,6 @@ }; mt6397_vgpu_reg: buck_vgpu { - regulator-compatible = "buck_vgpu"; regulator-name = "vgpu"; regulator-min-microvolt = < 700000>; regulator-max-microvolt = <1350000>; @@ -986,7 +980,6 @@ }; mt6397_vdrm_reg: buck_vdrm { - regulator-compatible = "buck_vdrm"; regulator-name = "vdrm"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1400000>; @@ -995,7 +988,6 @@ }; mt6397_vio18_reg: buck_vio18 { - regulator-compatible = "buck_vio18"; regulator-name = "vio18"; regulator-min-microvolt = <1620000>; regulator-max-microvolt = <1980000>; @@ -1004,18 +996,15 @@ }; mt6397_vtcxo_reg: ldo_vtcxo { - regulator-compatible = "ldo_vtcxo"; regulator-name = "vtcxo"; regulator-always-on; }; mt6397_va28_reg: ldo_va28 { - regulator-compatible = "ldo_va28"; regulator-name = "va28"; }; mt6397_vcama_reg: ldo_vcama { - regulator-compatible = "ldo_vcama"; regulator-name = "vcama"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -1023,18 +1012,15 @@ }; mt6397_vio28_reg: ldo_vio28 { - regulator-compatible = "ldo_vio28"; regulator-name = "vio28"; regulator-always-on; }; mt6397_vusb_reg: ldo_vusb { - regulator-compatible = "ldo_vusb"; regulator-name = "vusb"; }; mt6397_vmc_reg: ldo_vmc { - regulator-compatible = "ldo_vmc"; regulator-name = "vmc"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; @@ -1042,7 +1028,6 @@ }; mt6397_vmch_reg: ldo_vmch { - regulator-compatible = "ldo_vmch"; regulator-name = "vmch"; regulator-min-microvolt = <3000000>; regulator-max-microvolt = <3300000>; @@ -1050,7 +1035,6 @@ }; mt6397_vemc_3v3_reg: ldo_vemc3v3 { - regulator-compatible = "ldo_vemc3v3"; regulator-name = "vemc_3v3"; regulator-min-microvolt = <3000000>; regulator-max-microvolt = <3300000>; @@ -1058,7 +1042,6 @@ }; mt6397_vgp1_reg: ldo_vgp1 { - regulator-compatible = "ldo_vgp1"; regulator-name = "vcamd"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -1066,7 +1049,6 @@ }; mt6397_vgp2_reg: ldo_vgp2 { - regulator-compatible = "ldo_vgp2"; regulator-name = "vcamio"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; @@ -1074,7 +1056,6 @@ }; mt6397_vgp3_reg: ldo_vgp3 { - regulator-compatible = "ldo_vgp3"; regulator-name = "vcamaf"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -1082,7 +1063,6 @@ }; mt6397_vgp4_reg: ldo_vgp4 { - regulator-compatible = "ldo_vgp4"; regulator-name = "vgp4"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <3300000>; @@ -1090,7 +1070,6 @@ }; mt6397_vgp5_reg: ldo_vgp5 { - regulator-compatible = "ldo_vgp5"; regulator-name = "vgp5"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <3000000>; @@ -1098,7 +1077,6 @@ }; mt6397_vgp6_reg: ldo_vgp6 { - regulator-compatible = "ldo_vgp6"; regulator-name = "vgp6"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; @@ -1107,7 +1085,6 @@ }; mt6397_vibr_reg: ldo_vibr { - regulator-compatible = "ldo_vibr"; regulator-name = "vibr"; regulator-min-microvolt = <1300000>; regulator-max-microvolt = <3300000>; @@ -1115,7 +1092,7 @@ }; }; - rtc: mt6397rtc { + rtc: rtc { compatible = "mediatek,mt6397-rtc"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts index 4e0c3aa264a5..52b56069c51d 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts @@ -307,11 +307,10 @@ interrupt-controller; #interrupt-cells = <2>; - mt6397regulator: mt6397regulator { + regulators { compatible = "mediatek,mt6397-regulator"; mt6397_vpca15_reg: buck_vpca15 { - regulator-compatible = "buck_vpca15"; regulator-name = "vpca15"; regulator-min-microvolt = < 700000>; regulator-max-microvolt = <1350000>; @@ -320,7 +319,6 @@ }; mt6397_vpca7_reg: buck_vpca7 { - regulator-compatible = "buck_vpca7"; regulator-name = "vpca7"; regulator-min-microvolt = < 700000>; regulator-max-microvolt = <1350000>; @@ -329,7 +327,6 @@ }; mt6397_vsramca15_reg: buck_vsramca15 { - regulator-compatible = "buck_vsramca15"; regulator-name = "vsramca15"; regulator-min-microvolt = < 700000>; regulator-max-microvolt = <1350000>; @@ -338,7 +335,6 @@ }; mt6397_vsramca7_reg: buck_vsramca7 { - regulator-compatible = "buck_vsramca7"; regulator-name = "vsramca7"; regulator-min-microvolt = < 700000>; regulator-max-microvolt = <1350000>; @@ -347,7 +343,6 @@ }; mt6397_vcore_reg: buck_vcore { - regulator-compatible = "buck_vcore"; regulator-name = "vcore"; regulator-min-microvolt = < 700000>; regulator-max-microvolt = <1350000>; @@ -356,7 +351,6 @@ }; mt6397_vgpu_reg: buck_vgpu { - regulator-compatible = "buck_vgpu"; regulator-name = "vgpu"; regulator-min-microvolt = < 700000>; regulator-max-microvolt = <1350000>; @@ -365,7 +359,6 @@ }; mt6397_vdrm_reg: buck_vdrm { - regulator-compatible = "buck_vdrm"; regulator-name = "vdrm"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1400000>; @@ -374,7 +367,6 @@ }; mt6397_vio18_reg: buck_vio18 { - regulator-compatible = "buck_vio18"; regulator-name = "vio18"; regulator-min-microvolt = <1620000>; regulator-max-microvolt = <1980000>; @@ -383,19 +375,16 @@ }; mt6397_vtcxo_reg: ldo_vtcxo { - regulator-compatible = "ldo_vtcxo"; regulator-name = "vtcxo"; regulator-always-on; }; mt6397_va28_reg: ldo_va28 { - regulator-compatible = "ldo_va28"; regulator-name = "va28"; regulator-always-on; }; mt6397_vcama_reg: ldo_vcama { - regulator-compatible = "ldo_vcama"; regulator-name = "vcama"; regulator-min-microvolt = <1500000>; regulator-max-microvolt = <2800000>; @@ -403,18 +392,15 @@ }; mt6397_vio28_reg: ldo_vio28 { - regulator-compatible = "ldo_vio28"; regulator-name = "vio28"; regulator-always-on; }; mt6397_vusb_reg: ldo_vusb { - regulator-compatible = "ldo_vusb"; regulator-name = "vusb"; }; mt6397_vmc_reg: ldo_vmc { - regulator-compatible = "ldo_vmc"; regulator-name = "vmc"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; @@ -422,7 +408,6 @@ }; mt6397_vmch_reg: ldo_vmch { - regulator-compatible = "ldo_vmch"; regulator-name = "vmch"; regulator-min-microvolt = <3000000>; regulator-max-microvolt = <3300000>; @@ -430,7 +415,6 @@ }; mt6397_vemc_3v3_reg: ldo_vemc3v3 { - regulator-compatible = "ldo_vemc3v3"; regulator-name = "vemc_3v3"; regulator-min-microvolt = <3000000>; regulator-max-microvolt = <3300000>; @@ -438,7 +422,6 @@ }; mt6397_vgp1_reg: ldo_vgp1 { - regulator-compatible = "ldo_vgp1"; regulator-name = "vcamd"; regulator-min-microvolt = <1220000>; regulator-max-microvolt = <3300000>; @@ -446,7 +429,6 @@ }; mt6397_vgp2_reg: ldo_vgp2 { - regulator-compatible = "ldo_vgp2"; regulator-name = "vcamio"; regulator-min-microvolt = <1000000>; regulator-max-microvolt = <3300000>; @@ -454,7 +436,6 @@ }; mt6397_vgp3_reg: ldo_vgp3 { - regulator-compatible = "ldo_vgp3"; regulator-name = "vcamaf"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <3300000>; @@ -462,7 +443,6 @@ }; mt6397_vgp4_reg: ldo_vgp4 { - regulator-compatible = "ldo_vgp4"; regulator-name = "vgp4"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <3300000>; @@ -470,7 +450,6 @@ }; mt6397_vgp5_reg: ldo_vgp5 { - regulator-compatible = "ldo_vgp5"; regulator-name = "vgp5"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <3000000>; @@ -478,7 +457,6 @@ }; mt6397_vgp6_reg: ldo_vgp6 { - regulator-compatible = "ldo_vgp6"; regulator-name = "vgp6"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <3300000>; @@ -486,7 +464,6 @@ }; mt6397_vibr_reg: ldo_vibr { - regulator-compatible = "ldo_vibr"; regulator-name = "vibr"; regulator-min-microvolt = <1300000>; regulator-max-microvolt = <3300000>; diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi index c71a5155702d..9d05f908ddef 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi @@ -1260,8 +1260,7 @@ }; pwm0: pwm@1401e000 { - compatible = "mediatek,mt8173-disp-pwm", - "mediatek,mt6595-disp-pwm"; + compatible = "mediatek,mt8173-disp-pwm"; reg = <0 0x1401e000 0 0x1000>; #pwm-cells = <2>; clocks = <&mmsys CLK_MM_DISP_PWM026M>, @@ -1271,8 +1270,7 @@ }; pwm1: pwm@1401f000 { - compatible = "mediatek,mt8173-disp-pwm", - "mediatek,mt6595-disp-pwm"; + compatible = "mediatek,mt8173-disp-pwm"; reg = <0 0x1401f000 0 0x1000>; #pwm-cells = <2>; clocks = <&mmsys CLK_MM_DISP_PWM126M>, diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-damu.dts b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-damu.dts index 8e0cba4d2372..9a35c6577996 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-damu.dts +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-damu.dts @@ -25,6 +25,10 @@ hid-descr-addr = <0x0001>; }; +&mt6358codec { + mediatek,dmic-mode = <1>; /* one-wire */ +}; + &qca_wifi { qcom,ath10k-calibration-variant = "GO_DAMU"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-kenzo.dts b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-kenzo.dts index 6f1aa692753a..a477e2cce204 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-kenzo.dts +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-kenzo.dts @@ -10,3 +10,18 @@ model = "Google kenzo sku17 board"; compatible = "google,juniper-sku17", "google,juniper", "mediatek,mt8183"; }; + +&i2c0 { + touchscreen@40 { + compatible = "hid-over-i2c"; + reg = <0x40>; + + pinctrl-names = "default"; + pinctrl-0 = <&touchscreen_pins>; + + interrupts-extended = <&pio 155 IRQ_TYPE_LEVEL_LOW>; + + post-power-on-delay-ms = <70>; + hid-descr-addr = <0x0001>; + }; +}; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-willow.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-willow.dtsi index 76d33540166f..c942e461a177 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-willow.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-willow.dtsi @@ -6,6 +6,21 @@ /dts-v1/; #include "mt8183-kukui-jacuzzi.dtsi" +&i2c0 { + touchscreen@40 { + compatible = "hid-over-i2c"; + reg = <0x40>; + + pinctrl-names = "default"; + pinctrl-0 = <&touchscreen_pins>; + + interrupts-extended = <&pio 155 IRQ_TYPE_LEVEL_LOW>; + + post-power-on-delay-ms = <70>; + hid-descr-addr = <0x0001>; + }; +}; + &i2c2 { trackpad@2c { compatible = "hid-over-i2c"; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi index f19bf2834b39..3fa491dc5202 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi @@ -39,8 +39,6 @@ pp3300_panel: pp3300-panel { compatible = "regulator-fixed"; regulator-name = "pp3300_panel"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; pinctrl-names = "default"; pinctrl-0 = <&pp3300_panel_pins>; diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index 712ac1826d68..68395d4c8930 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -1236,6 +1236,7 @@ clock-names = "engine", "digital", "hs"; phys = <&mipi_tx0>; phy-names = "dphy"; + status = "disabled"; }; mutex: mutex@14016000 { diff --git a/arch/arm64/boot/dts/mediatek/mt8516.dtsi b/arch/arm64/boot/dts/mediatek/mt8516.dtsi index bbe5a1419eff..5655f12723f1 100644 --- a/arch/arm64/boot/dts/mediatek/mt8516.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8516.dtsi @@ -144,10 +144,10 @@ #size-cells = <2>; ranges; - /* 128 KiB reserved for ARM Trusted Firmware (BL31) */ + /* 192 KiB reserved for ARM Trusted Firmware (BL31) */ bl31_secmon_reserved: secmon@43000000 { no-map; - reg = <0 0x43000000 0 0x20000>; + reg = <0 0x43000000 0 0x30000>; }; }; @@ -206,7 +206,7 @@ compatible = "mediatek,mt8516-wdt", "mediatek,mt6589-wdt"; reg = <0 0x10007000 0 0x1000>; - interrupts = <GIC_SPI 198 IRQ_TYPE_EDGE_FALLING>; + interrupts = <GIC_SPI 198 IRQ_TYPE_LEVEL_LOW>; #reset-cells = <1>; }; @@ -269,7 +269,7 @@ interrupt-parent = <&gic>; interrupt-controller; reg = <0 0x10310000 0 0x1000>, - <0 0x10320000 0 0x1000>, + <0 0x1032f000 0 0x2000>, <0 0x10340000 0 0x2000>, <0 0x10360000 0 0x2000>; interrupts = <GIC_PPI 9 @@ -345,14 +345,10 @@ reg = <0 0x11009000 0 0x90>, <0 0x11000180 0 0x80>; interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_LOW>; - clocks = <&topckgen CLK_TOP_AHB_INFRA_D2>, - <&infracfg CLK_IFR_I2C0_SEL>, - <&topckgen CLK_TOP_I2C0>, + clock-div = <2>; + clocks = <&topckgen CLK_TOP_I2C0>, <&topckgen CLK_TOP_APDMA>; - clock-names = "main-source", - "main-sel", - "main", - "dma"; + clock-names = "main", "dma"; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -364,14 +360,10 @@ reg = <0 0x1100a000 0 0x90>, <0 0x11000200 0 0x80>; interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_LOW>; - clocks = <&topckgen CLK_TOP_AHB_INFRA_D2>, - <&infracfg CLK_IFR_I2C1_SEL>, - <&topckgen CLK_TOP_I2C1>, + clock-div = <2>; + clocks = <&topckgen CLK_TOP_I2C1>, <&topckgen CLK_TOP_APDMA>; - clock-names = "main-source", - "main-sel", - "main", - "dma"; + clock-names = "main", "dma"; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -383,14 +375,10 @@ reg = <0 0x1100b000 0 0x90>, <0 0x11000280 0 0x80>; interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_LOW>; - clocks = <&topckgen CLK_TOP_AHB_INFRA_D2>, - <&infracfg CLK_IFR_I2C2_SEL>, - <&topckgen CLK_TOP_I2C2>, + clock-div = <2>; + clocks = <&topckgen CLK_TOP_I2C2>, <&topckgen CLK_TOP_APDMA>; - clock-names = "main-source", - "main-sel", - "main", - "dma"; + clock-names = "main", "dma"; #address-cells = <1>; #size-cells = <0>; status = "disabled"; diff --git a/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi b/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi index 54514d62398f..8696da3de4cb 100644 --- a/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi +++ b/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi @@ -48,7 +48,6 @@ }; &i2c0 { - clock-div = <2>; pinctrl-names = "default"; pinctrl-0 = <&i2c0_pins_a>; status = "okay"; @@ -157,7 +156,6 @@ }; &i2c2 { - clock-div = <2>; pinctrl-names = "default"; pinctrl-0 = <&i2c2_pins_a>; status = "okay"; diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index 668674059d48..79d5f1433800 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -105,7 +105,7 @@ sleep_clk: sleep-clk { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <32768>; + clock-frequency = <32764>; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi index 4447ed146b3a..1ae2fbef9058 100644 --- a/arch/arm64/boot/dts/qcom/msm8994.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi @@ -25,7 +25,7 @@ sleep_clk: sleep-clk { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <32768>; + clock-frequency = <32764>; clock-output-names = "sleep_clk"; }; }; @@ -419,6 +419,15 @@ #size-cells = <1>; ranges; + interrupts = <GIC_SPI 180 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 311 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 310 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "pwr_event", + "qusb2_phy", + "hs_phy_irq", + "ss_phy_irq"; + clocks = <&gcc GCC_USB30_MASTER_CLK>, <&gcc GCC_SYS_NOC_USB3_AXI_CLK>, <&gcc GCC_USB30_SLEEP_CLK>, diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 9ee8eebfcdb5..ec0f067a6a5d 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -2608,9 +2608,14 @@ #size-cells = <1>; ranges; - interrupts = <GIC_SPI 347 IRQ_TYPE_LEVEL_HIGH>, + interrupts = <GIC_SPI 180 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 347 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 243 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "hs_phy_irq", "ss_phy_irq"; + interrupt-names = "pwr_event", + "qusb2_phy", + "hs_phy_irq", + "ss_phy_irq"; clocks = <&gcc GCC_SYS_NOC_USB3_AXI_CLK>, <&gcc GCC_USB30_MASTER_CLK>, diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 929fc0667e98..c65f3c9a6673 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -39,7 +39,7 @@ sleep_clk: sleep-clk { compatible = "fixed-clock"; - clock-frequency = <32000>; + clock-frequency = <32764>; #clock-cells = <0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index cff5423e9c88..69212445d22c 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -3948,16 +3948,16 @@ "vfe1", "vfe_lite"; - interrupts = <GIC_SPI 464 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 466 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 468 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 477 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 478 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 479 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 448 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 465 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 467 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 469 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 464 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 466 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 468 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 477 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 478 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 479 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 448 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 465 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 467 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 469 IRQ_TYPE_EDGE_RISING>; interrupt-names = "csid0", "csid1", "csid2", diff --git a/arch/arm64/boot/dts/qcom/sm6125.dtsi b/arch/arm64/boot/dts/qcom/sm6125.dtsi index 2e4fe2bc1e0a..0f6a9a5cbe17 100644 --- a/arch/arm64/boot/dts/qcom/sm6125.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6125.dtsi @@ -27,7 +27,7 @@ sleep_clk: sleep-clk { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <32000>; + clock-frequency = <32764>; clock-output-names = "sleep_clk"; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8150-microsoft-surface-duo.dts b/arch/arm64/boot/dts/qcom/sm8150-microsoft-surface-duo.dts index 736da9af44e0..b233b56d4bbd 100644 --- a/arch/arm64/boot/dts/qcom/sm8150-microsoft-surface-duo.dts +++ b/arch/arm64/boot/dts/qcom/sm8150-microsoft-surface-duo.dts @@ -375,8 +375,8 @@ pinctrl-0 = <&da7280_intr_default>; dlg,actuator-type = "LRA"; - dlg,dlg,const-op-mode = <1>; - dlg,dlg,periodic-op-mode = <1>; + dlg,const-op-mode = <1>; + dlg,periodic-op-mode = <1>; dlg,nom-microvolt = <2000000>; dlg,abs-max-microvolt = <2000000>; dlg,imax-microamp = <129000>; diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index 99afdd1ad7c6..bf91e0acd435 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -82,7 +82,7 @@ sleep_clk: sleep-clk { compatible = "fixed-clock"; - clock-frequency = <32768>; + clock-frequency = <32764>; #clock-cells = <0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi index 8506dc841c86..df02fe5ceee9 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -32,7 +32,7 @@ sleep_clk: sleep-clk { compatible = "fixed-clock"; - clock-frequency = <32000>; + clock-frequency = <32764>; #clock-cells = <0>; }; @@ -754,7 +754,7 @@ mpss: remoteproc@4080000 { compatible = "qcom,sm8350-mpss-pas"; - reg = <0x0 0x04080000 0x0 0x4040>; + reg = <0x0 0x04080000 0x0 0x10000>; interrupts-extended = <&intc GIC_SPI 264 IRQ_TYPE_LEVEL_HIGH>, <&smp2p_modem_in 0 IRQ_TYPE_EDGE_RISING>, diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dts b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dts index cef4d18b599d..a992a6ac5e9f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dts @@ -117,7 +117,7 @@ }; &u2phy1_host { - status = "disabled"; + phy-supply = <&vdd_5v>; }; &uart0 { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index d10074b56156..09a8861deba2 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -136,7 +136,7 @@ snps,reset-active-low; snps,reset-delays-us = <0 10000 50000>; tx_delay = <0x10>; - rx_delay = <0x10>; + rx_delay = <0x23>; status = "okay"; }; diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 7dfaad0fa17b..8fe0c8d0057a 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -75,6 +75,7 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_CORTEX_A76AE 0xD0E #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 #define ARM_CPU_PART_CORTEX_A78AE 0xD42 @@ -111,6 +112,7 @@ #define QCOM_CPU_PART_KRYO 0x200 #define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800 #define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801 +#define QCOM_CPU_PART_KRYO_3XX_GOLD 0x802 #define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 #define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804 #define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 @@ -139,6 +141,7 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_CORTEX_A76AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76AE) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) @@ -170,6 +173,7 @@ #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) #define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD) #define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER) +#define MIDR_QCOM_KRYO_3XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_GOLD) #define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) #define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD) #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 9a62884183e5..7a407c3767b6 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -44,9 +44,11 @@ extern void fpsimd_signal_preserve_current_state(void); extern void fpsimd_preserve_current_state(void); extern void fpsimd_restore_current_state(void); extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); +extern void fpsimd_kvm_prepare(void); extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, - void *sve_state, unsigned int sve_vl); + void *sve_state, unsigned int sve_vl, + enum fp_type *type, enum fp_type to_save); extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_save_and_flush_cpu_state(void); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 91038fa2e5e0..6d7b6b5d076d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -64,6 +64,7 @@ enum kvm_mode kvm_get_mode(void); DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); extern unsigned int kvm_sve_max_vl; +extern unsigned int kvm_host_sve_max_vl; int kvm_arm_init_sve(void); u32 __attribute_const__ kvm_target_cpu(void); @@ -280,7 +281,19 @@ struct vcpu_reset_state { struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; + + /* + * Guest floating point state + * + * The architecture has two main floating point extensions, + * the original FPSIMD and SVE. These have overlapping + * register views, with the FPSIMD V registers occupying the + * low 128 bits of the SVE Z registers. When the core + * floating point code saves the register state of a task it + * records which view it saved in fp_type. + */ void *sve_state; + enum fp_type fp_type; unsigned int sve_max_vl; /* Stage 2 paging state used by the hardware on next switch */ @@ -289,7 +302,6 @@ struct kvm_vcpu_arch { /* Values of trap registers for the guest. */ u64 hcr_el2; u64 mdcr_el2; - u64 cptr_el2; /* Values of trap registers for the host before guest entry. */ u64 mdcr_el2_host; @@ -321,7 +333,6 @@ struct kvm_vcpu_arch { struct kvm_guest_debug_arch external_debug_state; struct thread_info *host_thread_info; /* hyp VA */ - struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ struct { /* {Break,watch}point registers */ @@ -410,8 +421,6 @@ struct kvm_vcpu_arch { #define KVM_ARM64_DEBUG_DIRTY (1 << 0) #define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */ #define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */ -#define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */ -#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */ #define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */ #define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */ #define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 657d0c94cf82..308df86f9a4b 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -117,5 +117,12 @@ void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); +extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl); + +static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.flags & KVM_ARM64_FP_ENABLED; +} + #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h index 56bc2e4e81a6..0070ee4ba895 100644 --- a/arch/arm64/include/asm/mman.h +++ b/arch/arm64/include/asm/mman.h @@ -31,9 +31,12 @@ static inline unsigned long arch_calc_vm_flag_bits(struct file *file, * backed by tags-capable memory. The vm_flags may be overridden by a * filesystem supporting MTE (RAM-based). */ - if (system_supports_mte() && - ((flags & MAP_ANONYMOUS) || shmem_file(file))) - return VM_MTE_ALLOWED; + if (system_supports_mte()) { + if ((flags & MAP_ANONYMOUS) && !(flags & MAP_HUGETLB)) + return VM_MTE_ALLOWED; + if (shmem_file(file)) + return VM_MTE_ALLOWED; + } return 0; } diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 7364530de0a7..1da032444dac 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -115,6 +115,12 @@ struct debug_info { #endif }; +enum fp_type { + FP_STATE_CURRENT, /* Save based on current task state. */ + FP_STATE_FPSIMD, + FP_STATE_SVE, +}; + struct cpu_context { unsigned long x19; unsigned long x20; @@ -145,6 +151,7 @@ struct thread_struct { struct user_fpsimd_state fpsimd_state; } uw; + enum fp_type fp_type; /* registers FPSIMD or SVE? */ unsigned int fpsimd_cpu; void *sve_state; /* SVE registers, if any */ unsigned int sve_vl; /* SVE vector length */ diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index db7b371b367c..6d7f03adece8 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -97,7 +97,6 @@ enum mitigation_state arm64_get_meltdown_state(void); enum mitigation_state arm64_get_spectre_bhb_state(void); bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope); -u8 spectre_bhb_loop_affected(int scope); void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr); #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 97c42be71338..1510f457b615 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -87,16 +87,18 @@ int populate_cache_leaves(unsigned int cpu) unsigned int level, idx; enum cache_type type; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - struct cacheinfo *this_leaf = this_cpu_ci->info_list; + struct cacheinfo *infos = this_cpu_ci->info_list; for (idx = 0, level = 1; level <= this_cpu_ci->num_levels && - idx < this_cpu_ci->num_leaves; idx++, level++) { + idx < this_cpu_ci->num_leaves; level++) { type = get_cache_type(level); if (type == CACHE_TYPE_SEPARATE) { - ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); - ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + if (idx + 1 >= this_cpu_ci->num_leaves) + break; + ci_leaf_init(&infos[idx++], CACHE_TYPE_DATA, level); + ci_leaf_init(&infos[idx++], CACHE_TYPE_INST, level); } else { - ci_leaf_init(this_leaf++, type, level); + ci_leaf_init(&infos[idx++], type, level); } } return 0; diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e22571e57ae1..4be9d9fd4fb7 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -117,6 +117,8 @@ struct fpsimd_last_state_struct { struct user_fpsimd_state *st; void *sve_state; unsigned int sve_vl; + enum fp_type *fp_type; + enum fp_type to_save; }; static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); @@ -243,14 +245,6 @@ static void sve_free(struct task_struct *task) * The task can execute SVE instructions while in userspace without * trapping to the kernel. * - * When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the - * corresponding Zn), P0-P15 and FFR are encoded in in - * task->thread.sve_state, formatted appropriately for vector - * length task->thread.sve_vl. - * - * task->thread.sve_state must point to a valid buffer at least - * sve_state_size(task) bytes in size. - * * During any syscall, the kernel may optionally clear TIF_SVE and * discard the vector state except for the FPSIMD subset. * @@ -260,7 +254,15 @@ static void sve_free(struct task_struct *task) * do_sve_acc() to be called, which does some preparation and then * sets TIF_SVE. * - * When stored, FPSIMD registers V0-V31 are encoded in + * During any syscall, the kernel may optionally clear TIF_SVE and + * discard the vector state except for the FPSIMD subset. + * + * The data will be stored in one of two formats: + * + * * FPSIMD only - FP_STATE_FPSIMD: + * + * When the FPSIMD only state stored task->thread.fp_type is set to + * FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in * task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are * logically zero but not stored anywhere; P0-P15 and FFR are not * stored and have unspecified values from userspace's point of @@ -268,7 +270,23 @@ static void sve_free(struct task_struct *task) * but userspace is discouraged from relying on this. * * task->thread.sve_state does not need to be non-NULL, valid or any - * particular size: it must not be dereferenced. + * particular size: it must not be dereferenced and any data stored + * there should be considered stale and not referenced. + * + * * SVE state - FP_STATE_SVE: + * + * When the full SVE state is stored task->thread.fp_type is set to + * FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the + * corresponding Zn), P0-P15 and FFR are encoded in in + * task->thread.sve_state, formatted appropriately for vector + * length task->thread.sve_vl or, if SVCR.SM is set, + * task->thread.sme_vl. The storage for the vector registers in + * task->thread.uw.fpsimd_state should be ignored. + * + * task->thread.sve_state must point to a valid buffer at least + * sve_state_size(task) bytes in size. The data stored in + * task->thread.uw.fpsimd_state.vregs should be considered stale + * and not referenced. * * * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state * irrespective of whether TIF_SVE is clear or set, since these are @@ -287,45 +305,68 @@ static void task_fpsimd_load(void) WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); - if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) + if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) { + WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE); sve_load_state(sve_pffr(¤t->thread), ¤t->thread.uw.fpsimd_state.fpsr, sve_vq_from_vl(current->thread.sve_vl) - 1); - else + } else { + WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD); fpsimd_load_state(¤t->thread.uw.fpsimd_state); + } } /* * Ensure FPSIMD/SVE storage in memory for the loaded context is up to - * date with respect to the CPU registers. + * date with respect to the CPU registers. Note carefully that the + * current context is the context last bound to the CPU stored in + * last, if KVM is involved this may be the guest VM context rather + * than the host thread for the VM pointed to by current. This means + * that we must always reference the state storage via last rather + * than via current, if we are saving KVM state then it will have + * ensured that the type of registers to save is set in last->to_save. */ static void fpsimd_save(void) { struct fpsimd_last_state_struct const *last = this_cpu_ptr(&fpsimd_last_state); /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ + bool save_sve_regs = false; + unsigned long vl; WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); - if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { - if (IS_ENABLED(CONFIG_ARM64_SVE) && - test_thread_flag(TIF_SVE)) { - if (WARN_ON(sve_get_vl() != last->sve_vl)) { - /* - * Can't save the user regs, so current would - * re-enter user with corrupt state. - * There's no way to recover, so kill it: - */ - force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); - return; - } - - sve_save_state((char *)last->sve_state + - sve_ffr_offset(last->sve_vl), - &last->st->fpsr); - } else - fpsimd_save_state(last->st); + if (test_thread_flag(TIF_FOREIGN_FPSTATE)) + return; + + if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE)) || + last->to_save == FP_STATE_SVE) { + save_sve_regs = true; + vl = last->sve_vl; + } + + if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) { + /* Get the configured VL from RDVL, will account for SM */ + if (WARN_ON(sve_get_vl() != vl)) { + /* + * Can't save the user regs, so current would + * re-enter user with corrupt state. + * There's no way to recover, so kill it: + */ + force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); + return; + } + } + + if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) { + sve_save_state((char *)last->sve_state + + sve_ffr_offset(last->sve_vl), + &last->st->fpsr); + *last->fp_type = FP_STATE_SVE; + } else { + fpsimd_save_state(last->st); + *last->fp_type = FP_STATE_FPSIMD; } } @@ -624,8 +665,10 @@ int sve_set_vector_length(struct task_struct *task, } fpsimd_flush_task_state(task); - if (test_and_clear_tsk_thread_flag(task, TIF_SVE)) + if (test_and_clear_tsk_thread_flag(task, TIF_SVE)) { sve_to_fpsimd(task); + task->thread.fp_type = FP_STATE_FPSIMD; + } if (task == current) put_cpu_fpsimd_context(); @@ -965,6 +1008,7 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs) } else { fpsimd_to_sve(current); fpsimd_flush_task_state(current); + current->thread.fp_type = FP_STATE_SVE; } put_cpu_fpsimd_context(); @@ -1079,6 +1123,8 @@ void fpsimd_flush_thread(void) current->thread.sve_vl_onexec = 0; } + current->thread.fp_type = FP_STATE_FPSIMD; + put_cpu_fpsimd_context(); } @@ -1122,6 +1168,8 @@ static void fpsimd_bind_task_to_cpu(void) last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; last->sve_vl = current->thread.sve_vl; + last->fp_type = ¤t->thread.fp_type; + last->to_save = FP_STATE_CURRENT; current->thread.fpsimd_cpu = smp_processor_id(); if (system_supports_sve()) { @@ -1136,7 +1184,8 @@ static void fpsimd_bind_task_to_cpu(void) } void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, - unsigned int sve_vl) + unsigned int sve_vl, enum fp_type *type, + enum fp_type to_save) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); @@ -1147,6 +1196,8 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, last->st = st; last->sve_state = sve_state; last->sve_vl = sve_vl; + last->fp_type = type; + last->to_save = to_save; } /* diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index c95956789405..5efe20e2520d 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -306,6 +306,9 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) dst->thread.sve_state = NULL; clear_tsk_thread_flag(dst, TIF_SVE); + + dst->thread.fp_type = FP_STATE_FPSIMD; + /* clear any pending asynchronous tag fault raised by the parent */ clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index ebce46c4e942..df8188193c17 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -845,52 +845,88 @@ static unsigned long system_bhb_mitigations; * This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any * SCOPE_SYSTEM call will give the right answer. */ -u8 spectre_bhb_loop_affected(int scope) +static bool is_spectre_bhb_safe(int scope) +{ + static const struct midr_range spectre_bhb_safe_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A510), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A520), + MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), + {}, + }; + static bool all_safe = true; + + if (scope != SCOPE_LOCAL_CPU) + return all_safe; + + if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_safe_list)) + return true; + + all_safe = false; + + return false; +} + +static u8 spectre_bhb_loop_affected(void) { u8 k = 0; - static u8 max_bhb_k; - - if (scope == SCOPE_LOCAL_CPU) { - static const struct midr_range spectre_bhb_k32_list[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), - MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), - MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), - MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), - MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), - {}, - }; - static const struct midr_range spectre_bhb_k24_list[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), - MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), - {}, - }; - static const struct midr_range spectre_bhb_k11_list[] = { - MIDR_ALL_VERSIONS(MIDR_AMPERE1), - {}, - }; - static const struct midr_range spectre_bhb_k8_list[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - {}, - }; - - if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list)) - k = 32; - else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list)) - k = 24; - else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list)) - k = 11; - else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list)) - k = 8; - - max_bhb_k = max(max_bhb_k, k); - } else { - k = max_bhb_k; - } + + static const struct midr_range spectre_bhb_k132_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + {}, + }; + static const struct midr_range spectre_bhb_k38_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + {}, + }; + static const struct midr_range spectre_bhb_k32_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + {}, + }; + static const struct midr_range spectre_bhb_k24_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76AE), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_GOLD), + {}, + }; + static const struct midr_range spectre_bhb_k11_list[] = { + MIDR_ALL_VERSIONS(MIDR_AMPERE1), + {}, + }; + static const struct midr_range spectre_bhb_k8_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + {}, + }; + + if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k132_list)) + k = 132; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k38_list)) + k = 38; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list)) + k = 32; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list)) + k = 24; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list)) + k = 11; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list)) + k = 8; return k; } @@ -916,29 +952,13 @@ static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void) } } -static bool is_spectre_bhb_fw_affected(int scope) +static bool has_spectre_bhb_fw_mitigation(void) { - static bool system_affected; enum mitigation_state fw_state; bool has_smccc = arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE; - static const struct midr_range spectre_bhb_firmware_mitigated_list[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), - {}, - }; - bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(), - spectre_bhb_firmware_mitigated_list); - - if (scope != SCOPE_LOCAL_CPU) - return system_affected; fw_state = spectre_bhb_get_cpu_fw_mitigation_state(); - if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) { - system_affected = true; - return true; - } - - return false; + return has_smccc && fw_state == SPECTRE_MITIGATED; } static bool supports_ecbhb(int scope) @@ -954,6 +974,8 @@ static bool supports_ecbhb(int scope) ID_AA64MMFR1_ECBHB_SHIFT); } +static u8 max_bhb_k; + bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope) { @@ -962,16 +984,18 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, if (supports_csv2p3(scope)) return false; - if (supports_clearbhb(scope)) - return true; - - if (spectre_bhb_loop_affected(scope)) - return true; + if (is_spectre_bhb_safe(scope)) + return false; - if (is_spectre_bhb_fw_affected(scope)) - return true; + /* + * At this point the core isn't known to be "safe" so we're going to + * assume it's vulnerable. We still need to update `max_bhb_k` though, + * but only if we aren't mitigating with clearbhb though. + */ + if (scope == SCOPE_LOCAL_CPU && !supports_clearbhb(SCOPE_LOCAL_CPU)) + max_bhb_k = max(max_bhb_k, spectre_bhb_loop_affected()); - return false; + return true; } static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) @@ -997,7 +1021,7 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) { bp_hardening_cb_t cpu_cb; - enum mitigation_state fw_state, state = SPECTRE_VULNERABLE; + enum mitigation_state state = SPECTRE_VULNERABLE; struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU)) @@ -1023,7 +1047,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN); state = SPECTRE_MITIGATED; set_bit(BHB_INSN, &system_bhb_mitigations); - } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) { + } else if (spectre_bhb_loop_affected()) { /* * Ensure KVM uses the indirect vector which will have the * branchy-loop added. A57/A72-r0 will already have selected @@ -1036,32 +1060,29 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP); state = SPECTRE_MITIGATED; set_bit(BHB_LOOP, &system_bhb_mitigations); - } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) { - fw_state = spectre_bhb_get_cpu_fw_mitigation_state(); - if (fw_state == SPECTRE_MITIGATED) { - /* - * Ensure KVM uses one of the spectre bp_hardening - * vectors. The indirect vector doesn't include the EL3 - * call, so needs upgrading to - * HYP_VECTOR_SPECTRE_INDIRECT. - */ - if (!data->slot || data->slot == HYP_VECTOR_INDIRECT) - data->slot += 1; - - this_cpu_set_vectors(EL1_VECTOR_BHB_FW); - - /* - * The WA3 call in the vectors supersedes the WA1 call - * made during context-switch. Uninstall any firmware - * bp_hardening callback. - */ - cpu_cb = spectre_v2_get_sw_mitigation_cb(); - if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb) - __this_cpu_write(bp_hardening_data.fn, NULL); - - state = SPECTRE_MITIGATED; - set_bit(BHB_FW, &system_bhb_mitigations); - } + } else if (has_spectre_bhb_fw_mitigation()) { + /* + * Ensure KVM uses one of the spectre bp_hardening + * vectors. The indirect vector doesn't include the EL3 + * call, so needs upgrading to + * HYP_VECTOR_SPECTRE_INDIRECT. + */ + if (!data->slot || data->slot == HYP_VECTOR_INDIRECT) + data->slot += 1; + + this_cpu_set_vectors(EL1_VECTOR_BHB_FW); + + /* + * The WA3 call in the vectors supersedes the WA1 call + * made during context-switch. Uninstall any firmware + * bp_hardening callback. + */ + cpu_cb = spectre_v2_get_sw_mitigation_cb(); + if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb) + __this_cpu_write(bp_hardening_data.fn, NULL); + + state = SPECTRE_MITIGATED; + set_bit(BHB_FW, &system_bhb_mitigations); } update_mitigation_state(&spectre_bhb_state, state); @@ -1095,7 +1116,6 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt, { u8 rd; u32 insn; - u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM); BUG_ON(nr_inst != 1); /* MOV -> MOV */ @@ -1104,7 +1124,7 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt, insn = le32_to_cpu(*origptr); rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn); - insn = aarch64_insn_gen_movewide(rd, loop_count, 0, + insn = aarch64_insn_gen_movewide(rd, max_bhb_k, 0, AARCH64_INSN_VARIANT_64BIT, AARCH64_INSN_MOVEWIDE_ZERO); *updptr++ = cpu_to_le32(insn); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 92de9212b7b5..4ef9e688508c 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -829,6 +829,7 @@ static int sve_set(struct task_struct *target, ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, SVE_PT_FPSIMD_OFFSET); clear_tsk_thread_flag(target, TIF_SVE); + target->thread.fp_type = FP_STATE_FPSIMD; goto out; } @@ -848,6 +849,7 @@ static int sve_set(struct task_struct *target, if (!target->thread.sve_state) { ret = -ENOMEM; clear_tsk_thread_flag(target, TIF_SVE); + target->thread.fp_type = FP_STATE_FPSIMD; goto out; } @@ -858,6 +860,7 @@ static int sve_set(struct task_struct *target, */ fpsimd_sync_to_sve(target); set_tsk_thread_flag(target, TIF_SVE); + target->thread.fp_type = FP_STATE_SVE; BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); start = SVE_PT_SVE_OFFSET; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index b3e1beccf458..768de05b616b 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -207,6 +207,7 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx) __get_user_error(fpsimd.fpcr, &ctx->fpcr, err); clear_thread_flag(TIF_SVE); + current->thread.fp_type = FP_STATE_FPSIMD; /* load the hardware registers from the fpsimd_state structure */ if (!err) @@ -271,6 +272,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) if (sve.head.size <= sizeof(*user->sve)) { clear_thread_flag(TIF_SVE); + current->thread.fp_type = FP_STATE_FPSIMD; goto fpsimd_only; } @@ -303,6 +305,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) return -EFAULT; set_thread_flag(TIF_SVE); + current->thread.fp_type = FP_STATE_SVE; fpsimd_only: /* copy the FP and status/control registers */ diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index a5e61e09ea92..7dd11c4c945e 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -37,6 +37,7 @@ SECTIONS */ /DISCARD/ : { *(.note.GNU-stack .note.gnu.property) + *(.ARM.attributes) } .note : { *(.note.*) } :text :note diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 7bedadfa3642..90aa16ceddc4 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -139,6 +139,7 @@ SECTIONS /DISCARD/ : { *(.interp .dynamic) *(.dynsym .dynstr .hash .gnu.hash) + *(.ARM.attributes) } . = KIMAGE_VADDR; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 9ded5443de48..5ca8782edb96 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1138,7 +1138,6 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, } vcpu_reset_hcr(vcpu); - vcpu->arch.cptr_el2 = CPTR_EL2_DEFAULT; /* * Handle the "start in power-off" case. diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 5621020b28de..cfda503c8b3f 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -55,7 +55,6 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) } vcpu->arch.host_thread_info = kern_hyp_va(ti); - vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd); error: return ret; } @@ -66,24 +65,24 @@ error: * * Here, we just set the correct metadata to indicate that the FPSIMD * state in the cpu regs (if any) belongs to current on the host. - * - * TIF_SVE is backed up here, since it may get clobbered with guest state. - * This flag is restored by kvm_arch_vcpu_put_fp(vcpu). */ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) { BUG_ON(!current->mm); - vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | - KVM_ARM64_HOST_SVE_IN_USE | - KVM_ARM64_HOST_SVE_ENABLED); + vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED; vcpu->arch.flags |= KVM_ARM64_FP_HOST; - if (test_thread_flag(TIF_SVE)) - vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE; - - if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) - vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; + /* + * Ensure that any host FPSIMD/SVE/SME state is saved and unbound such + * that the host kernel is responsible for restoring this state upon + * return to userspace, and the hyp code doesn't need to save anything. + * + * When the host may use SME, fpsimd_save_and_flush_cpu_state() ensures + * that PSTATE.{SM,ZA} == {0,0}. + */ + fpsimd_save_and_flush_cpu_state(); + vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; } /* @@ -94,15 +93,26 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) */ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) { + enum fp_type fp_type; + WARN_ON_ONCE(!irqs_disabled()); if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { + if (vcpu_has_sve(vcpu)) + fp_type = FP_STATE_SVE; + else + fp_type = FP_STATE_FPSIMD; + + /* + * Currently we do not support SME guests so SVCR is + * always 0 and we just need a variable to point to. + */ fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, vcpu->arch.sve_state, - vcpu->arch.sve_max_vl); + vcpu->arch.sve_max_vl, + &vcpu->arch.fp_type, fp_type); clear_thread_flag(TIF_FOREIGN_FPSTATE); - update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); } } @@ -115,38 +125,22 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) { unsigned long flags; - bool host_has_sve = system_supports_sve(); - bool guest_has_sve = vcpu_has_sve(vcpu); local_irq_save(flags); if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { - if (guest_has_sve) { - __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); - - /* Restore the VL that was saved when bound to the CPU */ - if (!has_vhe()) - sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, - SYS_ZCR_EL1); - } - - fpsimd_save_and_flush_cpu_state(); - } else if (has_vhe() && host_has_sve) { /* - * The FPSIMD/SVE state in the CPU has not been touched, and we - * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been - * reset to CPACR_EL1_DEFAULT by the Hyp code, disabling SVE - * for EL0. To avoid spurious traps, restore the trap state - * seen by kvm_arch_vcpu_load_fp(): + * Flush (save and invalidate) the fpsimd/sve state so that if + * the host tries to use fpsimd/sve, it's not using stale data + * from the guest. + * + * Flushing the state sets the TIF_FOREIGN_FPSTATE bit for the + * context unconditionally, in both nVHE and VHE. This allows + * the kernel to restore the fpsimd/sve state, including ZCR_EL1 + * when needed. */ - if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED) - sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN); - else - sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0); + fpsimd_save_and_flush_cpu_state(); } - update_thread_flag(TIF_SVE, - vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE); - local_irq_restore(flags); } diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 435346ea1504..d8c94c45cb2f 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -44,6 +44,11 @@ alternative_if ARM64_HAS_RAS_EXTN alternative_else_nop_endif mrs x1, isr_el1 cbz x1, 1f + + // Ensure that __guest_enter() always provides a context + // synchronization event so that callers don't need ISBs for anything + // that would usually be synchonized by the ERET. + isb mov x0, #ARM_EXCEPTION_IRQ ret diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index ecd41844eda0..797544662a95 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -207,16 +207,6 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) return __get_fault_info(esr, &vcpu->arch.fault); } -static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu) -{ - struct thread_struct *thread; - - thread = container_of(vcpu->arch.host_fpsimd_state, struct thread_struct, - uw.fpsimd_state); - - __sve_save_state(sve_pffr(thread), &vcpu->arch.host_fpsimd_state->fpsr); -} - static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) { sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2); @@ -225,24 +215,72 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR); } +static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu) +{ + u64 zcr_el1, zcr_el2; + + if (!guest_owns_fp_regs(vcpu)) + return; + + if (vcpu_has_sve(vcpu)) { + zcr_el2 = vcpu_sve_max_vq(vcpu) - 1; + + write_sysreg_el2(zcr_el2, SYS_ZCR); + + zcr_el1 = __vcpu_sys_reg(vcpu, ZCR_EL1); + write_sysreg_el1(zcr_el1, SYS_ZCR); + } +} + +static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu) +{ + u64 zcr_el1, zcr_el2; + + if (!guest_owns_fp_regs(vcpu)) + return; + + /* + * When the guest owns the FP regs, we know that guest+hyp traps for + * any FPSIMD/SVE/SME features exposed to the guest have been disabled + * by either fpsimd_lazy_switch_to_guest() or kvm_hyp_handle_fpsimd() + * prior to __guest_entry(). As __guest_entry() guarantees a context + * synchronization event, we don't need an ISB here to avoid taking + * traps for anything that was exposed to the guest. + */ + if (vcpu_has_sve(vcpu)) { + zcr_el1 = read_sysreg_el1(SYS_ZCR); + __vcpu_sys_reg(vcpu, ZCR_EL1) = zcr_el1; + + /* + * The guest's state is always saved using the guest's max VL. + * Ensure that the host has the guest's max VL active such that + * the host can save the guest's state lazily, but don't + * artificially restrict the host to the guest's max VL. + */ + if (has_vhe()) { + zcr_el2 = vcpu_sve_max_vq(vcpu) - 1; + write_sysreg_el2(zcr_el2, SYS_ZCR); + } else { + zcr_el2 = sve_vq_from_vl(kvm_host_sve_max_vl) - 1; + write_sysreg_el2(zcr_el2, SYS_ZCR); + + zcr_el1 = vcpu_sve_max_vq(vcpu) - 1; + write_sysreg_el1(zcr_el1, SYS_ZCR); + } + } +} + /* Check for an FPSIMD/SVE trap and handle as appropriate */ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) { - bool sve_guest, sve_host; + bool sve_guest; u8 esr_ec; u64 reg; if (!system_supports_fpsimd()) return false; - if (system_supports_sve()) { - sve_guest = vcpu_has_sve(vcpu); - sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; - } else { - sve_guest = false; - sve_host = false; - } - + sve_guest = vcpu_has_sve(vcpu); esr_ec = kvm_vcpu_trap_get_class(vcpu); if (esr_ec != ESR_ELx_EC_FP_ASIMD && esr_ec != ESR_ELx_EC_SVE) @@ -268,15 +306,7 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) } isb(); - if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { - if (sve_host) - __hyp_sve_save_host(vcpu); - else - __fpsimd_save_state(vcpu->arch.host_fpsimd_state); - - vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; - } - + /* Restore the guest state */ if (sve_guest) __hyp_sve_restore_guest(vcpu); else diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 2da6aa8da868..a446883d5b9a 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -19,13 +19,17 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); +unsigned int kvm_host_sve_max_vl; + void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt); static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); + fpsimd_lazy_switch_to_guest(kern_hyp_va(vcpu)); cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu)); + fpsimd_lazy_switch_to_host(kern_hyp_va(vcpu)); } static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt) @@ -237,11 +241,6 @@ void handle_trap(struct kvm_cpu_context *host_ctxt) case ESR_ELx_EC_SMC64: handle_host_smc(host_ctxt); break; - case ESR_ELx_EC_SVE: - sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0); - isb(); - sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); - break; case ESR_ELx_EC_IABT_LOW: case ESR_ELx_EC_DABT_LOW: handle_host_mem_abort(host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 4db5409f40c4..fff7491d8351 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -34,21 +34,46 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); DEFINE_PER_CPU(unsigned long, kvm_hyp_vector); -static void __activate_traps(struct kvm_vcpu *vcpu) +static void __activate_cptr_traps(struct kvm_vcpu *vcpu) { - u64 val; - - ___activate_traps(vcpu); - __activate_traps_common(vcpu); + u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */ - val = vcpu->arch.cptr_el2; - val |= CPTR_EL2_TTA | CPTR_EL2_TAM; - if (!update_fp_enabled(vcpu)) { - val |= CPTR_EL2_TFP | CPTR_EL2_TZ; + if (!guest_owns_fp_regs(vcpu)) __activate_traps_fpsimd32(vcpu); + + /* !hVHE case upstream */ + if (1) { + val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1; + + if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs(vcpu)) + val |= CPTR_EL2_TZ; + + if (!guest_owns_fp_regs(vcpu)) + val |= CPTR_EL2_TFP; + + write_sysreg(val, cptr_el2); } +} + +static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) +{ + /* !hVHE case upstream */ + if (1) { + u64 val = CPTR_NVHE_EL2_RES1; + + if (!cpus_have_final_cap(ARM64_SVE)) + val |= CPTR_EL2_TZ; + + write_sysreg(val, cptr_el2); + } +} + +static void __activate_traps(struct kvm_vcpu *vcpu) +{ + ___activate_traps(vcpu); + __activate_traps_common(vcpu); + __activate_cptr_traps(vcpu); - write_sysreg(val, cptr_el2); write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2); if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { @@ -69,7 +94,6 @@ static void __activate_traps(struct kvm_vcpu *vcpu) static void __deactivate_traps(struct kvm_vcpu *vcpu) { extern char __kvm_hyp_host_vector[]; - u64 cptr; ___deactivate_traps(vcpu); @@ -94,11 +118,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2); - cptr = CPTR_EL2_DEFAULT; - if (vcpu_has_sve(vcpu) && (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)) - cptr |= CPTR_EL2_TZ; - - write_sysreg(cptr, cptr_el2); + __deactivate_cptr_traps(vcpu); write_sysreg(__kvm_hyp_host_vector, vbar_el2); } diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 813e6e2178c1..d8a8628a9d70 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -114,6 +114,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_save_host_state_vhe(host_ctxt); + fpsimd_lazy_switch_to_guest(vcpu); + /* * ARM erratum 1165522 requires us to configure both stage 1 and * stage 2 translation for the guest context before we clear @@ -144,6 +146,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __deactivate_traps(vcpu); + fpsimd_lazy_switch_to_host(vcpu); + sysreg_restore_host_state_vhe(host_ctxt); if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 5ce36b0a3343..deb205638279 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -42,11 +42,14 @@ static u32 kvm_ipa_limit; PSR_AA32_I_BIT | PSR_AA32_F_BIT) unsigned int kvm_sve_max_vl; +unsigned int kvm_host_sve_max_vl; int kvm_arm_init_sve(void) { if (system_supports_sve()) { kvm_sve_max_vl = sve_max_virtualisable_vl; + kvm_host_sve_max_vl = sve_max_vl; + kvm_nvhe_sym(kvm_host_sve_max_vl) = kvm_host_sve_max_vl; /* * The get_sve_reg()/set_sve_reg() ioctl interface will need diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h index cdb705e1496a..72c6e16c3f23 100644 --- a/arch/hexagon/include/asm/cmpxchg.h +++ b/arch/hexagon/include/asm/cmpxchg.h @@ -56,7 +56,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, __typeof__(ptr) __ptr = (ptr); \ __typeof__(*(ptr)) __old = (old); \ __typeof__(*(ptr)) __new = (new); \ - __typeof__(*(ptr)) __oldval = 0; \ + __typeof__(*(ptr)) __oldval = (__typeof__(*(ptr))) 0; \ \ asm volatile( \ "1: %0 = memw_locked(%1);\n" \ diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index 1240f038cce0..7aca1c329f94 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -195,8 +195,10 @@ int die(const char *str, struct pt_regs *regs, long err) printk(KERN_EMERG "Oops: %s[#%d]:\n", str, ++die.counter); if (notify_die(DIE_OOPS, str, regs, err, pt_cause(regs), SIGSEGV) == - NOTIFY_STOP) + NOTIFY_STOP) { + spin_unlock_irq(&die.lock); return 1; + } print_modules(); show_regs(regs); diff --git a/arch/m68k/include/asm/vga.h b/arch/m68k/include/asm/vga.h index 4742e6bc3ab8..cdd414fa8710 100644 --- a/arch/m68k/include/asm/vga.h +++ b/arch/m68k/include/asm/vga.h @@ -9,7 +9,7 @@ */ #ifndef CONFIG_PCI -#include <asm/raw_io.h> +#include <asm/io.h> #include <asm/kmap.h> /* @@ -29,9 +29,9 @@ #define inw_p(port) 0 #define outb_p(port, val) do { } while (0) #define outw(port, val) do { } while (0) -#define readb raw_inb -#define writeb raw_outb -#define writew raw_outw +#define readb __raw_readb +#define writeb __raw_writeb +#define writew __raw_writew #endif /* CONFIG_PCI */ #endif /* _ASM_M68K_VGA_H */ diff --git a/arch/mips/dec/prom/init.c b/arch/mips/dec/prom/init.c index cc988bbd27fc..0880a5551d97 100644 --- a/arch/mips/dec/prom/init.c +++ b/arch/mips/dec/prom/init.c @@ -42,7 +42,7 @@ int (*__pmax_close)(int); * Detect which PROM the DECSTATION has, and set the callback vectors * appropriately. */ -void __init which_prom(s32 magic, s32 *prom_vec) +static void __init which_prom(s32 magic, s32 *prom_vec) { /* * No sign of the REX PROM's magic number means we assume a non-REX diff --git a/arch/mips/include/asm/ds1287.h b/arch/mips/include/asm/ds1287.h index 46cfb01f9a14..51cb61fd4c03 100644 --- a/arch/mips/include/asm/ds1287.h +++ b/arch/mips/include/asm/ds1287.h @@ -8,7 +8,7 @@ #define __ASM_DS1287_H extern int ds1287_timer_state(void); -extern void ds1287_set_base_clock(unsigned int clock); +extern int ds1287_set_base_clock(unsigned int hz); extern int ds1287_clockevent_init(int irq); #endif diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index 696b40beb774..8494466740cc 100644 --- a/arch/mips/include/asm/mips-cm.h +++ b/arch/mips/include/asm/mips-cm.h @@ -47,6 +47,16 @@ extern phys_addr_t __mips_cm_phys_base(void); */ extern int mips_cm_is64; +/* + * mips_cm_is_l2_hci_broken - determine if HCI is broken + * + * Some CM reports show that Hardware Cache Initialization is + * complete, but in reality it's not the case. They also incorrectly + * indicate that Hardware Cache Initialization is supported. This + * flags allows warning about this broken feature. + */ +extern bool mips_cm_is_l2_hci_broken; + /** * mips_cm_error_report - Report CM cache errors */ @@ -86,6 +96,18 @@ static inline bool mips_cm_present(void) } /** + * mips_cm_update_property - update property from the device tree + * + * Retrieve the properties from the device tree if a CM node exist and + * update the internal variable based on this. + */ +#ifdef CONFIG_MIPS_CM +extern void mips_cm_update_property(void); +#else +static inline void mips_cm_update_property(void) {} +#endif + +/** * mips_cm_has_l2sync - determine whether an L2-only sync region is present * * Returns true if the system implements an L2-only sync region, else false. diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index 428b9f1cf1de..b1da249dcd71 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -65,7 +65,8 @@ static inline void instruction_pointer_set(struct pt_regs *regs, /* Query offset/name of register from its name/offset */ extern int regs_query_register_offset(const char *name); -#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last)) +#define MAX_REG_OFFSET \ + (offsetof(struct pt_regs, __last) - sizeof(unsigned long)) /** * regs_get_register() - get register value from its offset diff --git a/arch/mips/kernel/cevt-ds1287.c b/arch/mips/kernel/cevt-ds1287.c index 9a47fbcd4638..de64d6bb7ba3 100644 --- a/arch/mips/kernel/cevt-ds1287.c +++ b/arch/mips/kernel/cevt-ds1287.c @@ -10,6 +10,7 @@ #include <linux/mc146818rtc.h> #include <linux/irq.h> +#include <asm/ds1287.h> #include <asm/time.h> int ds1287_timer_state(void) diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index 8c401e42301c..f39e85fd58fa 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c @@ -248,7 +248,7 @@ int ftrace_disable_ftrace_graph_caller(void) #define S_R_SP (0xafb0 << 16) /* s{d,w} R, offset(sp) */ #define OFFSET_MASK 0xffff /* stack offset range: 0 ~ PT_SIZE */ -unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long +static unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long old_parent_ra, unsigned long parent_ra_addr, unsigned long fp) { unsigned long sp, ip, tmp; diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index b4f7d950c846..e21c2fd76167 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -5,6 +5,7 @@ */ #include <linux/errno.h> +#include <linux/of.h> #include <linux/percpu.h> #include <linux/spinlock.h> @@ -14,6 +15,7 @@ void __iomem *mips_gcr_base; void __iomem *mips_cm_l2sync_base; int mips_cm_is64; +bool mips_cm_is_l2_hci_broken; static char *cm2_tr[8] = { "mem", "gcr", "gic", "mmio", @@ -238,6 +240,18 @@ static void mips_cm_probe_l2sync(void) mips_cm_l2sync_base = ioremap(addr, MIPS_CM_L2SYNC_SIZE); } +void mips_cm_update_property(void) +{ + struct device_node *cm_node; + + cm_node = of_find_compatible_node(of_root, NULL, "mobileye,eyeq6-cm"); + if (!cm_node) + return; + pr_info("HCI (Hardware Cache Init for the L2 cache) in GCR_L2_RAM_CONFIG from the CM3 is broken"); + mips_cm_is_l2_hci_broken = true; + of_node_put(cm_node); +} + int mips_cm_probe(void) { phys_addr_t addr; diff --git a/arch/mips/loongson64/boardinfo.c b/arch/mips/loongson64/boardinfo.c index 280989c5a137..8bb275c93ac0 100644 --- a/arch/mips/loongson64/boardinfo.c +++ b/arch/mips/loongson64/boardinfo.c @@ -21,13 +21,11 @@ static ssize_t boardinfo_show(struct kobject *kobj, "BIOS Info\n" "Vendor\t\t\t: %s\n" "Version\t\t\t: %s\n" - "ROM Size\t\t: %d KB\n" "Release Date\t\t: %s\n", strsep(&tmp_board_manufacturer, "-"), eboard->name, strsep(&tmp_bios_vendor, "-"), einter->description, - einter->size, especial->special_name); } static struct kobj_attribute boardinfo_attr = __ATTR(boardinfo, 0444, diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 587cf1d115e8..d6fbb69baa2d 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -1660,7 +1660,7 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, break; } - case 0x3: + case 0x7: if (MIPSInst_FUNC(ir) != pfetch_op) return SIGILL; diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 833fcfc20b10..81fda8c583ae 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -527,7 +527,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, static void __init pcpu_fc_free(void *ptr, size_t size) { - memblock_free_early(__pa(ptr), size); + memblock_free(__pa(ptr), size); } void __init setup_per_cpu_areas(void) diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c index fcc761b0e11b..d20e8283c5b8 100644 --- a/arch/parisc/kernel/pdt.c +++ b/arch/parisc/kernel/pdt.c @@ -62,6 +62,7 @@ static unsigned long pdt_entry[MAX_PDT_ENTRIES] __page_aligned_bss; #define PDT_ADDR_PERM_ERR (pdt_type != PDT_PDC ? 2UL : 0UL) #define PDT_ADDR_SINGLE_ERR 1UL +#ifdef CONFIG_PROC_FS /* report PDT entries via /proc/meminfo */ void arch_report_meminfo(struct seq_file *m) { @@ -73,6 +74,7 @@ void arch_report_meminfo(struct seq_file *m) seq_printf(m, "PDT_cur_entries: %7lu\n", pdt_status.pdt_entries); } +#endif static int get_info_pat_new(void) { diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c index 6ce427b58836..ecd27b48d61f 100644 --- a/arch/parisc/math-emu/driver.c +++ b/arch/parisc/math-emu/driver.c @@ -103,9 +103,19 @@ handle_fpe(struct pt_regs *regs) memcpy(regs->fr, frcopy, sizeof regs->fr); if (signalcode != 0) { - force_sig_fault(signalcode >> 24, signalcode & 0xffffff, - (void __user *) regs->iaoq[0]); - return -1; + int sig = signalcode >> 24; + + if (sig == SIGFPE) { + /* + * Clear floating point trap bit to avoid trapping + * again on the first floating-point instruction in + * the userspace signal handler. + */ + regs->fr[0] &= ~(1ULL << 38); + } + force_sig_fault(sig, signalcode & 0xffffff, + (void __user *) regs->iaoq[0]); + return -1; } return signalcode ? -1 : 0; diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index b6ac4f86c87b..433d164374cb 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -90,6 +90,34 @@ static inline int hash__hugepd_ok(hugepd_t hpd) #endif /* + * With 4K page size the real_pte machinery is all nops. + */ +static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset) +{ + return (real_pte_t){pte}; +} + +#define __rpte_to_pte(r) ((r).pte) + +static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) +{ + return pte_val(__rpte_to_pte(rpte)) >> H_PAGE_F_GIX_SHIFT; +} + +#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ + do { \ + index = 0; \ + shift = mmu_psize_defs[psize].shift; \ + +#define pte_iterate_hashed_end() } while(0) + +/* + * We expect this to be called only for user addresses or kernel virtual + * addresses other than the linear mapping. + */ +#define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K + +/* * 4K PTE format is different from 64K PTE format. Saving the hash_slot is just * a matter of returning the PTE bits that need to be modified. On 64K PTE, * things are a little more involved and hence needs many more parameters to diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 6866d860d4f3..cd9088673cea 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -336,32 +336,6 @@ extern unsigned long pci_io_base; #ifndef __ASSEMBLY__ -/* - * This is the default implementation of various PTE accessors, it's - * used in all cases except Book3S with 64K pages where we have a - * concept of sub-pages - */ -#ifndef __real_pte - -#define __real_pte(e, p, o) ((real_pte_t){(e)}) -#define __rpte_to_pte(r) ((r).pte) -#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT) - -#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ - do { \ - index = 0; \ - shift = mmu_psize_defs[psize].shift; \ - -#define pte_iterate_hashed_end() } while(0) - -/* - * We expect this to be called only for user addresses or kernel virtual - * addresses other than the linear mapping. - */ -#define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K - -#endif /* __real_pte */ - static inline unsigned long pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long clr, unsigned long set, int huge) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index d01a0ad57e38..f2378f51cbed 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -16,6 +16,7 @@ #include <linux/capability.h> #include <linux/delay.h> #include <linux/cpu.h> +#include <linux/nospec.h> #include <linux/sched.h> #include <linux/smp.h> #include <linux/completion.h> @@ -1076,6 +1077,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) || nargs + nret > ARRAY_SIZE(args.args)) return -EINVAL; + nargs = array_index_nospec(nargs, ARRAY_SIZE(args.args)); + nret = array_index_nospec(nret, ARRAY_SIZE(args.args) - nargs); + /* Copy in args. */ if (copy_from_user(args.args, uargs->args, nargs * sizeof(rtas_arg_t)) != 0) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index c5ed98823835..e303e3f039a6 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -51,7 +51,7 @@ static int text_area_cpu_up(unsigned int cpu) { struct vm_struct *area; - area = get_vm_area(PAGE_SIZE, VM_ALLOC); + area = get_vm_area(PAGE_SIZE, 0); if (!area) { WARN_ONCE(1, "Failed to create text area for cpu %d\n", cpu); diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index ed37a93bf858..ea3082f2f9d1 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -190,8 +190,10 @@ static int spufs_fill_dir(struct dentry *dir, return -ENOMEM; ret = spufs_new_file(dir->d_sb, dentry, files->ops, files->mode & mode, files->size, ctx); - if (ret) + if (ret) { + dput(dentry); return ret; + } files++; } return 0; @@ -434,8 +436,11 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, } ret = spufs_mkdir(inode, dentry, flags, mode & 0777); - if (ret) + if (ret) { + if (neighbor) + put_spu_context(neighbor); goto out_aff_unlock; + } if (affinity) { spufs_set_affinity(flags, SPUFS_I(d_inode(dentry))->i_ctx, diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index f51fd4ac3f0b..38889a013a86 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -578,8 +578,10 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay) switch(rets[0]) { case 0: - result = EEH_STATE_MMIO_ACTIVE | - EEH_STATE_DMA_ACTIVE; + result = EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | + EEH_STATE_MMIO_ENABLED | + EEH_STATE_DMA_ENABLED; break; case 1: result = EEH_STATE_RESET_ACTIVE | diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index 87f001b4c4e4..f12229ce7301 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -56,8 +56,7 @@ void __init svm_swiotlb_init(void) return; - memblock_free_early(__pa(vstart), - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + memblock_free(__pa(vstart), PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); panic("SVM: Cannot allocate SWIOTLB buffer"); } diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index d47d87c2d7e3..195f4ebd71f2 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -77,7 +77,7 @@ struct dyn_arch_ftrace { #define make_call_t0(caller, callee, call) \ do { \ unsigned int offset = \ - (unsigned long) callee - (unsigned long) caller; \ + (unsigned long) (callee) - (unsigned long) (caller); \ call[0] = to_auipc_t0(offset); \ call[1] = to_jalr_t0(offset); \ } while (0) @@ -93,7 +93,7 @@ do { \ #define make_call_ra(caller, callee, call) \ do { \ unsigned int offset = \ - (unsigned long) callee - (unsigned long) caller; \ + (unsigned long) (callee) - (unsigned long) (caller); \ call[0] = to_auipc_ra(offset); \ call[1] = to_jalr_ra(offset); \ } while (0) diff --git a/arch/riscv/include/asm/kgdb.h b/arch/riscv/include/asm/kgdb.h index 46677daf708b..cc11c4544cff 100644 --- a/arch/riscv/include/asm/kgdb.h +++ b/arch/riscv/include/asm/kgdb.h @@ -19,16 +19,9 @@ #ifndef __ASSEMBLY__ +void arch_kgdb_breakpoint(void); extern unsigned long kgdb_compiled_break; -static inline void arch_kgdb_breakpoint(void) -{ - asm(".global kgdb_compiled_break\n" - ".option norvc\n" - "kgdb_compiled_break: ebreak\n" - ".option rvc\n"); -} - #endif /* !__ASSEMBLY__ */ #define DBG_REG_ZERO "zero" diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index 34fbb3ea21d5..932ec2500b8a 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -60,8 +60,11 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long *args) { args[0] = regs->orig_a0; - args++; - memcpy(args, ®s->a1, 5 * sizeof(args[0])); + args[1] = regs->a1; + args[2] = regs->a2; + args[3] = regs->a3; + args[4] = regs->a4; + args[5] = regs->a5; } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index 963ed7edcff2..1d83b3696721 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -273,6 +273,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) regs->epc = pc; } +noinline void arch_kgdb_breakpoint(void) +{ + asm(".global kgdb_compiled_break\n" + "kgdb_compiled_break: ebreak\n"); +} + void kgdb_arch_handle_qxfer_pkt(char *remcom_in_buffer, char *remcom_out_buffer) { diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 8cc147491c67..0c85b9e59ec0 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -84,6 +84,9 @@ static struct resource bss_res = { .name = "Kernel bss", }; static struct resource elfcorehdr_res = { .name = "ELF Core hdr", }; #endif +static int num_standard_resources; +static struct resource *standard_resources; + static int __init add_resource(struct resource *parent, struct resource *res) { @@ -157,7 +160,7 @@ static void __init init_resources(void) struct resource *res = NULL; struct resource *mem_res = NULL; size_t mem_res_sz = 0; - int num_resources = 0, res_idx = 0; + int num_resources = 0, res_idx = 0, non_resv_res = 0; int ret = 0; /* + 1 as memblock_alloc() might increase memblock.reserved.cnt */ @@ -221,6 +224,7 @@ static void __init init_resources(void) /* Add /memory regions to the resource tree */ for_each_mem_region(region) { res = &mem_res[res_idx--]; + non_resv_res++; if (unlikely(memblock_is_nomap(region))) { res->name = "Reserved"; @@ -238,6 +242,9 @@ static void __init init_resources(void) goto error; } + num_standard_resources = non_resv_res; + standard_resources = &mem_res[res_idx + 1]; + /* Clean-up any unused pre-allocated resources */ if (res_idx >= 0) memblock_free(__pa(mem_res), (res_idx + 1) * sizeof(*mem_res)); @@ -249,6 +256,33 @@ static void __init init_resources(void) memblock_free(__pa(mem_res), mem_res_sz); } +static int __init reserve_memblock_reserved_regions(void) +{ + u64 i, j; + + for (i = 0; i < num_standard_resources; i++) { + struct resource *mem = &standard_resources[i]; + phys_addr_t r_start, r_end, mem_size = resource_size(mem); + + if (!memblock_is_region_reserved(mem->start, mem_size)) + continue; + + for_each_reserved_mem_range(j, &r_start, &r_end) { + resource_size_t start, end; + + start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start); + end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end); + + if (start > mem->end || end < mem->start) + continue; + + reserve_region_with_split(mem, start, end, "Reserved"); + } + } + + return 0; +} +arch_initcall(reserve_memblock_reserved_regions); static void __init parse_dtb(void) { diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index bf15767b729f..002e0e6a9b2b 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -43,7 +43,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, break; case FUTEX_OP_ANDN: __futex_atomic_op("lr %2,%1\nnr %2,%5\n", - ret, oldval, newval, uaddr, oparg); + ret, oldval, newval, uaddr, ~oparg); break; case FUTEX_OP_XOR: __futex_atomic_op("lr %2,%1\nxr %2,%5\n", diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 48f67a69d119..5c1fd147591c 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -875,7 +875,7 @@ void __init smp_detect_cpus(void) /* Add CPUs present at boot */ __smp_rescan_cpus(info, true); - memblock_free_early((unsigned long)info, sizeof(*info)); + memblock_free((unsigned long)info, sizeof(*info)); } /* diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 4044826d72ae..f9bed5ff5d48 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -281,10 +281,10 @@ static void __init test_monitor_call(void) return; asm volatile( " mc 0,0\n" - "0: xgr %0,%0\n" + "0: lhi %[val],0\n" "1:\n" - EX_TABLE(0b,1b) - : "+d" (val)); + EX_TABLE(0b, 1b) + : [val] "+d" (val)); if (!val) panic("Monitor call doesn't work!\n"); } diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 6f0209d45164..9c5f546a2e1a 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -56,7 +56,7 @@ TRACE_EVENT(kvm_s390_create_vcpu, __entry->sie_block = sie_block; ), - TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK", + TP_printk("create cpu %d at 0x%p, sie block at 0x%p", __entry->id, __entry->vcpu, __entry->sie_block) ); @@ -255,7 +255,7 @@ TRACE_EVENT(kvm_s390_enable_css, __entry->kvm = kvm; ), - TP_printk("enabling channel I/O support (kvm @ %pK)\n", + TP_printk("enabling channel I/O support (kvm @ %p)\n", __entry->kvm) ); diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index e07bc0d3df6f..b95677c4855a 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1321,8 +1321,14 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9); rcu_read_unlock(); if (page) { - if (page_ref_inc_return(page) == 2) - return page_to_virt(page); + if (page_ref_inc_return(page) == 2) { + if (page->index == addr) + return page_to_virt(page); + /* + * We raced with someone reusing + putting this vsie + * page before we grabbed it. + */ + } page_ref_dec(page); } @@ -1352,15 +1358,20 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) kvm->arch.vsie.next++; kvm->arch.vsie.next %= nr_vcpus; } - radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9); + if (page->index != ULONG_MAX) + radix_tree_delete(&kvm->arch.vsie.addr_to_page, + page->index >> 9); } - page->index = addr; - /* double use of the same address */ + /* Mark it as invalid until it resides in the tree. */ + page->index = ULONG_MAX; + + /* Double use of the same address or allocation failure. */ if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) { page_ref_dec(page); mutex_unlock(&kvm->arch.vsie.mutex); return NULL; } + page->index = addr; mutex_unlock(&kvm->arch.vsie.mutex); vsie_page = page_to_virt(page); @@ -1453,7 +1464,9 @@ void kvm_s390_vsie_destroy(struct kvm *kvm) vsie_page = page_to_virt(page); release_gmap_shadow(vsie_page); /* free the radix tree entry */ - radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9); + if (page->index != ULONG_MAX) + radix_tree_delete(&kvm->arch.vsie.addr_to_page, + page->index >> 9); __free_page(page); } kvm->arch.vsie.page_count = 0; diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 9a725547578e..78ee9a023529 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -52,8 +52,10 @@ out: void arch_enter_lazy_mmu_mode(void) { - struct tlb_batch *tb = this_cpu_ptr(&tlb_batch); + struct tlb_batch *tb; + preempt_disable(); + tb = this_cpu_ptr(&tlb_batch); tb->active = 1; } @@ -64,6 +66,7 @@ void arch_leave_lazy_mmu_mode(void) if (tb->tlb_nr) flush_tlb_pending(); tb->active = 0; + preempt_enable(); } static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2f6312e7ce81..de6a66ad3fa6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -194,7 +194,7 @@ config X86 select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS - select HAVE_EISA + select HAVE_EISA if X86_32 select HAVE_EXIT_THREAD select HAVE_FAST_GUP select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE @@ -2449,7 +2449,8 @@ config CPU_IBPB_ENTRY depends on CPU_SUP_AMD && X86_64 default y help - Compile the kernel with support for the retbleed=ibpb mitigation. + Compile the kernel with support for the retbleed=ibpb and + spec_rstack_overflow={ibpb,ibpb-vmexit} mitigations. config CPU_IBRS_ENTRY bool "Enable IBRS on kernel entry" diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 15c5ae62a0e9..3ec9fb6b0378 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -33,6 +33,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ # avoid errors with '-march=i386', and future flags may depend on the target to # be valid. KBUILD_CFLAGS := -m$(BITS) -O2 $(CLANG_FLAGS) +KBUILD_CFLAGS += -std=gnu11 KBUILD_CFLAGS += -fno-strict-aliasing -fPIE KBUILD_CFLAGS += -Wundef KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index b00a3a95fbfa..16e12b45b151 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -70,6 +70,8 @@ For 32-bit we have the following conventions - kernel is built with pushq %rsi /* pt_regs->si */ movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */ + /* We just clobbered the return address - use the IRET frame for unwinding: */ + UNWIND_HINT_IRET_REGS offset=3*8 .else pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index f4419afc7147..bda217961172 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -16,7 +16,7 @@ SYM_FUNC_START(entry_ibpb) movl $MSR_IA32_PRED_CMD, %ecx - movl $PRED_CMD_IBPB, %eax + movl _ASM_RIP(x86_pred_cmd), %eax xorl %edx, %edx wrmsr diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index b70e1522a27a..767c60af13be 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4364,8 +4364,11 @@ static void intel_pmu_cpu_starting(int cpu) init_debug_store_on_cpu(cpu); /* - * Deal with CPUs that don't clear their LBRs on power-up. + * Deal with CPUs that don't clear their LBRs on power-up, and that may + * even boot with LBRs enabled. */ + if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && x86_pmu.lbr_nr) + msr_clear_bit(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR_BIT); intel_pmu_lbr_reset(); cpuc->lbr_sel = NULL; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 21a9cb48daf5..99517e85325e 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1101,8 +1101,10 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event) * + precise_ip < 2 for the non event IP * + For RTM TSX weight we need GPRs for the abort code. */ - gprs = (sample_type & PERF_SAMPLE_REGS_INTR) && - (attr->sample_regs_intr & PEBS_GP_REGS); + gprs = ((sample_type & PERF_SAMPLE_REGS_INTR) && + (attr->sample_regs_intr & PEBS_GP_REGS)) || + ((sample_type & PERF_SAMPLE_REGS_USER) && + (attr->sample_regs_user & PEBS_GP_REGS)); tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) && ((attr->config & INTEL_ARCH_EVENT_MASK) == @@ -1701,7 +1703,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, regs->flags &= ~PERF_EFLAGS_EXACT; } - if (sample_type & PERF_SAMPLE_REGS_INTR) + if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)) adaptive_pebs_save_regs(regs, gprs); } diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index d081eb89ba12..831c4e3f371a 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -4656,28 +4656,28 @@ static struct uncore_event_desc snr_uncore_iio_freerunning_events[] = { INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), /* Free-Running IIO BANDWIDTH IN Counters */ INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), { /* end: all zeroes */ }, }; @@ -5250,37 +5250,6 @@ static struct freerunning_counters icx_iio_freerunning[] = { [ICX_IIO_MSR_BW_IN] = { 0xaa0, 0x1, 0x10, 8, 48, icx_iio_bw_freerunning_box_offsets }, }; -static struct uncore_event_desc icx_uncore_iio_freerunning_events[] = { - /* Free-Running IIO CLOCKS Counter */ - INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), - /* Free-Running IIO BANDWIDTH IN Counters */ - INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), - { /* end: all zeroes */ }, -}; - static struct intel_uncore_type icx_uncore_iio_free_running = { .name = "iio_free_running", .num_counters = 9, @@ -5288,7 +5257,7 @@ static struct intel_uncore_type icx_uncore_iio_free_running = { .num_freerunning_types = ICX_IIO_FREERUNNING_TYPE_MAX, .freerunning = icx_iio_freerunning, .ops = &skx_uncore_iio_freerunning_ops, - .event_descs = icx_uncore_iio_freerunning_events, + .event_descs = snr_uncore_iio_freerunning_events, .format_group = &skx_uncore_iio_freerunning_format_group, }; @@ -5857,69 +5826,13 @@ static struct freerunning_counters spr_iio_freerunning[] = { [SPR_IIO_MSR_BW_OUT] = { 0x3808, 0x1, 0x10, 8, 48 }, }; -static struct uncore_event_desc spr_uncore_iio_freerunning_events[] = { - /* Free-Running IIO CLOCKS Counter */ - INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), - /* Free-Running IIO BANDWIDTH IN Counters */ - INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), - /* Free-Running IIO BANDWIDTH OUT Counters */ - INTEL_UNCORE_EVENT_DESC(bw_out_port0, "event=0xff,umask=0x30"), - INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1, "event=0xff,umask=0x31"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2, "event=0xff,umask=0x32"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3, "event=0xff,umask=0x33"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4, "event=0xff,umask=0x34"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5, "event=0xff,umask=0x35"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6, "event=0xff,umask=0x36"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7, "event=0xff,umask=0x37"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7.unit, "MiB"), - { /* end: all zeroes */ }, -}; - static struct intel_uncore_type spr_uncore_iio_free_running = { .name = "iio_free_running", .num_counters = 17, .num_freerunning_types = SPR_IIO_FREERUNNING_TYPE_MAX, .freerunning = spr_iio_freerunning, .ops = &skx_uncore_iio_freerunning_ops, - .event_descs = spr_uncore_iio_freerunning_events, + .event_descs = snr_uncore_iio_freerunning_events, .format_group = &skx_uncore_iio_freerunning_format_group, }; diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 4bdcb91478a5..9e00275576b1 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -44,6 +44,7 @@ KVM_X86_OP(set_idt) KVM_X86_OP(get_gdt) KVM_X86_OP(set_gdt) KVM_X86_OP(sync_dirty_debug_regs) +KVM_X86_OP(set_dr6) KVM_X86_OP(set_dr7) KVM_X86_OP(cache_reg) KVM_X86_OP(get_rflags) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f779facd8246..710c9c87cdf2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1344,6 +1344,7 @@ struct kvm_x86_ops { void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); + void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 5d7494631ea9..c07c018a1c13 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -33,6 +33,8 @@ typedef struct { */ atomic64_t tlb_gen; + unsigned long next_trim_cpumask; + #ifdef CONFIG_MODIFY_LDT_SYSCALL struct rw_semaphore ldt_usr_sem; struct ldt_struct *ldt; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 27516046117a..1d11aeb59754 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -106,6 +106,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); atomic64_set(&mm->context.tlb_gen, 0); + mm->context.next_trim_cpumask = jiffies + HZ; #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 15939a71dca7..03b12c194588 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -337,7 +337,8 @@ #define MSR_IA32_PASID_VALID BIT_ULL(31) /* DEBUGCTLMSR bits (others vary by model): */ -#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_LBR_BIT 0 /* last branch recording */ +#define DEBUGCTLMSR_LBR (1UL << DEBUGCTLMSR_LBR_BIT) #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ #define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index b587a9ee9cb2..d4857798f232 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -207,6 +207,7 @@ struct flush_tlb_info { unsigned int initiating_cpu; u8 stride_shift; u8 freed_tables; + u8 trim_cpumask; }; void flush_tlb_local(void); @@ -226,7 +227,7 @@ void flush_tlb_multi(const struct cpumask *cpumask, flush_tlb_mm_range((vma)->vm_mm, start, end, \ ((vma)->vm_flags & VM_HUGETLB) \ ? huge_page_shift(hstate_vma(vma)) \ - : PAGE_SHIFT, false) + : PAGE_SHIFT, true) extern void flush_tlb_all(void); extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 62cd2af806b4..eda11832b6e6 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -544,6 +544,10 @@ static __init void fix_erratum_688(void) static __init int init_amd_nbs(void) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return 0; + amd_cache_northbridges(); amd_cache_gart(); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ce5b27db65e1..a8dc7fe5f100 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -811,7 +811,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c) * (model = 0x14) and later actually support it. * (AMD Erratum #110, docId: 25759). */ - if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { + if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM) && !cpu_has(c, X86_FEATURE_HYPERVISOR)) { clear_cpu_cap(c, X86_FEATURE_LAHF_LM); if (!rdmsrl_amd_safe(0xc001100d, &value)) { value &= ~BIT_64(32); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f84d59cd180b..c10d93d2773b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1092,6 +1092,8 @@ do_cmd_auto: case RETBLEED_MITIGATION_IBPB: setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); + mitigate_smt = true; /* * IBPB on entry already obviates the need for @@ -1101,8 +1103,6 @@ do_cmd_auto: setup_clear_cpu_cap(X86_FEATURE_UNRET); setup_clear_cpu_cap(X86_FEATURE_RETHUNK); - mitigate_smt = true; - /* * There is no need for RSB filling: entry_ibpb() ensures * all predictions, including the RSB, are invalidated, @@ -1553,7 +1553,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void) rrsba_disabled = true; } -static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) +static void __init spectre_v2_select_rsb_mitigation(enum spectre_v2_mitigation mode) { /* * Similar to context switches, there are two types of RSB attacks @@ -1577,27 +1577,30 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_ */ switch (mode) { case SPECTRE_V2_NONE: - return; + break; - case SPECTRE_V2_EIBRS_LFENCE: case SPECTRE_V2_EIBRS: + case SPECTRE_V2_EIBRS_LFENCE: + case SPECTRE_V2_EIBRS_RETPOLINE: if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { - setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n"); + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); } - return; + break; - case SPECTRE_V2_EIBRS_RETPOLINE: case SPECTRE_V2_RETPOLINE: case SPECTRE_V2_LFENCE: case SPECTRE_V2_IBRS: + pr_info("Spectre v2 / SpectreRSB: Filling RSB on context switch and VMEXIT\n"); + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); - pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n"); - return; - } + break; - pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit"); - dump_stack(); + default: + pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation\n"); + dump_stack(); + break; + } } /* @@ -1653,10 +1656,11 @@ static void __init bhi_select_mitigation(void) return; } - if (spec_ctrl_bhi_dis()) + if (!IS_ENABLED(CONFIG_X86_64)) return; - if (!IS_ENABLED(CONFIG_X86_64)) + /* Mitigate in hardware if supported */ + if (spec_ctrl_bhi_dis()) return; /* Mitigate KVM by default */ @@ -1822,10 +1826,7 @@ static void __init spectre_v2_select_mitigation(void) * * FIXME: Is this pointless for retbleed-affected AMD? */ - setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); - pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); - - spectre_v2_determine_rsb_fill_type_at_vmexit(mode); + spectre_v2_select_rsb_mitigation(mode); /* * Retpoline protects the kernel, but doesn't protect firmware. IBRS @@ -2607,6 +2608,7 @@ static void __init srso_select_mitigation(void) if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { if (has_microcode) { setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); srso_mitigation = SRSO_MITIGATION_IBPB; /* @@ -2616,6 +2618,13 @@ static void __init srso_select_mitigation(void) */ setup_clear_cpu_cap(X86_FEATURE_UNRET); setup_clear_cpu_cap(X86_FEATURE_RETHUNK); + + /* + * There is no need for RSB filling: entry_ibpb() ensures + * all predictions, including the RSB, are invalidated, + * regardless of IBPB implementation. + */ + setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT); } } else { pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); @@ -2624,8 +2633,8 @@ static void __init srso_select_mitigation(void) break; case SRSO_CMD_IBPB_ON_VMEXIT: - if (IS_ENABLED(CONFIG_CPU_SRSO)) { - if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) { + if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + if (has_microcode) { setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT; @@ -2637,9 +2646,9 @@ static void __init srso_select_mitigation(void) setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT); } } else { - pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); + pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); goto pred_cmd; - } + } break; default: diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index b5e36bd0425b..a33c972cecf5 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -795,7 +795,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); /* If bit 31 is set, this is an unknown format */ - for (j = 0 ; j < 3 ; j++) + for (j = 0 ; j < 4 ; j++) if (regs[j] & (1 << 31)) regs[j] = 0; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index dbaea8a6175b..1ebd67c95d86 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1345,9 +1345,12 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (vulnerable_to_rfds(x86_arch_cap_msr)) setup_force_cpu_bug(X86_BUG_RFDS); - /* When virtualized, eIBRS could be hidden, assume vulnerable */ - if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) && - !cpu_matches(cpu_vuln_whitelist, NO_BHI) && + /* + * Intel parts with eIBRS are vulnerable to BHI attacks. Parts with + * BHI_NO still need to use the BHI mitigation to prevent Intra-mode + * attacks. When virtualized, eIBRS could be hidden, assume vulnerable. + */ + if (!cpu_matches(cpu_vuln_whitelist, NO_BHI) && (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || boot_cpu_has(X86_FEATURE_HYPERVISOR))) setup_force_cpu_bug(X86_BUG_BHI); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 7227c15299d0..7de799ab2a04 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -152,8 +152,8 @@ static void geode_configure(void) u8 ccr3; local_irq_save(flags); - /* Suspend on halt power saving and enable #SUSP pin */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); + /* Suspend on halt power saving */ + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x08); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 2b1cd4202e75..e7145ee656df 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -749,26 +749,37 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) } #endif -#define TLB_INST_4K 0x01 -#define TLB_INST_4M 0x02 -#define TLB_INST_2M_4M 0x03 +#define TLB_INST_4K 0x01 +#define TLB_INST_4M 0x02 +#define TLB_INST_2M_4M 0x03 -#define TLB_INST_ALL 0x05 -#define TLB_INST_1G 0x06 +#define TLB_INST_ALL 0x05 +#define TLB_INST_1G 0x06 -#define TLB_DATA_4K 0x11 -#define TLB_DATA_4M 0x12 -#define TLB_DATA_2M_4M 0x13 -#define TLB_DATA_4K_4M 0x14 +#define TLB_DATA_4K 0x11 +#define TLB_DATA_4M 0x12 +#define TLB_DATA_2M_4M 0x13 +#define TLB_DATA_4K_4M 0x14 -#define TLB_DATA_1G 0x16 +#define TLB_DATA_1G 0x16 +#define TLB_DATA_1G_2M_4M 0x17 -#define TLB_DATA0_4K 0x21 -#define TLB_DATA0_4M 0x22 -#define TLB_DATA0_2M_4M 0x23 +#define TLB_DATA0_4K 0x21 +#define TLB_DATA0_4M 0x22 +#define TLB_DATA0_2M_4M 0x23 -#define STLB_4K 0x41 -#define STLB_4K_2M 0x42 +#define STLB_4K 0x41 +#define STLB_4K_2M 0x42 + +/* + * All of leaf 0x2's one-byte TLB descriptors implies the same number of + * entries for their respective TLB types. The 0x63 descriptor is an + * exception: it implies 4 dTLB entries for 1GB pages 32 dTLB entries + * for 2MB or 4MB pages. Encode descriptor 0x63 dTLB entry count for + * 2MB/4MB pages here, as its count for dTLB 1GB pages is already at the + * intel_tlb_table[] mapping. + */ +#define TLB_0x63_2M_4M_ENTRIES 32 static const struct _tlb_table intel_tlb_table[] = { { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, @@ -790,7 +801,8 @@ static const struct _tlb_table intel_tlb_table[] = { { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, { 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" }, - { 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" }, + { 0x63, TLB_DATA_1G_2M_4M, 4, " TLB_DATA 1 GByte pages, 4-way set associative" + " (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here)" }, { 0x6b, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 8-way associative" }, { 0x6c, TLB_DATA_2M_4M, 128, " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" }, { 0x6d, TLB_DATA_1G, 16, " TLB_DATA 1 GByte pages, fully associative" }, @@ -890,6 +902,12 @@ static void intel_tlb_lookup(const unsigned char desc) if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; break; + case TLB_DATA_1G_2M_4M: + if (tlb_lld_2m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES) + tlb_lld_2m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES; + if (tlb_lld_4m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES) + tlb_lld_4m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES; + fallthrough; case TLB_DATA_1G: if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries; @@ -913,7 +931,7 @@ static void intel_detect_tlb(struct cpuinfo_x86 *c) cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); /* If bit 31 is set, this is an unknown format */ - for (j = 0 ; j < 3 ; j++) + for (j = 0 ; j < 4 ; j++) if (regs[j] & (1 << 31)) regs[j] = 0; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 6a95a52d08da..8143089c9b98 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -861,7 +861,7 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz return ret; } - for_each_node(nid) { + for_each_node_with_cpus(nid) { cpu = cpumask_first(cpumask_of_node(nid)); c = &cpu_data(cpu); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 19762b47fbec..b6424322f5d3 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -16,7 +16,6 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/kexec.h> -#include <linux/i8253.h> #include <linux/random.h> #include <asm/processor.h> #include <asm/hypervisor.h> @@ -445,16 +444,6 @@ static void __init ms_hyperv_init_platform(void) if (efi_enabled(EFI_BOOT)) x86_platform.get_nmi_reason = hv_get_nmi_reason; - /* - * Hyper-V VMs have a PIT emulation quirk such that zeroing the - * counter register during PIT shutdown restarts the PIT. So it - * continues to interrupt @18.2 HZ. Setting i8253_clear_counter - * to false tells pit_shutdown() not to zero the counter so that - * the PIT really is shutdown. Generation 2 VMs don't have a PIT, - * and setting this value has no effect. - */ - i8253_clear_counter_on_shutdown = false; - #if IS_ENABLED(CONFIG_HYPERV) /* * Setup the hook to get control post apic initialization. diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c index aa9b8b868867..afccb69cd9a2 100644 --- a/arch/x86/kernel/cpu/sgx/driver.c +++ b/arch/x86/kernel/cpu/sgx/driver.c @@ -150,13 +150,15 @@ int __init sgx_drv_init(void) u64 xfrm_mask; int ret; - if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) + if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) { + pr_info("SGX disabled: SGX launch control CPU feature is not available, /dev/sgx_enclave disabled.\n"); return -ENODEV; + } cpuid_count(SGX_CPUID, 0, &eax, &ebx, &ecx, &edx); if (!(eax & 1)) { - pr_err("SGX disabled: SGX1 instruction support not available.\n"); + pr_info("SGX disabled: SGX1 instruction support not available, /dev/sgx_enclave disabled.\n"); return -ENODEV; } @@ -173,8 +175,10 @@ int __init sgx_drv_init(void) } ret = misc_register(&sgx_dev_enclave); - if (ret) + if (ret) { + pr_info("SGX disabled: Unable to register the /dev/sgx_enclave driver (%d).\n", ret); return ret; + } return 0; } diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index fa5777af8da1..2e93bafa7c47 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -232,25 +232,10 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page, return epc_page; } -static struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl, - unsigned long addr, - unsigned long vm_flags) +static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl, + struct sgx_encl_page *entry) { - unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC); struct sgx_epc_page *epc_page; - struct sgx_encl_page *entry; - - entry = xa_load(&encl->page_array, PFN_DOWN(addr)); - if (!entry) - return ERR_PTR(-EFAULT); - - /* - * Verify that the faulted page has equal or higher build time - * permissions than the VMA permissions (i.e. the subset of {VM_READ, - * VM_WRITE, VM_EXECUTE} in vma->vm_flags). - */ - if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits) - return ERR_PTR(-EFAULT); /* Entry successfully located. */ if (entry->epc_page) { @@ -276,6 +261,40 @@ static struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl, return entry; } +static struct sgx_encl_page *sgx_encl_load_page_in_vma(struct sgx_encl *encl, + unsigned long addr, + unsigned long vm_flags) +{ + unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC); + struct sgx_encl_page *entry; + + entry = xa_load(&encl->page_array, PFN_DOWN(addr)); + if (!entry) + return ERR_PTR(-EFAULT); + + /* + * Verify that the page has equal or higher build time + * permissions than the VMA permissions (i.e. the subset of {VM_READ, + * VM_WRITE, VM_EXECUTE} in vma->vm_flags). + */ + if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits) + return ERR_PTR(-EFAULT); + + return __sgx_encl_load_page(encl, entry); +} + +struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl, + unsigned long addr) +{ + struct sgx_encl_page *entry; + + entry = xa_load(&encl->page_array, PFN_DOWN(addr)); + if (!entry) + return ERR_PTR(-EFAULT); + + return __sgx_encl_load_page(encl, entry); +} + static vm_fault_t sgx_vma_fault(struct vm_fault *vmf) { unsigned long addr = (unsigned long)vmf->address; @@ -297,7 +316,7 @@ static vm_fault_t sgx_vma_fault(struct vm_fault *vmf) mutex_lock(&encl->lock); - entry = sgx_encl_load_page(encl, addr, vma->vm_flags); + entry = sgx_encl_load_page_in_vma(encl, addr, vma->vm_flags); if (IS_ERR(entry)) { mutex_unlock(&encl->lock); @@ -445,7 +464,7 @@ static struct sgx_encl_page *sgx_encl_reserve_page(struct sgx_encl *encl, for ( ; ; ) { mutex_lock(&encl->lock); - entry = sgx_encl_load_page(encl, addr, vm_flags); + entry = sgx_encl_load_page_in_vma(encl, addr, vm_flags); if (PTR_ERR(entry) != -EBUSY) break; @@ -702,7 +721,7 @@ int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm) spin_lock(&encl->mm_lock); list_add_rcu(&encl_mm->list, &encl->mm_list); - /* Pairs with smp_rmb() in sgx_reclaimer_block(). */ + /* Pairs with smp_rmb() in sgx_zap_enclave_ptes(). */ smp_wmb(); encl->mm_list_version++; spin_unlock(&encl->mm_lock); @@ -918,7 +937,52 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm, } /** + * sgx_zap_enclave_ptes() - remove PTEs mapping the address from enclave + * @encl: the enclave + * @addr: page aligned pointer to single page for which PTEs will be removed + * + * Multiple VMAs may have an enclave page mapped. Remove the PTE mapping + * @addr from each VMA. Ensure that page fault handler is ready to handle + * new mappings of @addr before calling this function. + */ +void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr) +{ + unsigned long mm_list_version; + struct sgx_encl_mm *encl_mm; + struct vm_area_struct *vma; + int idx, ret; + + do { + mm_list_version = encl->mm_list_version; + + /* Pairs with smp_wmb() in sgx_encl_mm_add(). */ + smp_rmb(); + + idx = srcu_read_lock(&encl->srcu); + + list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) { + if (!mmget_not_zero(encl_mm->mm)) + continue; + + mmap_read_lock(encl_mm->mm); + + ret = sgx_encl_find(encl_mm->mm, addr, &vma); + if (!ret && encl == vma->vm_private_data) + zap_vma_ptes(vma, addr, PAGE_SIZE); + + mmap_read_unlock(encl_mm->mm); + + mmput_async(encl_mm->mm); + } + + srcu_read_unlock(&encl->srcu, idx); + } while (unlikely(encl->mm_list_version != mm_list_version)); +} + +/** * sgx_alloc_va_page() - Allocate a Version Array (VA) page + * @reclaim: Reclaim EPC pages directly if none available. Enclave + * mutex should not be held if this is set. * * Allocate a free EPC page and convert it to a Version Array (VA) page. * @@ -926,12 +990,12 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm, * a VA page, * -errno otherwise */ -struct sgx_epc_page *sgx_alloc_va_page(void) +struct sgx_epc_page *sgx_alloc_va_page(bool reclaim) { struct sgx_epc_page *epc_page; int ret; - epc_page = sgx_alloc_epc_page(NULL, true); + epc_page = sgx_alloc_epc_page(NULL, reclaim); if (IS_ERR(epc_page)) return ERR_CAST(epc_page); diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h index 332ef3568267..b5e9602be127 100644 --- a/arch/x86/kernel/cpu/sgx/encl.h +++ b/arch/x86/kernel/cpu/sgx/encl.h @@ -113,11 +113,15 @@ int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index, void sgx_encl_put_backing(struct sgx_backing *backing); int sgx_encl_test_and_clear_young(struct mm_struct *mm, struct sgx_encl_page *page); - -struct sgx_epc_page *sgx_alloc_va_page(void); +void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr); +struct sgx_epc_page *sgx_alloc_va_page(bool reclaim); unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page); void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset); bool sgx_va_page_full(struct sgx_va_page *va_page); void sgx_encl_free_epc_page(struct sgx_epc_page *page); +struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl, + unsigned long addr); +struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl, bool reclaim); +void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page); #endif /* _X86_ENCL_H */ diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index 217777c029ee..14ee6d218003 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -17,7 +17,7 @@ #include "encl.h" #include "encls.h" -static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl) +struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl, bool reclaim) { struct sgx_va_page *va_page = NULL; void *err; @@ -30,7 +30,7 @@ static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl) if (!va_page) return ERR_PTR(-ENOMEM); - va_page->epc_page = sgx_alloc_va_page(); + va_page->epc_page = sgx_alloc_va_page(reclaim); if (IS_ERR(va_page->epc_page)) { err = ERR_CAST(va_page->epc_page); kfree(va_page); @@ -43,7 +43,7 @@ static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl) return va_page; } -static void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page) +void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page) { encl->page_cnt--; @@ -64,7 +64,14 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs) struct file *backing; long ret; - va_page = sgx_encl_grow(encl); + /* + * ECREATE would detect this too, but checking here also ensures + * that the 'encl_size' calculations below can never overflow. + */ + if (!is_power_of_2(secs->size)) + return -EINVAL; + + va_page = sgx_encl_grow(encl, true); if (IS_ERR(va_page)) return PTR_ERR(va_page); else if (va_page) @@ -306,7 +313,7 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src, return PTR_ERR(epc_page); } - va_page = sgx_encl_grow(encl); + va_page = sgx_encl_grow(encl, true); if (IS_ERR(va_page)) { ret = PTR_ERR(va_page); goto err_out_free; diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index ad453b4387a4..fd786f1bc4e8 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -122,36 +122,9 @@ static void sgx_reclaimer_block(struct sgx_epc_page *epc_page) struct sgx_encl_page *page = epc_page->owner; unsigned long addr = page->desc & PAGE_MASK; struct sgx_encl *encl = page->encl; - unsigned long mm_list_version; - struct sgx_encl_mm *encl_mm; - struct vm_area_struct *vma; - int idx, ret; - - do { - mm_list_version = encl->mm_list_version; - - /* Pairs with smp_rmb() in sgx_encl_mm_add(). */ - smp_rmb(); - - idx = srcu_read_lock(&encl->srcu); - - list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) { - if (!mmget_not_zero(encl_mm->mm)) - continue; - - mmap_read_lock(encl_mm->mm); - - ret = sgx_encl_find(encl_mm->mm, addr, &vma); - if (!ret && encl == vma->vm_private_data) - zap_vma_ptes(vma, addr, PAGE_SIZE); - - mmap_read_unlock(encl_mm->mm); - - mmput_async(encl_mm->mm); - } + int ret; - srcu_read_unlock(&encl->srcu, idx); - } while (unlikely(encl->mm_list_version != mm_list_version)); + sgx_zap_enclave_ptes(encl, addr); mutex_lock(&encl->lock); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 92b33c7eaf3f..8a8660074284 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -195,6 +195,7 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, printk("%sCall Trace:\n", log_lvl); unwind_start(&state, task, regs, stack); + stack = stack ?: get_stack_pointer(task, regs); regs = unwind_get_entry_regs(&state, &partial); /* @@ -213,9 +214,7 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, * - hardirq stack * - entry stack */ - for (stack = stack ?: get_stack_pointer(task, regs); - stack; - stack = stack_info.next_sp) { + for (; stack; stack = stack_info.next_sp) { const char *stack_name; stack = PTR_ALIGN(stack, sizeof(long)); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index f267205f2d5a..678289bc60b5 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -753,22 +753,21 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len) void __init e820__register_nosave_regions(unsigned long limit_pfn) { int i; - unsigned long pfn = 0; + u64 last_addr = 0; for (i = 0; i < e820_table->nr_entries; i++) { struct e820_entry *entry = &e820_table->entries[i]; - if (pfn < PFN_UP(entry->addr)) - register_nosave_region(pfn, PFN_UP(entry->addr)); - - pfn = PFN_DOWN(entry->addr + entry->size); - if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN) - register_nosave_region(PFN_UP(entry->addr), pfn); + continue; - if (pfn >= limit_pfn) - break; + if (last_addr < entry->addr) + register_nosave_region(PFN_DOWN(last_addr), PFN_UP(entry->addr)); + + last_addr = entry->addr + entry->size; } + + register_nosave_region(PFN_DOWN(last_addr), limit_pfn); } #ifdef CONFIG_ACPI diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 2b7999a1a50a..80e262bb627f 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -8,6 +8,7 @@ #include <linux/timex.h> #include <linux/i8253.h> +#include <asm/hypervisor.h> #include <asm/apic.h> #include <asm/hpet.h> #include <asm/time.h> @@ -39,9 +40,15 @@ static bool __init use_pit(void) bool __init pit_timer_init(void) { - if (!use_pit()) + if (!use_pit()) { + /* + * Don't just ignore the PIT. Ensure it's stopped, because + * VMMs otherwise steal CPU time just to pointlessly waggle + * the (masked) IRQ. + */ + clockevent_i8253_disable(); return false; - + } clockevent_i8253_init(true); global_clock_event = &i8253_clockevent; return true; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 766ffe3ba313..439fdb3f5fdf 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -23,8 +23,10 @@ #include <asm/traps.h> #include <asm/thermal.h> +#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_THERMAL_VECTOR) #define CREATE_TRACE_POINTS #include <asm/trace/irq_vectors.h> +#endif DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 720d99520316..72eb0df1a1a5 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -83,7 +83,12 @@ EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); */ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - memcpy(dst, src, arch_task_struct_size); + /* init_task is not dynamically sized (incomplete FPU state) */ + if (unlikely(src == &init_task)) + memcpy_and_pad(dst, arch_task_struct_size, src, sizeof(init_task), 0); + else + memcpy(dst, src, arch_task_struct_size); + #ifdef CONFIG_VM86 dst->thread.vm86 = NULL; #endif diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 54d87bc2af7b..a5dd11c92d05 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -169,7 +169,6 @@ EXPORT_SYMBOL_GPL(arch_static_call_transform); noinstr void __static_call_update_early(void *tramp, void *func) { BUG_ON(system_state != SYSTEM_BOOTING); - BUG_ON(!early_boot_irqs_disabled); BUG_ON(static_call_initialized); __text_gen_insn(tramp, JMP32_INSN_OPCODE, tramp, func, JMP32_INSN_SIZE); sync_core(); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index a698196377be..693cb785357b 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -920,7 +920,7 @@ static unsigned long long cyc2ns_suspend; void tsc_save_sched_clock_state(void) { - if (!sched_clock_stable()) + if (!static_branch_likely(&__use_tsc) && !sched_clock_stable()) return; cyc2ns_suspend = sched_clock(); @@ -940,7 +940,7 @@ void tsc_restore_sched_clock_state(void) unsigned long flags; int cpu; - if (!sched_clock_stable()) + if (!static_branch_likely(&__use_tsc) && !sched_clock_stable()) return; local_irq_save(flags); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 29a96d1c7e2b..32d1c9064b5e 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1902,6 +1902,9 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) bool all_cpus; int i; + if (!lapic_in_kernel(vcpu)) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + if (hc->code == HVCALL_SEND_IPI) { if (!hc->fast) { if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi, @@ -2507,7 +2510,8 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED; ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED; - ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; + if (!vcpu || lapic_in_kernel(vcpu)) + ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; if (evmcs_ver) ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index b595a33860d7..4bb2fbe6676a 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -737,7 +737,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) * Allocating new amd_iommu_pi_data, which will get * add to the per-vcpu ir_list. */ - ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT); + ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_ATOMIC | __GFP_ACCOUNT); if (!ir) { ret = -ENOMEM; goto out; @@ -801,6 +801,7 @@ int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; + bool enable_remapped_mode = true; int idx, ret = 0; if (!kvm_arch_has_assigned_device(kvm) || @@ -838,6 +839,8 @@ int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, kvm_vcpu_apicv_active(&svm->vcpu)) { struct amd_iommu_pi_data pi; + enable_remapped_mode = false; + /* Try to enable guest_mode in IRTE */ pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK); @@ -856,33 +859,6 @@ int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, */ if (!ret && pi.is_guest_mode) svm_ir_list_add(svm, &pi); - } else { - /* Use legacy mode in IRTE */ - struct amd_iommu_pi_data pi; - - /** - * Here, pi is used to: - * - Tell IOMMU to use legacy mode for this interrupt. - * - Retrieve ga_tag of prior interrupt remapping data. - */ - pi.prev_ga_tag = 0; - pi.is_guest_mode = false; - ret = irq_set_vcpu_affinity(host_irq, &pi); - - /** - * Check if the posted interrupt was previously - * setup with the guest_mode by checking if the ga_tag - * was cached. If so, we need to clean up the per-vcpu - * ir_list. - */ - if (!ret && pi.prev_ga_tag) { - int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); - struct kvm_vcpu *vcpu; - - vcpu = kvm_get_vcpu_by_id(kvm, id); - if (vcpu) - svm_ir_list_del(to_svm(vcpu), &pi); - } } if (!ret && svm) { @@ -898,6 +874,34 @@ int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, } ret = 0; + if (enable_remapped_mode) { + /* Use legacy mode in IRTE */ + struct amd_iommu_pi_data pi; + + /** + * Here, pi is used to: + * - Tell IOMMU to use legacy mode for this interrupt. + * - Retrieve ga_tag of prior interrupt remapping data. + */ + pi.prev_ga_tag = 0; + pi.is_guest_mode = false; + ret = irq_set_vcpu_affinity(host_irq, &pi); + + /** + * Check if the posted interrupt was previously + * setup with the guest_mode by checking if the ga_tag + * was cached. If so, we need to clean up the per-vcpu + * ir_list. + */ + if (!ret && pi.prev_ga_tag) { + int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_vcpu_by_id(kvm, id); + if (vcpu) + svm_ir_list_del(to_svm(vcpu), &pi); + } + } out: srcu_read_unlock(&kvm->irq_srcu, idx); return ret; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index bc0958eb83b4..0d0aea145f2d 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1887,11 +1887,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) svm->asid = sd->next_asid++; } -static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value) +static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) { - struct vmcb *vmcb = svm->vmcb; + struct vmcb *vmcb = to_svm(vcpu)->vmcb; - if (svm->vcpu.arch.guest_state_protected) + if (vcpu->arch.guest_state_protected) return; if (unlikely(value != vmcb->save.dr6)) { @@ -3851,10 +3851,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) * Run with all-zero DR6 unless needed, so that we can get the exact cause * of a #DB. */ - if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) - svm_set_dr6(svm, vcpu->arch.dr6); - else - svm_set_dr6(svm, DR6_ACTIVE_LOW); + if (likely(!(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))) + svm_set_dr6(vcpu, DR6_ACTIVE_LOW); clgi(); kvm_load_guest_xsave_state(vcpu); @@ -4631,6 +4629,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_idt = svm_set_idt, .get_gdt = svm_get_gdt, .set_gdt = svm_set_gdt, + .set_dr6 = svm_set_dr6, .set_dr7 = svm_set_dr7, .sync_dirty_debug_regs = svm_sync_dirty_debug_regs, .cache_reg = svm_cache_reg, diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index 46fb83d6a286..4ca480ced35d 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -270,6 +270,7 @@ int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; + bool enable_remapped_mode = true; struct kvm_lapic_irq irq; struct kvm_vcpu *vcpu; struct vcpu_data vcpu_info; @@ -308,21 +309,8 @@ int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, kvm_set_msi_irq(kvm, e, &irq); if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) || - !kvm_irq_is_postable(&irq)) { - /* - * Make sure the IRTE is in remapped mode if - * we don't handle it in posted mode. - */ - ret = irq_set_vcpu_affinity(host_irq, NULL); - if (ret < 0) { - printk(KERN_INFO - "failed to back to remapped mode, irq: %u\n", - host_irq); - goto out; - } - + !kvm_irq_is_postable(&irq)) continue; - } vcpu_info.pi_desc_addr = __pa(&to_vmx(vcpu)->pi_desc); vcpu_info.vector = irq.vector; @@ -330,11 +318,12 @@ int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, vcpu_info.vector, vcpu_info.pi_desc_addr, set); - if (set) - ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); - else - ret = irq_set_vcpu_affinity(host_irq, NULL); + if (!set) + continue; + + enable_remapped_mode = false; + ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); if (ret < 0) { printk(KERN_INFO "%s: failed to update PI IRTE\n", __func__); @@ -342,6 +331,9 @@ int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, } } + if (enable_remapped_mode) + ret = irq_set_vcpu_affinity(host_irq, NULL); + ret = 0; out: srcu_read_unlock(&kvm->irq_srcu, idx); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6965cf92bd36..5e3e60bdaa5e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5249,6 +5249,12 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) set_debugreg(DR6_RESERVED, 6); } +static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) +{ + lockdep_assert_irqs_disabled(); + set_debugreg(vcpu->arch.dr6, 6); +} + static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) { vmcs_writel(GUEST_DR7, val); @@ -6839,10 +6845,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->host_state.cr4 = cr4; } - /* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */ - if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) - set_debugreg(vcpu->arch.dr6, 6); - /* When single-stepping over STI and MOV SS, we must clear the * corresponding interruptibility bits in the guest state. Otherwise * vmentry fails as it then expects bit 14 (BS) in pending debug @@ -7777,6 +7779,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .set_idt = vmx_set_idt, .get_gdt = vmx_get_gdt, .set_gdt = vmx_set_gdt, + .set_dr6 = vmx_set_dr6, .set_dr7 = vmx_set_dr7, .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, .cache_reg = vmx_cache_reg, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b9e7457bf2aa..bf03f3ff896e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9963,6 +9963,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(vcpu->arch.eff_db[1], 1); set_debugreg(vcpu->arch.eff_db[2], 2); set_debugreg(vcpu->arch.eff_db[3], 3); + /* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */ + if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) + static_call(kvm_x86_set_dr6)(vcpu, vcpu->arch.dr6); } else if (unlikely(hw_breakpoint_active())) { set_debugreg(0, 7); } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 5953c7482016..1110f6dda352 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -264,28 +264,33 @@ static void __init probe_page_size_mask(void) } /* - * INVLPG may not properly flush Global entries - * on these CPUs when PCIDs are enabled. + * INVLPG may not properly flush Global entries on + * these CPUs. New microcode fixes the issue. */ static const struct x86_cpu_id invlpg_miss_ids[] = { - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 0), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 0), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, 0), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, 0), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, 0), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, 0), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 0x2e), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 0x42c), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, 0x11), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, 0x118), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, 0x4117), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, 0x2e), {} }; static void setup_pcid(void) { + const struct x86_cpu_id *invlpg_miss_match; + if (!IS_ENABLED(CONFIG_X86_64)) return; if (!boot_cpu_has(X86_FEATURE_PCID)) return; - if (x86_match_cpu(invlpg_miss_ids)) { + invlpg_miss_match = x86_match_cpu(invlpg_miss_ids); + + if (invlpg_miss_match && + boot_cpu_data.microcode < invlpg_miss_match->driver_data) { pr_info("Incomplete global flushes, disabling PCID"); setup_clear_cpu_cap(X86_FEATURE_PCID); return; diff --git a/arch/x86/mm/pat/cpa-test.c b/arch/x86/mm/pat/cpa-test.c index 0612a73638a8..7641cff719bd 100644 --- a/arch/x86/mm/pat/cpa-test.c +++ b/arch/x86/mm/pat/cpa-test.c @@ -183,7 +183,7 @@ static int pageattr_test(void) break; case 1: - err = change_page_attr_set(addrs, len[1], PAGE_CPA_TEST, 1); + err = change_page_attr_set(addrs, len[i], PAGE_CPA_TEST, 1); break; case 2: diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 511172d70825..11a43d373bae 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -384,9 +384,9 @@ static void cond_mitigation(struct task_struct *next) prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_spec); /* - * Avoid user/user BTB poisoning by flushing the branch predictor - * when switching between processes. This stops one process from - * doing Spectre-v2 attacks on another. + * Avoid user->user BTB/RSB poisoning by flushing them when switching + * between processes. This stops one process from doing Spectre-v2 + * attacks on another. * * Both, the conditional and the always IBPB mode use the mm * pointer to avoid the IBPB when switching between tasks of the @@ -616,7 +616,11 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); - /* Let nmi_uaccess_okay() know that we're changing CR3. */ + /* + * Indicate that CR3 is about to change. nmi_uaccess_okay() + * and others are sensitive to the window where mm_cpumask(), + * CR3 and cpu_tlbstate.loaded_mm are not all in sync. + */ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); barrier(); } @@ -854,9 +858,51 @@ done: nr_invalidate); } -static bool tlb_is_not_lazy(int cpu, void *data) +static bool should_flush_tlb(int cpu, void *data) { - return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu); + struct mm_struct *loaded_mm = per_cpu(cpu_tlbstate.loaded_mm, cpu); + struct flush_tlb_info *info = data; + + /* + * Order the 'loaded_mm' and 'is_lazy' against their + * write ordering in switch_mm_irqs_off(). Ensure + * 'is_lazy' is at least as new as 'loaded_mm'. + */ + smp_rmb(); + + /* Lazy TLB will get flushed at the next context switch. */ + if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu)) + return false; + + /* No mm means kernel memory flush. */ + if (!info->mm) + return true; + + /* + * While switching, the remote CPU could have state from + * either the prev or next mm. Assume the worst and flush. + */ + if (loaded_mm == LOADED_MM_SWITCHING) + return true; + + /* The target mm is loaded, and the CPU is not lazy. */ + if (loaded_mm == info->mm) + return true; + + /* In cpumask, but not the loaded mm? Periodically remove by flushing. */ + if (info->trim_cpumask) + return true; + + return false; +} + +static bool should_trim_cpumask(struct mm_struct *mm) +{ + if (time_after(jiffies, READ_ONCE(mm->context.next_trim_cpumask))) { + WRITE_ONCE(mm->context.next_trim_cpumask, jiffies + HZ); + return true; + } + return false; } DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared); @@ -890,7 +936,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, if (info->freed_tables) on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); else - on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func, + on_each_cpu_cond_mask(should_flush_tlb, flush_tlb_func, (void *)info, 1, cpumask); } @@ -941,6 +987,7 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm, info->freed_tables = freed_tables; info->new_tlb_gen = new_tlb_gen; info->initiating_cpu = smp_processor_id(); + info->trim_cpumask = 0; return info; } @@ -983,6 +1030,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, * flush_tlb_func_local() directly in this case. */ if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { + info->trim_cpumask = should_trim_cpumask(mm); flush_tlb_multi(mm_cpumask(mm), info); } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { lockdep_assert_irqs_enabled(); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index ac06f53391ec..f62ebeee8b14 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -36,6 +36,8 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) +#define EMIT5(b1, b2, b3, b4, b5) \ + do { EMIT1(b1); EMIT4(b2, b3, b4, b5); } while (0) #define EMIT1_off32(b1, off) \ do { EMIT1(b1); EMIT(off, 4); } while (0) @@ -932,6 +934,47 @@ static void emit_nops(u8 **pprog, int len) #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp))) +static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip, + struct bpf_prog *bpf_prog) +{ + u8 *prog = *pprog; + u8 *func; + + if (cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP)) { + /* The clearing sequence clobbers eax and ecx. */ + EMIT1(0x50); /* push rax */ + EMIT1(0x51); /* push rcx */ + ip += 2; + + func = (u8 *)clear_bhb_loop; + + if (emit_call(&prog, func, ip)) + return -EINVAL; + EMIT1(0x59); /* pop rcx */ + EMIT1(0x58); /* pop rax */ + } + /* Insert IBHF instruction */ + if ((cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP) && + cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) || + cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_HW)) { + /* + * Add an Indirect Branch History Fence (IBHF). IBHF acts as a + * fence preventing branch history from before the fence from + * affecting indirect branches after the fence. This is + * specifically used in cBPF jitted code to prevent Intra-mode + * BHI attacks. The IBHF instruction is designed to be a NOP on + * hardware that doesn't need or support it. The REP and REX.W + * prefixes are required by the microcode, and they also ensure + * that the NOP is unlikely to be used in existing code. + * + * IBHF is not a valid instruction in 32-bit mode. + */ + EMIT5(0xF3, 0x48, 0x0F, 0x1E, 0xF8); /* ibhf */ + } + *pprog = prog; + return 0; +} + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx, bool jmp_padding) { @@ -1737,6 +1780,15 @@ emit_jmp: seen_exit = true; /* Update cleanup_addr */ ctx->cleanup_addr = proglen; + + if (bpf_prog_was_classic(bpf_prog) && + !capable(CAP_SYS_ADMIN)) { + u8 *ip = image + addrs[i - 1]; + + if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog)) + return -EINVAL; + } + pop_callee_regs(&prog, callee_regs_used); EMIT1(0xC9); /* leave */ emit_return(&prog, image + addrs[i - 1] + (prog - temp)); diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index 72c1e42d121d..b3e3f64d436d 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -99,7 +99,12 @@ SYM_CODE_START_LOCAL(pvh_start_xen) xor %edx, %edx wrmsr - call xen_prepare_pvh + /* Call xen_prepare_pvh() via the kernel virtual mapping */ + leaq xen_prepare_pvh(%rip), %rax + subq phys_base(%rip), %rax + addq $__START_KERNEL_map, %rax + ANNOTATE_RETPOLINE_SAFE + call *%rax /* startup_64 expects boot_params in %rsi. */ mov $_pa(pvh_bootparams), %rsi diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 3359c23573c5..4eb6a6bb609f 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -95,6 +95,51 @@ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; */ static DEFINE_SPINLOCK(xen_reservation_lock); +/* Protected by xen_reservation_lock. */ +#define MIN_CONTIG_ORDER 9 /* 2MB */ +static unsigned int discontig_frames_order = MIN_CONTIG_ORDER; +static unsigned long discontig_frames_early[1UL << MIN_CONTIG_ORDER] __initdata; +static unsigned long *discontig_frames __refdata = discontig_frames_early; +static bool discontig_frames_dyn; + +static int alloc_discontig_frames(unsigned int order) +{ + unsigned long *new_array, *old_array; + unsigned int old_order; + unsigned long flags; + + BUG_ON(order < MIN_CONTIG_ORDER); + BUILD_BUG_ON(sizeof(discontig_frames_early) != PAGE_SIZE); + + new_array = (unsigned long *)__get_free_pages(GFP_KERNEL, + order - MIN_CONTIG_ORDER); + if (!new_array) + return -ENOMEM; + + spin_lock_irqsave(&xen_reservation_lock, flags); + + old_order = discontig_frames_order; + + if (order > discontig_frames_order || !discontig_frames_dyn) { + if (!discontig_frames_dyn) + old_array = NULL; + else + old_array = discontig_frames; + + discontig_frames = new_array; + discontig_frames_order = order; + discontig_frames_dyn = true; + } else { + old_array = new_array; + } + + spin_unlock_irqrestore(&xen_reservation_lock, flags); + + free_pages((unsigned long)old_array, old_order - MIN_CONTIG_ORDER); + + return 0; +} + /* * Note about cr3 (pagetable base) values: * @@ -762,6 +807,7 @@ void xen_mm_pin_all(void) { struct page *page; + spin_lock(&init_mm.page_table_lock); spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { @@ -772,6 +818,7 @@ void xen_mm_pin_all(void) } spin_unlock(&pgd_lock); + spin_unlock(&init_mm.page_table_lock); } static void __init xen_mark_pinned(struct mm_struct *mm, struct page *page, @@ -791,6 +838,9 @@ static void __init xen_after_bootmem(void) static_branch_enable(&xen_struct_pages_ready); SetPagePinned(virt_to_page(level3_user_vsyscall)); xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); + + if (alloc_discontig_frames(MIN_CONTIG_ORDER)) + BUG(); } static void xen_unpin_page(struct mm_struct *mm, struct page *page, @@ -866,6 +916,7 @@ void xen_mm_unpin_all(void) { struct page *page; + spin_lock(&init_mm.page_table_lock); spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { @@ -877,6 +928,7 @@ void xen_mm_unpin_all(void) } spin_unlock(&pgd_lock); + spin_unlock(&init_mm.page_table_lock); } static void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) @@ -2151,10 +2203,6 @@ void __init xen_init_mmu_ops(void) memset(dummy_mapping, 0xff, PAGE_SIZE); } -/* Protected by xen_reservation_lock. */ -#define MAX_CONTIG_ORDER 9 /* 2MB */ -static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER]; - #define VOID_PTE (mfn_pte(0, __pgprot(0))) static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order, unsigned long *in_frames, @@ -2271,24 +2319,25 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, unsigned int address_bits, dma_addr_t *dma_handle) { - unsigned long *in_frames = discontig_frames, out_frame; + unsigned long *in_frames, out_frame; unsigned long flags; int success; unsigned long vstart = (unsigned long)phys_to_virt(pstart); - /* - * Currently an auto-translated guest will not perform I/O, nor will - * it require PAE page directories below 4GB. Therefore any calls to - * this function are redundant and can be ignored. - */ + if (unlikely(order > discontig_frames_order)) { + if (!discontig_frames_dyn) + return -ENOMEM; - if (unlikely(order > MAX_CONTIG_ORDER)) - return -ENOMEM; + if (alloc_discontig_frames(order)) + return -ENOMEM; + } memset((void *) vstart, 0, PAGE_SIZE << order); spin_lock_irqsave(&xen_reservation_lock, flags); + in_frames = discontig_frames; + /* 1. Zap current PTEs, remembering MFNs. */ xen_zap_pfn_range(vstart, order, in_frames, NULL); @@ -2312,12 +2361,12 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) { - unsigned long *out_frames = discontig_frames, in_frame; + unsigned long *out_frames, in_frame; unsigned long flags; int success; unsigned long vstart; - if (unlikely(order > MAX_CONTIG_ORDER)) + if (unlikely(order > discontig_frames_order)) return; vstart = (unsigned long)phys_to_virt(pstart); @@ -2325,6 +2374,8 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) spin_lock_irqsave(&xen_reservation_lock, flags); + out_frames = discontig_frames; + /* 1. Find start MFN of contiguous extent. */ in_frame = virt_to_mfn(vstart); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 9b3a9fa4a0ad..899590f1f74a 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -197,7 +197,7 @@ static void * __ref alloc_p2m_page(void) static void __ref free_p2m_page(void *p) { if (unlikely(!slab_is_available())) { - memblock_free((unsigned long)p, PAGE_SIZE); + memblock_free_ptr(p, PAGE_SIZE); return; } diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 152bbe900a17..6105404ba570 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -115,8 +115,8 @@ SYM_FUNC_START(xen_hypercall_hvm) pop %ebx pop %eax #else - lea xen_hypercall_amd(%rip), %rbx - cmp %rax, %rbx + lea xen_hypercall_amd(%rip), %rcx + cmp %rax, %rcx #ifdef CONFIG_FRAME_POINTER pop %rax /* Dummy pop. */ #endif @@ -130,6 +130,7 @@ SYM_FUNC_START(xen_hypercall_hvm) pop %rcx pop %rax #endif + FRAME_END /* Use correct hypercall function. */ jz xen_hypercall_amd jmp xen_hypercall_intel |