diff options
author | Stefan Agner <stefan.agner@toradex.com> | 2019-06-03 10:59:09 +0200 |
---|---|---|
committer | Stefan Agner <stefan.agner@toradex.com> | 2019-06-03 10:59:09 +0200 |
commit | 2115c1bc6e396d5ffe9ecbe394d1c50a6e25c404 (patch) | |
tree | 20fff445991414f23c0a29237940b75cd756de86 /arch | |
parent | b794ea49ba3816c0d5cf05506964a8e69ce4efa3 (diff) | |
parent | 3f7c1cab1a61108821cf47dda8a32ed25cc3588b (diff) |
Merge tag 'v5.0.19' into toradex_5.0.ytoradex_5.0.y
This is the 5.0.19 stable release
Diffstat (limited to 'arch')
64 files changed, 467 insertions, 534 deletions
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 4135abec3fb0..63e6e6504699 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -113,10 +113,24 @@ static void read_decode_cache_bcr_arcv2(int cpu) } READ_BCR(ARC_REG_CLUSTER_BCR, cbcr); - if (cbcr.c) + if (cbcr.c) { ioc_exists = 1; - else + + /* + * As for today we don't support both IOC and ZONE_HIGHMEM enabled + * simultaneously. This happens because as of today IOC aperture covers + * only ZONE_NORMAL (low mem) and any dma transactions outside this + * region won't be HW coherent. + * If we want to use both IOC and ZONE_HIGHMEM we can use + * bounce_buffer to handle dma transactions to HIGHMEM. + * Also it is possible to modify dma_direct cache ops or increase IOC + * aperture size if we are planning to use HIGHMEM without PAE. + */ + if (IS_ENABLED(CONFIG_HIGHMEM) || is_pae40_enabled()) + ioc_enable = 0; + } else { ioc_enable = 0; + } /* HS 2.0 didn't have AUX_VOL */ if (cpuinfo_arc700[cpu].core.family > 0x51) { @@ -1158,19 +1172,6 @@ noinline void __init arc_ioc_setup(void) if (!ioc_enable) return; - /* - * As for today we don't support both IOC and ZONE_HIGHMEM enabled - * simultaneously. This happens because as of today IOC aperture covers - * only ZONE_NORMAL (low mem) and any dma transactions outside this - * region won't be HW coherent. - * If we want to use both IOC and ZONE_HIGHMEM we can use - * bounce_buffer to handle dma transactions to HIGHMEM. - * Also it is possible to modify dma_direct cache ops or increase IOC - * aperture size if we are planning to use HIGHMEM without PAE. - */ - if (IS_ENABLED(CONFIG_HIGHMEM)) - panic("IOC and HIGHMEM can't be used simultaneously"); - /* Flush + invalidate + disable L1 dcache */ __dc_disable(); diff --git a/arch/arm/boot/dts/exynos5260.dtsi b/arch/arm/boot/dts/exynos5260.dtsi index 55167850619c..33a085ffc447 100644 --- a/arch/arm/boot/dts/exynos5260.dtsi +++ b/arch/arm/boot/dts/exynos5260.dtsi @@ -223,7 +223,7 @@ wakeup-interrupt-controller { compatible = "samsung,exynos4210-wakeup-eint"; interrupt-parent = <&gic>; - interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>; }; }; diff --git a/arch/arm/boot/dts/exynos5422-odroidxu3-audio.dtsi b/arch/arm/boot/dts/exynos5422-odroidxu3-audio.dtsi index e84544b220b9..b90cea8b7368 100644 --- a/arch/arm/boot/dts/exynos5422-odroidxu3-audio.dtsi +++ b/arch/arm/boot/dts/exynos5422-odroidxu3-audio.dtsi @@ -22,7 +22,7 @@ "Headphone Jack", "HPL", "Headphone Jack", "HPR", "Headphone Jack", "MICBIAS", - "IN1", "Headphone Jack", + "IN12", "Headphone Jack", "Speakers", "SPKL", "Speakers", "SPKR"; diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi b/arch/arm/boot/dts/qcom-ipq4019.dtsi index 2d56008d8d6b..049fa8e3f2fd 100644 --- a/arch/arm/boot/dts/qcom-ipq4019.dtsi +++ b/arch/arm/boot/dts/qcom-ipq4019.dtsi @@ -393,8 +393,8 @@ #address-cells = <3>; #size-cells = <2>; - ranges = <0x81000000 0 0x40200000 0x40200000 0 0x00100000 - 0x82000000 0 0x40300000 0x40300000 0 0x400000>; + ranges = <0x81000000 0 0x40200000 0x40200000 0 0x00100000>, + <0x82000000 0 0x40300000 0x40300000 0 0x00d00000>; interrupts = <GIC_SPI 141 IRQ_TYPE_EDGE_RISING>; interrupt-names = "msi"; diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index 07e31941dc67..617c2c99ebfb 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -278,6 +278,8 @@ static int __xts_crypt(struct skcipher_request *req, int err; err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; crypto_cipher_encrypt_one(ctx->tweak_tfm, walk.iv, walk.iv); diff --git a/arch/arm/mach-exynos/firmware.c b/arch/arm/mach-exynos/firmware.c index d602e3bf3f96..2eaf2dbb8e81 100644 --- a/arch/arm/mach-exynos/firmware.c +++ b/arch/arm/mach-exynos/firmware.c @@ -196,6 +196,7 @@ bool __init exynos_secure_firmware_available(void) return false; addr = of_get_address(nd, 0, NULL, NULL); + of_node_put(nd); if (!addr) { pr_err("%s: No address specified.\n", __func__); return false; diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c index 0850505ac78b..9afb0c69db34 100644 --- a/arch/arm/mach-exynos/suspend.c +++ b/arch/arm/mach-exynos/suspend.c @@ -639,8 +639,10 @@ void __init exynos_pm_init(void) if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) { pr_warn("Outdated DT detected, suspend/resume will NOT work\n"); + of_node_put(np); return; } + of_node_put(np); pm_data = (const struct exynos_pm_data *) match->data; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts index be78172abc09..8ed3104ade94 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts @@ -489,7 +489,7 @@ status = "okay"; bt656-supply = <&vcc1v8_dvp>; - audio-supply = <&vcca1v8_codec>; + audio-supply = <&vcc_3v0>; sdmmc-supply = <&vcc_sdio>; gpio1830-supply = <&vcc_3v0>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 6cc1c9fa4ea6..1bbf0da4e01d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -333,6 +333,7 @@ phys = <&emmc_phy>; phy-names = "phy_arasan"; power-domains = <&power RK3399_PD_EMMC>; + disable-cqe-dcmd; status = "disabled"; }; diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index e7a95a566462..5cc248967387 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -304,6 +304,8 @@ static int __xts_crypt(struct skcipher_request *req, int err; err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; kernel_neon_begin(); neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, ctx->key.rounds, 1); diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 067d8937d5af..1ed227bf6106 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -418,9 +418,11 @@ static int gcm_encrypt(struct aead_request *req) put_unaligned_be32(2, iv + GCM_IV_SIZE); while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { - int blocks = walk.nbytes / AES_BLOCK_SIZE; + const int blocks = + walk.nbytes / (2 * AES_BLOCK_SIZE) * 2; u8 *dst = walk.dst.virt.addr; u8 *src = walk.src.virt.addr; + int remaining = blocks; do { __aes_arm64_encrypt(ctx->aes_key.key_enc, @@ -430,9 +432,9 @@ static int gcm_encrypt(struct aead_request *req) dst += AES_BLOCK_SIZE; src += AES_BLOCK_SIZE; - } while (--blocks > 0); + } while (--remaining > 0); - ghash_do_update(walk.nbytes / AES_BLOCK_SIZE, dg, + ghash_do_update(blocks, dg, walk.dst.virt.addr, &ctx->ghash_key, NULL); @@ -553,7 +555,7 @@ static int gcm_decrypt(struct aead_request *req) put_unaligned_be32(2, iv + GCM_IV_SIZE); while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { - int blocks = walk.nbytes / AES_BLOCK_SIZE; + int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2; u8 *dst = walk.dst.virt.addr; u8 *src = walk.src.virt.addr; diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index f2a234d6516c..93e07512b4b6 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -148,18 +148,47 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) isb(); } +/* + * Ensure that reads of the counter are treated the same as memory reads + * for the purposes of ordering by subsequent memory barriers. + * + * This insanity brought to you by speculative system register reads, + * out-of-order memory accesses, sequence locks and Thomas Gleixner. + * + * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html + */ +#define arch_counter_enforce_ordering(val) do { \ + u64 tmp, _val = (val); \ + \ + asm volatile( \ + " eor %0, %1, %1\n" \ + " add %0, sp, %0\n" \ + " ldr xzr, [%0]" \ + : "=r" (tmp) : "r" (_val)); \ +} while (0) + static inline u64 arch_counter_get_cntpct(void) { + u64 cnt; + isb(); - return arch_timer_reg_read_stable(cntpct_el0); + cnt = arch_timer_reg_read_stable(cntpct_el0); + arch_counter_enforce_ordering(cnt); + return cnt; } static inline u64 arch_counter_get_cntvct(void) { + u64 cnt; + isb(); - return arch_timer_reg_read_stable(cntvct_el0); + cnt = arch_timer_reg_read_stable(cntvct_el0); + arch_counter_enforce_ordering(cnt); + return cnt; } +#undef arch_counter_enforce_ordering + static inline int arch_timer_arch_init(void) { return 0; diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index f1a7ab18faf3..928f59340598 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -57,7 +57,15 @@ #define TASK_SIZE_64 (UL(1) << vabits_user) #ifdef CONFIG_COMPAT +#ifdef CONFIG_ARM64_64K_PAGES +/* + * With CONFIG_ARM64_64K_PAGES enabled, the last page is occupied + * by the compat vectors page. + */ #define TASK_SIZE_32 UL(0x100000000) +#else +#define TASK_SIZE_32 (UL(0x100000000) - PAGE_SIZE) +#endif /* CONFIG_ARM64_64K_PAGES */ #define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ TASK_SIZE_32 : TASK_SIZE_64) #define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \ diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index d7bb6aefae0a..0fa6db521e44 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -135,6 +135,7 @@ NOKPROBE_SYMBOL(disable_debug_monitors); */ static int clear_os_lock(unsigned int cpu) { + write_sysreg(0, osdlr_el1); write_sysreg(0, oslar_el1); isb(); return 0; diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index b44065fb1616..6f91e8116514 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -31,7 +31,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, - unsigned long, fd, off_t, off) + unsigned long, fd, unsigned long, off) { if (offset_in_page(off) != 0) return -EINVAL; diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index c39872a7b03c..e8f60112818f 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -73,6 +73,13 @@ x_tmp .req x8 movn x_tmp, #0xff00, lsl #48 and \res, x_tmp, \res mul \res, \res, \mult + /* + * Fake address dependency from the value computed from the counter + * register to subsequent data page accesses so that the sequence + * locking also orders the read of the counter. + */ + and x_tmp, \res, xzr + add vdso_data, vdso_data, x_tmp .endm /* @@ -147,12 +154,12 @@ ENTRY(__kernel_gettimeofday) /* w11 = cs_mono_mult, w12 = cs_shift */ ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - seqcnt_check fail=1b get_nsec_per_sec res=x9 lsl x9, x9, x12 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + seqcnt_check fail=1b get_ts_realtime res_sec=x10, res_nsec=x11, \ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 @@ -211,13 +218,13 @@ realtime: /* w11 = cs_mono_mult, w12 = cs_shift */ ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - seqcnt_check fail=realtime /* All computations are done with left-shifted nsecs. */ get_nsec_per_sec res=x9 lsl x9, x9, x12 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + seqcnt_check fail=realtime get_ts_realtime res_sec=x10, res_nsec=x11, \ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 clock_gettime_return, shift=1 @@ -231,7 +238,6 @@ monotonic: ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC] - seqcnt_check fail=monotonic /* All computations are done with left-shifted nsecs. */ lsl x4, x4, x12 @@ -239,6 +245,7 @@ monotonic: lsl x9, x9, x12 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + seqcnt_check fail=monotonic get_ts_realtime res_sec=x10, res_nsec=x11, \ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 @@ -253,13 +260,13 @@ monotonic_raw: /* w11 = cs_raw_mult, w12 = cs_shift */ ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT] ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC] - seqcnt_check fail=monotonic_raw /* All computations are done with left-shifted nsecs. */ get_nsec_per_sec res=x9 lsl x9, x9, x12 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + seqcnt_check fail=monotonic_raw get_ts_clock_raw res_sec=x10, res_nsec=x11, \ clock_nsec=x15, nsec_to_sec=x9 diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 73886a5f1f30..19bc318b2fe9 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -76,24 +76,25 @@ ENTRY(cpu_do_suspend) mrs x2, tpidr_el0 mrs x3, tpidrro_el0 mrs x4, contextidr_el1 - mrs x5, cpacr_el1 - mrs x6, tcr_el1 - mrs x7, vbar_el1 - mrs x8, mdscr_el1 - mrs x9, oslsr_el1 - mrs x10, sctlr_el1 + mrs x5, osdlr_el1 + mrs x6, cpacr_el1 + mrs x7, tcr_el1 + mrs x8, vbar_el1 + mrs x9, mdscr_el1 + mrs x10, oslsr_el1 + mrs x11, sctlr_el1 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - mrs x11, tpidr_el1 + mrs x12, tpidr_el1 alternative_else - mrs x11, tpidr_el2 + mrs x12, tpidr_el2 alternative_endif - mrs x12, sp_el0 + mrs x13, sp_el0 stp x2, x3, [x0] - stp x4, xzr, [x0, #16] - stp x5, x6, [x0, #32] - stp x7, x8, [x0, #48] - stp x9, x10, [x0, #64] - stp x11, x12, [x0, #80] + stp x4, x5, [x0, #16] + stp x6, x7, [x0, #32] + stp x8, x9, [x0, #48] + stp x10, x11, [x0, #64] + stp x12, x13, [x0, #80] ret ENDPROC(cpu_do_suspend) @@ -116,8 +117,8 @@ ENTRY(cpu_do_resume) msr cpacr_el1, x6 /* Don't change t0sz here, mask those bits when restoring */ - mrs x5, tcr_el1 - bfi x8, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH + mrs x7, tcr_el1 + bfi x8, x7, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH msr tcr_el1, x8 msr vbar_el1, x9 @@ -141,6 +142,7 @@ alternative_endif /* * Restore oslsr_el1 by writing oslar_el1 */ + msr osdlr_el1, x5 ubfx x11, x11, #1, #1 msr oslar_el1, x11 reset_pmuserenr_el0 x0 // Disable PMU access from EL0 diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 783de51a6c4e..6c881659ee8a 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -100,12 +100,6 @@ #define A64_STXR(sf, Rt, Rn, Rs) \ A64_LSX(sf, Rt, Rn, Rs, STORE_EX) -/* Prefetch */ -#define A64_PRFM(Rn, type, target, policy) \ - aarch64_insn_gen_prefetch(Rn, AARCH64_INSN_PRFM_TYPE_##type, \ - AARCH64_INSN_PRFM_TARGET_##target, \ - AARCH64_INSN_PRFM_POLICY_##policy) - /* Add/subtract (immediate) */ #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \ aarch64_insn_gen_add_sub_imm(Rd, Rn, imm12, \ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 1542df00b23c..8d7ceec7f079 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -739,7 +739,6 @@ emit_cond_jmp: case BPF_STX | BPF_XADD | BPF_DW: emit_a64_mov_i(1, tmp, off, ctx); emit(A64_ADD(1, tmp, tmp, dst), ctx); - emit(A64_PRFM(tmp, PST, L1, STRM), ctx); emit(A64_LDXR(isdw, tmp2, tmp), ctx); emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx); diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 413863508f6f..d67fb64e908c 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -64,17 +64,11 @@ struct mips_perf_event { #define CNTR_EVEN 0x55555555 #define CNTR_ODD 0xaaaaaaaa #define CNTR_ALL 0xffffffff -#ifdef CONFIG_MIPS_MT_SMP enum { T = 0, V = 1, P = 2, } range; -#else - #define T - #define V - #define P -#endif }; static struct mips_perf_event raw_event; @@ -325,9 +319,7 @@ static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx) { struct perf_event *event = container_of(evt, struct perf_event, hw); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); -#ifdef CONFIG_MIPS_MT_SMP unsigned int range = evt->event_base >> 24; -#endif /* CONFIG_MIPS_MT_SMP */ WARN_ON(idx < 0 || idx >= mipspmu.num_counters); @@ -336,21 +328,15 @@ static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx) /* Make sure interrupt enabled. */ MIPS_PERFCTRL_IE; -#ifdef CONFIG_CPU_BMIPS5000 - { + if (IS_ENABLED(CONFIG_CPU_BMIPS5000)) { /* enable the counter for the calling thread */ cpuc->saved_ctrl[idx] |= (1 << (12 + vpe_id())) | BRCM_PERFCTRL_TC; - } -#else -#ifdef CONFIG_MIPS_MT_SMP - if (range > V) { + } else if (IS_ENABLED(CONFIG_MIPS_MT_SMP) && range > V) { /* The counter is processor wide. Set it up to count all TCs. */ pr_debug("Enabling perf counter for all TCs\n"); cpuc->saved_ctrl[idx] |= M_TC_EN_ALL; - } else -#endif /* CONFIG_MIPS_MT_SMP */ - { + } else { unsigned int cpu, ctrl; /* @@ -365,7 +351,6 @@ static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx) cpuc->saved_ctrl[idx] |= ctrl; pr_debug("Enabling perf counter for CPU%d\n", cpu); } -#endif /* CONFIG_CPU_BMIPS5000 */ /* * We do not actually let the counter run. Leave it until start(). */ diff --git a/arch/parisc/boot/compressed/head.S b/arch/parisc/boot/compressed/head.S index 5aba20fa48aa..e8b798fd0cf0 100644 --- a/arch/parisc/boot/compressed/head.S +++ b/arch/parisc/boot/compressed/head.S @@ -22,7 +22,7 @@ __HEAD ENTRY(startup) - .level LEVEL + .level PA_ASM_LEVEL #define PSW_W_SM 0x200 #define PSW_W_BIT 36 @@ -63,7 +63,7 @@ $bss_loop: load32 BOOTADDR(decompress_kernel),%r3 #ifdef CONFIG_64BIT - .level LEVEL + .level PA_ASM_LEVEL ssm PSW_W_SM, %r0 /* set W-bit */ depdi 0, 31, 32, %r3 #endif @@ -72,7 +72,7 @@ $bss_loop: startup_continue: #ifdef CONFIG_64BIT - .level LEVEL + .level PA_ASM_LEVEL rsm PSW_W_SM, %r0 /* clear W-bit */ #endif diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index c17ec0ee6e7c..d85738a7bbe6 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -61,14 +61,14 @@ #define LDCW ldcw,co #define BL b,l # ifdef CONFIG_64BIT -# define LEVEL 2.0w +# define PA_ASM_LEVEL 2.0w # else -# define LEVEL 2.0 +# define PA_ASM_LEVEL 2.0 # endif #else #define LDCW ldcw #define BL bl -#define LEVEL 1.1 +#define PA_ASM_LEVEL 1.1 #endif #ifdef __ASSEMBLY__ diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 006fb939cac8..4016fe1c65a9 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -44,22 +44,22 @@ void parisc_setup_cache_timing(void); #define pdtlb(addr) asm volatile("pdtlb 0(%%sr1,%0)" \ ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ - : : "r" (addr)) + : : "r" (addr) : "memory") #define pitlb(addr) asm volatile("pitlb 0(%%sr1,%0)" \ ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ ALTERNATIVE(ALT_COND_NO_SPLIT_TLB, INSN_NOP) \ - : : "r" (addr)) + : : "r" (addr) : "memory") #define pdtlb_kernel(addr) asm volatile("pdtlb 0(%0)" \ ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ - : : "r" (addr)) + : : "r" (addr) : "memory") #define asm_io_fdc(addr) asm volatile("fdc %%r0(%0)" \ ALTERNATIVE(ALT_COND_NO_DCACHE, INSN_NOP) \ ALTERNATIVE(ALT_COND_NO_IOC_FDC, INSN_NOP) \ - : : "r" (addr)) + : : "r" (addr) : "memory") #define asm_io_sync() asm volatile("sync" \ ALTERNATIVE(ALT_COND_NO_DCACHE, INSN_NOP) \ - ALTERNATIVE(ALT_COND_NO_IOC_FDC, INSN_NOP) :: ) + ALTERNATIVE(ALT_COND_NO_IOC_FDC, INSN_NOP) :::"memory") #endif /* ! __ASSEMBLY__ */ diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index fbb4e43fda05..f56cbab64ac1 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -22,7 +22,7 @@ #include <linux/linkage.h> #include <linux/init.h> - .level LEVEL + .level PA_ASM_LEVEL __INITDATA ENTRY(boot_args) @@ -258,7 +258,7 @@ stext_pdc_ret: ldo R%PA(fault_vector_11)(%r10),%r10 $is_pa20: - .level LEVEL /* restore 1.1 || 2.0w */ + .level PA_ASM_LEVEL /* restore 1.1 || 2.0w */ #endif /*!CONFIG_64BIT*/ load32 PA(fault_vector_20),%r10 diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 841db71958cd..97c206734e24 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -193,6 +193,7 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r) */ int running_on_qemu __read_mostly; +EXPORT_SYMBOL(running_on_qemu); void __cpuidle arch_cpu_idle_dead(void) { diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 4f77bd9be66b..93cc36d98875 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -48,7 +48,7 @@ registers). */ #define KILL_INSN break 0,0 - .level LEVEL + .level PA_ASM_LEVEL .text diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 059187a3ded7..3d1305aa64b6 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -512,7 +512,7 @@ static void __init map_pages(unsigned long start_vaddr, void __init set_kernel_text_rw(int enable_read_write) { - unsigned long start = (unsigned long) _text; + unsigned long start = (unsigned long) __init_begin; unsigned long end = (unsigned long) &data_start; map_pages(start, __pa(start), end-start, diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 6ee8195a2ffb..4a6dd3ba0b0b 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -237,7 +237,6 @@ extern void arch_exit_mmap(struct mm_struct *mm); #endif static inline void arch_unmap(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long start, unsigned long end) { if (start <= mm->context.vdso_base && mm->context.vdso_base < end) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 532ab79734c7..d43e8fe6d424 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -543,14 +543,14 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, if (ret != H_SUCCESS) return ret; + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvmppc_tce_validate(stt, tce); if (ret != H_SUCCESS) - return ret; + goto unlock_exit; dir = iommu_tce_direction(tce); - idx = srcu_read_lock(&vcpu->kvm->srcu); - if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { ret = H_PARAMETER; goto unlock_exit; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 5a066fc299e1..f17065f2c962 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3407,7 +3407,9 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2); vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3); - mtspr(SPRN_PSSCR, host_psscr); + /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */ + mtspr(SPRN_PSSCR, host_psscr | + (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); mtspr(SPRN_HFSCR, host_hfscr); mtspr(SPRN_CIABR, host_ciabr); mtspr(SPRN_DAWR, host_dawr); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ed554b09eb3f..6023021c9b23 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -148,6 +148,7 @@ config S390 select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS + select HAVE_GENERIC_GUP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 063732414dfb..861f2b63f290 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1203,42 +1203,79 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) -#define pgd_offset_k(address) pgd_offset(&init_mm, address) -#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr)) - #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) #define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) #define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN) #define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) -static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +/* + * The pgd_offset function *always* adds the index for the top-level + * region/segment table. This is done to get a sequence like the + * following to work: + * pgdp = pgd_offset(current->mm, addr); + * pgd = READ_ONCE(*pgdp); + * p4dp = p4d_offset(&pgd, addr); + * ... + * The subsequent p4d_offset, pud_offset and pmd_offset functions + * only add an index if they dereferenced the pointer. + */ +static inline pgd_t *pgd_offset_raw(pgd_t *pgd, unsigned long address) { - p4d_t *p4d = (p4d_t *) pgd; + unsigned long rste; + unsigned int shift; - if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) - p4d = (p4d_t *) pgd_deref(*pgd); - return p4d + p4d_index(address); + /* Get the first entry of the top level table */ + rste = pgd_val(*pgd); + /* Pick up the shift from the table type of the first entry */ + shift = ((rste & _REGION_ENTRY_TYPE_MASK) >> 2) * 11 + 20; + return pgd + ((address >> shift) & (PTRS_PER_PGD - 1)); } -static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) +#define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address) +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) { - pud_t *pud = (pud_t *) p4d; + if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1) + return (p4d_t *) pgd_deref(*pgd) + p4d_index(address); + return (p4d_t *) pgd; +} - if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) - pud = (pud_t *) p4d_deref(*p4d); - return pud + pud_index(address); +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) +{ + if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2) + return (pud_t *) p4d_deref(*p4d) + pud_index(address); + return (pud_t *) p4d; } static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) { - pmd_t *pmd = (pmd_t *) pud; + if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3) + return (pmd_t *) pud_deref(*pud) + pmd_index(address); + return (pmd_t *) pud; +} - if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) - pmd = (pmd_t *) pud_deref(*pud); - return pmd + pmd_index(address); +static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address) +{ + return (pte_t *) pmd_deref(*pmd) + pte_index(address); } +#define pte_offset_kernel(pmd, address) pte_offset(pmd, address) +#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) +#define pte_unmap(pte) do { } while (0) + +static inline bool gup_fast_permitted(unsigned long start, int nr_pages) +{ + unsigned long len, end; + + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if (end < start) + return false; + return end <= current->mm->context.asce_limit; +} +#define gup_fast_permitted gup_fast_permitted + #define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot)) #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) #define pte_page(x) pfn_to_page(pte_pfn(x)) @@ -1248,12 +1285,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d)) #define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd)) -/* Find an entry in the lowest level page table.. */ -#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr)) -#define pte_offset_kernel(pmd, address) pte_offset(pmd,address) -#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) -#define pte_unmap(pte) do { } while (0) - static inline pmd_t pmd_wrprotect(pmd_t pmd) { pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE; diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index f5880bfd1b0c..3175413186b9 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -4,7 +4,7 @@ # obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o -obj-y += page-states.o gup.o pageattr.o pgtable.o pgalloc.o +obj-y += page-states.o pageattr.o pgtable.o pgalloc.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c deleted file mode 100644 index 2809d11c7a28..000000000000 --- a/arch/s390/mm/gup.c +++ /dev/null @@ -1,300 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Lockless get_user_pages_fast for s390 - * - * Copyright IBM Corp. 2010 - * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> - */ -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/hugetlb.h> -#include <linux/vmstat.h> -#include <linux/pagemap.h> -#include <linux/rwsem.h> -#include <asm/pgtable.h> - -/* - * The performance critical leaf functions are made noinline otherwise gcc - * inlines everything into a single function which results in too much - * register pressure. - */ -static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - struct page *head, *page; - unsigned long mask; - pte_t *ptep, pte; - - mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL; - - ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); - do { - pte = *ptep; - barrier(); - /* Similar to the PMD case, NUMA hinting must take slow path */ - if (pte_protnone(pte)) - return 0; - if ((pte_val(pte) & mask) != 0) - return 0; - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); - head = compound_head(page); - if (!page_cache_get_speculative(head)) - return 0; - if (unlikely(pte_val(pte) != pte_val(*ptep))) { - put_page(head); - return 0; - } - VM_BUG_ON_PAGE(compound_head(page) != head, page); - pages[*nr] = page; - (*nr)++; - - } while (ptep++, addr += PAGE_SIZE, addr != end); - - return 1; -} - -static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - struct page *head, *page; - unsigned long mask; - int refs; - - mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID; - if ((pmd_val(pmd) & mask) != 0) - return 0; - VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); - - refs = 0; - head = pmd_page(pmd); - page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - if (!page_cache_add_speculative(head, refs)) { - *nr -= refs; - return 0; - } - - if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) { - *nr -= refs; - while (refs--) - put_page(head); - return 0; - } - - return 1; -} - - -static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp, pmd; - - pmdp = (pmd_t *) pudp; - if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) - pmdp = (pmd_t *) pud_deref(pud); - pmdp += pmd_index(addr); - do { - pmd = *pmdp; - barrier(); - next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) - return 0; - if (unlikely(pmd_large(pmd))) { - /* - * NUMA hinting faults need to be handled in the GUP - * slowpath for accounting purposes and so that they - * can be serialised against THP migration. - */ - if (pmd_protnone(pmd)) - return 0; - if (!gup_huge_pmd(pmdp, pmd, addr, next, - write, pages, nr)) - return 0; - } else if (!gup_pte_range(pmdp, pmd, addr, next, - write, pages, nr)) - return 0; - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - struct page *head, *page; - unsigned long mask; - int refs; - - mask = (write ? _REGION_ENTRY_PROTECT : 0) | _REGION_ENTRY_INVALID; - if ((pud_val(pud) & mask) != 0) - return 0; - VM_BUG_ON(!pfn_valid(pud_pfn(pud))); - - refs = 0; - head = pud_page(pud); - page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); - do { - VM_BUG_ON_PAGE(compound_head(page) != head, page); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - if (!page_cache_add_speculative(head, refs)) { - *nr -= refs; - return 0; - } - - if (unlikely(pud_val(pud) != pud_val(*pudp))) { - *nr -= refs; - while (refs--) - put_page(head); - return 0; - } - - return 1; -} - -static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp, pud; - - pudp = (pud_t *) p4dp; - if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) - pudp = (pud_t *) p4d_deref(p4d); - pudp += pud_index(addr); - do { - pud = *pudp; - barrier(); - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (unlikely(pud_large(pud))) { - if (!gup_huge_pud(pudp, pud, addr, next, write, pages, - nr)) - return 0; - } else if (!gup_pmd_range(pudp, pud, addr, next, write, pages, - nr)) - return 0; - } while (pudp++, addr = next, addr != end); - - return 1; -} - -static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long next; - p4d_t *p4dp, p4d; - - p4dp = (p4d_t *) pgdp; - if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) - p4dp = (p4d_t *) pgd_deref(pgd); - p4dp += p4d_index(addr); - do { - p4d = *p4dp; - barrier(); - next = p4d_addr_end(addr, end); - if (p4d_none(p4d)) - return 0; - if (!gup_pud_range(p4dp, p4d, addr, next, write, pages, nr)) - return 0; - } while (p4dp++, addr = next, addr != end); - - return 1; -} - -/* - * Like get_user_pages_fast() except its IRQ-safe in that it won't fall - * back to the regular GUP. - * Note a difference with get_user_pages_fast: this always returns the - * number of pages pinned, 0 if no pages were pinned. - */ -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next, flags; - pgd_t *pgdp, pgd; - int nr = 0; - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - if ((end <= start) || (end > mm->context.asce_limit)) - return 0; - /* - * local_irq_save() doesn't prevent pagetable teardown, but does - * prevent the pagetables from being freed on s390. - * - * So long as we atomically load page table pointers versus teardown, - * we can follow the address down to the the page and take a ref on it. - */ - local_irq_save(flags); - pgdp = pgd_offset(mm, addr); - do { - pgd = *pgdp; - barrier(); - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (!gup_p4d_range(pgdp, pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - local_irq_restore(flags); - - return nr; -} - -/** - * get_user_pages_fast() - pin user pages in memory - * @start: starting user address - * @nr_pages: number of pages from start to pin - * @write: whether pages will be written to - * @pages: array that receives pointers to the pages pinned. - * Should be at least nr_pages long. - * - * Attempt to pin user pages in memory without taking mm->mmap_sem. - * If not successful, it will fall back to taking the lock and - * calling get_user_pages(). - * - * Returns number of pages pinned. This may be fewer than the number - * requested. If nr_pages is 0 or negative, returns 0. If no pages - * were pinned, returns -errno. - */ -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - int nr, ret; - - might_sleep(); - start &= PAGE_MASK; - nr = __get_user_pages_fast(start, nr_pages, write, pages); - if (nr == nr_pages) - return nr; - - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - ret = get_user_pages_unlocked(start, nr_pages - nr, pages, - write ? FOLL_WRITE : 0); - /* Have to be a bit careful with return values */ - if (nr > 0) - ret = (ret < 0) ? nr : ret + nr; - return ret; -} diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h index fca34b2177e2..9f4b4bb78120 100644 --- a/arch/um/include/asm/mmu_context.h +++ b/arch/um/include/asm/mmu_context.h @@ -22,7 +22,6 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) } extern void arch_exit_mmap(struct mm_struct *mm); static inline void arch_unmap(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long start, unsigned long end) { } diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h index 5c205a9cb5a6..9f06ea5466dd 100644 --- a/arch/unicore32/include/asm/mmu_context.h +++ b/arch/unicore32/include/asm/mmu_context.h @@ -88,7 +88,6 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, } static inline void arch_unmap(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long start, unsigned long end) { } diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index cd4df9322501..7bbfe7d35da7 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -76,15 +76,14 @@ static int chksum_final(struct shash_desc *desc, u8 *out) return 0; } -static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, - u8 *out) +static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out) { if (irq_fpu_usable()) { kernel_fpu_begin(); - *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); + *(__u16 *)out = crc_t10dif_pcl(crc, data, len); kernel_fpu_end(); } else - *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); + *(__u16 *)out = crc_t10dif_generic(crc, data, len); return 0; } @@ -93,15 +92,13 @@ static int chksum_finup(struct shash_desc *desc, const u8 *data, { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - return __chksum_finup(&ctx->crc, data, len, out); + return __chksum_finup(ctx->crc, data, len, out); } static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) { - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, length, out); + return __chksum_finup(0, data, length, out); } static struct shash_alg alg = { diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index d309f30cf7af..5fc76b755510 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -650,6 +650,7 @@ ENTRY(__switch_to_asm) pushl %ebx pushl %edi pushl %esi + pushfl /* switch stack */ movl %esp, TASK_threadsp(%eax) @@ -672,6 +673,7 @@ ENTRY(__switch_to_asm) #endif /* restore callee-saved registers */ + popfl popl %esi popl %edi popl %ebx diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 1f0efdb7b629..b1d59a7c556e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -291,6 +291,7 @@ ENTRY(__switch_to_asm) pushq %r13 pushq %r14 pushq %r15 + pushfq /* switch stack */ movq %rsp, TASK_threadsp(%rdi) @@ -313,6 +314,7 @@ ENTRY(__switch_to_asm) #endif /* restore callee-saved registers */ + popfq popq %r15 popq %r14 popq %r13 @@ -879,7 +881,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt * @paranoid == 2 is special: the stub will never switch stacks. This is for * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. */ -.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 +.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 create_gap=0 ENTRY(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 @@ -899,6 +901,20 @@ ENTRY(\sym) jnz .Lfrom_usermode_switch_stack_\@ .endif + .if \create_gap == 1 + /* + * If coming from kernel space, create a 6-word gap to allow the + * int3 handler to emulate a call instruction. + */ + testb $3, CS-ORIG_RAX(%rsp) + jnz .Lfrom_usermode_no_gap_\@ + .rept 6 + pushq 5*8(%rsp) + .endr + UNWIND_HINT_IRET_REGS offset=8 +.Lfrom_usermode_no_gap_\@: + .endif + .if \paranoid call paranoid_entry .else @@ -1130,7 +1146,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \ #endif /* CONFIG_HYPERV */ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK -idtentry int3 do_int3 has_error_code=0 +idtentry int3 do_int3 has_error_code=0 create_gap=1 idtentry stack_segment do_stack_segment has_error_code=1 #ifdef CONFIG_XEN_PV diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 71fb8b7b2954..c87b06ad9f86 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2090,15 +2090,19 @@ static void intel_pmu_disable_event(struct perf_event *event) cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); cpuc->intel_cp_status &= ~(1ull << hwc->idx); - if (unlikely(event->attr.precise_ip)) - intel_pmu_pebs_disable(event); - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_disable_fixed(hwc); return; } x86_pmu_disable_event(event); + + /* + * Needs to be called after x86_pmu_disable_event, + * so we don't trigger the event without PEBS bit set. + */ + if (unlikely(event->attr.precise_ip)) + intel_pmu_pebs_disable(event); } static void intel_pmu_del_event(struct perf_event *event) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index c1a812bd5a27..f7d562bd7dc7 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -209,16 +209,6 @@ static inline void cmci_rediscover(void) {} static inline void cmci_recheck(void) {} #endif -#ifdef CONFIG_X86_MCE_AMD -void mce_amd_feature_init(struct cpuinfo_x86 *c); -int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr); -#else -static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } -static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; }; -#endif - -static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } - int mce_available(struct cpuinfo_x86 *c); bool mce_is_memory_error(struct mce *m); bool mce_is_correctable(struct mce *m); @@ -338,12 +328,19 @@ extern bool amd_mce_is_memory_error(struct mce *m); extern int mce_threshold_create_device(unsigned int cpu); extern int mce_threshold_remove_device(unsigned int cpu); -#else +void mce_amd_feature_init(struct cpuinfo_x86 *c); +int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr); -static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; -static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; }; -static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; +#else +static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; +static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; }; +static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; +static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } +static inline int +umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; }; #endif +static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } + #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 19d18fae6ec6..41019af68adf 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -277,8 +277,8 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, mpx_mm_init(mm); } -static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long start, unsigned long end) +static inline void arch_unmap(struct mm_struct *mm, unsigned long start, + unsigned long end) { /* * mpx_notify_unmap() goes and reads a rarely-hot @@ -298,7 +298,7 @@ static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, * consistently wrong. */ if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX))) - mpx_notify_unmap(mm, vma, start, end); + mpx_notify_unmap(mm, start, end); } /* diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h index d0b1434fb0b6..143a5c193ed3 100644 --- a/arch/x86/include/asm/mpx.h +++ b/arch/x86/include/asm/mpx.h @@ -64,12 +64,15 @@ struct mpx_fault_info { }; #ifdef CONFIG_X86_INTEL_MPX -int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs); -int mpx_handle_bd_fault(void); + +extern int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs); +extern int mpx_handle_bd_fault(void); + static inline int kernel_managing_mpx_tables(struct mm_struct *mm) { return (mm->context.bd_addr != MPX_INVALID_BOUNDS_DIR); } + static inline void mpx_mm_init(struct mm_struct *mm) { /* @@ -78,11 +81,10 @@ static inline void mpx_mm_init(struct mm_struct *mm) */ mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR; } -void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long start, unsigned long end); -unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, - unsigned long flags); +extern void mpx_notify_unmap(struct mm_struct *mm, unsigned long start, unsigned long end); +extern unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, unsigned long flags); + #else static inline int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs) { @@ -100,7 +102,6 @@ static inline void mpx_mm_init(struct mm_struct *mm) { } static inline void mpx_notify_unmap(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long start, unsigned long end) { } diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 9c85b54bf03c..0bb566315621 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -259,8 +259,7 @@ extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); #define gup_fast_permitted gup_fast_permitted -static inline bool gup_fast_permitted(unsigned long start, int nr_pages, - int write) +static inline bool gup_fast_permitted(unsigned long start, int nr_pages) { unsigned long len, end; diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 7cf1a270d891..157149d4129c 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -40,6 +40,7 @@ asmlinkage void ret_from_fork(void); * order of the fields must match the code in __switch_to_asm(). */ struct inactive_task_frame { + unsigned long flags; #ifdef CONFIG_X86_64 unsigned long r15; unsigned long r14; diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index e85ff65c43c3..05861cc08787 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -39,4 +39,32 @@ extern int poke_int3_handler(struct pt_regs *regs); extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); extern int after_bootmem; +static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) +{ + regs->ip = ip; +} + +#define INT3_INSN_SIZE 1 +#define CALL_INSN_SIZE 5 + +#ifdef CONFIG_X86_64 +static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) +{ + /* + * The int3 handler in entry_64.S adds a gap between the + * stack where the break point happened, and the saving of + * pt_regs. We can extend the original stack because of + * this gap. See the idtentry macro's create_gap option. + */ + regs->sp -= sizeof(unsigned long); + *(unsigned long *)regs->sp = val; +} + +static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func) +{ + int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE); + int3_emulate_jmp(regs, func); +} +#endif + #endif /* _ASM_X86_TEXT_PATCHING_H */ diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 89298c83de53..496033b01d26 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -545,6 +545,66 @@ out: return offset; } +bool amd_filter_mce(struct mce *m) +{ + enum smca_bank_types bank_type = smca_get_bank_type(m->bank); + struct cpuinfo_x86 *c = &boot_cpu_data; + u8 xec = (m->status >> 16) & 0x3F; + + /* See Family 17h Models 10h-2Fh Erratum #1114. */ + if (c->x86 == 0x17 && + c->x86_model >= 0x10 && c->x86_model <= 0x2F && + bank_type == SMCA_IF && xec == 10) + return true; + + return false; +} + +/* + * Turn off thresholding banks for the following conditions: + * - MC4_MISC thresholding is not supported on Family 0x15. + * - Prevent possible spurious interrupts from the IF bank on Family 0x17 + * Models 0x10-0x2F due to Erratum #1114. + */ +void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank) +{ + int i, num_msrs; + u64 hwcr; + bool need_toggle; + u32 msrs[NR_BLOCKS]; + + if (c->x86 == 0x15 && bank == 4) { + msrs[0] = 0x00000413; /* MC4_MISC0 */ + msrs[1] = 0xc0000408; /* MC4_MISC1 */ + num_msrs = 2; + } else if (c->x86 == 0x17 && + (c->x86_model >= 0x10 && c->x86_model <= 0x2F)) { + + if (smca_get_bank_type(bank) != SMCA_IF) + return; + + msrs[0] = MSR_AMD64_SMCA_MCx_MISC(bank); + num_msrs = 1; + } else { + return; + } + + rdmsrl(MSR_K7_HWCR, hwcr); + + /* McStatusWrEn has to be set */ + need_toggle = !(hwcr & BIT(18)); + if (need_toggle) + wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); + + /* Clear CntP bit safely */ + for (i = 0; i < num_msrs; i++) + msr_clear_bit(msrs[i], 62); + + /* restore old settings */ + if (need_toggle) + wrmsrl(MSR_K7_HWCR, hwcr); +} + /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { @@ -556,6 +616,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) if (mce_flags.smca) smca_configure(bank, cpu); + disable_err_thresholding(c, bank); + for (block = 0; block < NR_BLOCKS; ++block) { address = get_block_address(address, low, high, bank, block); if (!address) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 6ce290c506d9..1a7084ba9a3b 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1612,36 +1612,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) if (c->x86 == 0x15 && c->x86_model <= 0xf) mce_flags.overflow_recov = 1; - /* - * Turn off MC4_MISC thresholding banks on those models since - * they're not supported there. - */ - if (c->x86 == 0x15 && - (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) { - int i; - u64 hwcr; - bool need_toggle; - u32 msrs[] = { - 0x00000413, /* MC4_MISC0 */ - 0xc0000408, /* MC4_MISC1 */ - }; - - rdmsrl(MSR_K7_HWCR, hwcr); - - /* McStatusWrEn has to be set */ - need_toggle = !(hwcr & BIT(18)); - - if (need_toggle) - wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); - - /* Clear CntP bit safely */ - for (i = 0; i < ARRAY_SIZE(msrs); i++) - msr_clear_bit(msrs[i], 62); - - /* restore old settings */ - if (need_toggle) - wrmsrl(MSR_K7_HWCR, hwcr); - } } if (c->x86_vendor == X86_VENDOR_INTEL) { @@ -1801,6 +1771,14 @@ static void __mcheck_cpu_init_timer(void) mce_start_timer(t); } +bool filter_mce(struct mce *m) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + return amd_filter_mce(m); + + return false; +} + /* Handle unconfigured int18 (should never happen) */ static void unexpected_machine_check(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/kernel/cpu/mce/genpool.c b/arch/x86/kernel/cpu/mce/genpool.c index 3395549c51d3..64d1d5a00f39 100644 --- a/arch/x86/kernel/cpu/mce/genpool.c +++ b/arch/x86/kernel/cpu/mce/genpool.c @@ -99,6 +99,9 @@ int mce_gen_pool_add(struct mce *mce) { struct mce_evt_llist *node; + if (filter_mce(mce)) + return -EINVAL; + if (!mce_evt_pool) return -EINVAL; diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index af5eab1e65e2..a34b55baa7aa 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -173,4 +173,13 @@ struct mca_msr_regs { extern struct mca_msr_regs msr_ops; +/* Decide whether to add MCE record to MCE event pool or filter it out. */ +extern bool filter_mce(struct mce *m); + +#ifdef CONFIG_X86_MCE_AMD +extern bool amd_filter_mce(struct mce *m); +#else +static inline bool amd_filter_mce(struct mce *m) { return false; }; +#endif + #endif /* __X86_MCE_INTERNAL_H__ */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 763d4264d16a..2ee4b12a70e8 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -29,6 +29,7 @@ #include <asm/kprobes.h> #include <asm/ftrace.h> #include <asm/nops.h> +#include <asm/text-patching.h> #ifdef CONFIG_DYNAMIC_FTRACE @@ -231,6 +232,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } static unsigned long ftrace_update_func; +static unsigned long ftrace_update_func_call; static int update_ftrace_func(unsigned long ip, void *new) { @@ -259,6 +261,8 @@ int ftrace_update_ftrace_func(ftrace_func_t func) unsigned char *new; int ret; + ftrace_update_func_call = (unsigned long)func; + new = ftrace_call_replace(ip, (unsigned long)func); ret = update_ftrace_func(ip, new); @@ -294,13 +298,28 @@ int ftrace_int3_handler(struct pt_regs *regs) if (WARN_ON_ONCE(!regs)) return 0; - ip = regs->ip - 1; - if (!ftrace_location(ip) && !is_ftrace_caller(ip)) - return 0; + ip = regs->ip - INT3_INSN_SIZE; - regs->ip += MCOUNT_INSN_SIZE - 1; +#ifdef CONFIG_X86_64 + if (ftrace_location(ip)) { + int3_emulate_call(regs, (unsigned long)ftrace_regs_caller); + return 1; + } else if (is_ftrace_caller(ip)) { + if (!ftrace_update_func_call) { + int3_emulate_jmp(regs, ip + CALL_INSN_SIZE); + return 1; + } + int3_emulate_call(regs, ftrace_update_func_call); + return 1; + } +#else + if (ftrace_location(ip) || is_ftrace_caller(ip)) { + int3_emulate_jmp(regs, ip + CALL_INSN_SIZE); + return 1; + } +#endif - return 1; + return 0; } static int ftrace_write(unsigned long ip, const char *val, int size) @@ -858,6 +877,8 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) func = ftrace_ops_get_func(ops); + ftrace_update_func_call = (unsigned long)func; + /* Do a safe modify in case the trampoline is executing */ new = ftrace_call_replace(ip, (unsigned long)func); ret = update_ftrace_func(ip, new); @@ -959,6 +980,7 @@ static int ftrace_mod_jmp(unsigned long ip, void *func) { unsigned char *new; + ftrace_update_func_call = 0UL; new = ftrace_jmp_replace(ip, (unsigned long)func); return update_ftrace_func(ip, new); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index e471d8e6f0b2..70933193878c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -127,6 +127,13 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, struct task_struct *tsk; int err; + /* + * For a new task use the RESET flags value since there is no before. + * All the status flags are zero; DF and all the system flags must also + * be 0, specifically IF must be 0 because we context switch to the new + * task with interrupts disabled. + */ + frame->flags = X86_EFLAGS_FIXED; frame->bp = 0; frame->ret_addr = (unsigned long) ret_from_fork; p->thread.sp = (unsigned long) fork_frame; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6a62f4af9fcf..026a43be9bd1 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -392,6 +392,14 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, childregs = task_pt_regs(p); fork_frame = container_of(childregs, struct fork_frame, regs); frame = &fork_frame->frame; + + /* + * For a new task use the RESET flags value since there is no before. + * All the status flags are zero; DF and all the system flags must also + * be 0, specifically IF must be 0 because we context switch to the new + * task with interrupts disabled. + */ + frame->flags = X86_EFLAGS_FIXED; frame->bp = 0; frame->ret_addr = (unsigned long) ret_from_fork; p->thread.sp = (unsigned long) fork_frame; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 85fe1870f873..9b7c4ca8f0a7 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -58,7 +58,6 @@ #include <asm/alternative.h> #include <asm/fpu/xstate.h> #include <asm/trace/mpx.h> -#include <asm/nospec-branch.h> #include <asm/mpx.h> #include <asm/vm86.h> #include <asm/umip.h> @@ -367,13 +366,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) regs->ip = (unsigned long)general_protection; regs->sp = (unsigned long)&gpregs->orig_ax; - /* - * This situation can be triggered by userspace via - * modify_ldt(2) and the return does not take the regular - * user space exit, so a CPU buffer clear is required when - * MDS mitigation is enabled. - */ - mds_user_clear_cpu_buffers(); return; } #endif diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 371c669696d7..610c0f1fbdd7 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1371,7 +1371,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, valid_bank_mask = BIT_ULL(0); sparse_banks[0] = flush.processor_mask; - all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS; + + /* + * Work around possible WS2012 bug: it sends hypercalls + * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear, + * while also expecting us to flush something and crashing if + * we don't. Let's treat processor_mask == 0 same as + * HV_FLUSH_ALL_PROCESSORS. + */ + all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) || + flush.processor_mask == 0; } else { if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex, sizeof(flush_ex)))) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 235687f3388f..6939eba2001a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1453,7 +1453,7 @@ static void apic_timer_expired(struct kvm_lapic *apic) if (swait_active(q)) swake_up_one(q); - if (apic_lvtt_tscdeadline(apic)) + if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) ktimer->expired_tscdeadline = ktimer->tscdeadline; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3eeb7183fc09..0bbb21a49082 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1262,31 +1262,42 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) return 0; } -bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) +static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) { - if (efer & efer_reserved_bits) - return false; - if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT)) - return false; + return false; if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM)) - return false; + return false; return true; + +} +bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) +{ + if (efer & efer_reserved_bits) + return false; + + return __kvm_valid_efer(vcpu, efer); } EXPORT_SYMBOL_GPL(kvm_valid_efer); -static int set_efer(struct kvm_vcpu *vcpu, u64 efer) +static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { u64 old_efer = vcpu->arch.efer; + u64 efer = msr_info->data; - if (!kvm_valid_efer(vcpu, efer)) - return 1; + if (efer & efer_reserved_bits) + return false; - if (is_paging(vcpu) - && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) - return 1; + if (!msr_info->host_initiated) { + if (!__kvm_valid_efer(vcpu, efer)) + return 1; + + if (is_paging(vcpu) && + (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) + return 1; + } efer &= ~EFER_LMA; efer |= vcpu->arch.efer & EFER_LMA; @@ -2456,7 +2467,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.arch_capabilities = data; break; case MSR_EFER: - return set_efer(vcpu, data); + return set_efer(vcpu, msr_info); case MSR_K7_HWCR: data &= ~(u64)0x40; /* ignore flush filter disable */ data &= ~(u64)0x100; /* ignore ignne emulation enable */ diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 140e61843a07..3cb3af51ec89 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -6,6 +6,18 @@ # Produces uninteresting flaky coverage. KCOV_INSTRUMENT_delay.o := n +# Early boot use of cmdline; don't instrument it +ifdef CONFIG_AMD_MEM_ENCRYPT +KCOV_INSTRUMENT_cmdline.o := n +KASAN_SANITIZE_cmdline.o := n + +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_cmdline.o = -pg +endif + +CFLAGS_cmdline.o := $(call cc-option, -fno-stack-protector) +endif + inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt quiet_cmd_inat_tables = GEN $@ diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index de1851d15699..ea17ff6c8588 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -881,9 +881,10 @@ static int mpx_unmap_tables(struct mm_struct *mm, * the virtual address region start...end have already been split if * necessary, and the 'vma' is the first vma in this range (start -> end). */ -void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long start, unsigned long end) +void mpx_notify_unmap(struct mm_struct *mm, unsigned long start, + unsigned long end) { + struct vm_area_struct *vma; int ret; /* @@ -902,11 +903,12 @@ void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, * which should not occur normally. Being strict about it here * helps ensure that we do not have an exploitable stack overflow. */ - do { + vma = find_vma(mm, start); + while (vma && vma->vm_start < end) { if (vma->vm_flags & VM_MPX) return; vma = vma->vm_next; - } while (vma && vma->vm_start < end); + } ret = mpx_unmap_tables(mm, start, end); if (ret) diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c index 62f5c7045944..1861a2ba0f2b 100644 --- a/arch/x86/platform/pvh/enlighten.c +++ b/arch/x86/platform/pvh/enlighten.c @@ -44,8 +44,6 @@ void __init __weak mem_map_via_hcall(struct boot_params *ptr __maybe_unused) static void __init init_pvh_bootparams(bool xen_guest) { - memset(&pvh_bootparams, 0, sizeof(pvh_bootparams)); - if ((pvh_start_info.version > 0) && (pvh_start_info.memmap_entries)) { struct hvm_memmap_table_entry *ep; int i; @@ -103,7 +101,7 @@ static void __init init_pvh_bootparams(bool xen_guest) * If we are trying to boot a Xen PVH guest, it is expected that the kernel * will have been configured to provide the required override for this routine. */ -void __init __weak xen_pvh_init(void) +void __init __weak xen_pvh_init(struct boot_params *boot_params) { xen_raw_printk("Error: Missing xen PVH initialization\n"); BUG(); @@ -112,7 +110,7 @@ void __init __weak xen_pvh_init(void) static void hypervisor_specific_init(bool xen_guest) { if (xen_guest) - xen_pvh_init(); + xen_pvh_init(&pvh_bootparams); } /* @@ -131,6 +129,8 @@ void __init xen_prepare_pvh(void) BUG(); } + memset(&pvh_bootparams, 0, sizeof(pvh_bootparams)); + hypervisor_specific_init(xen_guest); init_pvh_bootparams(xen_guest); diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 1fbb629a9d78..0d3365cb64de 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -158,7 +158,7 @@ static enum efi_secureboot_mode xen_efi_get_secureboot(void) return efi_secureboot_mode_unknown; } -void __init xen_efi_init(void) +void __init xen_efi_init(struct boot_params *boot_params) { efi_system_table_t *efi_systab_xen; @@ -167,12 +167,12 @@ void __init xen_efi_init(void) if (efi_systab_xen == NULL) return; - strncpy((char *)&boot_params.efi_info.efi_loader_signature, "Xen", - sizeof(boot_params.efi_info.efi_loader_signature)); - boot_params.efi_info.efi_systab = (__u32)__pa(efi_systab_xen); - boot_params.efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32); + strncpy((char *)&boot_params->efi_info.efi_loader_signature, "Xen", + sizeof(boot_params->efi_info.efi_loader_signature)); + boot_params->efi_info.efi_systab = (__u32)__pa(efi_systab_xen); + boot_params->efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32); - boot_params.secure_boot = xen_efi_get_secureboot(); + boot_params->secure_boot = xen_efi_get_secureboot(); set_bit(EFI_BOOT, &efi.flags); set_bit(EFI_PARAVIRT, &efi.flags); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index c54a493e139a..4722ba2966ac 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1403,7 +1403,7 @@ asmlinkage __visible void __init xen_start_kernel(void) /* We need this for printk timestamps */ xen_setup_runstate_info(0); - xen_efi_init(); + xen_efi_init(&boot_params); /* Start the world */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 35b7599d2d0b..80a79db72fcf 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -13,6 +13,8 @@ #include <xen/interface/memory.h> +#include "xen-ops.h" + /* * PVH variables. * @@ -21,17 +23,20 @@ */ bool xen_pvh __attribute__((section(".data"))) = 0; -void __init xen_pvh_init(void) +void __init xen_pvh_init(struct boot_params *boot_params) { u32 msr; u64 pfn; xen_pvh = 1; + xen_domain_type = XEN_HVM_DOMAIN; xen_start_flags = pvh_start_info.flags; msr = cpuid_ebx(xen_cpuid_base() + 2); pfn = __pa(hypercall_page); wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); + + xen_efi_init(boot_params); } void __init mem_map_via_hcall(struct boot_params *boot_params_p) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 0e60bd918695..2f111f47ba98 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -122,9 +122,9 @@ static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, void __init xen_init_apic(void); #ifdef CONFIG_XEN_EFI -extern void xen_efi_init(void); +extern void xen_efi_init(struct boot_params *boot_params); #else -static inline void __init xen_efi_init(void) +static inline void __init xen_efi_init(struct boot_params *boot_params) { } #endif |