summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/Makefile2
-rw-r--r--arch/x86/boot/compressed/Makefile4
-rw-r--r--arch/x86/boot/compressed/head_32.S5
-rw-r--r--arch/x86/boot/compressed/head_64.S1
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c9
-rw-r--r--arch/x86/configs/i386_defconfig2
-rw-r--r--arch/x86/configs/x86_64_defconfig2
-rw-r--r--arch/x86/crypto/aes_ctrby8_avx-x86_64.S14
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S6
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S2
-rw-r--r--arch/x86/events/Kconfig6
-rw-r--r--arch/x86/events/Makefile1
-rw-r--r--arch/x86/events/amd/ibs.c15
-rw-r--r--arch/x86/events/amd/iommu.c2
-rw-r--r--arch/x86/events/intel/Makefile2
-rw-r--r--arch/x86/events/intel/ds.c32
-rw-r--r--arch/x86/events/intel/uncore_snb.c31
-rw-r--r--arch/x86/events/rapl.c (renamed from arch/x86/events/intel/rapl.c)11
-rw-r--r--arch/x86/include/asm/crash.h6
-rw-r--r--arch/x86/include/asm/fpu/internal.h5
-rw-r--r--arch/x86/include/asm/kvm_host.h5
-rw-r--r--arch/x86/include/asm/nospec-branch.h4
-rw-r--r--arch/x86/include/asm/pkeys.h5
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/ptrace.h2
-rw-r--r--arch/x86/include/asm/smap.h11
-rw-r--r--arch/x86/include/asm/special_insns.h28
-rw-r--r--arch/x86/kernel/amd_nb.c8
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c16
-rw-r--r--arch/x86/kernel/apic/msi.c18
-rw-r--r--arch/x86/kernel/apic/vector.c27
-rw-r--r--arch/x86/kernel/cpu/common.c28
-rw-r--r--arch/x86/kernel/cpu/mce/core.c72
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c2
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h10
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c28
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c2
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h3
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c3
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c1
-rw-r--r--arch/x86/kernel/cpu/umwait.c6
-rw-r--r--arch/x86/kernel/crash.c15
-rw-r--r--arch/x86/kernel/fpu/core.c6
-rw-r--r--arch/x86/kernel/fpu/init.c30
-rw-r--r--arch/x86/kernel/fpu/xstate.c11
-rw-r--r--arch/x86/kernel/i8259.c2
-rw-r--r--arch/x86/kernel/idt.c6
-rw-r--r--arch/x86/kernel/irq_64.c2
-rw-r--r--arch/x86/kernel/kprobes/core.c16
-rw-r--r--arch/x86/kernel/nmi.c5
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/smpboot.c26
-rw-r--r--arch/x86/kernel/stacktrace.c5
-rw-r--r--arch/x86/kernel/tsc_msr.c9
-rw-r--r--arch/x86/kernel/unwind_orc.c8
-rw-r--r--arch/x86/kernel/vmlinux.lds.S1
-rw-r--r--arch/x86/kvm/emulate.c4
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h2
-rw-r--r--arch/x86/kvm/lapic.c4
-rw-r--r--arch/x86/kvm/mmu.c10
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/mmutrace.h2
-rw-r--r--arch/x86/kvm/paging_tmpl.h7
-rw-r--r--arch/x86/kvm/svm.c12
-rw-r--r--arch/x86/kvm/vmx/nested.c6
-rw-r--r--arch/x86/kvm/vmx/vmx.c53
-rw-r--r--arch/x86/kvm/vmx/vmx.h2
-rw-r--r--arch/x86/kvm/x86.c21
-rw-r--r--arch/x86/lib/usercopy_64.c3
-rw-r--r--arch/x86/math-emu/wm_sqrt.S2
-rw-r--r--arch/x86/mm/init.c2
-rw-r--r--arch/x86/mm/numa_emulation.c2
-rw-r--r--arch/x86/pci/intel_mid_pci.c1
-rw-r--r--arch/x86/pci/xen.c1
-rw-r--r--arch/x86/platform/uv/uv_irq.c3
-rw-r--r--arch/x86/purgatory/Makefile5
-rw-r--r--arch/x86/realmode/init.c2
-rw-r--r--arch/x86/xen/enlighten_pv.c9
79 files changed, 487 insertions, 253 deletions
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index e2839b5c246c..6539c50fb9aa 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -87,7 +87,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
-sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p'
quiet_cmd_zoffset = ZOFFSET $@
cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 6b84afdd7538..292b5bc6e3a3 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -38,6 +38,8 @@ KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
KBUILD_CFLAGS += -Wno-pointer-sign
+# Disable relocation relaxation in case the link is not PIE.
+KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
@@ -102,7 +104,7 @@ vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
quiet_cmd_check_data_rel = DATAREL $@
define cmd_check_data_rel
for obj in $(filter %.o,$^); do \
- ${CROSS_COMPILE}readelf -S $$obj | grep -qF .rel.local && { \
+ $(READELF) -S $$obj | grep -qF .rel.local && { \
echo "error: $$obj has data relocations!" >&2; \
exit 1; \
} || true; \
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 70ffce98c568..d7c0fcc1dbf9 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -49,16 +49,17 @@
* Position Independent Executable (PIE) so that linker won't optimize
* R_386_GOT32X relocation to its fixed symbol address. Older
* linkers generate R_386_32 relocations against locally defined symbols,
- * _bss, _ebss, _got and _egot, in PIE. It isn't wrong, just less
+ * _bss, _ebss, _got, _egot and _end, in PIE. It isn't wrong, just less
* optimal than R_386_RELATIVE. But the x86 kernel fails to properly handle
* R_386_32 relocations when relocating the kernel. To generate
- * R_386_RELATIVE relocations, we mark _bss, _ebss, _got and _egot as
+ * R_386_RELATIVE relocations, we mark _bss, _ebss, _got, _egot and _end as
* hidden:
*/
.hidden _bss
.hidden _ebss
.hidden _got
.hidden _egot
+ .hidden _end
__HEAD
ENTRY(startup_32)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 07d2002da642..50c9eeb36f0d 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -42,6 +42,7 @@
.hidden _ebss
.hidden _got
.hidden _egot
+ .hidden _end
__HEAD
.code32
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index c8862696a47b..7d0394f4ebf9 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -5,15 +5,6 @@
#include "pgtable.h"
#include "../string.h"
-/*
- * __force_order is used by special_insns.h asm code to force instruction
- * serialization.
- *
- * It is not referenced from the code, but GCC < 5 with -fPIE would fail
- * due to an undefined symbol. Define it to make these ancient GCCs work.
- */
-unsigned long __force_order;
-
#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 59ce9ed58430..088709089e9b 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -137,7 +137,6 @@ CONFIG_CONNECTOR=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_SD=y
CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_SPI_ATTRS=y
@@ -205,7 +204,6 @@ CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
CONFIG_FB_EFI=y
# CONFIG_LCD_CLASS_DEVICE is not set
-CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index d0a5ffeae8df..8092d7baf8b5 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -136,7 +136,6 @@ CONFIG_CONNECTOR=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_SD=y
CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_SPI_ATTRS=y
@@ -201,7 +200,6 @@ CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
CONFIG_FB_EFI=y
# CONFIG_LCD_CLASS_DEVICE is not set
-CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
index 5f6a5af9c489..77043a82da51 100644
--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
@@ -127,10 +127,6 @@ ddq_add_8:
/* generate a unique variable for ddq_add_x */
-.macro setddq n
- var_ddq_add = ddq_add_\n
-.endm
-
/* generate a unique variable for xmm register */
.macro setxdata n
var_xdata = %xmm\n
@@ -140,9 +136,7 @@ ddq_add_8:
.macro club name, id
.altmacro
- .if \name == DDQ_DATA
- setddq %\id
- .elseif \name == XDATA
+ .if \name == XDATA
setxdata %\id
.endif
.noaltmacro
@@ -165,9 +159,8 @@ ddq_add_8:
.set i, 1
.rept (by - 1)
- club DDQ_DATA, i
club XDATA, i
- vpaddq var_ddq_add(%rip), xcounter, var_xdata
+ vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata
vptest ddq_low_msk(%rip), var_xdata
jnz 1f
vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata
@@ -180,8 +173,7 @@ ddq_add_8:
vmovdqa 1*16(p_keys), xkeyA
vpxor xkey0, xdata0, xdata0
- club DDQ_DATA, by
- vpaddq var_ddq_add(%rip), xcounter, xcounter
+ vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter
vptest ddq_low_msk(%rip), xcounter
jnz 1f
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index e40bdf024ba7..9afeb58c910e 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -266,7 +266,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
PSHUFB_XMM %xmm2, %xmm0
movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
- PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7
movdqu HashKey(%arg2), %xmm13
CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
@@ -978,7 +978,7 @@ _initial_blocks_done\@:
* arg1, %arg3, %arg4 are used as pointers only, not modified
* %r11 is the data offset value
*/
-.macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \
+.macro GHASH_4_ENCRYPT_4_PARALLEL_enc TMP1 TMP2 TMP3 TMP4 TMP5 \
TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM1, \XMM5
@@ -1186,7 +1186,7 @@ aes_loop_par_enc_done\@:
* arg1, %arg3, %arg4 are used as pointers only, not modified
* %r11 is the data offset value
*/
-.macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \
+.macro GHASH_4_ENCRYPT_4_PARALLEL_dec TMP1 TMP2 TMP3 TMP4 TMP5 \
TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM1, \XMM5
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index d9b734d0c8cc..3c6e01520a97 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -170,7 +170,7 @@ continue_block:
## branch into array
lea jump_table(%rip), bufp
- movzxw (bufp, %rax, 2), len
+ movzwq (bufp, %rax, 2), len
lea crc_array(%rip), bufp
lea (bufp, len, 1), bufp
JMP_NOSPEC bufp
diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig
index 9a7a1446cb3a..4a809c6cbd2f 100644
--- a/arch/x86/events/Kconfig
+++ b/arch/x86/events/Kconfig
@@ -10,11 +10,11 @@ config PERF_EVENTS_INTEL_UNCORE
available on NehalemEX and more modern processors.
config PERF_EVENTS_INTEL_RAPL
- tristate "Intel rapl performance events"
- depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
+ tristate "Intel/AMD rapl performance events"
+ depends on PERF_EVENTS && (CPU_SUP_INTEL || CPU_SUP_AMD) && PCI
default y
---help---
- Include support for Intel rapl performance events for power
+ Include support for Intel and AMD rapl performance events for power
monitoring on modern processors.
config PERF_EVENTS_INTEL_CSTATE
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
index 9e07f554333f..726e83c0a31a 100644
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-y += core.o probe.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += rapl.o
obj-y += amd/
obj-$(CONFIG_X86_LOCAL_APIC) += msr.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel/
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 26c36357c4c9..a023cbe21230 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -89,6 +89,7 @@ struct perf_ibs {
u64 max_period;
unsigned long offset_mask[1];
int offset_max;
+ unsigned int fetch_count_reset_broken : 1;
struct cpu_perf_ibs __percpu *pcpu;
struct attribute **format_attrs;
@@ -363,7 +364,12 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
struct hw_perf_event *hwc, u64 config)
{
- wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
+ u64 tmp = hwc->config | config;
+
+ if (perf_ibs->fetch_count_reset_broken)
+ wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask);
+
+ wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask);
}
/*
@@ -733,6 +739,13 @@ static __init void perf_event_ibs_init(void)
{
struct attribute **attr = ibs_op_format_attrs;
+ /*
+ * Some chips fail to reset the fetch count when it is written; instead
+ * they need a 0-1 transition of IbsFetchEn.
+ */
+ if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
+ perf_ibs_fetch.fetch_count_reset_broken = 1;
+
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
if (ibs_caps & IBS_CAPS_OPCNT) {
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index fb616203ce42..be50ef8572cc 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -379,7 +379,7 @@ static __init int _init_events_attrs(void)
while (amd_iommu_v2_event_descs[i].attr.attr.name)
i++;
- attrs = kcalloc(i + 1, sizeof(struct attribute **), GFP_KERNEL);
+ attrs = kcalloc(i + 1, sizeof(*attrs), GFP_KERNEL);
if (!attrs)
return -ENOMEM;
diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile
index 3468b0c1dc7c..e67a5886336c 100644
--- a/arch/x86/events/intel/Makefile
+++ b/arch/x86/events/intel/Makefile
@@ -2,8 +2,6 @@
obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o
obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o
obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o
-obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl-perf.o
-intel-rapl-perf-objs := rapl.o
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o
intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index e5ad97a82342..1aaba2c8a9ba 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -669,9 +669,7 @@ unlock:
static inline void intel_pmu_drain_pebs_buffer(void)
{
- struct pt_regs regs;
-
- x86_pmu.drain_pebs(&regs);
+ x86_pmu.drain_pebs(NULL);
}
/*
@@ -1736,6 +1734,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
struct x86_perf_regs perf_regs;
struct pt_regs *regs = &perf_regs.regs;
void *at = get_next_pebs_record_by_bit(base, top, bit);
+ struct pt_regs dummy_iregs;
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
/*
@@ -1748,6 +1747,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
} else if (!intel_pmu_save_and_restart(event))
return;
+ if (!iregs)
+ iregs = &dummy_iregs;
+
while (count > 1) {
setup_sample(event, iregs, at, &data, regs);
perf_event_output(event, &data, regs);
@@ -1757,16 +1759,22 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
}
setup_sample(event, iregs, at, &data, regs);
-
- /*
- * All but the last records are processed.
- * The last one is left to be able to call the overflow handler.
- */
- if (perf_event_overflow(event, &data, regs)) {
- x86_pmu_stop(event, 0);
- return;
+ if (iregs == &dummy_iregs) {
+ /*
+ * The PEBS records may be drained in the non-overflow context,
+ * e.g., large PEBS + context switch. Perf should treat the
+ * last record the same as other PEBS records, and doesn't
+ * invoke the generic overflow handler.
+ */
+ perf_event_output(event, &data, regs);
+ } else {
+ /*
+ * All but the last records are processed.
+ * The last one is left to be able to call the overflow handler.
+ */
+ if (perf_event_overflow(event, &data, regs))
+ x86_pmu_stop(event, 0);
}
-
}
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index c37cb12d0ef6..aec6e63c6a04 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -110,6 +110,10 @@
#define ICL_UNC_CBO_0_PER_CTR0 0x702
#define ICL_UNC_CBO_MSR_OFFSET 0x8
+/* ICL ARB register */
+#define ICL_UNC_ARB_PER_CTR 0x3b1
+#define ICL_UNC_ARB_PERFEVTSEL 0x3b3
+
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
@@ -297,15 +301,21 @@ void skl_uncore_cpu_init(void)
snb_uncore_arb.ops = &skl_uncore_msr_ops;
}
+static struct intel_uncore_ops icl_uncore_msr_ops = {
+ .disable_event = snb_uncore_msr_disable_event,
+ .enable_event = snb_uncore_msr_enable_event,
+ .read_counter = uncore_msr_read_counter,
+};
+
static struct intel_uncore_type icl_uncore_cbox = {
.name = "cbox",
- .num_counters = 4,
+ .num_counters = 2,
.perf_ctr_bits = 44,
.perf_ctr = ICL_UNC_CBO_0_PER_CTR0,
.event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
.event_mask = SNB_UNC_RAW_EVENT_MASK,
.msr_offset = ICL_UNC_CBO_MSR_OFFSET,
- .ops = &skl_uncore_msr_ops,
+ .ops = &icl_uncore_msr_ops,
.format_group = &snb_uncore_format_group,
};
@@ -334,13 +344,25 @@ static struct intel_uncore_type icl_uncore_clockbox = {
.single_fixed = 1,
.event_mask = SNB_UNC_CTL_EV_SEL_MASK,
.format_group = &icl_uncore_clock_format_group,
- .ops = &skl_uncore_msr_ops,
+ .ops = &icl_uncore_msr_ops,
.event_descs = icl_uncore_events,
};
+static struct intel_uncore_type icl_uncore_arb = {
+ .name = "arb",
+ .num_counters = 1,
+ .num_boxes = 1,
+ .perf_ctr_bits = 44,
+ .perf_ctr = ICL_UNC_ARB_PER_CTR,
+ .event_ctl = ICL_UNC_ARB_PERFEVTSEL,
+ .event_mask = SNB_UNC_RAW_EVENT_MASK,
+ .ops = &icl_uncore_msr_ops,
+ .format_group = &snb_uncore_format_group,
+};
+
static struct intel_uncore_type *icl_msr_uncores[] = {
&icl_uncore_cbox,
- &snb_uncore_arb,
+ &icl_uncore_arb,
&icl_uncore_clockbox,
NULL,
};
@@ -358,7 +380,6 @@ void icl_uncore_cpu_init(void)
{
uncore_msr_uncores = icl_msr_uncores;
icl_uncore_cbox.num_boxes = icl_get_cbox_num();
- snb_uncore_arb.ops = &skl_uncore_msr_ops;
}
enum {
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/rapl.c
index 5053a403e4ae..187c72a58e69 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -1,11 +1,14 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Support Intel RAPL energy consumption counters
+ * Support Intel/AMD RAPL energy consumption counters
* Copyright (C) 2013 Google, Inc., Stephane Eranian
*
* Intel RAPL interface is specified in the IA-32 Manual Vol3b
* section 14.7.1 (September 2013)
*
+ * AMD RAPL interface for Fam17h is described in the public PPR:
+ * https://bugzilla.kernel.org/show_bug.cgi?id=206537
+ *
* RAPL provides more controls than just reporting energy consumption
* however here we only expose the 3 energy consumption free running
* counters (pp0, pkg, dram).
@@ -58,8 +61,8 @@
#include <linux/nospec.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
-#include "../perf_event.h"
-#include "../probe.h"
+#include "perf_event.h"
+#include "probe.h"
MODULE_LICENSE("GPL");
@@ -639,7 +642,7 @@ static const struct attribute_group *rapl_attr_update[] = {
&rapl_events_pkg_group,
&rapl_events_ram_group,
&rapl_events_gpu_group,
- &rapl_events_gpu_group,
+ &rapl_events_psys_group,
NULL,
};
diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h
index ef5638f641f2..88eadd08ad70 100644
--- a/arch/x86/include/asm/crash.h
+++ b/arch/x86/include/asm/crash.h
@@ -10,4 +10,10 @@ int crash_setup_memmap_entries(struct kimage *image,
struct boot_params *params);
void crash_smp_send_stop(void);
+#ifdef CONFIG_KEXEC_CORE
+void __init crash_reserve_low_1M(void);
+#else
+static inline void __init crash_reserve_low_1M(void) { }
+#endif
+
#endif /* _ASM_X86_CRASH_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 44c48e34d799..00eac7f1529b 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -619,6 +619,11 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
* MXCSR and XCR definitions:
*/
+static inline void ldmxcsr(u32 mxcsr)
+{
+ asm volatile("ldmxcsr %0" :: "m" (mxcsr));
+}
+
extern unsigned int mxcsr_feature_mask;
#define XCR_XFEATURE_ENABLED_MASK 0x00000000
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7d91a3f5b26a..c41686641c3f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1160,7 +1160,7 @@ struct kvm_x86_ops {
void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t offset, unsigned long mask);
- int (*write_log_dirty)(struct kvm_vcpu *vcpu);
+ int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
@@ -1553,7 +1553,8 @@ asmlinkage void kvm_spurious_fault(void);
_ASM_EXTABLE(666b, 667b)
#define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 5c24a7b35166..b222a3595946 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -320,7 +320,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
* combination with microcode which triggers a CPU buffer flush when the
* instruction is executed.
*/
-static inline void mds_clear_cpu_buffers(void)
+static __always_inline void mds_clear_cpu_buffers(void)
{
static const u16 ds = __KERNEL_DS;
@@ -341,7 +341,7 @@ static inline void mds_clear_cpu_buffers(void)
*
* Clear CPU buffers if the corresponding static key is enabled
*/
-static inline void mds_user_clear_cpu_buffers(void)
+static __always_inline void mds_user_clear_cpu_buffers(void)
{
if (static_branch_likely(&mds_user_clear))
mds_clear_cpu_buffers();
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 19b137f1b3be..2ff9b98812b7 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -4,6 +4,11 @@
#define ARCH_DEFAULT_PKEY 0
+/*
+ * If more than 16 keys are ever supported, a thorough audit
+ * will be necessary to ensure that the types that store key
+ * numbers and masks have sufficient capacity.
+ */
#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 54f5d54280f6..a07dfdf7759e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -334,7 +334,7 @@ struct x86_hw_tss {
#define INVALID_IO_BITMAP_OFFSET 0x8000
struct entry_stack {
- unsigned long words[64];
+ char stack[PAGE_SIZE];
};
struct entry_stack_page {
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 332eb3525867..902be2e6e96c 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -309,8 +309,8 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
static const unsigned int argument_offs[] = {
#ifdef __i386__
offsetof(struct pt_regs, ax),
- offsetof(struct pt_regs, cx),
offsetof(struct pt_regs, dx),
+ offsetof(struct pt_regs, cx),
#define NR_REG_ARGUMENTS 3
#else
offsetof(struct pt_regs, di),
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 27c47d183f4b..8b58d6975d5d 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -57,8 +57,10 @@ static __always_inline unsigned long smap_save(void)
{
unsigned long flags;
- asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC,
- X86_FEATURE_SMAP)
+ asm volatile ("# smap_save\n\t"
+ ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
+ "pushf; pop %0; " __ASM_CLAC "\n\t"
+ "1:"
: "=rm" (flags) : : "memory", "cc");
return flags;
@@ -66,7 +68,10 @@ static __always_inline unsigned long smap_save(void)
static __always_inline void smap_restore(unsigned long flags)
{
- asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
+ asm volatile ("# smap_restore\n\t"
+ ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
+ "push %0; popf\n\t"
+ "1:"
: : "g" (flags) : "memory", "cc");
}
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 6d37b8fcfc77..2e0cdc64cb50 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -10,45 +10,47 @@
#include <linux/jump_label.h>
/*
- * Volatile isn't enough to prevent the compiler from reordering the
- * read/write functions for the control registers and messing everything up.
- * A memory clobber would solve the problem, but would prevent reordering of
- * all loads stores around it, which can hurt performance. Solution is to
- * use a variable and mimic reads and writes to it to enforce serialization
+ * The compiler should not reorder volatile asm statements with respect to each
+ * other: they should execute in program order. However GCC 4.9.x and 5.x have
+ * a bug (which was fixed in 8.1, 7.3 and 6.5) where they might reorder
+ * volatile asm. The write functions are not affected since they have memory
+ * clobbers preventing reordering. To prevent reads from being reordered with
+ * respect to writes, use a dummy memory operand.
*/
-extern unsigned long __force_order;
+
+#define __FORCE_ORDER "m"(*(unsigned int *)0x1000UL)
void native_write_cr0(unsigned long val);
static inline unsigned long native_read_cr0(void)
{
unsigned long val;
- asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order));
+ asm volatile("mov %%cr0,%0\n\t" : "=r" (val) : __FORCE_ORDER);
return val;
}
static inline unsigned long native_read_cr2(void)
{
unsigned long val;
- asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order));
+ asm volatile("mov %%cr2,%0\n\t" : "=r" (val) : __FORCE_ORDER);
return val;
}
static inline void native_write_cr2(unsigned long val)
{
- asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order));
+ asm volatile("mov %0,%%cr2": : "r" (val) : "memory");
}
static inline unsigned long __native_read_cr3(void)
{
unsigned long val;
- asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
+ asm volatile("mov %%cr3,%0\n\t" : "=r" (val) : __FORCE_ORDER);
return val;
}
static inline void native_write_cr3(unsigned long val)
{
- asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order));
+ asm volatile("mov %0,%%cr3": : "r" (val) : "memory");
}
static inline unsigned long native_read_cr4(void)
@@ -63,10 +65,10 @@ static inline unsigned long native_read_cr4(void)
asm volatile("1: mov %%cr4, %0\n"
"2:\n"
_ASM_EXTABLE(1b, 2b)
- : "=r" (val), "=m" (__force_order) : "0" (0));
+ : "=r" (val) : "0" (0), __FORCE_ORDER);
#else
/* CR4 always exists on x86_64. */
- asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
+ asm volatile("mov %%cr4,%0\n\t" : "=r" (val) : __FORCE_ORDER);
#endif
return val;
}
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 251c795b4eb3..c4bc01da820e 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -18,10 +18,13 @@
#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450
#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0
#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480
+#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630
#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
+#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
+#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654
/* Protect the PCI config register pairs used for SMN and DF indirect access. */
static DEFINE_MUTEX(smn_mutex);
@@ -32,6 +35,7 @@ static const struct pci_device_id amd_root_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_ROOT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_ROOT) },
{}
};
@@ -50,8 +54,10 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
{}
};
EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
@@ -65,7 +71,9 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
{}
};
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 25b8c45467fc..fce94c799f01 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2099,7 +2099,7 @@ void __init init_apic_mappings(void)
unsigned int new_apicid;
if (apic_validate_deadline_timer())
- pr_debug("TSC deadline timer available\n");
+ pr_info("TSC deadline timer available\n");
if (x2apic_mode) {
boot_cpu_physical_apicid = read_apic_id();
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index f0262cb5657a..4b6301946f45 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2256,6 +2256,7 @@ static inline void __init check_timer(void)
legacy_pic->init(0);
legacy_pic->make_irq(0);
apic_write(APIC_LVT0, APIC_DM_EXTINT);
+ legacy_pic->unmask(0);
unlock_ExtINT_logic();
@@ -2329,12 +2330,12 @@ static int mp_irqdomain_create(int ioapic)
ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops,
(void *)(long)ioapic);
- /* Release fw handle if it was allocated above */
- if (!cfg->dev)
- irq_domain_free_fwnode(fn);
-
- if (!ip->irqdomain)
+ if (!ip->irqdomain) {
+ /* Release fw handle if it was allocated above */
+ if (!cfg->dev)
+ irq_domain_free_fwnode(fn);
return -ENOMEM;
+ }
ip->irqdomain->parent = parent;
@@ -2348,8 +2349,13 @@ static int mp_irqdomain_create(int ioapic)
static void ioapic_destroy_irqdomain(int idx)
{
+ struct ioapic_domain_cfg *cfg = &ioapics[idx].irqdomain_cfg;
+ struct fwnode_handle *fn = ioapics[idx].irqdomain->fwnode;
+
if (ioapics[idx].irqdomain) {
irq_domain_remove(ioapics[idx].irqdomain);
+ if (!cfg->dev)
+ irq_domain_free_fwnode(fn);
ioapics[idx].irqdomain = NULL;
}
}
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 159bd0cb8548..a20873bbbed6 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -262,12 +262,13 @@ void __init arch_init_msi_domain(struct irq_domain *parent)
msi_default_domain =
pci_msi_create_irq_domain(fn, &pci_msi_domain_info,
parent);
- irq_domain_free_fwnode(fn);
}
- if (!msi_default_domain)
+ if (!msi_default_domain) {
+ irq_domain_free_fwnode(fn);
pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n");
- else
+ } else {
msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK;
+ }
}
#ifdef CONFIG_IRQ_REMAP
@@ -300,7 +301,8 @@ struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent,
if (!fn)
return NULL;
d = pci_msi_create_irq_domain(fn, &pci_msi_ir_domain_info, parent);
- irq_domain_free_fwnode(fn);
+ if (!d)
+ irq_domain_free_fwnode(fn);
return d;
}
#endif
@@ -363,7 +365,8 @@ static struct irq_domain *dmar_get_irq_domain(void)
if (fn) {
dmar_domain = msi_create_irq_domain(fn, &dmar_msi_domain_info,
x86_vector_domain);
- irq_domain_free_fwnode(fn);
+ if (!dmar_domain)
+ irq_domain_free_fwnode(fn);
}
out:
mutex_unlock(&dmar_lock);
@@ -488,7 +491,10 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id)
}
d = msi_create_irq_domain(fn, domain_info, parent);
- irq_domain_free_fwnode(fn);
+ if (!d) {
+ irq_domain_free_fwnode(fn);
+ kfree(domain_info);
+ }
return d;
}
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 2c5676b0a6e7..c8203694d9ce 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -446,12 +446,10 @@ static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
trace_vector_activate(irqd->irq, apicd->is_managed,
apicd->can_reserve, reserve);
- /* Nothing to do for fixed assigned vectors */
- if (!apicd->can_reserve && !apicd->is_managed)
- return 0;
-
raw_spin_lock_irqsave(&vector_lock, flags);
- if (reserve || irqd_is_managed_and_shutdown(irqd))
+ if (!apicd->can_reserve && !apicd->is_managed)
+ assign_irq_vector_any_locked(irqd);
+ else if (reserve || irqd_is_managed_and_shutdown(irqd))
vector_assign_managed_shutdown(irqd);
else if (apicd->is_managed)
ret = activate_managed(irqd);
@@ -556,6 +554,10 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
irqd->chip_data = apicd;
irqd->hwirq = virq + i;
irqd_set_single_target(irqd);
+
+ /* Don't invoke affinity setter on deactivated interrupts */
+ irqd_set_affinity_on_activate(irqd);
+
/*
* Legacy vectors are already assigned when the IOAPIC
* takes them over. They stay on the same vector. This is
@@ -703,7 +705,6 @@ int __init arch_early_irq_init(void)
x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops,
NULL);
BUG_ON(x86_vector_domain == NULL);
- irq_domain_free_fwnode(fn);
irq_set_default_host(x86_vector_domain);
arch_init_msi_domain(x86_vector_domain);
@@ -769,20 +770,10 @@ void lapic_offline(void)
static int apic_set_affinity(struct irq_data *irqd,
const struct cpumask *dest, bool force)
{
- struct apic_chip_data *apicd = apic_chip_data(irqd);
int err;
- /*
- * Core code can call here for inactive interrupts. For inactive
- * interrupts which use managed or reservation mode there is no
- * point in going through the vector assignment right now as the
- * activation will assign a vector which fits the destination
- * cpumask. Let the core code store the destination mask and be
- * done with it.
- */
- if (!irqd_is_activated(irqd) &&
- (apicd->is_managed || apicd->can_reserve))
- return IRQ_SET_MASK_OK;
+ if (WARN_ON_ONCE(!irqd_is_activated(irqd)))
+ return -EIO;
raw_spin_lock(&vector_lock);
cpumask_and(vector_searchmask, dest, cpu_online_mask);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 650df6d21049..8a85c2e144a6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -366,6 +366,9 @@ out:
cr4_clear_bits(X86_CR4_UMIP);
}
+/* These bits should not change their value after CPU init is finished. */
+static const unsigned long cr4_pinned_mask =
+ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE;
static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
static unsigned long cr4_pinned_bits __ro_after_init;
@@ -374,7 +377,7 @@ void native_write_cr0(unsigned long val)
unsigned long bits_missing = 0;
set_register:
- asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order));
+ asm volatile("mov %0,%%cr0": "+r" (val) : : "memory");
if (static_branch_likely(&cr_pinning)) {
if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
@@ -390,20 +393,20 @@ EXPORT_SYMBOL(native_write_cr0);
void native_write_cr4(unsigned long val)
{
- unsigned long bits_missing = 0;
+ unsigned long bits_changed = 0;
set_register:
- asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
+ asm volatile("mov %0,%%cr4": "+r" (val) : : "memory");
if (static_branch_likely(&cr_pinning)) {
- if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
- bits_missing = ~val & cr4_pinned_bits;
- val |= bits_missing;
+ if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) {
+ bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits;
+ val = (val & ~cr4_pinned_mask) | cr4_pinned_bits;
goto set_register;
}
- /* Warn after we've set the missing bits. */
- WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
- bits_missing);
+ /* Warn after we've corrected the changed bits. */
+ WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n",
+ bits_changed);
}
}
EXPORT_SYMBOL(native_write_cr4);
@@ -415,7 +418,7 @@ void cr4_init(void)
if (boot_cpu_has(X86_FEATURE_PCID))
cr4 |= X86_CR4_PCIDE;
if (static_branch_likely(&cr_pinning))
- cr4 |= cr4_pinned_bits;
+ cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits;
__write_cr4(cr4);
@@ -430,10 +433,7 @@ void cr4_init(void)
*/
static void __init setup_cr_pinning(void)
{
- unsigned long mask;
-
- mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP);
- cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask;
+ cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask;
static_key_enable(&cr_pinning.key);
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index fd76e3733dd3..92331de16d70 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -388,10 +388,28 @@ static int msr_to_offset(u32 msr)
return -1;
}
+__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
+{
+ pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
+ (unsigned int)regs->cx, regs->ip, (void *)regs->ip);
+
+ show_stack_regs(regs);
+
+ panic("MCA architectural violation!\n");
+
+ while (true)
+ cpu_relax();
+
+ return true;
+}
+
/* MSR access wrappers used for error injection */
static u64 mce_rdmsrl(u32 msr)
{
- u64 v;
+ DECLARE_ARGS(val, low, high);
if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
@@ -401,21 +419,43 @@ static u64 mce_rdmsrl(u32 msr)
return *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
}
- if (rdmsrl_safe(msr, &v)) {
- WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr);
- /*
- * Return zero in case the access faulted. This should
- * not happen normally but can happen if the CPU does
- * something weird, or if the code is buggy.
- */
- v = 0;
- }
+ /*
+ * RDMSR on MCA MSRs should not fault. If they do, this is very much an
+ * architectural violation and needs to be reported to hw vendor. Panic
+ * the box to not allow any further progress.
+ */
+ asm volatile("1: rdmsr\n"
+ "2:\n"
+ _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_fault)
+ : EAX_EDX_RET(val, low, high) : "c" (msr));
- return v;
+
+ return EAX_EDX_VAL(val, low, high);
+}
+
+__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
+{
+ pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
+ (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax,
+ regs->ip, (void *)regs->ip);
+
+ show_stack_regs(regs);
+
+ panic("MCA architectural violation!\n");
+
+ while (true)
+ cpu_relax();
+
+ return true;
}
static void mce_wrmsrl(u32 msr, u64 v)
{
+ u32 low, high;
+
if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
@@ -423,7 +463,15 @@ static void mce_wrmsrl(u32 msr, u64 v)
*(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v;
return;
}
- wrmsrl(msr, v);
+
+ low = (u32)v;
+ high = (u32)(v >> 32);
+
+ /* See comment in mce_rdmsrl() */
+ asm volatile("1: wrmsr\n"
+ "2:\n"
+ _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_fault)
+ : : "c" (msr), "a"(low), "d" (high) : "memory");
}
/*
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 1f30117b24ba..eb2d41c1816d 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -511,7 +511,7 @@ static void do_inject(void)
*/
if (inj_type == DFR_INT_INJ) {
i_mce.status |= MCI_STATUS_DEFERRED;
- i_mce.status |= (i_mce.status & ~MCI_STATUS_UC);
+ i_mce.status &= ~MCI_STATUS_UC;
}
/*
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 43031db429d2..231954fe5b4e 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -172,4 +172,14 @@ extern bool amd_filter_mce(struct mce *m);
static inline bool amd_filter_mce(struct mce *m) { return false; };
#endif
+__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr);
+
+__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr);
+
#endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 87bcdc6dc2f0..0d09eb13743b 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -9,9 +9,11 @@
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/debugfs.h>
-#include <asm/mce.h>
#include <linux/uaccess.h>
+#include <asm/mce.h>
+#include <asm/intel-family.h>
+
#include "internal.h"
/*
@@ -40,9 +42,14 @@ static struct severity {
unsigned char context;
unsigned char excp;
unsigned char covered;
+ unsigned char cpu_model;
+ unsigned char cpu_minstepping;
+ unsigned char bank_lo, bank_hi;
char *msg;
} severities[] = {
#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
+#define BANK_RANGE(l, h) .bank_lo = l, .bank_hi = h
+#define MODEL_STEPPING(m, s) .cpu_model = m, .cpu_minstepping = s
#define KERNEL .context = IN_KERNEL
#define USER .context = IN_USER
#define KERNEL_RECOV .context = IN_KERNEL_RECOV
@@ -97,7 +104,6 @@ static struct severity {
KEEP, "Corrected error",
NOSER, BITCLR(MCI_STATUS_UC)
),
-
/*
* known AO MCACODs reported via MCE or CMC:
*
@@ -113,6 +119,18 @@ static struct severity {
AO, "Action optional: last level cache writeback error",
SER, MASK(MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
),
+ /*
+ * Quirk for Skylake/Cascade Lake. Patrol scrubber may be configured
+ * to report uncorrected errors using CMCI with a special signature.
+ * UC=0, MSCOD=0x0010, MCACOD=binary(000X 0000 1100 XXXX) reported
+ * in one of the memory controller banks.
+ * Set severity to "AO" for same action as normal patrol scrub error.
+ */
+ MCESEV(
+ AO, "Uncorrected Patrol Scrub Error",
+ SER, MASK(MCI_STATUS_UC|MCI_ADDR|0xffffeff0, MCI_ADDR|0x001000c0),
+ MODEL_STEPPING(INTEL_FAM6_SKYLAKE_X, 4), BANK_RANGE(13, 18)
+ ),
/* ignore OVER for UCNA */
MCESEV(
@@ -320,6 +338,12 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e
continue;
if (s->excp && excp != s->excp)
continue;
+ if (s->cpu_model && boot_cpu_data.x86_model != s->cpu_model)
+ continue;
+ if (s->cpu_minstepping && boot_cpu_data.x86_stepping < s->cpu_minstepping)
+ continue;
+ if (s->bank_lo && (m->bank < s->bank_lo || m->bank > s->bank_hi))
+ continue;
if (msg)
*msg = s->msg;
s->covered = 1;
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index d8cc5223b7ce..87a34b6e06a2 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -260,6 +260,7 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
r->num_closid = edx.split.cos_max + 1;
r->membw.max_delay = eax.split.max_delay + 1;
r->default_ctrl = MAX_MBA_BW;
+ r->membw.mbm_width = MBM_CNTR_WIDTH;
if (ecx & MBA_IS_LINEAR) {
r->membw.delay_linear = true;
r->membw.min_bw = MAX_MBA_BW - r->membw.max_delay;
@@ -289,6 +290,7 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
/* AMD does not use delay */
r->membw.delay_linear = false;
+ r->membw.mbm_width = MBM_CNTR_WIDTH_AMD;
r->membw.min_bw = 0;
r->membw.bw_gran = 1;
/* Max value is 2048, Data width should be 4 in decimal */
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 3dd13f3a8b23..17095435c875 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -32,6 +32,7 @@
#define CQM_LIMBOCHECK_INTERVAL 1000
#define MBM_CNTR_WIDTH 24
+#define MBM_CNTR_WIDTH_AMD 44
#define MBM_OVERFLOW_INTERVAL 1000
#define MAX_MBA_BW 100u
#define MBA_IS_LINEAR 0x4
@@ -368,6 +369,7 @@ struct rdt_cache {
* @min_bw: Minimum memory bandwidth percentage user can request
* @bw_gran: Granularity at which the memory bandwidth is allocated
* @delay_linear: True if memory B/W delay is in linear scale
+ * @mbm_width: memory B/W monitor counter width
* @mba_sc: True if MBA software controller(mba_sc) is enabled
* @mb_map: Mapping of memory B/W percentage to memory B/W delay
*/
@@ -376,6 +378,7 @@ struct rdt_membw {
u32 min_bw;
u32 bw_gran;
u32 delay_linear;
+ u32 mbm_width;
bool mba_sc;
u32 *mb_map;
};
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 773124b0e18a..0cf4f87f6012 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -216,8 +216,9 @@ void free_rmid(u32 rmid)
static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr)
{
- u64 shift = 64 - MBM_CNTR_WIDTH, chunks;
+ u64 shift, chunks;
+ shift = 64 - rdt_resources_all[RDT_RESOURCE_MBA].membw.mbm_width;
chunks = (cur_msr << shift) - (prev_msr << shift);
return chunks >>= shift;
}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 20856d80dce3..54b711bc0607 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1027,6 +1027,7 @@ static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d,
_d_cdp = rdt_find_domain(_r_cdp, d->id, NULL);
if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) {
_r_cdp = NULL;
+ _d_cdp = NULL;
ret = -EINVAL;
}
diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c
index c222f283b456..32b4dc9030aa 100644
--- a/arch/x86/kernel/cpu/umwait.c
+++ b/arch/x86/kernel/cpu/umwait.c
@@ -17,12 +17,6 @@
*/
static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
-u32 get_umwait_control_msr(void)
-{
- return umwait_control_cached;
-}
-EXPORT_SYMBOL_GPL(get_umwait_control_msr);
-
/*
* Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by
* hardware or BIOS before kernel boot.
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index eb651fbde92a..ff25a2ea271c 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -24,6 +24,7 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/memblock.h>
#include <asm/processor.h>
#include <asm/hardirq.h>
@@ -39,6 +40,7 @@
#include <asm/virtext.h>
#include <asm/intel_pt.h>
#include <asm/crash.h>
+#include <asm/cmdline.h>
/* Used while preparing memory map entries for second kernel */
struct crash_memmap_data {
@@ -68,6 +70,19 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void)
rcu_read_unlock();
}
+/*
+ * When the crashkernel option is specified, only use the low
+ * 1M for the real mode trampoline.
+ */
+void __init crash_reserve_low_1M(void)
+{
+ if (cmdline_find_option(boot_command_line, "crashkernel", NULL, 0) < 0)
+ return;
+
+ memblock_reserve(0, 1<<20);
+ pr_info("Reserving the low 1M of memory for crashkernel\n");
+}
+
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 12c70840980e..cd8839027f66 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -101,6 +101,12 @@ void kernel_fpu_begin(void)
copy_fpregs_to_fpstate(&current->thread.fpu);
}
__cpu_invalidate_fpregs_state();
+
+ if (boot_cpu_has(X86_FEATURE_XMM))
+ ldmxcsr(MXCSR_DEFAULT);
+
+ if (boot_cpu_has(X86_FEATURE_FPU))
+ asm volatile ("fninit");
}
EXPORT_SYMBOL_GPL(kernel_fpu_begin);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 6ce7e0a23268..b271da0fa219 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -242,9 +242,9 @@ static void __init fpu__init_system_ctx_switch(void)
*/
static void __init fpu__init_parse_early_param(void)
{
- char arg[32];
+ char arg[128];
char *argptr = arg;
- int bit;
+ int arglen, res, bit;
#ifdef CONFIG_X86_32
if (cmdline_find_option_bool(boot_command_line, "no387"))
@@ -267,12 +267,26 @@ static void __init fpu__init_parse_early_param(void)
if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
- if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
- sizeof(arg)) &&
- get_option(&argptr, &bit) &&
- bit >= 0 &&
- bit < NCAPINTS * 32)
- setup_clear_cpu_cap(bit);
+ arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
+ if (arglen <= 0)
+ return;
+
+ pr_info("Clearing CPUID bits:");
+ do {
+ res = get_option(&argptr, &bit);
+ if (res == 0 || res == 3)
+ break;
+
+ /* If the argument was too long, the last bit may be cut off */
+ if (res == 1 && arglen >= sizeof(arg))
+ break;
+
+ if (bit >= 0 && bit < NCAPINTS * 32) {
+ pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
+ setup_clear_cpu_cap(bit);
+ }
+ } while (res == 2);
+ pr_cont("\n");
}
/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c94fec268ef2..735d1f1bbabc 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -895,8 +895,6 @@ const void *get_xsave_field_ptr(int xfeature_nr)
#ifdef CONFIG_ARCH_HAS_PKEYS
-#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
-#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
/*
* This will go out and modify PKRU register to set the access
* rights for @pkey to @init_val.
@@ -915,6 +913,13 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return -EINVAL;
+ /*
+ * This code should only be called with valid 'pkey'
+ * values originating from in-kernel users. Complain
+ * if a bad value is observed.
+ */
+ WARN_ON_ONCE(pkey >= arch_max_pkey());
+
/* Set the bits we need in PKRU: */
if (init_val & PKEY_DISABLE_ACCESS)
new_pkru_bits |= PKRU_AD_BIT;
@@ -1017,7 +1022,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
copy_part(offsetof(struct fxregs_state, st_space), 128,
&xsave->i387.st_space, &kbuf, &offset_start, &count);
if (header.xfeatures & XFEATURE_MASK_SSE)
- copy_part(xstate_offsets[XFEATURE_MASK_SSE], 256,
+ copy_part(xstate_offsets[XFEATURE_SSE], 256,
&xsave->i387.xmm_space, &kbuf, &offset_start, &count);
/*
* Fill xsave->i387.sw_reserved value for ptrace frame:
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 519649ddf100..fe522691ac71 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -207,7 +207,7 @@ spurious_8259A_irq:
* lets ACK and report it. [once per IRQ]
*/
if (!(spurious_irq_mask & irqmask)) {
- printk(KERN_DEBUG
+ printk_deferred(KERN_DEBUG
"spurious 8259A interrupt: IRQ%d.\n", irq);
spurious_irq_mask |= irqmask;
}
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 87ef69a72c52..7bb4c3cbf4dc 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -318,7 +318,11 @@ void __init idt_setup_apic_and_irq_gates(void)
#ifdef CONFIG_X86_LOCAL_APIC
for_each_clear_bit_from(i, system_vectors, NR_VECTORS) {
- set_bit(i, system_vectors);
+ /*
+ * Don't set the non assigned system vectors in the
+ * system_vectors bitmap. Otherwise they show up in
+ * /proc/interrupts.
+ */
entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR);
set_intr_gate(i, entry);
}
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 12df3a4abfdd..6b32ab009c19 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -43,7 +43,7 @@ static int map_irq_stack(unsigned int cpu)
pages[i] = pfn_to_page(pa >> PAGE_SHIFT);
}
- va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
+ va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, VM_MAP, PAGE_KERNEL);
if (!va)
return -ENOMEM;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 43fc13c831af..62c39baea39e 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -746,16 +746,11 @@ asm(
NOKPROBE_SYMBOL(kretprobe_trampoline);
STACK_FRAME_NON_STANDARD(kretprobe_trampoline);
-static struct kprobe kretprobe_kprobe = {
- .addr = (void *)kretprobe_trampoline,
-};
-
/*
* Called from kretprobe_trampoline
*/
__used __visible void *trampoline_handler(struct pt_regs *regs)
{
- struct kprobe_ctlblk *kcb;
struct kretprobe_instance *ri = NULL;
struct hlist_head *head, empty_rp;
struct hlist_node *tmp;
@@ -765,16 +760,12 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
void *frame_pointer;
bool skipped = false;
- preempt_disable();
-
/*
* Set a dummy kprobe for avoiding kretprobe recursion.
* Since kretprobe never run in kprobe handler, kprobe must not
* be running at this point.
*/
- kcb = get_kprobe_ctlblk();
- __this_cpu_write(current_kprobe, &kretprobe_kprobe);
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ kprobe_busy_begin();
INIT_HLIST_HEAD(&empty_rp);
kretprobe_hash_lock(current, &head, &flags);
@@ -850,7 +841,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
__this_cpu_write(current_kprobe, &ri->rp->kp);
ri->ret_addr = correct_ret_addr;
ri->rp->handler(ri, regs);
- __this_cpu_write(current_kprobe, &kretprobe_kprobe);
+ __this_cpu_write(current_kprobe, &kprobe_busy);
}
recycle_rp_inst(ri, &empty_rp);
@@ -866,8 +857,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
kretprobe_hash_unlock(current, &flags);
- __this_cpu_write(current_kprobe, NULL);
- preempt_enable();
+ kprobe_busy_end();
hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 54c21d6abd5a..5bb001c0c771 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -106,7 +106,6 @@ fs_initcall(nmi_warning_debugfs);
static void nmi_check_duration(struct nmiaction *action, u64 duration)
{
- u64 whole_msecs = READ_ONCE(action->max_duration);
int remainder_ns, decimal_msecs;
if (duration < nmi_longest_ns || duration < action->max_duration)
@@ -114,12 +113,12 @@ static void nmi_check_duration(struct nmiaction *action, u64 duration)
action->max_duration = duration;
- remainder_ns = do_div(whole_msecs, (1000 * 1000));
+ remainder_ns = do_div(duration, (1000 * 1000));
decimal_msecs = remainder_ns / 1000;
printk_ratelimited(KERN_INFO
"INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
- action->handler, whole_msecs, decimal_msecs);
+ action->handler, duration, decimal_msecs);
}
static int nmi_handle(unsigned int type, struct pt_regs *regs)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index af64519b2695..da3cc3a10d63 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -316,7 +316,7 @@ static unsigned long x86_fsgsbase_read_task(struct task_struct *task,
*/
mutex_lock(&task->mm->context.lock);
ldt = task->mm->context.ldt;
- if (unlikely(idx >= ldt->nr_entries))
+ if (unlikely(!ldt || idx >= ldt->nr_entries))
base = 0;
else
base = get_desc_base(ldt->entries + idx);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9674321ce3a3..8367bd7a9a81 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1599,14 +1599,28 @@ int native_cpu_disable(void)
if (ret)
return ret;
- /*
- * Disable the local APIC. Otherwise IPI broadcasts will reach
- * it. It still responds normally to INIT, NMI, SMI, and SIPI
- * messages.
- */
- apic_soft_disable();
cpu_disable_common();
+ /*
+ * Disable the local APIC. Otherwise IPI broadcasts will reach
+ * it. It still responds normally to INIT, NMI, SMI, and SIPI
+ * messages.
+ *
+ * Disabling the APIC must happen after cpu_disable_common()
+ * which invokes fixup_irqs().
+ *
+ * Disabling the APIC preserves already set bits in IRR, but
+ * an interrupt arriving after disabling the local APIC does not
+ * set the corresponding IRR bit.
+ *
+ * fixup_irqs() scans IRR for set bits so it can raise a not
+ * yet handled interrupt on the new destination CPU via an IPI
+ * but obviously it can't do so for IRR bits which are not set.
+ * IOW, interrupts arriving after disabling the local APIC will
+ * be lost.
+ */
+ apic_soft_disable();
+
return 0;
}
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 2d6898c2cb64..6d83b4b857e6 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -58,7 +58,6 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
* or a page fault), which can make frame pointers
* unreliable.
*/
-
if (IS_ENABLED(CONFIG_FRAME_POINTER))
return -EINVAL;
}
@@ -81,10 +80,6 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
if (unwind_error(&state))
return -EINVAL;
- /* Success path for non-user tasks, i.e. kthreads and idle tasks */
- if (!(task->flags & (PF_KTHREAD | PF_IDLE)))
- return -EINVAL;
-
return 0;
}
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index c65adaf81384..41200706e6da 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -133,10 +133,15 @@ static const struct freq_desc freq_desc_ann = {
.mask = 0x0f,
};
-/* 24 MHz crystal? : 24 * 13 / 4 = 78 MHz */
+/*
+ * 24 MHz crystal? : 24 * 13 / 4 = 78 MHz
+ * Frequency step for Lightning Mountain SoC is fixed to 78 MHz,
+ * so all the frequency entries are 78000.
+ */
static const struct freq_desc freq_desc_lgm = {
.use_msr_plat = true,
- .freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 },
+ .freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000,
+ 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 },
.mask = 0x0f,
};
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index aa0f39dc8129..187a86e0e753 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -431,8 +431,11 @@ bool unwind_next_frame(struct unwind_state *state)
/*
* Find the orc_entry associated with the text address.
*
- * Decrement call return addresses by one so they work for sibling
- * calls and calls to noreturn functions.
+ * For a call frame (as opposed to a signal frame), state->ip points to
+ * the instruction after the call. That instruction's stack layout
+ * could be different from the call instruction's layout, for example
+ * if the call was to a noreturn function. So get the ORC data for the
+ * call instruction itself.
*/
orc = orc_find(state->signal ? state->ip : state->ip - 1);
if (!orc) {
@@ -653,6 +656,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
state->sp = task->thread.sp;
state->bp = READ_ONCE_NOCHECK(frame->bp);
state->ip = READ_ONCE_NOCHECK(frame->ret_addr);
+ state->signal = (void *)state->ip == ret_from_fork;
}
if (get_stack_info((unsigned long *)state->sp, state->task,
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index bac1a65a9d39..1afe211d7a7c 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -362,6 +362,7 @@ SECTIONS
.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
__bss_start = .;
*(.bss..page_aligned)
+ . = ALIGN(PAGE_SIZE);
*(BSS_MAIN)
BSS_DECRYPTED
. = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 128d3ad46e96..484c32b7f79f 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3617,7 +3617,7 @@ static int em_rdpid(struct x86_emulate_ctxt *ctxt)
u64 tsc_aux = 0;
if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux))
- return emulate_gp(ctxt, 0);
+ return emulate_ud(ctxt);
ctxt->dst.val = tsc_aux;
return X86EMUL_CONTINUE;
}
@@ -5836,6 +5836,8 @@ writeback:
}
ctxt->eip = ctxt->_eip;
+ if (ctxt->mode != X86EMUL_MODE_PROT64)
+ ctxt->eip = (u32)ctxt->_eip;
done:
if (rc == X86EMUL_PROPAGATE_FAULT) {
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 1cc6c47dc77e..341f58a01de0 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -7,7 +7,7 @@
#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
#define KVM_POSSIBLE_CR4_GUEST_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
- | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE)
+ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE | X86_CR4_TSD)
#define BUILD_KVM_GPR_ACCESSORS(lname, uname) \
static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 5d2587005d0e..9f793c9649cd 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1684,7 +1684,7 @@ static void start_sw_period(struct kvm_lapic *apic)
hrtimer_start(&apic->lapic_timer.timer,
apic->lapic_timer.target_expiration,
- HRTIMER_MODE_ABS);
+ HRTIMER_MODE_ABS_HARD);
}
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
@@ -2085,7 +2085,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
+ if (!kvm_apic_present(vcpu) || apic_lvtt_oneshot(apic) ||
apic_lvtt_period(apic))
return;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a3824ae9a634..b90e8fd2f6ce 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1819,10 +1819,10 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
* Emulate arch specific page modification logging for the
* nested hypervisor
*/
-int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu)
+int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa)
{
if (kvm_x86_ops->write_log_dirty)
- return kvm_x86_ops->write_log_dirty(vcpu);
+ return kvm_x86_ops->write_log_dirty(vcpu, l2_gpa);
return 0;
}
@@ -2045,7 +2045,8 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags)
{
return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
}
@@ -4580,7 +4581,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
rsvd_bits(maxphyaddr, 51);
rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd |
- nonleaf_bit8_rsvd | gbpages_bit_rsvd |
+ gbpages_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
@@ -6452,6 +6453,7 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
cond_resched_lock(&kvm->mmu_lock);
}
}
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
spin_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, rcu_idx);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index d55674f44a18..6f2208cf30df 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -209,7 +209,7 @@ void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn);
-int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
+int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
int kvm_mmu_post_init_vm(struct kvm *kvm);
void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index 3c6522b84ff1..ffcd96fc02d0 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -339,7 +339,7 @@ TRACE_EVENT(
/* These depend on page entry type, so compute them now. */
__field(bool, r)
__field(bool, x)
- __field(u8, u)
+ __field(signed char, u)
),
TP_fast_assign(
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 4e3f137ffa8c..a20fc1ba607f 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -220,7 +220,7 @@ static inline unsigned FNAME(gpte_access)(u64 gpte)
static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu,
struct guest_walker *walker,
- int write_fault)
+ gpa_t addr, int write_fault)
{
unsigned level, index;
pt_element_t pte, orig_pte;
@@ -245,7 +245,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
!(pte & PT_GUEST_DIRTY_MASK)) {
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
#if PTTYPE == PTTYPE_EPT
- if (kvm_arch_write_log_dirty(vcpu))
+ if (kvm_arch_write_log_dirty(vcpu, addr))
return -EINVAL;
#endif
pte |= PT_GUEST_DIRTY_MASK;
@@ -442,7 +442,8 @@ retry_walk:
(PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT);
if (unlikely(!accessed_dirty)) {
- ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
+ ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker,
+ addr, write_fault);
if (unlikely(ret < 0))
goto error;
else if (ret)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 3243a80ea32c..c79c1a07f44b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -787,9 +787,6 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
return 0;
} else {
- if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
- pr_err("%s: ip 0x%lx next 0x%llx\n",
- __func__, kvm_rip_read(vcpu), svm->next_rip);
kvm_rip_write(vcpu, svm->next_rip);
}
svm_set_interrupt_shadow(vcpu, 0);
@@ -3970,6 +3967,12 @@ static int iret_interception(struct vcpu_svm *svm)
return 1;
}
+static int invd_interception(struct vcpu_svm *svm)
+{
+ /* Treat an INVD instruction as a NOP and just skip it. */
+ return kvm_skip_emulated_instruction(&svm->vcpu);
+}
+
static int invlpg_interception(struct vcpu_svm *svm)
{
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
@@ -4822,7 +4825,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_RDPMC] = rdpmc_interception,
[SVM_EXIT_CPUID] = cpuid_interception,
[SVM_EXIT_IRET] = iret_interception,
- [SVM_EXIT_INVD] = emulate_on_interception,
+ [SVM_EXIT_INVD] = invd_interception,
[SVM_EXIT_PAUSE] = pause_interception,
[SVM_EXIT_HLT] = halt_interception,
[SVM_EXIT_INVLPG] = invlpg_interception,
@@ -5380,6 +5383,7 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
* - Tell IOMMU to use legacy mode for this interrupt.
* - Retrieve ga_tag of prior interrupt remapping data.
*/
+ pi.prev_ga_tag = 0;
pi.is_guest_mode = false;
ret = irq_set_vcpu_affinity(host_irq, &pi);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index a460ddf04d60..08e1e7544f82 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2231,6 +2231,8 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
+
+ vmx->segment_cache.bitmask = 0;
}
if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
@@ -3094,8 +3096,10 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
prepare_vmcs02_early(vmx, vmcs12);
if (from_vmentry) {
- if (unlikely(!nested_get_vmcs12_pages(vcpu)))
+ if (unlikely(!nested_get_vmcs12_pages(vcpu))) {
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
return NVMX_VMENTRY_KVM_INTERNAL_ERROR;
+ }
if (nested_vmx_check_vmentry_hw(vcpu)) {
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 5fac01865a2d..2a1ed3aae100 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1130,6 +1130,10 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
vmx->guest_msrs[i].mask);
}
+
+ if (vmx->nested.need_vmcs12_to_shadow_sync)
+ nested_sync_vmcs12_to_shadow(vcpu);
+
if (vmx->guest_state_loaded)
return;
@@ -1537,7 +1541,7 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
- unsigned long rip;
+ unsigned long rip, orig_rip;
/*
* Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on
@@ -1549,8 +1553,17 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
*/
if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) {
- rip = kvm_rip_read(vcpu);
- rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+ orig_rip = kvm_rip_read(vcpu);
+ rip = orig_rip + vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+#ifdef CONFIG_X86_64
+ /*
+ * We need to mask out the high 32 bits of RIP if not in 64-bit
+ * mode, but just finding out that we are in 64-bit mode is
+ * quite expensive. Only do it if there was a carry.
+ */
+ if (unlikely(((rip ^ orig_rip) >> 31) == 3) && !is_64_bit_mode(vcpu))
+ rip = (u32)rip;
+#endif
kvm_rip_write(vcpu, rip);
} else {
if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
@@ -3913,6 +3926,8 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
{
+ BUILD_BUG_ON(KVM_CR4_GUEST_OWNED_BITS & ~KVM_POSSIBLE_CR4_GUEST_BITS);
+
vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
if (enable_ept)
vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
@@ -5893,6 +5908,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
exit_reason != EXIT_REASON_EPT_VIOLATION &&
exit_reason != EXIT_REASON_PML_FULL &&
+ exit_reason != EXIT_REASON_APIC_ACCESS &&
exit_reason != EXIT_REASON_TASK_SWITCH)) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
@@ -6427,23 +6443,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
msrs[i].host, false);
}
-static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx)
-{
- u32 host_umwait_control;
-
- if (!vmx_has_waitpkg(vmx))
- return;
-
- host_umwait_control = get_umwait_control_msr();
-
- if (vmx->msr_ia32_umwait_control != host_umwait_control)
- add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL,
- vmx->msr_ia32_umwait_control,
- host_umwait_control, false);
- else
- clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL);
-}
-
static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6500,8 +6499,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_write32(PLE_WINDOW, vmx->ple_window);
}
- if (vmx->nested.need_vmcs12_to_shadow_sync)
- nested_sync_vmcs12_to_shadow(vcpu);
+ /*
+ * We did this in prepare_switch_to_guest, because it needs to
+ * be within srcu_read_lock.
+ */
+ WARN_ON_ONCE(vmx->nested.need_vmcs12_to_shadow_sync);
if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
@@ -6533,7 +6535,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
pt_guest_enter(vmx);
atomic_switch_perf_msrs(vmx);
- atomic_switch_umwait_control_msr(vmx);
if (enable_preemption_timer)
vmx_update_hv_timer(vcpu);
@@ -7272,11 +7273,11 @@ static void vmx_flush_log_dirty(struct kvm *kvm)
kvm_flush_pml_buffers(kvm);
}
-static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
+static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa)
{
struct vmcs12 *vmcs12;
struct vcpu_vmx *vmx = to_vmx(vcpu);
- gpa_t gpa, dst;
+ gpa_t dst;
if (is_guest_mode(vcpu)) {
WARN_ON_ONCE(vmx->nested.pml_full);
@@ -7295,7 +7296,7 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
return 1;
}
- gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
+ gpa &= ~0xFFFull;
dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 295c5f83842e..a1919ec7fd10 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -14,8 +14,6 @@
extern const u32 vmx_msr_index[];
extern u64 host_efer;
-extern u32 get_umwait_control_msr(void);
-
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
#define MSR_TYPE_RW 3
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fff279fb173b..12e83297ea02 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -972,7 +972,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
unsigned long old_cr4 = kvm_read_cr4(vcpu);
unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
- X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
+ X86_CR4_SMEP;
+ unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE;
if (kvm_valid_cr4(vcpu, cr4))
return 1;
@@ -980,6 +981,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (is_long_mode(vcpu)) {
if (!(cr4 & X86_CR4_PAE))
return 1;
+ if ((cr4 ^ old_cr4) & X86_CR4_LA57)
+ return 1;
} else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
&& ((cr4 ^ old_cr4) & pdptr_bits)
&& !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
@@ -998,7 +1001,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (kvm_x86_ops->set_cr4(vcpu, cr4))
return 1;
- if (((cr4 ^ old_cr4) & pdptr_bits) ||
+ if (((cr4 ^ old_cr4) & mmu_role_bits) ||
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
kvm_mmu_reset_context(vcpu);
@@ -2753,7 +2756,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return kvm_mtrr_set_msr(vcpu, msr, data);
case MSR_IA32_APICBASE:
return kvm_set_apic_base(vcpu, msr_info);
- case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
+ case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
return kvm_x2apic_msr_write(vcpu, msr, data);
case MSR_IA32_TSCDEADLINE:
kvm_set_lapic_tscdeadline_msr(vcpu, data);
@@ -3057,7 +3060,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_APICBASE:
msr_info->data = kvm_get_apic_base(vcpu);
break;
- case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
+ case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
break;
case MSR_IA32_TSCDEADLINE:
@@ -5048,10 +5051,13 @@ set_identity_unlock:
r = -EFAULT;
if (copy_from_user(&u.ps, argp, sizeof(u.ps)))
goto out;
+ mutex_lock(&kvm->lock);
r = -ENXIO;
if (!kvm->arch.vpit)
- goto out;
+ goto set_pit_out;
r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
+set_pit_out:
+ mutex_unlock(&kvm->lock);
break;
}
case KVM_GET_PIT2: {
@@ -5071,10 +5077,13 @@ set_identity_unlock:
r = -EFAULT;
if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
goto out;
+ mutex_lock(&kvm->lock);
r = -ENXIO;
if (!kvm->arch.vpit)
- goto out;
+ goto set_pit2_out;
r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
+set_pit2_out:
+ mutex_unlock(&kvm->lock);
break;
}
case KVM_REINJECT_CONTROL: {
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index fff28c6f73a2..1847e993ac63 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -24,6 +24,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
asm volatile(
" testq %[size8],%[size8]\n"
" jz 4f\n"
+ " .align 16\n"
"0: movq $0,(%[dst])\n"
" addq $8,%[dst]\n"
" decl %%ecx ; jnz 0b\n"
@@ -119,7 +120,7 @@ long __copy_user_flushcache(void *dst, const void __user *src, unsigned size)
*/
if (size < 8) {
if (!IS_ALIGNED(dest, 4) || size != 4)
- clean_cache_range(dst, 1);
+ clean_cache_range(dst, size);
} else {
if (!IS_ALIGNED(dest, 8)) {
dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S
index f031c0e19356..515cdee90df7 100644
--- a/arch/x86/math-emu/wm_sqrt.S
+++ b/arch/x86/math-emu/wm_sqrt.S
@@ -209,7 +209,7 @@ sqrt_stage_2_finish:
#ifdef PARANOID
/* It should be possible to get here only if the arg is ffff....ffff */
- cmp $0xffffffff,FPU_fsqrt_arg_1
+ cmpl $0xffffffff,FPU_fsqrt_arg_1
jnz sqrt_stage_2_error
#endif /* PARANOID */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index fd10d91a6115..af352e228fa2 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -121,8 +121,6 @@ __ref void *alloc_low_pages(unsigned int num)
} else {
pfn = pgt_buf_end;
pgt_buf_end += num;
- printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n",
- pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1);
}
for (i = 0; i < num; i++) {
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index abffa0be80da..87282258d5be 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -321,7 +321,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
u64 addr, u64 max_addr, u64 size)
{
return split_nodes_size_interleave_uniform(ei, pi, addr, max_addr, size,
- 0, NULL, NUMA_NO_NODE);
+ 0, NULL, 0);
}
int __init setup_emu2phys_nid(int *dfl_phys_nid)
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 43867bc85368..eea5a0f3b959 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -33,6 +33,7 @@
#include <asm/hw_irq.h>
#include <asm/io_apic.h>
#include <asm/intel-mid.h>
+#include <asm/acpi.h>
#define PCIE_CAP_OFFSET 0x100
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 91220cc25854..5c11ae66b5d8 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -26,6 +26,7 @@
#include <asm/xen/pci.h>
#include <asm/xen/cpuid.h>
#include <asm/apic.h>
+#include <asm/acpi.h>
#include <asm/i8259.h>
static int xen_pcifront_enable_irq(struct pci_dev *dev)
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index fc13cbbb2dce..abb6075397f0 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -167,9 +167,10 @@ static struct irq_domain *uv_get_irq_domain(void)
goto out;
uv_domain = irq_domain_create_tree(fn, &uv_domain_ops, NULL);
- irq_domain_free_fwnode(fn);
if (uv_domain)
uv_domain->parent = x86_vector_domain;
+ else
+ irq_domain_free_fwnode(fn);
out:
mutex_unlock(&uv_lock);
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index fb4ee5444379..9733d1cc791d 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -17,7 +17,10 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS
LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
targets += purgatory.ro
+# Sanitizer, etc. runtimes are unavailable and cannot be linked here.
+GCOV_PROFILE := n
KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
# These are adjustments to the compiler flags used for objects that
@@ -25,7 +28,7 @@ KCOV_INSTRUMENT := n
PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss
-PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN)
+PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING
# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
# in turn leaves some undefined symbols like __fentry__ in purgatory and not
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 7dce39c8c034..262f83cad355 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -8,6 +8,7 @@
#include <asm/pgtable.h>
#include <asm/realmode.h>
#include <asm/tlbflush.h>
+#include <asm/crash.h>
struct real_mode_header *real_mode_header;
u32 *trampoline_cr4_features;
@@ -34,6 +35,7 @@ void __init reserve_real_mode(void)
memblock_reserve(mem, size);
set_real_mode_mem(mem);
+ crash_reserve_low_1M();
}
static void __init setup_real_mode(void)
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 6d4d8a5700b7..3e66feff524a 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1382,6 +1382,15 @@ asmlinkage __visible void __init xen_start_kernel(void)
x86_init.mpparse.get_smp_config = x86_init_uint_noop;
xen_boot_params_init_edd();
+
+#ifdef CONFIG_ACPI
+ /*
+ * Disable selecting "Firmware First mode" for correctable
+ * memory errors, as this is the duty of the hypervisor to
+ * decide.
+ */
+ acpi_disable_cmcff = 1;
+#endif
}
if (!boot_params.screen_info.orig_video_isVGA)