summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorMarcel Ziswiler <marcel.ziswiler@toradex.com>2018-12-12 14:08:45 +0100
committerMarcel Ziswiler <marcel.ziswiler@toradex.com>2018-12-12 14:08:45 +0100
commitcfbbc7703fff59c67761c93a8b1de29a79f9841c (patch)
tree58b4b37bed385b27fc5956435b2451c760f26f5f /arch/x86
parent5f3fecbc0715a70437501e1d85e74726c4f561be (diff)
parent1aa861ff238ecd17a3095b0dbd2d20bdf7bfaf14 (diff)
Merge tag 'v4.9.144' into 4.9-2.3.x-imx
This is the 4.9.144 stable release
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Makefile39
-rw-r--r--arch/x86/boot/string.c9
-rw-r--r--arch/x86/boot/tools/build.c7
-rw-r--r--arch/x86/crypto/aes_ctrby8_avx-x86_64.S7
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c17
-rw-r--r--arch/x86/entry/entry_64.S4
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c26
-rw-r--r--arch/x86/events/core.c20
-rw-r--r--arch/x86/events/intel/core.c52
-rw-r--r--arch/x86/events/intel/lbr.c36
-rw-r--r--arch/x86/events/intel/uncore_snb.c115
-rw-r--r--arch/x86/events/intel/uncore_snbep.c12
-rw-r--r--arch/x86/events/perf_event.h14
-rw-r--r--arch/x86/include/asm/cpufeatures.h2
-rw-r--r--arch/x86/include/asm/fpu/internal.h37
-rw-r--r--arch/x86/include/asm/fpu/types.h34
-rw-r--r--arch/x86/include/asm/nospec-branch.h1
-rw-r--r--arch/x86/include/asm/percpu.h8
-rw-r--r--arch/x86/include/asm/pgtable_types.h2
-rw-r--r--arch/x86/include/asm/suspend_64.h5
-rw-r--r--arch/x86/include/asm/trace/fpu.h5
-rw-r--r--arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--arch/x86/kernel/check.c15
-rw-r--r--arch/x86/kernel/cpu/bugs.c20
-rw-r--r--arch/x86/kernel/cpu/common.c3
-rw-r--r--arch/x86/kernel/cpu/cyrix.c1
-rw-r--r--arch/x86/kernel/fpu/core.c39
-rw-r--r--arch/x86/kernel/fpu/signal.c9
-rw-r--r--arch/x86/kernel/fpu/xstate.c9
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/time.c2
-rw-r--r--arch/x86/kernel/tsc_msr.c1
-rw-r--r--arch/x86/kvm/cpuid.c4
-rw-r--r--arch/x86/kvm/lapic.c22
-rw-r--r--arch/x86/kvm/mmu.c27
-rw-r--r--arch/x86/kvm/svm.c39
-rw-r--r--arch/x86/kvm/x86.c13
-rw-r--r--arch/x86/mm/numa_emulation.c2
-rw-r--r--arch/x86/mm/pkeys.c3
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-rtc.c3
-rw-r--r--arch/x86/power/hibernate_64.c6
-rw-r--r--arch/x86/xen/spinlock.c35
42 files changed, 406 insertions, 304 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index f408babdf746..b5226a009973 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -11,6 +11,16 @@ else
KBUILD_DEFCONFIG := $(ARCH)_defconfig
endif
+# For gcc stack alignment is specified with -mpreferred-stack-boundary,
+# clang has the option -mstack-alignment for that purpose.
+ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
+ cc_stack_align4 := -mpreferred-stack-boundary=2
+ cc_stack_align8 := -mpreferred-stack-boundary=3
+else ifneq ($(call cc-option, -mstack-alignment=16),)
+ cc_stack_align4 := -mstack-alignment=4
+ cc_stack_align8 := -mstack-alignment=8
+endif
+
# How to compile the 16-bit code. Note we always compile for -march=i386;
# that way we can complain to the user if the CPU is insufficient.
#
@@ -24,10 +34,11 @@ REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \
-DDISABLE_BRANCH_PROFILING \
-Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
-fno-strict-aliasing -fomit-frame-pointer -fno-pic \
- -mno-mmx -mno-sse \
- $(call cc-option, -ffreestanding) \
- $(call cc-option, -fno-stack-protector) \
- $(call cc-option, -mpreferred-stack-boundary=2)
+ -mno-mmx -mno-sse
+
+REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -ffreestanding)
+REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -fno-stack-protector)
+REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align4))
export REALMODE_CFLAGS
# BITS is used as extension for files which are available in a 32 bit
@@ -64,8 +75,10 @@ ifeq ($(CONFIG_X86_32),y)
# with nonstandard options
KBUILD_CFLAGS += -fno-pic
- # prevent gcc from keeping the stack 16 byte aligned
- KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
+ # Align the stack to the register width instead of using the default
+ # alignment of 16 bytes. This reduces stack usage and the number of
+ # alignment instructions.
+ KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align4))
# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
# a lot more stack due to the lack of sharing of stacklots:
@@ -88,17 +101,23 @@ else
KBUILD_CFLAGS += -m64
# Align jump targets to 1 byte, not the default 16 bytes:
- KBUILD_CFLAGS += -falign-jumps=1
+ KBUILD_CFLAGS += $(call cc-option,-falign-jumps=1)
# Pack loops tightly as well:
- KBUILD_CFLAGS += -falign-loops=1
+ KBUILD_CFLAGS += $(call cc-option,-falign-loops=1)
# Don't autogenerate traditional x87 instructions
KBUILD_CFLAGS += $(call cc-option,-mno-80387)
KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387)
- # Use -mpreferred-stack-boundary=3 if supported.
- KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3)
+ # By default gcc and clang use a stack alignment of 16 bytes for x86.
+ # However the standard kernel entry on x86-64 leaves the stack on an
+ # 8-byte boundary. If the compiler isn't informed about the actual
+ # alignment it will generate extra alignment instructions for the
+ # default alignment which keep the stack *mis*aligned.
+ # Furthermore an alignment to the register width reduces stack usage
+ # and the number of alignment instructions.
+ KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align8))
# Use -mskip-rax-setup if supported.
KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 9e240fcba784..08dfce02362c 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -16,6 +16,15 @@
#include "ctype.h"
#include "string.h"
+/*
+ * Undef these macros so that the functions that we provide
+ * here will have the correct names regardless of how string.h
+ * may have chosen to #define them.
+ */
+#undef memcpy
+#undef memset
+#undef memcmp
+
int memcmp(const void *s1, const void *s2, size_t len)
{
bool diff;
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 0702d2531bc7..039c4a66aca4 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -390,6 +390,13 @@ int main(int argc, char ** argv)
die("Unable to mmap '%s': %m", argv[2]);
/* Number of 16-byte paragraphs, including space for a 4-byte CRC */
sys_size = (sz + 15 + 4) / 16;
+#ifdef CONFIG_EFI_STUB
+ /*
+ * COFF requires minimum 32-byte alignment of sections, and
+ * adding a signature is problematic without that alignment.
+ */
+ sys_size = (sys_size + 1) & ~1;
+#endif
/* Patch the setup code with the appropriate size parameters */
buf[0x1f1] = setup_sectors-1;
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
index a916c4a61165..5f6a5af9c489 100644
--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
@@ -65,7 +65,6 @@
#include <linux/linkage.h>
#include <asm/inst.h>
-#define CONCAT(a,b) a##b
#define VMOVDQ vmovdqu
#define xdata0 %xmm0
@@ -92,8 +91,6 @@
#define num_bytes %r8
#define tmp %r10
-#define DDQ(i) CONCAT(ddq_add_,i)
-#define XMM(i) CONCAT(%xmm, i)
#define DDQ_DATA 0
#define XDATA 1
#define KEY_128 1
@@ -131,12 +128,12 @@ ddq_add_8:
/* generate a unique variable for ddq_add_x */
.macro setddq n
- var_ddq_add = DDQ(\n)
+ var_ddq_add = ddq_add_\n
.endm
/* generate a unique variable for xmm register */
.macro setxdata n
- var_xdata = XMM(\n)
+ var_xdata = %xmm\n
.endm
/* club the numeric 'id' to the symbol 'name' */
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index dd1958436591..5773e1161072 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -48,21 +48,13 @@
#ifdef CONFIG_X86_64
/*
* use carryless multiply version of crc32c when buffer
- * size is >= 512 (when eager fpu is enabled) or
- * >= 1024 (when eager fpu is disabled) to account
+ * size is >= 512 to account
* for fpu state save/restore overhead.
*/
-#define CRC32C_PCL_BREAKEVEN_EAGERFPU 512
-#define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024
+#define CRC32C_PCL_BREAKEVEN 512
asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
unsigned int crc_init);
-static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU;
-#define set_pcl_breakeven_point() \
-do { \
- if (!use_eager_fpu()) \
- crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \
-} while (0)
#endif /* CONFIG_X86_64 */
static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
@@ -185,7 +177,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
* use faster PCL version if datasize is large enough to
* overcome kernel fpu state save/restore overhead
*/
- if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
+ if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) {
kernel_fpu_begin();
*crcp = crc_pcl(data, len, *crcp);
kernel_fpu_end();
@@ -197,7 +189,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
- if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
+ if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) {
kernel_fpu_begin();
*(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
kernel_fpu_end();
@@ -257,7 +249,6 @@ static int __init crc32c_intel_mod_init(void)
alg.update = crc32c_pcl_intel_update;
alg.finup = crc32c_pcl_intel_finup;
alg.digest = crc32c_pcl_intel_digest;
- set_pcl_breakeven_point();
}
#endif
return crypto_register_shash(&alg);
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 76c1d85e749b..870e941c1947 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -91,7 +91,7 @@ ENDPROC(native_usergs_sysret64)
.endm
.macro TRACE_IRQS_IRETQ_DEBUG
- bt $9, EFLAGS(%rsp) /* interrupts off? */
+ btl $9, EFLAGS(%rsp) /* interrupts off? */
jnc 1f
TRACE_IRQS_ON_DEBUG
1:
@@ -485,7 +485,7 @@ retint_kernel:
#ifdef CONFIG_PREEMPT
/* Interrupts are off */
/* Check if we need preemption */
- bt $9, EFLAGS(%rsp) /* were interrupts off? */
+ btl $9, EFLAGS(%rsp) /* were interrupts off? */
jnc 1f
0: cmpl $0, PER_CPU_VAR(__preempt_count)
jnz 1f
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 02223cb4bcfd..1e967099ae51 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -37,8 +37,9 @@ extern u8 pvclock_page
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
- asm("syscall" : "=a" (ret) :
- "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
+ asm ("syscall" : "=a" (ret), "=m" (*ts) :
+ "0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
+ "memory", "rcx", "r11");
return ret;
}
@@ -46,8 +47,9 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
{
long ret;
- asm("syscall" : "=a" (ret) :
- "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+ asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) :
+ "0" (__NR_gettimeofday), "D" (tv), "S" (tz) :
+ "memory", "rcx", "r11");
return ret;
}
@@ -58,13 +60,13 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
- asm(
+ asm (
"mov %%ebx, %%edx \n"
- "mov %2, %%ebx \n"
+ "mov %[clock], %%ebx \n"
"call __kernel_vsyscall \n"
"mov %%edx, %%ebx \n"
- : "=a" (ret)
- : "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
+ : "=a" (ret), "=m" (*ts)
+ : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
: "memory", "edx");
return ret;
}
@@ -73,13 +75,13 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
{
long ret;
- asm(
+ asm (
"mov %%ebx, %%edx \n"
- "mov %2, %%ebx \n"
+ "mov %[tv], %%ebx \n"
"call __kernel_vsyscall \n"
"mov %%edx, %%ebx \n"
- : "=a" (ret)
- : "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
+ : "=a" (ret), "=m" (*tv), "=m" (*tz)
+ : "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz)
: "memory", "edx");
return ret;
}
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 655a65eaf105..cadf99923600 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -437,26 +437,6 @@ int x86_setup_perfctr(struct perf_event *event)
if (config == -1LL)
return -EINVAL;
- /*
- * Branch tracing:
- */
- if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
- !attr->freq && hwc->sample_period == 1) {
- /* BTS is not supported by this architecture. */
- if (!x86_pmu.bts_active)
- return -EOPNOTSUPP;
-
- /* BTS is currently only allowed for user-mode. */
- if (!attr->exclude_kernel)
- return -EOPNOTSUPP;
-
- /* disallow bts if conflicting events are present */
- if (x86_add_exclusive(x86_lbr_exclusive_lbr))
- return -EBUSY;
-
- event->destroy = hw_perf_lbr_event_destroy;
- }
-
hwc->config |= config;
return 0;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 815039327932..4f8560774082 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2198,16 +2198,7 @@ done:
static struct event_constraint *
intel_bts_constraints(struct perf_event *event)
{
- struct hw_perf_event *hwc = &event->hw;
- unsigned int hw_event, bts_event;
-
- if (event->attr.freq)
- return NULL;
-
- hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
- bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
-
- if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
+ if (unlikely(intel_pmu_has_bts(event)))
return &bts_constraint;
return NULL;
@@ -2822,6 +2813,39 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
return flags;
}
+static int intel_pmu_bts_config(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+
+ if (unlikely(intel_pmu_has_bts(event))) {
+ /* BTS is not supported by this architecture. */
+ if (!x86_pmu.bts_active)
+ return -EOPNOTSUPP;
+
+ /* BTS is currently only allowed for user-mode. */
+ if (!attr->exclude_kernel)
+ return -EOPNOTSUPP;
+
+ /* disallow bts if conflicting events are present */
+ if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+ return -EBUSY;
+
+ event->destroy = hw_perf_lbr_event_destroy;
+ }
+
+ return 0;
+}
+
+static int core_pmu_hw_config(struct perf_event *event)
+{
+ int ret = x86_pmu_hw_config(event);
+
+ if (ret)
+ return ret;
+
+ return intel_pmu_bts_config(event);
+}
+
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
@@ -2829,6 +2853,10 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (ret)
return ret;
+ ret = intel_pmu_bts_config(event);
+ if (ret)
+ return ret;
+
if (event->attr.precise_ip) {
if (!event->attr.freq) {
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
@@ -2848,7 +2876,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
/*
* BTS is set up earlier in this path, so don't account twice
*/
- if (!intel_pmu_has_bts(event)) {
+ if (!unlikely(intel_pmu_has_bts(event))) {
/* disallow lbr if conflicting events are present */
if (x86_add_exclusive(x86_lbr_exclusive_lbr))
return -EBUSY;
@@ -3265,7 +3293,7 @@ static __initconst const struct x86_pmu core_pmu = {
.enable_all = core_pmu_enable_all,
.enable = core_pmu_enable_event,
.disable = x86_pmu_disable_event,
- .hw_config = x86_pmu_hw_config,
+ .hw_config = core_pmu_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 5d103a87e984..10c1a5c448e5 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -342,7 +342,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
mask = x86_pmu.lbr_nr - 1;
tos = task_ctx->tos;
- for (i = 0; i < tos; i++) {
+ for (i = 0; i < task_ctx->valid_lbrs; i++) {
lbr_idx = (tos - i) & mask;
wrlbr_from(lbr_idx, task_ctx->lbr_from[i]);
wrlbr_to (lbr_idx, task_ctx->lbr_to[i]);
@@ -350,6 +350,15 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
}
+
+ for (; i < x86_pmu.lbr_nr; i++) {
+ lbr_idx = (tos - i) & mask;
+ wrlbr_from(lbr_idx, 0);
+ wrlbr_to(lbr_idx, 0);
+ if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ wrmsrl(MSR_LBR_INFO_0 + lbr_idx, 0);
+ }
+
wrmsrl(x86_pmu.lbr_tos, tos);
task_ctx->lbr_stack_state = LBR_NONE;
}
@@ -357,7 +366,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
{
unsigned lbr_idx, mask;
- u64 tos;
+ u64 tos, from;
int i;
if (task_ctx->lbr_callstack_users == 0) {
@@ -367,13 +376,17 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
mask = x86_pmu.lbr_nr - 1;
tos = intel_pmu_lbr_tos();
- for (i = 0; i < tos; i++) {
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
lbr_idx = (tos - i) & mask;
- task_ctx->lbr_from[i] = rdlbr_from(lbr_idx);
+ from = rdlbr_from(lbr_idx);
+ if (!from)
+ break;
+ task_ctx->lbr_from[i] = from;
task_ctx->lbr_to[i] = rdlbr_to(lbr_idx);
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
}
+ task_ctx->valid_lbrs = i;
task_ctx->tos = tos;
task_ctx->lbr_stack_state = LBR_VALID;
}
@@ -522,7 +535,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
*/
static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
{
- bool need_info = false;
+ bool need_info = false, call_stack = false;
unsigned long mask = x86_pmu.lbr_nr - 1;
int lbr_format = x86_pmu.intel_cap.lbr_format;
u64 tos = intel_pmu_lbr_tos();
@@ -533,7 +546,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
if (cpuc->lbr_sel) {
need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
if (cpuc->lbr_sel->config & LBR_CALL_STACK)
- num = tos;
+ call_stack = true;
}
for (i = 0; i < num; i++) {
@@ -546,6 +559,13 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
from = rdlbr_from(lbr_idx);
to = rdlbr_to(lbr_idx);
+ /*
+ * Read LBR call stack entries
+ * until invalid entry (0s) is detected.
+ */
+ if (call_stack && !from)
+ break;
+
if (lbr_format == LBR_FORMAT_INFO && need_info) {
u64 info;
@@ -1175,4 +1195,8 @@ void intel_pmu_lbr_init_knl(void)
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
x86_pmu.lbr_sel_map = snb_lbr_sel_map;
+
+ /* Knights Landing does have MISPREDICT bit */
+ if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP)
+ x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
}
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index a3dcc12bef4a..8c700069060b 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -14,6 +14,25 @@
#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910
#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f
#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f
+#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c
+#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904
+#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914
+#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f
+#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f
+#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc
+#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0
+#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10
+#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4
+#define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC 0x3e0f
+#define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC 0x3e1f
+#define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC 0x3ec2
+#define PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC 0x3e30
+#define PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC 0x3e18
+#define PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC 0x3ec6
+#define PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC 0x3e31
+#define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33
+#define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca
+#define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
@@ -631,7 +650,82 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
-
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_U_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_UQ_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SD_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SQ_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4U_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4H_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6H_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
{ /* end: all zeroes */ },
};
@@ -680,6 +774,25 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */
IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */
IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */
+ IMC_DEV(KBL_Y_IMC, &skl_uncore_pci_driver), /* 7th Gen Core Y */
+ IMC_DEV(KBL_U_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U */
+ IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */
+ IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Dual Core */
+ IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Quad Core */
+ IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 2 Cores */
+ IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 4 Cores */
+ IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 4 Cores */
+ IMC_DEV(CFL_6H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 6 Cores */
+ IMC_DEV(CFL_2S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 2 Cores Desktop */
+ IMC_DEV(CFL_4S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Desktop */
+ IMC_DEV(CFL_6S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Desktop */
+ IMC_DEV(CFL_8S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Desktop */
+ IMC_DEV(CFL_4S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Work Station */
+ IMC_DEV(CFL_6S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Work Station */
+ IMC_DEV(CFL_8S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Work Station */
+ IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */
+ IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */
+ IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */
{ /* end marker */ }
};
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 6bc36944a8c1..8c2a9fa0caf3 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3767,16 +3767,16 @@ static const struct pci_device_id skx_uncore_pci_ids[] = {
.driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3),
},
{ /* M3UPI0 Link 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
- .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0),
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 0),
},
{ /* M3UPI0 Link 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
- .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1),
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204E),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 2, SKX_PCI_UNCORE_M3UPI, 1),
},
{ /* M3UPI1 Link 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
- .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2),
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 5, SKX_PCI_UNCORE_M3UPI, 2),
},
{ /* end: all zeroes */ }
};
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index f3563179290b..7ace39c51ff7 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -633,6 +633,7 @@ struct x86_perf_task_context {
u64 lbr_to[MAX_LBR_ENTRIES];
u64 lbr_info[MAX_LBR_ENTRIES];
int tos;
+ int valid_lbrs;
int lbr_callstack_users;
int lbr_stack_state;
};
@@ -834,11 +835,16 @@ static inline int amd_pmu_init(void)
static inline bool intel_pmu_has_bts(struct perf_event *event)
{
- if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
- !event->attr.freq && event->hw.sample_period == 1)
- return true;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int hw_event, bts_event;
+
+ if (event->attr.freq)
+ return false;
+
+ hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
+ bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
- return false;
+ return hw_event == bts_event && hwc->sample_period == 1;
}
int intel_pmu_save_and_restart(struct perf_event *event);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index fbc1474960e3..c56c24347f15 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -104,7 +104,6 @@
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
-/* free, was #define X86_FEATURE_EAGER_FPU ( 3*32+29) * "eagerfpu" Non lazy FPU restore */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
@@ -214,6 +213,7 @@
#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
+#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 8852e3afa1ad..499d6ed0e376 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -60,11 +60,6 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
/*
* FPU related CPU feature flag helper routines:
*/
-static __always_inline __pure bool use_eager_fpu(void)
-{
- return true;
-}
-
static __always_inline __pure bool use_xsaveopt(void)
{
return static_cpu_has(X86_FEATURE_XSAVEOPT);
@@ -501,24 +496,6 @@ static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu)
}
-/*
- * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation'
- * idiom, which is then paired with the sw-flag (fpregs_active) later on:
- */
-
-static inline void __fpregs_activate_hw(void)
-{
- if (!use_eager_fpu())
- clts();
-}
-
-static inline void __fpregs_deactivate_hw(void)
-{
- if (!use_eager_fpu())
- stts();
-}
-
-/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */
static inline void __fpregs_deactivate(struct fpu *fpu)
{
WARN_ON_FPU(!fpu->fpregs_active);
@@ -528,7 +505,6 @@ static inline void __fpregs_deactivate(struct fpu *fpu)
trace_x86_fpu_regs_deactivated(fpu);
}
-/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
static inline void __fpregs_activate(struct fpu *fpu)
{
WARN_ON_FPU(fpu->fpregs_active);
@@ -554,22 +530,17 @@ static inline int fpregs_active(void)
}
/*
- * Encapsulate the CR0.TS handling together with the
- * software flag.
- *
* These generally need preemption protection to work,
* do try to avoid using these on their own.
*/
static inline void fpregs_activate(struct fpu *fpu)
{
- __fpregs_activate_hw();
__fpregs_activate(fpu);
}
static inline void fpregs_deactivate(struct fpu *fpu)
{
__fpregs_deactivate(fpu);
- __fpregs_deactivate_hw();
}
/*
@@ -596,8 +567,7 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
* or if the past 5 consecutive context-switches used math.
*/
fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
- new_fpu->fpstate_active &&
- (use_eager_fpu() || new_fpu->counter > 5);
+ new_fpu->fpstate_active;
if (old_fpu->fpregs_active) {
if (!copy_fpregs_to_fpstate(old_fpu))
@@ -611,18 +581,13 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
/* Don't change CR0.TS if we just switch! */
if (fpu.preload) {
- new_fpu->counter++;
__fpregs_activate(new_fpu);
trace_x86_fpu_regs_activated(new_fpu);
prefetch(&new_fpu->state);
- } else {
- __fpregs_deactivate_hw();
}
} else {
- old_fpu->counter = 0;
old_fpu->last_cpu = -1;
if (fpu.preload) {
- new_fpu->counter++;
if (fpu_want_lazy_restore(new_fpu, cpu))
fpu.preload = 0;
else
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 48df486b02f9..3c80f5b9c09d 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -322,17 +322,6 @@ struct fpu {
unsigned char fpregs_active;
/*
- * @counter:
- *
- * This counter contains the number of consecutive context switches
- * during which the FPU stays used. If this is over a threshold, the
- * lazy FPU restore logic becomes eager, to save the trap overhead.
- * This is an unsigned char so that after 256 iterations the counter
- * wraps and the context switch behavior turns lazy again; this is to
- * deal with bursty apps that only use the FPU for a short time:
- */
- unsigned char counter;
- /*
* @state:
*
* In-memory copy of all FPU registers that we save/restore
@@ -340,29 +329,6 @@ struct fpu {
* the registers in the FPU are more recent than this state
* copy. If the task context-switches away then they get
* saved here and represent the FPU state.
- *
- * After context switches there may be a (short) time period
- * during which the in-FPU hardware registers are unchanged
- * and still perfectly match this state, if the tasks
- * scheduled afterwards are not using the FPU.
- *
- * This is the 'lazy restore' window of optimization, which
- * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
- *
- * We detect whether a subsequent task uses the FPU via setting
- * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
- *
- * During this window, if the task gets scheduled again, we
- * might be able to skip having to do a restore from this
- * memory buffer to the hardware registers - at the cost of
- * incurring the overhead of #NM fault traps.
- *
- * Note that on modern CPUs that support the XSAVEOPT (or other
- * optimized XSAVE instructions), we don't use #NM traps anymore,
- * as the hardware can track whether FPU registers need saving
- * or not. On such CPUs we activate the non-lazy ('eagerfpu')
- * logic, which unconditionally saves/restores all FPU state
- * across context switches. (if FPU state exists.)
*/
union fpregs_state state;
/*
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 8b38df98548e..1b4132161c1f 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -215,6 +215,7 @@ enum spectre_v2_mitigation {
SPECTRE_V2_RETPOLINE_GENERIC,
SPECTRE_V2_RETPOLINE_AMD,
SPECTRE_V2_IBRS,
+ SPECTRE_V2_IBRS_ENHANCED,
};
/* The Speculative Store Bypass disable variants */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 84f58de08c2b..2cb5d0f13641 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -184,22 +184,22 @@ do { \
typeof(var) pfo_ret__; \
switch (sizeof(var)) { \
case 1: \
- asm(op "b "__percpu_arg(1)",%0" \
+ asm volatile(op "b "__percpu_arg(1)",%0"\
: "=q" (pfo_ret__) \
: "m" (var)); \
break; \
case 2: \
- asm(op "w "__percpu_arg(1)",%0" \
+ asm volatile(op "w "__percpu_arg(1)",%0"\
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
case 4: \
- asm(op "l "__percpu_arg(1)",%0" \
+ asm volatile(op "l "__percpu_arg(1)",%0"\
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
case 8: \
- asm(op "q "__percpu_arg(1)",%0" \
+ asm volatile(op "q "__percpu_arg(1)",%0"\
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index dfdb7e21ba56..93d089c9509a 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -134,7 +134,7 @@
*/
#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
- _PAGE_SOFT_DIRTY)
+ _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
/* The ASID is the lower 12 bits of CR3 */
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 6136a18152af..2bd96b4df140 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -42,8 +42,7 @@ struct saved_context {
set_debugreg((thread)->debugreg##register, register)
/* routines for saving/restoring kernel state */
-extern int acpi_save_state_mem(void);
-extern char core_restore_code;
-extern char restore_registers;
+extern char core_restore_code[];
+extern char restore_registers[];
#endif /* _ASM_X86_SUSPEND_64_H */
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 9217ab1f5bf6..342e59789fcd 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -14,7 +14,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
__field(struct fpu *, fpu)
__field(bool, fpregs_active)
__field(bool, fpstate_active)
- __field(int, counter)
__field(u64, xfeatures)
__field(u64, xcomp_bv)
),
@@ -23,17 +22,15 @@ DECLARE_EVENT_CLASS(x86_fpu,
__entry->fpu = fpu;
__entry->fpregs_active = fpu->fpregs_active;
__entry->fpstate_active = fpu->fpstate_active;
- __entry->counter = fpu->counter;
if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
__entry->xfeatures = fpu->state.xsave.header.xfeatures;
__entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv;
}
),
- TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d counter: %d xfeatures: %llx xcomp_bv: %llx",
+ TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx",
__entry->fpu,
__entry->fpregs_active,
__entry->fpstate_active,
- __entry->counter,
__entry->xfeatures,
__entry->xcomp_bv
)
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 739c0c594022..1bb90fafcdc3 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -356,5 +356,6 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 145863d4d343..a8b215865636 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -30,6 +30,11 @@ static __init int set_corruption_check(char *arg)
ssize_t ret;
unsigned long val;
+ if (!arg) {
+ pr_err("memory_corruption_check config string not provided\n");
+ return -EINVAL;
+ }
+
ret = kstrtoul(arg, 10, &val);
if (ret)
return ret;
@@ -44,6 +49,11 @@ static __init int set_corruption_check_period(char *arg)
ssize_t ret;
unsigned long val;
+ if (!arg) {
+ pr_err("memory_corruption_check_period config string not provided\n");
+ return -EINVAL;
+ }
+
ret = kstrtoul(arg, 10, &val);
if (ret)
return ret;
@@ -58,6 +68,11 @@ static __init int set_corruption_check_size(char *arg)
char *end;
unsigned size;
+ if (!arg) {
+ pr_err("memory_corruption_check_size config string not provided\n");
+ return -EINVAL;
+ }
+
size = memparse(arg, &end);
if (*end == '\0')
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 8103adacbc83..6221166e3fca 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -139,6 +139,7 @@ static const char *spectre_v2_strings[] = {
[SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
[SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
[SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
+ [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
};
#undef pr_fmt
@@ -340,6 +341,13 @@ static void __init spectre_v2_select_mitigation(void)
case SPECTRE_V2_CMD_FORCE:
case SPECTRE_V2_CMD_AUTO:
+ if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
+ mode = SPECTRE_V2_IBRS_ENHANCED;
+ /* Force it so VMEXIT will restore correctly */
+ x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ goto specv2_set_mode;
+ }
if (IS_ENABLED(CONFIG_RETPOLINE))
goto retpoline_auto;
break;
@@ -377,6 +385,7 @@ retpoline_auto:
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
}
+specv2_set_mode:
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
@@ -399,9 +408,16 @@ retpoline_auto:
/*
* Retpoline means the kernel is safe because it has no indirect
- * branches. But firmware isn't, so use IBRS to protect that.
+ * branches. Enhanced IBRS protects firmware too, so, enable restricted
+ * speculation around firmware calls only when Enhanced IBRS isn't
+ * supported.
+ *
+ * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
+ * the user might select retpoline on the kernel command line and if
+ * the CPU supports Enhanced IBRS, kernel might un-intentionally not
+ * enable IBRS around firmware calls.
*/
- if (boot_cpu_has(X86_FEATURE_IBRS)) {
+ if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) {
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index dc0850bb74be..3c01610c5ba9 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -959,6 +959,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+ if (ia32_cap & ARCH_CAP_IBRS_ALL)
+ setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
+
if (x86_match_cpu(cpu_no_meltdown))
return;
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 455d8ada9b9a..d39cfb2c6b63 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -253,6 +253,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
break;
case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
+ case 11: /* GX1 with inverted Device ID */
#ifdef CONFIG_PCI
{
u32 vendor, device;
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 430c095cfa0e..fc965118d2e6 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -59,27 +59,9 @@ static bool kernel_fpu_disabled(void)
return this_cpu_read(in_kernel_fpu);
}
-/*
- * Were we in an interrupt that interrupted kernel mode?
- *
- * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
- * pair does nothing at all: the thread must not have fpu (so
- * that we don't try to save the FPU state), and TS must
- * be set (so that the clts/stts pair does nothing that is
- * visible in the interrupted kernel thread).
- *
- * Except for the eagerfpu case when we return true; in the likely case
- * the thread has FPU but we are not going to set/clear TS.
- */
static bool interrupted_kernel_fpu_idle(void)
{
- if (kernel_fpu_disabled())
- return false;
-
- if (use_eager_fpu())
- return true;
-
- return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS);
+ return !kernel_fpu_disabled();
}
/*
@@ -127,7 +109,6 @@ void __kernel_fpu_begin(void)
copy_fpregs_to_fpstate(fpu);
} else {
this_cpu_write(fpu_fpregs_owner_ctx, NULL);
- __fpregs_activate_hw();
}
}
EXPORT_SYMBOL(__kernel_fpu_begin);
@@ -138,8 +119,6 @@ void __kernel_fpu_end(void)
if (fpu->fpregs_active)
copy_kernel_to_fpregs(&fpu->state);
- else
- __fpregs_deactivate_hw();
kernel_fpu_enable();
}
@@ -201,10 +180,7 @@ void fpu__save(struct fpu *fpu)
trace_x86_fpu_before_save(fpu);
if (fpu->fpregs_active) {
if (!copy_fpregs_to_fpstate(fpu)) {
- if (use_eager_fpu())
- copy_kernel_to_fpregs(&fpu->state);
- else
- fpregs_deactivate(fpu);
+ copy_kernel_to_fpregs(&fpu->state);
}
}
trace_x86_fpu_after_save(fpu);
@@ -249,7 +225,6 @@ EXPORT_SYMBOL_GPL(fpstate_init);
int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
{
- dst_fpu->counter = 0;
dst_fpu->fpregs_active = 0;
dst_fpu->last_cpu = -1;
@@ -262,8 +237,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
* Don't let 'init optimized' areas of the XSAVE area
* leak into the child task:
*/
- if (use_eager_fpu())
- memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
+ memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
/*
* Save current FPU registers directly into the child
@@ -285,10 +259,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
memcpy(&src_fpu->state, &dst_fpu->state,
fpu_kernel_xstate_size);
- if (use_eager_fpu())
- copy_kernel_to_fpregs(&src_fpu->state);
- else
- fpregs_deactivate(src_fpu);
+ copy_kernel_to_fpregs(&src_fpu->state);
}
preempt_enable();
@@ -461,7 +432,6 @@ void fpu__restore(struct fpu *fpu)
trace_x86_fpu_before_restore(fpu);
fpregs_activate(fpu);
copy_kernel_to_fpregs(&fpu->state);
- fpu->counter++;
trace_x86_fpu_after_restore(fpu);
kernel_fpu_enable();
}
@@ -479,7 +449,6 @@ EXPORT_SYMBOL_GPL(fpu__restore);
void fpu__drop(struct fpu *fpu)
{
preempt_disable();
- fpu->counter = 0;
if (fpu->fpregs_active) {
/* Ignore delayed exceptions from user space */
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 3ec0d2d64601..ae52ef05d098 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -309,7 +309,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
* thread's fpu state, reconstruct fxstate from the fsave
* header. Sanitize the copied state etc.
*/
- struct fpu *fpu = &tsk->thread.fpu;
struct user_i387_ia32_struct env;
int err = 0;
@@ -344,11 +343,9 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
}
fpu->fpstate_active = 1;
- if (use_eager_fpu()) {
- preempt_disable();
- fpu__restore(fpu);
- preempt_enable();
- }
+ preempt_disable();
+ fpu__restore(fpu);
+ preempt_enable();
return err;
} else {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index abfbb61b18b8..e9d7f461b7fa 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -890,15 +890,6 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
*/
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return -EINVAL;
- /*
- * For most XSAVE components, this would be an arduous task:
- * brining fpstate up to date with fpregs, updating fpstate,
- * then re-populating fpregs. But, for components that are
- * never lazily managed, we can just access the fpregs
- * directly. PKRU is never managed lazily, so we can just
- * manipulate it directly. Make sure it stays that way.
- */
- WARN_ON_ONCE(!use_eager_fpu());
/* Set the bits we need in PKRU: */
if (init_val & PKEY_DISABLE_ACCESS)
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 29d465627919..bf9552bebb3c 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -90,7 +90,7 @@ unsigned paravirt_patch_call(void *insnbuf,
if (len < 5) {
#ifdef CONFIG_RETPOLINE
- WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr);
+ WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr);
#endif
return len; /* call too long for patch site */
}
@@ -110,7 +110,7 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
if (len < 5) {
#ifdef CONFIG_RETPOLINE
- WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr);
+ WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr);
#endif
return len; /* call too long for patch site */
}
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index f8a0518d2810..89d1190b9d94 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -24,7 +24,7 @@
#include <asm/time.h>
#ifdef CONFIG_X86_64
-__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES;
+__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES;
#endif
unsigned long profile_pc(struct pt_regs *regs)
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 0fe720d64fef..3f818ce985c0 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -12,6 +12,7 @@
#include <asm/setup.h>
#include <asm/apic.h>
#include <asm/param.h>
+#include <asm/tsc.h>
#define MAX_NUM_FREQS 9
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7e5119c1d15c..c17d3893ae60 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -16,7 +16,6 @@
#include <linux/export.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
-#include <asm/fpu/internal.h> /* For use_eager_fpu. Ugh! */
#include <asm/user.h>
#include <asm/fpu/xstate.h>
#include "cpuid.h"
@@ -114,8 +113,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
- if (use_eager_fpu())
- kvm_x86_ops->fpu_activate(vcpu);
+ kvm_x86_ops->fpu_activate(vcpu);
/*
* The existing code assumes virtual address is 48-bit in the canonical
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a8a86be8cf15..69a81a7daa24 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1220,9 +1220,8 @@ EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
{
- return kvm_apic_hw_enabled(apic) &&
- addr >= apic->base_address &&
- addr < apic->base_address + LAPIC_MMIO_LENGTH;
+ return addr >= apic->base_address &&
+ addr < apic->base_address + LAPIC_MMIO_LENGTH;
}
static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
@@ -1234,6 +1233,15 @@ static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!apic_mmio_in_range(apic, address))
return -EOPNOTSUPP;
+ if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
+ if (!kvm_check_has_quirk(vcpu->kvm,
+ KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
+ return -EOPNOTSUPP;
+
+ memset(data, 0xff, len);
+ return 0;
+ }
+
kvm_lapic_reg_read(apic, offset, len, data);
return 0;
@@ -1646,6 +1654,14 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!apic_mmio_in_range(apic, address))
return -EOPNOTSUPP;
+ if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
+ if (!kvm_check_has_quirk(vcpu->kvm,
+ KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
+ return -EOPNOTSUPP;
+
+ return 0;
+ }
+
/*
* APIC register must be aligned on 128-bits boundary.
* 32/64/128 bits registers must be accessed thru 32 bits.
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8a4d6bc8fed0..676edfc19a95 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4297,9 +4297,9 @@ static bool need_remote_flush(u64 old, u64 new)
}
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
- const u8 *new, int *bytes)
+ int *bytes)
{
- u64 gentry;
+ u64 gentry = 0;
int r;
/*
@@ -4311,22 +4311,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
*gpa &= ~(gpa_t)7;
*bytes = 8;
- r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8);
- if (r)
- gentry = 0;
- new = (const u8 *)&gentry;
}
- switch (*bytes) {
- case 4:
- gentry = *(const u32 *)new;
- break;
- case 8:
- gentry = *(const u64 *)new;
- break;
- default:
- gentry = 0;
- break;
+ if (*bytes == 4 || *bytes == 8) {
+ r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes);
+ if (r)
+ gentry = 0;
}
return gentry;
@@ -4437,8 +4427,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
- gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes);
-
/*
* No need to care whether allocation memory is successful
* or not since pte prefetch is skiped if it does not have
@@ -4447,6 +4435,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
mmu_topup_memory_caches(vcpu);
spin_lock(&vcpu->kvm->mmu_lock);
+
+ gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
+
++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 5f44d63a9d69..fa1b0e3c8a06 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1333,20 +1333,23 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, int index)
static int avic_init_access_page(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
- int ret;
+ int ret = 0;
+ mutex_lock(&kvm->slots_lock);
if (kvm->arch.apic_access_page_done)
- return 0;
+ goto out;
- ret = x86_set_memory_region(kvm,
- APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
- APIC_DEFAULT_PHYS_BASE,
- PAGE_SIZE);
+ ret = __x86_set_memory_region(kvm,
+ APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+ APIC_DEFAULT_PHYS_BASE,
+ PAGE_SIZE);
if (ret)
- return ret;
+ goto out;
kvm->arch.apic_access_page_done = true;
- return 0;
+out:
+ mutex_unlock(&kvm->slots_lock);
+ return ret;
}
static int avic_init_backing_page(struct kvm_vcpu *vcpu)
@@ -1672,21 +1675,31 @@ out:
return ERR_PTR(err);
}
+static void svm_clear_current_vmcb(struct vmcb *vmcb)
+{
+ int i;
+
+ for_each_online_cpu(i)
+ cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
+}
+
static void svm_free_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ /*
+ * The vmcb page can be recycled, causing a false negative in
+ * svm_vcpu_load(). So, ensure that no logical CPU has this
+ * vmcb page recorded as its current vmcb.
+ */
+ svm_clear_current_vmcb(svm->vmcb);
+
__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
__free_page(virt_to_page(svm->nested.hsave));
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
- /*
- * The vmcb page can be recycled, causing a false negative in
- * svm_vcpu_load(). So do a full IBPB now.
- */
- indirect_branch_prediction_barrier();
}
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 203d42340fc1..27d13b870e07 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6661,7 +6661,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
else {
if (vcpu->arch.apicv_active)
kvm_x86_ops->sync_pir_to_irr(vcpu);
- kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
+ if (ioapic_in_kernel(vcpu->kvm))
+ kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
}
bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
vcpu_to_synic(vcpu)->vec_bitmap, 256);
@@ -7631,16 +7632,6 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
__kernel_fpu_end();
++vcpu->stat.fpu_reload;
- /*
- * If using eager FPU mode, or if the guest is a frequent user
- * of the FPU, just leave the FPU active for next time.
- * Every 255 times fpu_counter rolls over to 0; a guest that uses
- * the FPU in bursts will revert to loading it on demand.
- */
- if (!use_eager_fpu()) {
- if (++vcpu->fpu_counter < 5)
- kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
- }
trace_kvm_fpu(0);
}
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index a8f90ce3dedf..dc6d99017f3f 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -60,7 +60,7 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
eb->nid = nid;
if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
- emu_nid_to_phys[nid] = nid;
+ emu_nid_to_phys[nid] = pb->nid;
pb->start += size;
if (pb->start >= pb->end) {
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 0bbec041c003..e2d2b3cd4276 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -142,8 +142,7 @@ u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
* Called from the FPU code when creating a fresh set of FPU
* registers. This is called from a very specific context where
* we know the FPU regstiers are safe for use and we can use PKRU
- * directly. The fact that PKRU is only available when we are
- * using eagerfpu mode makes this possible.
+ * directly.
*/
void copy_init_pkru_to_fpregs(void)
{
diff --git a/arch/x86/platform/olpc/olpc-xo1-rtc.c b/arch/x86/platform/olpc/olpc-xo1-rtc.c
index a2b4efddd61a..8e7ddd7e313a 100644
--- a/arch/x86/platform/olpc/olpc-xo1-rtc.c
+++ b/arch/x86/platform/olpc/olpc-xo1-rtc.c
@@ -16,6 +16,7 @@
#include <asm/msr.h>
#include <asm/olpc.h>
+#include <asm/x86_init.h>
static void rtc_wake_on(struct device *dev)
{
@@ -75,6 +76,8 @@ static int __init xo1_rtc_init(void)
if (r)
return r;
+ x86_platform.legacy.rtc = 0;
+
device_init_wakeup(&xo1_rtc_device.dev, 1);
return 0;
}
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 0cb1dd461529..fef485b789ca 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -126,7 +126,7 @@ static int relocate_restore_code(void)
if (!relocated_restore_code)
return -ENOMEM;
- memcpy((void *)relocated_restore_code, &core_restore_code, PAGE_SIZE);
+ memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE);
/* Make the page containing the relocated code executable */
pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code);
@@ -197,8 +197,8 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
if (max_size < sizeof(struct restore_data_record))
return -EOVERFLOW;
- rdr->jump_address = (unsigned long)&restore_registers;
- rdr->jump_address_phys = __pa_symbol(&restore_registers);
+ rdr->jump_address = (unsigned long)restore_registers;
+ rdr->jump_address_phys = __pa_symbol(restore_registers);
rdr->cr3 = restore_cr3;
rdr->magic = RESTORE_MAGIC;
return 0;
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 3d6e0064cbfc..8d2c6f071dcc 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -8,6 +8,7 @@
#include <linux/log2.h>
#include <linux/gfp.h>
#include <linux/slab.h>
+#include <linux/atomic.h>
#include <asm/paravirt.h>
@@ -19,6 +20,7 @@
static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
static DEFINE_PER_CPU(char *, irq_name);
+static DEFINE_PER_CPU(atomic_t, xen_qlock_wait_nest);
static bool xen_pvspin = true;
#include <asm/qspinlock.h>
@@ -40,33 +42,24 @@ static void xen_qlock_kick(int cpu)
static void xen_qlock_wait(u8 *byte, u8 val)
{
int irq = __this_cpu_read(lock_kicker_irq);
+ atomic_t *nest_cnt = this_cpu_ptr(&xen_qlock_wait_nest);
/* If kicker interrupts not initialized yet, just spin */
- if (irq == -1)
+ if (irq == -1 || in_nmi())
return;
- /* clear pending */
- xen_clear_irq_pending(irq);
- barrier();
-
- /*
- * We check the byte value after clearing pending IRQ to make sure
- * that we won't miss a wakeup event because of the clearing.
- *
- * The sync_clear_bit() call in xen_clear_irq_pending() is atomic.
- * So it is effectively a memory barrier for x86.
- */
- if (READ_ONCE(*byte) != val)
- return;
+ /* Detect reentry. */
+ atomic_inc(nest_cnt);
- /*
- * If an interrupt happens here, it will leave the wakeup irq
- * pending, which will cause xen_poll_irq() to return
- * immediately.
- */
+ /* If irq pending already and no nested call clear it. */
+ if (atomic_read(nest_cnt) == 1 && xen_test_irq_pending(irq)) {
+ xen_clear_irq_pending(irq);
+ } else if (READ_ONCE(*byte) == val) {
+ /* Block until irq becomes pending (or a spurious wakeup) */
+ xen_poll_irq(irq);
+ }
- /* Block until irq becomes pending (or perhaps a spurious wakeup) */
- xen_poll_irq(irq);
+ atomic_dec(nest_cnt);
}
static irqreturn_t dummy_handler(int irq, void *dev_id)