summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c2
-rw-r--r--arch/x86/kernel/cpu/sgx/ioctl.c2
-rw-r--r--arch/x86/kernel/fpu/core.c32
-rw-r--r--arch/x86/kernel/fpu/xstate.c4
-rw-r--r--arch/x86/kernel/irq.c23
-rw-r--r--arch/x86/kernel/kvm.c19
-rw-r--r--arch/x86/kernel/unwind_orc.c39
-rw-r--r--arch/x86/kernel/x86_init.c2
8 files changed, 100 insertions, 23 deletions
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 3821a985f4ff..46673530bc6f 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -258,7 +258,7 @@ static bool cpu_has_entrysign(void)
if (fam == 0x1a) {
if (model <= 0x2f ||
(0x40 <= model && model <= 0x4f) ||
- (0x60 <= model && model <= 0x6f))
+ (0x60 <= model && model <= 0x7f))
return true;
}
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 66f1efa16fbb..9322a9287dc7 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -242,7 +242,7 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
/*
* If the caller requires measurement of the page as a proof for the content,
* use EEXTEND to add a measurement for 256 bytes of the page. Repeat this
- * operation until the entire page is measured."
+ * operation until the entire page is measured.
*/
static int __sgx_encl_extend(struct sgx_encl *encl,
struct sgx_epc_page *epc_page)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index da233f20ae6f..608983806fd7 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -319,10 +319,29 @@ EXPORT_SYMBOL_FOR_KVM(fpu_enable_guest_xfd_features);
#ifdef CONFIG_X86_64
void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd)
{
+ struct fpstate *fpstate = guest_fpu->fpstate;
+
fpregs_lock();
- guest_fpu->fpstate->xfd = xfd;
- if (guest_fpu->fpstate->in_use)
- xfd_update_state(guest_fpu->fpstate);
+
+ /*
+ * KVM's guest ABI is that setting XFD[i]=1 *can* immediately revert the
+ * save state to its initial configuration. Likewise, KVM_GET_XSAVE does
+ * the same as XSAVE and returns XSTATE_BV[i]=0 whenever XFD[i]=1.
+ *
+ * If the guest's FPU state is in hardware, just update XFD: the XSAVE
+ * in fpu_swap_kvm_fpstate will clear XSTATE_BV[i] whenever XFD[i]=1.
+ *
+ * If however the guest's FPU state is NOT resident in hardware, clear
+ * disabled components in XSTATE_BV now, or a subsequent XRSTOR will
+ * attempt to load disabled components and generate #NM _in the host_.
+ */
+ if (xfd && test_thread_flag(TIF_NEED_FPU_LOAD))
+ fpstate->regs.xsave.header.xfeatures &= ~xfd;
+
+ fpstate->xfd = xfd;
+ if (fpstate->in_use)
+ xfd_update_state(fpstate);
+
fpregs_unlock();
}
EXPORT_SYMBOL_FOR_KVM(fpu_update_guest_xfd);
@@ -431,6 +450,13 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf,
return -EINVAL;
/*
+ * Disabled features must be in their initial state, otherwise XRSTOR
+ * causes an exception.
+ */
+ if (WARN_ON_ONCE(ustate->xsave.header.xfeatures & kstate->xfd))
+ return -EINVAL;
+
+ /*
* Nullify @vpkru to preserve its current value if PKRU's bit isn't set
* in the header. KVM's odd ABI is to leave PKRU untouched in this
* case (all other components are eventually re-initialized).
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 48113c5193aa..76153dfb58c9 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1946,7 +1946,7 @@ static int dump_xsave_layout_desc(struct coredump_params *cprm)
};
if (!dump_emit(cprm, &xc, sizeof(xc)))
- return 0;
+ return -1;
num_records++;
}
@@ -1984,7 +1984,7 @@ int elf_coredump_extra_notes_write(struct coredump_params *cprm)
return 1;
num_records = dump_xsave_layout_desc(cprm);
- if (!num_records)
+ if (num_records < 0)
return 1;
/* Total size should be equal to the number of records */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 86f4e574de02..b2fe6181960c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -397,6 +397,7 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi)
/* Posted Interrupt Descriptors for coalesced MSIs to be posted */
DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc);
+static DEFINE_PER_CPU_CACHE_HOT(bool, posted_msi_handler_active);
void intel_posted_msi_init(void)
{
@@ -414,6 +415,25 @@ void intel_posted_msi_init(void)
this_cpu_write(posted_msi_pi_desc.ndst, destination);
}
+void intel_ack_posted_msi_irq(struct irq_data *irqd)
+{
+ irq_move_irq(irqd);
+
+ /*
+ * Handle the rare case that irq_retrigger() raised the actual
+ * assigned vector on the target CPU, which means that it was not
+ * invoked via the posted MSI handler below. In that case APIC EOI
+ * is required as otherwise the ISR entry becomes stale and lower
+ * priority interrupts are never going to be delivered after that.
+ *
+ * If the posted handler invoked the device interrupt handler then
+ * the EOI would be premature because it would acknowledge the
+ * posted vector.
+ */
+ if (unlikely(!__this_cpu_read(posted_msi_handler_active)))
+ apic_eoi();
+}
+
static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs)
{
unsigned long pir_copy[NR_PIR_WORDS];
@@ -446,6 +466,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
pid = this_cpu_ptr(&posted_msi_pi_desc);
+ /* Mark the handler active for intel_ack_posted_msi_irq() */
+ __this_cpu_write(posted_msi_handler_active, true);
inc_irq_stat(posted_msi_notification_count);
irq_enter();
@@ -474,6 +496,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
apic_eoi();
irq_exit();
+ __this_cpu_write(posted_msi_handler_active, false);
set_irq_regs(old_regs);
}
#endif /* X86_POSTED_MSI */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index df78ddee0abb..37dc8465e0f5 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -89,6 +89,7 @@ struct kvm_task_sleep_node {
struct swait_queue_head wq;
u32 token;
int cpu;
+ bool dummy;
};
static struct kvm_task_sleep_head {
@@ -120,15 +121,26 @@ static bool kvm_async_pf_queue_task(u32 token, struct kvm_task_sleep_node *n)
raw_spin_lock(&b->lock);
e = _find_apf_task(b, token);
if (e) {
- /* dummy entry exist -> wake up was delivered ahead of PF */
- hlist_del(&e->link);
+ struct kvm_task_sleep_node *dummy = NULL;
+
+ /*
+ * The entry can either be a 'dummy' entry (which is put on the
+ * list when wake-up happens ahead of APF handling completion)
+ * or a token from another task which should not be touched.
+ */
+ if (e->dummy) {
+ hlist_del(&e->link);
+ dummy = e;
+ }
+
raw_spin_unlock(&b->lock);
- kfree(e);
+ kfree(dummy);
return false;
}
n->token = token;
n->cpu = smp_processor_id();
+ n->dummy = false;
init_swait_queue_head(&n->wq);
hlist_add_head(&n->link, &b->list);
raw_spin_unlock(&b->lock);
@@ -231,6 +243,7 @@ again:
}
dummy->token = token;
dummy->cpu = smp_processor_id();
+ dummy->dummy = true;
init_swait_queue_head(&dummy->wq);
hlist_add_head(&dummy->link, &b->list);
dummy = NULL;
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 977ee75e047c..f610fde2d5c4 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -2,6 +2,7 @@
#include <linux/objtool.h>
#include <linux/module.h>
#include <linux/sort.h>
+#include <linux/bpf.h>
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
@@ -172,6 +173,25 @@ static struct orc_entry *orc_ftrace_find(unsigned long ip)
}
#endif
+/* Fake frame pointer entry -- used as a fallback for generated code */
+static struct orc_entry orc_fp_entry = {
+ .type = ORC_TYPE_CALL,
+ .sp_reg = ORC_REG_BP,
+ .sp_offset = 16,
+ .bp_reg = ORC_REG_PREV_SP,
+ .bp_offset = -16,
+};
+
+static struct orc_entry *orc_bpf_find(unsigned long ip)
+{
+#ifdef CONFIG_BPF_JIT
+ if (bpf_has_frame_pointer(ip))
+ return &orc_fp_entry;
+#endif
+
+ return NULL;
+}
+
/*
* If we crash with IP==0, the last successfully executed instruction
* was probably an indirect function call with a NULL function pointer,
@@ -186,15 +206,6 @@ static struct orc_entry null_orc_entry = {
.type = ORC_TYPE_CALL
};
-/* Fake frame pointer entry -- used as a fallback for generated code */
-static struct orc_entry orc_fp_entry = {
- .type = ORC_TYPE_CALL,
- .sp_reg = ORC_REG_BP,
- .sp_offset = 16,
- .bp_reg = ORC_REG_PREV_SP,
- .bp_offset = -16,
-};
-
static struct orc_entry *orc_find(unsigned long ip)
{
static struct orc_entry *orc;
@@ -238,6 +249,11 @@ static struct orc_entry *orc_find(unsigned long ip)
if (orc)
return orc;
+ /* BPF lookup: */
+ orc = orc_bpf_find(ip);
+ if (orc)
+ return orc;
+
return orc_ftrace_find(ip);
}
@@ -495,9 +511,8 @@ bool unwind_next_frame(struct unwind_state *state)
if (!orc) {
/*
* As a fallback, try to assume this code uses a frame pointer.
- * This is useful for generated code, like BPF, which ORC
- * doesn't know about. This is just a guess, so the rest of
- * the unwind is no longer considered reliable.
+ * This is just a guess, so the rest of the unwind is no longer
+ * considered reliable.
*/
orc = &orc_fp_entry;
state->error = true;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 0a2bbd674a6d..ebefb77c37bb 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2009 Thomas Gleixner <tglx@linutronix.de>
+ * Copyright (C) 2009 Linutronix GmbH, Thomas Gleixner <tglx@kernel.org>
*
* For licencing details see kernel-base/COPYING
*/