summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/bug.h2
-rw-r--r--arch/x86/include/asm/irq_remapping.h7
-rw-r--r--arch/x86/include/asm/irqflags.h2
-rw-r--r--arch/x86/include/asm/uv/bios.h2
-rw-r--r--arch/x86/kernel/cpu/sgx/ioctl.c2
-rw-r--r--arch/x86/kernel/fpu/xstate.c4
-rw-r--r--arch/x86/kernel/irq.c23
-rw-r--r--arch/x86/kernel/unwind_orc.c39
-rw-r--r--arch/x86/kvm/cpuid.c11
-rw-r--r--arch/x86/kvm/svm/nested.c4
-rw-r--r--arch/x86/kvm/svm/svm.c2
-rw-r--r--arch/x86/kvm/svm/svm.h7
-rw-r--r--arch/x86/kvm/vmx/nested.c3
-rw-r--r--arch/x86/kvm/vmx/vmx.c9
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--arch/x86/net/bpf_jit_comp.c12
-rw-r--r--arch/x86/xen/enlighten_pv.c2
17 files changed, 102 insertions, 36 deletions
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index d561a8443c13..9b4e04690e1a 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -15,7 +15,7 @@ extern void __WARN_trap(struct bug_entry *bug, ...);
/*
* Despite that some emulators terminate on UD2, we use it for WARN().
*/
-#define ASM_UD2 _ASM_BYTES(0x0f, 0x0b)
+#define ASM_UD2 __ASM_FORM(ud2)
#define INSN_UD2 0x0b0f
#define LEN_UD2 2
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5a0d42464d44..4e55d1755846 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -87,4 +87,11 @@ static inline void panic_if_irq_remap(const char *msg)
}
#endif /* CONFIG_IRQ_REMAP */
+
+#ifdef CONFIG_X86_POSTED_MSI
+void intel_ack_posted_msi_irq(struct irq_data *irqd);
+#else
+#define intel_ack_posted_msi_irq NULL
+#endif
+
#endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index b30e5474c18e..a1193e9d65f2 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -25,7 +25,7 @@ extern __always_inline unsigned long native_save_fl(void)
*/
asm volatile("# __raw_save_flags\n\t"
"pushf ; pop %0"
- : "=rm" (flags)
+ : ASM_OUTPUT_RM (flags)
: /* no input */
: "memory");
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 6989b824fd32..d0b62e255290 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -122,7 +122,7 @@ struct uv_systab {
struct {
u32 type:8; /* type of entry */
u32 offset:24; /* byte offset from struct start to entry */
- } entry[1]; /* additional entries follow */
+ } entry[]; /* additional entries follow */
};
extern struct uv_systab *uv_systab;
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 66f1efa16fbb..9322a9287dc7 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -242,7 +242,7 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
/*
* If the caller requires measurement of the page as a proof for the content,
* use EEXTEND to add a measurement for 256 bytes of the page. Repeat this
- * operation until the entire page is measured."
+ * operation until the entire page is measured.
*/
static int __sgx_encl_extend(struct sgx_encl *encl,
struct sgx_epc_page *epc_page)
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 48113c5193aa..76153dfb58c9 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1946,7 +1946,7 @@ static int dump_xsave_layout_desc(struct coredump_params *cprm)
};
if (!dump_emit(cprm, &xc, sizeof(xc)))
- return 0;
+ return -1;
num_records++;
}
@@ -1984,7 +1984,7 @@ int elf_coredump_extra_notes_write(struct coredump_params *cprm)
return 1;
num_records = dump_xsave_layout_desc(cprm);
- if (!num_records)
+ if (num_records < 0)
return 1;
/* Total size should be equal to the number of records */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 86f4e574de02..b2fe6181960c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -397,6 +397,7 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi)
/* Posted Interrupt Descriptors for coalesced MSIs to be posted */
DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc);
+static DEFINE_PER_CPU_CACHE_HOT(bool, posted_msi_handler_active);
void intel_posted_msi_init(void)
{
@@ -414,6 +415,25 @@ void intel_posted_msi_init(void)
this_cpu_write(posted_msi_pi_desc.ndst, destination);
}
+void intel_ack_posted_msi_irq(struct irq_data *irqd)
+{
+ irq_move_irq(irqd);
+
+ /*
+ * Handle the rare case that irq_retrigger() raised the actual
+ * assigned vector on the target CPU, which means that it was not
+ * invoked via the posted MSI handler below. In that case APIC EOI
+ * is required as otherwise the ISR entry becomes stale and lower
+ * priority interrupts are never going to be delivered after that.
+ *
+ * If the posted handler invoked the device interrupt handler then
+ * the EOI would be premature because it would acknowledge the
+ * posted vector.
+ */
+ if (unlikely(!__this_cpu_read(posted_msi_handler_active)))
+ apic_eoi();
+}
+
static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs)
{
unsigned long pir_copy[NR_PIR_WORDS];
@@ -446,6 +466,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
pid = this_cpu_ptr(&posted_msi_pi_desc);
+ /* Mark the handler active for intel_ack_posted_msi_irq() */
+ __this_cpu_write(posted_msi_handler_active, true);
inc_irq_stat(posted_msi_notification_count);
irq_enter();
@@ -474,6 +496,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
apic_eoi();
irq_exit();
+ __this_cpu_write(posted_msi_handler_active, false);
set_irq_regs(old_regs);
}
#endif /* X86_POSTED_MSI */
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 977ee75e047c..f610fde2d5c4 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -2,6 +2,7 @@
#include <linux/objtool.h>
#include <linux/module.h>
#include <linux/sort.h>
+#include <linux/bpf.h>
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
@@ -172,6 +173,25 @@ static struct orc_entry *orc_ftrace_find(unsigned long ip)
}
#endif
+/* Fake frame pointer entry -- used as a fallback for generated code */
+static struct orc_entry orc_fp_entry = {
+ .type = ORC_TYPE_CALL,
+ .sp_reg = ORC_REG_BP,
+ .sp_offset = 16,
+ .bp_reg = ORC_REG_PREV_SP,
+ .bp_offset = -16,
+};
+
+static struct orc_entry *orc_bpf_find(unsigned long ip)
+{
+#ifdef CONFIG_BPF_JIT
+ if (bpf_has_frame_pointer(ip))
+ return &orc_fp_entry;
+#endif
+
+ return NULL;
+}
+
/*
* If we crash with IP==0, the last successfully executed instruction
* was probably an indirect function call with a NULL function pointer,
@@ -186,15 +206,6 @@ static struct orc_entry null_orc_entry = {
.type = ORC_TYPE_CALL
};
-/* Fake frame pointer entry -- used as a fallback for generated code */
-static struct orc_entry orc_fp_entry = {
- .type = ORC_TYPE_CALL,
- .sp_reg = ORC_REG_BP,
- .sp_offset = 16,
- .bp_reg = ORC_REG_PREV_SP,
- .bp_offset = -16,
-};
-
static struct orc_entry *orc_find(unsigned long ip)
{
static struct orc_entry *orc;
@@ -238,6 +249,11 @@ static struct orc_entry *orc_find(unsigned long ip)
if (orc)
return orc;
+ /* BPF lookup: */
+ orc = orc_bpf_find(ip);
+ if (orc)
+ return orc;
+
return orc_ftrace_find(ip);
}
@@ -495,9 +511,8 @@ bool unwind_next_frame(struct unwind_state *state)
if (!orc) {
/*
* As a fallback, try to assume this code uses a frame pointer.
- * This is useful for generated code, like BPF, which ORC
- * doesn't know about. This is just a guess, so the rest of
- * the unwind is no longer considered reliable.
+ * This is just a guess, so the rest of the unwind is no longer
+ * considered reliable.
*/
orc = &orc_fp_entry;
state->error = true;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index d563a948318b..88a5426674a1 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -510,10 +510,17 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
int r;
/*
+ * Apply pending runtime CPUID updates to the current CPUID entries to
+ * avoid false positives due to mismatches on KVM-owned feature flags.
+ */
+ if (vcpu->arch.cpuid_dynamic_bits_dirty)
+ kvm_update_cpuid_runtime(vcpu);
+
+ /*
* Swap the existing (old) entries with the incoming (new) entries in
* order to massage the new entries, e.g. to account for dynamic bits
- * that KVM controls, without clobbering the current guest CPUID, which
- * KVM needs to preserve in order to unwind on failure.
+ * that KVM controls, without losing the current guest CPUID, which KVM
+ * needs to preserve in order to unwind on failure.
*
* Similarly, save the vCPU's current cpu_caps so that the capabilities
* can be updated alongside the CPUID entries when performing runtime
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index c81005b24522..ba0f11c68372 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -985,7 +985,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
if (!nested_vmcb_check_save(vcpu) ||
!nested_vmcb_check_controls(vcpu)) {
vmcb12->control.exit_code = SVM_EXIT_ERR;
- vmcb12->control.exit_code_hi = 0;
+ vmcb12->control.exit_code_hi = -1u;
vmcb12->control.exit_info_1 = 0;
vmcb12->control.exit_info_2 = 0;
goto out;
@@ -1018,7 +1018,7 @@ out_exit_err:
svm->soft_int_injected = false;
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
- svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_code_hi = -1u;
svm->vmcb->control.exit_info_1 = 0;
svm->vmcb->control.exit_info_2 = 0;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f56c2d895011..24d59ccfa40d 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2443,6 +2443,7 @@ static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu,
if (cr0 ^ val) {
svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
+ svm->vmcb->control.exit_code_hi = 0;
ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
}
@@ -4617,6 +4618,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
if (static_cpu_has(X86_FEATURE_NRIPS))
vmcb->control.next_rip = info->next_rip;
vmcb->control.exit_code = icpt_info.exit_code;
+ vmcb->control.exit_code_hi = 0;
vmexit = nested_svm_exit_handled(svm);
ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 9e151dbdef25..01be93a53d07 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -761,9 +761,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm);
static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
{
- svm->vmcb->control.exit_code = exit_code;
- svm->vmcb->control.exit_info_1 = 0;
- svm->vmcb->control.exit_info_2 = 0;
+ svm->vmcb->control.exit_code = exit_code;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = 0;
+ svm->vmcb->control.exit_info_2 = 0;
return nested_svm_vmexit(svm);
}
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 40777278eabb..6137e5307d0f 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -19,6 +19,7 @@
#include "trace.h"
#include "vmx.h"
#include "smm.h"
+#include "x86_ops.h"
static bool __read_mostly enable_shadow_vmcs = 1;
module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
@@ -5165,7 +5166,7 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
if (vmx->nested.update_vmcs01_apicv_status) {
vmx->nested.update_vmcs01_apicv_status = false;
- kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+ vmx_refresh_apicv_exec_ctrl(vcpu);
}
if (vmx->nested.update_vmcs01_hwapic_isr) {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4cbe8c84b636..6b96f7aea20b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6937,15 +6937,6 @@ void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
* VM-Exit, otherwise L1 with run with a stale SVI.
*/
if (is_guest_mode(vcpu)) {
- /*
- * KVM is supposed to forward intercepted L2 EOIs to L1 if VID
- * is enabled in vmcs12; as above, the EOIs affect L2's vAPIC.
- * Note, userspace can stuff state while L2 is active; assert
- * that VID is disabled if and only if the vCPU is in KVM_RUN
- * to avoid false positives if userspace is setting APIC state.
- */
- WARN_ON_ONCE(vcpu->wants_to_run &&
- nested_cpu_has_vid(get_vmcs12(vcpu)));
to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true;
return;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0c6d899d53dd..ff8812f3a129 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10886,9 +10886,16 @@ void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
* pending. At the same time, KVM_REQ_EVENT may not be set as APICv was
* still active when the interrupt got accepted. Make sure
* kvm_check_and_inject_events() is called to check for that.
+ *
+ * Update SVI when APICv gets enabled, otherwise SVI won't reflect the
+ * highest bit in vISR and the next accelerated EOI in the guest won't
+ * be virtualized correctly (the CPU uses SVI to determine which vISR
+ * vector to clear).
*/
if (!apic->apicv_active)
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ else
+ kvm_apic_update_hwapic_isr(vcpu);
out:
preempt_enable();
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b69dc7194e2c..b0bac2a66eff 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1678,6 +1678,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
emit_prologue(&prog, image, stack_depth,
bpf_prog_was_classic(bpf_prog), tail_call_reachable,
bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
+
+ bpf_prog->aux->ksym.fp_start = prog - temp;
+
/* Exception callback will clobber callee regs for its own use, and
* restore the original callee regs from main prog's stack frame.
*/
@@ -2736,6 +2739,8 @@ emit_jmp:
pop_r12(&prog);
}
EMIT1(0xC9); /* leave */
+ bpf_prog->aux->ksym.fp_end = prog - temp;
+
emit_return(&prog, image + addrs[i - 1] + (prog - temp));
break;
@@ -3325,6 +3330,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+ if (im)
+ im->ksym.fp_start = prog - (u8 *)rw_image;
+
if (!is_imm8(stack_size)) {
/* sub rsp, stack_size */
EMIT3_off32(0x48, 0x81, 0xEC, stack_size);
@@ -3462,7 +3470,11 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off);
+
EMIT1(0xC9); /* leave */
+ if (im)
+ im->ksym.fp_end = prog - (u8 *)rw_image;
+
if (flags & BPF_TRAMP_F_SKIP_FRAME) {
/* skip our return address and return to parent */
EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 4806cc28d7ca..b74ff8bc7f2a 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -108,7 +108,7 @@ static int xen_cpu_dead_pv(unsigned int cpu);
* calls.
*/
DEFINE_PER_CPU(bool, xen_in_preemptible_hcall);
-EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
+EXPORT_PER_CPU_SYMBOL_GPL(xen_in_preemptible_hcall);
/*
* In case of scheduling the flag must be cleared and restored after