summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-08-27 15:51:50 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-08-27 15:51:50 -0700
commitaf56ff27eba54fceee5f5643e79bf6531f2e1739 (patch)
tree99d683c0c7698f472ce66f6973ce78efddc6500e /arch
parent5e608a027082ae426e100a582031e0ff40becc83 (diff)
parentba913e4f72fc9cfd03dad968dfb110eb49211d80 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Paolo Bonzini: "ARM: - fixes for ITS init issues, error handling, IRQ leakage, race conditions - an erratum workaround for timers - some removal of misleading use of errors and comments - a fix for GICv3 on 32-bit guests MIPS: - fix for where the guest could wrongly map the first page of physical memory x86: - nested virtualization fixes" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: MIPS: KVM: Check for pfn noslot case kvm: nVMX: fix nested tsc scaling KVM: nVMX: postpone VMCS changes on MSR_IA32_APICBASE write KVM: nVMX: fix msr bitmaps to prevent L2 from accessing L0 x2APIC arm64: KVM: report configured SRE value to 32-bit world arm64: KVM: remove misleading comment on pmu status KVM: arm/arm64: timer: Workaround misconfigured timer interrupt arm64: Document workaround for Cortex-A72 erratum #853709 KVM: arm/arm64: Change misleading use of is_error_pfn KVM: arm64: ITS: avoid re-mapping LPIs KVM: arm64: check for ITS device on MSI injection KVM: arm64: ITS: move ITS registration into first VCPU run KVM: arm64: vgic-its: Make updates to propbaser/pendbaser atomic KVM: arm64: vgic-its: Plug race in vgic_put_irq KVM: arm64: vgic-its: Handle errors from vgic_add_lpi KVM: arm64: ITS: return 1 on successful MSI injection
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/kvm/mmu.c2
-rw-r--r--arch/arm64/kvm/hyp/switch.c2
-rw-r--r--arch/arm64/kvm/sys_regs.c10
-rw-r--r--arch/mips/kvm/mmu.c2
-rw-r--r--arch/x86/kvm/vmx.c136
5 files changed, 73 insertions, 79 deletions
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index bda27b6b1aa2..29d0b23af2a9 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -1309,7 +1309,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
smp_rmb();
pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
- if (is_error_pfn(pfn))
+ if (is_error_noslot_pfn(pfn))
return -EFAULT;
if (kvm_is_device_pfn(pfn)) {
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index ae7855f16ec2..5a84b4562603 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -256,7 +256,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
/*
* We must restore the 32-bit state before the sysregs, thanks
- * to Cortex-A57 erratum #852523.
+ * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
*/
__sysreg32_restore_state(vcpu);
__sysreg_restore_guest_state(guest_ctxt);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index b0b225ceca18..e51367d159d0 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -823,14 +823,6 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
* Architected system registers.
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
*
- * We could trap ID_DFR0 and tell the guest we don't support performance
- * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was
- * NAKed, so it will read the PMCR anyway.
- *
- * Therefore we tell the guest we have 0 counters. Unfortunately, we
- * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
- * all PM registers, which doesn't crash the guest kernel at least.
- *
* Debug handling: We do trap most, if not all debug related system
* registers. The implementation is good enough to ensure that a guest
* can use these with minimal performance degradation. The drawback is
@@ -1360,7 +1352,7 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
/* ICC_SRE */
- { Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi },
+ { Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre },
{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 6cfdcf55572d..121008c0fcc9 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -40,7 +40,7 @@ static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn)
srcu_idx = srcu_read_lock(&kvm->srcu);
pfn = gfn_to_pfn(kvm, gfn);
- if (is_error_pfn(pfn)) {
+ if (is_error_noslot_pfn(pfn)) {
kvm_err("Couldn't get pfn for gfn %#llx!\n", gfn);
err = -EFAULT;
goto out;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a45d8580f91e..5cede40e2552 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -422,6 +422,7 @@ struct nested_vmx {
struct list_head vmcs02_pool;
int vmcs02_num;
u64 vmcs01_tsc_offset;
+ bool change_vmcs01_virtual_x2apic_mode;
/* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending;
/*
@@ -435,6 +436,8 @@ struct nested_vmx {
bool pi_pending;
u16 posted_intr_nv;
+ unsigned long *msr_bitmap;
+
struct hrtimer preemption_timer;
bool preemption_timer_expired;
@@ -924,7 +927,6 @@ static unsigned long *vmx_msr_bitmap_legacy;
static unsigned long *vmx_msr_bitmap_longmode;
static unsigned long *vmx_msr_bitmap_legacy_x2apic;
static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_nested;
static unsigned long *vmx_vmread_bitmap;
static unsigned long *vmx_vmwrite_bitmap;
@@ -2198,6 +2200,12 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
new.control) != old.control);
}
+static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
+{
+ vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
+ vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
+}
+
/*
* Switches to specified vcpu, until a matching vcpu_put(), but assumes
* vcpu mutex is already taken.
@@ -2256,10 +2264,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
/* Setup TSC multiplier */
if (kvm_has_tsc_control &&
- vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) {
- vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio;
- vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
- }
+ vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
+ decache_tsc_multiplier(vmx);
vmx_vcpu_pi_load(vcpu, cpu);
vmx->host_pkru = read_pkru();
@@ -2508,7 +2514,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
unsigned long *msr_bitmap;
if (is_guest_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_nested;
+ msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
else if (cpu_has_secondary_exec_ctrls() &&
(vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
@@ -6363,13 +6369,6 @@ static __init int hardware_setup(void)
if (!vmx_msr_bitmap_longmode_x2apic)
goto out4;
- if (nested) {
- vmx_msr_bitmap_nested =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_nested)
- goto out5;
- }
-
vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_vmread_bitmap)
goto out6;
@@ -6392,8 +6391,6 @@ static __init int hardware_setup(void)
memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
- if (nested)
- memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);
if (setup_vmcs_config(&vmcs_config) < 0) {
r = -EIO;
@@ -6529,9 +6526,6 @@ out8:
out7:
free_page((unsigned long)vmx_vmread_bitmap);
out6:
- if (nested)
- free_page((unsigned long)vmx_msr_bitmap_nested);
-out5:
free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
out4:
free_page((unsigned long)vmx_msr_bitmap_longmode);
@@ -6557,8 +6551,6 @@ static __exit void hardware_unsetup(void)
free_page((unsigned long)vmx_io_bitmap_a);
free_page((unsigned long)vmx_vmwrite_bitmap);
free_page((unsigned long)vmx_vmread_bitmap);
- if (nested)
- free_page((unsigned long)vmx_msr_bitmap_nested);
free_kvm_area();
}
@@ -6995,16 +6987,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
return 1;
}
+ if (cpu_has_vmx_msr_bitmap()) {
+ vmx->nested.msr_bitmap =
+ (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx->nested.msr_bitmap)
+ goto out_msr_bitmap;
+ }
+
vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
if (!vmx->nested.cached_vmcs12)
- return -ENOMEM;
+ goto out_cached_vmcs12;
if (enable_shadow_vmcs) {
shadow_vmcs = alloc_vmcs();
- if (!shadow_vmcs) {
- kfree(vmx->nested.cached_vmcs12);
- return -ENOMEM;
- }
+ if (!shadow_vmcs)
+ goto out_shadow_vmcs;
/* mark vmcs as shadow */
shadow_vmcs->revision_id |= (1u << 31);
/* init shadow vmcs */
@@ -7024,6 +7021,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
skip_emulated_instruction(vcpu);
nested_vmx_succeed(vcpu);
return 1;
+
+out_shadow_vmcs:
+ kfree(vmx->nested.cached_vmcs12);
+
+out_cached_vmcs12:
+ free_page((unsigned long)vmx->nested.msr_bitmap);
+
+out_msr_bitmap:
+ return -ENOMEM;
}
/*
@@ -7098,6 +7104,10 @@ static void free_nested(struct vcpu_vmx *vmx)
vmx->nested.vmxon = false;
free_vpid(vmx->nested.vpid02);
nested_release_vmcs12(vmx);
+ if (vmx->nested.msr_bitmap) {
+ free_page((unsigned long)vmx->nested.msr_bitmap);
+ vmx->nested.msr_bitmap = NULL;
+ }
if (enable_shadow_vmcs)
free_vmcs(vmx->nested.current_shadow_vmcs);
kfree(vmx->nested.cached_vmcs12);
@@ -8419,6 +8429,12 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
{
u32 sec_exec_control;
+ /* Postpone execution until vmcs01 is the current VMCS. */
+ if (is_guest_mode(vcpu)) {
+ to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true;
+ return;
+ }
+
/*
* There is not point to enable virtualize x2apic without enable
* apicv
@@ -9472,8 +9488,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
{
int msr;
struct page *page;
- unsigned long *msr_bitmap;
+ unsigned long *msr_bitmap_l1;
+ unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
+ /* This shortcut is ok because we support only x2APIC MSRs so far. */
if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
return false;
@@ -9482,63 +9500,37 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
WARN_ON(1);
return false;
}
- msr_bitmap = (unsigned long *)kmap(page);
- if (!msr_bitmap) {
+ msr_bitmap_l1 = (unsigned long *)kmap(page);
+ if (!msr_bitmap_l1) {
nested_release_page_clean(page);
WARN_ON(1);
return false;
}
+ memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
+
if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
if (nested_cpu_has_apic_reg_virt(vmcs12))
for (msr = 0x800; msr <= 0x8ff; msr++)
nested_vmx_disable_intercept_for_msr(
- msr_bitmap,
- vmx_msr_bitmap_nested,
+ msr_bitmap_l1, msr_bitmap_l0,
msr, MSR_TYPE_R);
- /* TPR is allowed */
- nested_vmx_disable_intercept_for_msr(msr_bitmap,
- vmx_msr_bitmap_nested,
+
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
APIC_BASE_MSR + (APIC_TASKPRI >> 4),
MSR_TYPE_R | MSR_TYPE_W);
+
if (nested_cpu_has_vid(vmcs12)) {
- /* EOI and self-IPI are allowed */
nested_vmx_disable_intercept_for_msr(
- msr_bitmap,
- vmx_msr_bitmap_nested,
+ msr_bitmap_l1, msr_bitmap_l0,
APIC_BASE_MSR + (APIC_EOI >> 4),
MSR_TYPE_W);
nested_vmx_disable_intercept_for_msr(
- msr_bitmap,
- vmx_msr_bitmap_nested,
+ msr_bitmap_l1, msr_bitmap_l0,
APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
MSR_TYPE_W);
}
- } else {
- /*
- * Enable reading intercept of all the x2apic
- * MSRs. We should not rely on vmcs12 to do any
- * optimizations here, it may have been modified
- * by L1.
- */
- for (msr = 0x800; msr <= 0x8ff; msr++)
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- msr,
- MSR_TYPE_R);
-
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- APIC_BASE_MSR + (APIC_TASKPRI >> 4),
- MSR_TYPE_W);
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- APIC_BASE_MSR + (APIC_EOI >> 4),
- MSR_TYPE_W);
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
- MSR_TYPE_W);
}
kunmap(page);
nested_release_page_clean(page);
@@ -9957,10 +9949,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
}
if (cpu_has_vmx_msr_bitmap() &&
- exec_control & CPU_BASED_USE_MSR_BITMAPS) {
- nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
- /* MSR_BITMAP will be set by following vmx_set_efer. */
- } else
+ exec_control & CPU_BASED_USE_MSR_BITMAPS &&
+ nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
+ ; /* MSR_BITMAP will be set by following vmx_set_efer. */
+ else
exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
/*
@@ -10011,6 +10003,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
else
vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+ if (kvm_has_tsc_control)
+ decache_tsc_multiplier(vmx);
if (enable_vpid) {
/*
@@ -10767,6 +10761,14 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
else
vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
PIN_BASED_VMX_PREEMPTION_TIMER);
+ if (kvm_has_tsc_control)
+ decache_tsc_multiplier(vmx);
+
+ if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
+ vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
+ vmx_set_virtual_x2apic_mode(vcpu,
+ vcpu->arch.apic_base & X2APIC_ENABLE);
+ }
/* This is needed for same reason as it was needed in prepare_vmcs02 */
vmx->host_rsp = 0;