diff options
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/i8254.c | 9 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 19 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 19 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 91 |
5 files changed, 92 insertions, 47 deletions
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index f2f5d260874e..3829aa7b663f 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -200,9 +200,12 @@ int __pit_timer_fn(struct kvm_kpit_state *ps) atomic_inc(&pt->pending); smp_mb__after_atomic_inc(); - if (vcpu0 && waitqueue_active(&vcpu0->wq)) { - vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; - wake_up_interruptible(&vcpu0->wq); + if (vcpu0) { + set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); + if (waitqueue_active(&vcpu0->wq)) { + vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; + wake_up_interruptible(&vcpu0->wq); + } } pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c297c50eba63..ebc03f5ae162 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -940,6 +940,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic) wait_queue_head_t *q = &apic->vcpu->wq; atomic_inc(&apic->timer.pending); + set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); if (waitqueue_active(q)) { apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; wake_up_interruptible(q); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ee3f53098f0c..7e7c3969f7a2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -640,6 +640,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) rmap_remove(kvm, spte); --kvm->stat.lpages; set_shadow_pte(spte, shadow_trap_nonpresent_pte); + spte = NULL; write_protected = 1; } spte = rmap_next(kvm, rmapp, spte); @@ -1082,10 +1083,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, struct kvm_mmu_page *shadow; spte |= PT_WRITABLE_MASK; - if (user_fault) { - mmu_unshadow(vcpu->kvm, gfn); - goto unshadowed; - } shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); if (shadow || @@ -1102,8 +1099,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, } } -unshadowed: - if (pte_access & ACC_WRITE_MASK) mark_page_dirty(vcpu->kvm, gfn); @@ -1580,11 +1575,13 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, u64 *spte, const void *new) { - if ((sp->role.level != PT_PAGE_TABLE_LEVEL) - && !vcpu->arch.update_pte.largepage) { - ++vcpu->kvm->stat.mmu_pde_zapped; - return; - } + if (sp->role.level != PT_PAGE_TABLE_LEVEL) { + if (!vcpu->arch.update_pte.largepage || + sp->role.glevels == PT32_ROOT_LEVEL) { + ++vcpu->kvm->stat.mmu_pde_zapped; + return; + } + } ++vcpu->kvm->stat.mmu_pte_updated; if (sp->role.glevels == PT32_ROOT_LEVEL) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 02efbe75f317..540e95179074 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -566,7 +566,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) load_transition_efer(vmx); } -static void vmx_load_host_state(struct vcpu_vmx *vmx) +static void __vmx_load_host_state(struct vcpu_vmx *vmx) { unsigned long flags; @@ -596,6 +596,13 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx) reload_host_efer(vmx); } +static void vmx_load_host_state(struct vcpu_vmx *vmx) +{ + preempt_disable(); + __vmx_load_host_state(vmx); + preempt_enable(); +} + /* * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. @@ -654,7 +661,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) static void vmx_vcpu_put(struct kvm_vcpu *vcpu) { - vmx_load_host_state(to_vmx(vcpu)); + __vmx_load_host_state(to_vmx(vcpu)); } static void vmx_fpu_activate(struct kvm_vcpu *vcpu) @@ -884,11 +891,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) switch (msr_index) { #ifdef CONFIG_X86_64 case MSR_EFER: + vmx_load_host_state(vmx); ret = kvm_set_msr_common(vcpu, msr_index, data); - if (vmx->host_state.loaded) { - reload_host_efer(vmx); - load_transition_efer(vmx); - } break; case MSR_FS_BASE: vmcs_writel(GUEST_FS_BASE, data); @@ -910,11 +914,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) guest_write_tsc(data); break; default: + vmx_load_host_state(vmx); msr = find_msr_entry(vmx, msr_index); if (msr) { msr->data = data; - if (vmx->host_state.loaded) - load_msrs(vmx->guest_msrs, vmx->save_nmsrs); break; } ret = kvm_set_msr_common(vcpu, msr_index, data); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 00acf1301a15..63a77caa59f1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -492,8 +492,8 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) { static int version; - struct kvm_wall_clock wc; - struct timespec wc_ts; + struct pvclock_wall_clock wc; + struct timespec now, sys, boot; if (!wall_clock) return; @@ -502,10 +502,19 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); - wc_ts = current_kernel_time(); - wc.wc_sec = wc_ts.tv_sec; - wc.wc_nsec = wc_ts.tv_nsec; - wc.wc_version = version; + /* + * The guest calculates current wall clock time by adding + * system time (updated by kvm_write_guest_time below) to the + * wall clock specified here. guest system time equals host + * system time for us, thus we must fill in host boot time here. + */ + now = current_kernel_time(); + ktime_get_ts(&sys); + boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys)); + + wc.sec = boot.tv_sec; + wc.nsec = boot.tv_nsec; + wc.version = version; kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); @@ -513,6 +522,45 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); } +static uint32_t div_frac(uint32_t dividend, uint32_t divisor) +{ + uint32_t quotient, remainder; + + /* Don't try to replace with do_div(), this one calculates + * "(dividend << 32) / divisor" */ + __asm__ ( "divl %4" + : "=a" (quotient), "=d" (remainder) + : "0" (0), "1" (dividend), "r" (divisor) ); + return quotient; +} + +static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock) +{ + uint64_t nsecs = 1000000000LL; + int32_t shift = 0; + uint64_t tps64; + uint32_t tps32; + + tps64 = tsc_khz * 1000LL; + while (tps64 > nsecs*2) { + tps64 >>= 1; + shift--; + } + + tps32 = (uint32_t)tps64; + while (tps32 <= (uint32_t)nsecs) { + tps32 <<= 1; + shift++; + } + + hv_clock->tsc_shift = shift; + hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32); + + pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n", + __FUNCTION__, tsc_khz, hv_clock->tsc_shift, + hv_clock->tsc_to_system_mul); +} + static void kvm_write_guest_time(struct kvm_vcpu *v) { struct timespec ts; @@ -523,6 +571,11 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) if ((!vcpu->time_page)) return; + if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) { + kvm_set_time_scale(tsc_khz, &vcpu->hv_clock); + vcpu->hv_clock_tsc_khz = tsc_khz; + } + /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER, @@ -537,14 +590,14 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) /* * The interface expects us to write an even number signaling that the * update is finished. Since the guest won't see the intermediate - * state, we just write "2" at the end + * state, we just increase by 2 at the end. */ - vcpu->hv_clock.version = 2; + vcpu->hv_clock.version += 2; shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0); memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, - sizeof(vcpu->hv_clock)); + sizeof(vcpu->hv_clock)); kunmap_atomic(shared_kaddr, KM_USER0); @@ -599,10 +652,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) /* ...but clean it before doing the actual write */ vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); - vcpu->arch.hv_clock.tsc_to_system_mul = - clocksource_khz2mult(tsc_khz, 22); - vcpu->arch.hv_clock.tsc_shift = 22; - down_read(¤t->mm->mmap_sem); vcpu->arch.time_page = gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); @@ -2759,6 +2808,8 @@ again: if (vcpu->requests) { if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) __kvm_migrate_timers(vcpu); + if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) + kvm_x86_ops->tlb_flush(vcpu); if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests)) { kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS; @@ -2772,6 +2823,7 @@ again: } } + clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); kvm_inject_pending_timer_irqs(vcpu); preempt_disable(); @@ -2781,21 +2833,13 @@ again: local_irq_disable(); - if (need_resched()) { + if (vcpu->requests || need_resched()) { local_irq_enable(); preempt_enable(); r = 1; goto out; } - if (vcpu->requests) - if (test_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) { - local_irq_enable(); - preempt_enable(); - r = 1; - goto out; - } - if (signal_pending(current)) { local_irq_enable(); preempt_enable(); @@ -2825,9 +2869,6 @@ again: kvm_guest_enter(); - if (vcpu->requests) - if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) - kvm_x86_ops->tlb_flush(vcpu); KVMTRACE_0D(VMENTRY, vcpu, entryexit); kvm_x86_ops->run(vcpu, kvm_run); |