From 9e02fb963352c5ad075d80dd3e852fbee9585575 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 17 Mar 2011 10:53:33 +0200 Subject: KVM: fix crash on irqfd deassign irqfd in kvm used flush_work incorrectly: it assumed that work scheduled previously can't run after flush_work, but since kvm uses a non-reentrant workqueue (by means of schedule_work) we need flush_work_sync to get that guarantee. Signed-off-by: Michael S. Tsirkin Reported-by: Jean-Philippe Menil Tested-by: Jean-Philippe Menil Signed-off-by: Avi Kivity --- virt/kvm/eventfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 3656849f78a0..0d1fd5160adf 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -90,7 +90,7 @@ irqfd_shutdown(struct work_struct *work) * We know no new events will be scheduled at this point, so block * until all previously outstanding events have completed */ - flush_work(&irqfd->inject); + flush_work_sync(&irqfd->inject); /* * It is now safe to release the object's resources -- cgit v1.2.3 From 0857b9e95c1af8bfe84630ef6747b9d4d61de4c6 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 1 Feb 2011 13:21:47 +0200 Subject: KVM: Enable async page fault processing If asynchronous hva_to_pfn() is requested call GUP with FOLL_NOWAIT to avoid sleeping on IO. Check for hwpoison is done at the same time, otherwise check_user_page_hwpoison() will call GUP again and will put vcpu to sleep. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 556e3efe5325..6330653480e4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1037,6 +1037,17 @@ static pfn_t get_fault_pfn(void) return fault_pfn; } +int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, int write, struct page **page) +{ + int flags = FOLL_TOUCH | FOLL_NOWAIT | FOLL_HWPOISON | FOLL_GET; + + if (write) + flags |= FOLL_WRITE; + + return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL); +} + static inline int check_user_page_hwpoison(unsigned long addr) { int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; @@ -1070,7 +1081,14 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, if (writable) *writable = write_fault; - npages = get_user_pages_fast(addr, 1, write_fault, page); + if (async) { + down_read(¤t->mm->mmap_sem); + npages = get_user_page_nowait(current, current->mm, + addr, write_fault, page); + up_read(¤t->mm->mmap_sem); + } else + npages = get_user_pages_fast(addr, 1, write_fault, + page); /* map read fault as writable if possible */ if (unlikely(!write_fault) && npages == 1) { @@ -1093,7 +1111,8 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, return get_fault_pfn(); down_read(¤t->mm->mmap_sem); - if (check_user_page_hwpoison(addr)) { + if (npages == -EHWPOISON || + (!async && check_user_page_hwpoison(addr))) { up_read(¤t->mm->mmap_sem); get_page(hwpoison_page); return page_to_pfn(hwpoison_page); -- cgit v1.2.3 From 20800bc940af671257abc97ad362abe3c21ddd50 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 30 Mar 2011 15:01:45 +0200 Subject: KVM: fix XSAVE bit scanning When KVM scans the 0xD CPUID leaf for propagating the XSAVE save area leaves, it assumes that the leaves are contigious and stops at the first zero one. On AMD hardware there is a gap, though, as LWP uses leaf 62 to announce it's state save area. So lets iterate through all 64 possible leaves and simply skip zero ones to also cover later features. Signed-off-by: Andre Przywara Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 58f517b59645..4a1ba05f7af3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2395,9 +2395,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, int i; entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; - for (i = 1; *nent < maxnent; ++i) { - if (entry[i - 1].eax == 0 && i != 2) - break; + for (i = 1; *nent < maxnent && i < 64; ++i) { + if (entry[i].eax == 0) + continue; do_cpuid_1_ent(&entry[i], function, i); entry[i].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; -- cgit v1.2.3 From bd22f5cfcfe8f68bf43b72daf4530cd7eedc9b7a Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 31 Mar 2011 16:58:49 +0200 Subject: KVM: move and fix substitue search for missing CPUID entries If KVM cannot find an exact match for a requested CPUID leaf, the code will try to find the closest match instead of simply confessing it's failure. The implementation was meant to satisfy the CPUID specification, but did not properly check for extended and standard leaves and also didn't account for the index subleaf. Beside that this rule only applies to CPUID intercepts, which is not the only user of the kvm_find_cpuid_entry() function. So fix this algorithm and call it from kvm_emulate_cpuid(). This fixes a crash of newer Linux kernels as KVM guests on AMD Bulldozer CPUs, where bogus values were returned in response to a CPUID intercept. Signed-off-by: Andre Przywara Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4a1ba05f7af3..934b4c6b0bf9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4958,12 +4958,6 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, best = e; break; } - /* - * Both basic or both extended? - */ - if (((e->function ^ function) & 0x80000000) == 0) - if (!best || e->function > best->function) - best = e; } return best; } @@ -4983,6 +4977,27 @@ not_found: return 36; } +/* + * If no match is found, check whether we exceed the vCPU's limit + * and return the content of the highest valid _standard_ leaf instead. + * This is to satisfy the CPUID specification. + */ +static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, + u32 function, u32 index) +{ + struct kvm_cpuid_entry2 *maxlevel; + + maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0); + if (!maxlevel || maxlevel->eax >= function) + return NULL; + if (function & 0x80000000) { + maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0); + if (!maxlevel) + return NULL; + } + return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); +} + void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) { u32 function, index; @@ -4995,6 +5010,10 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) kvm_register_write(vcpu, VCPU_REGS_RCX, 0); kvm_register_write(vcpu, VCPU_REGS_RDX, 0); best = kvm_find_cpuid_entry(vcpu, function, index); + + if (!best) + best = check_cpuid_limit(vcpu, function, index); + if (best) { kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); -- cgit v1.2.3