From d56f5136b01020155b6b0a29f69d924687529bee Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 4 Jun 2020 15:16:52 +0200 Subject: KVM: let kvm_destroy_vm_debugfs clean up vCPU debugfs directories After commit 63d0434 ("KVM: x86: move kvm_create_vcpu_debugfs after last failure point") we are creating the pre-vCPU debugfs files after the creation of the vCPU file descriptor. This makes it possible for userspace to reach kvm_vcpu_release before kvm_create_vcpu_debugfs has finished. The vcpu->debugfs_dentry then does not have any associated inode anymore, and this causes a NULL-pointer dereference in debugfs_create_file. The solution is simply to avoid removing the files; they are cleaned up when the VM file descriptor is closed (and that must be after KVM_CREATE_VCPU returns). We can stop storing the dentry in struct kvm_vcpu too, because it is not needed anywhere after kvm_create_vcpu_debugfs returns. Reported-by: syzbot+705f4401d5a93a59b87d@syzkaller.appspotmail.com Fixes: 63d04348371b ("KVM: x86: move kvm_create_vcpu_debugfs after last failure point") Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f43b59b1294c..d38d6b9c24be 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -318,7 +318,6 @@ struct kvm_vcpu { bool preempted; bool ready; struct kvm_vcpu_arch arch; - struct dentry *debugfs_dentry; }; static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) @@ -888,7 +887,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS -void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu); +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry); #endif int kvm_arch_hardware_enable(void); -- cgit v1.2.3 From e649b3f0188f8fd34dd0dde8d43fd3312b902fb2 Mon Sep 17 00:00:00 2001 From: Eiichi Tsukata Date: Sat, 6 Jun 2020 13:26:27 +0900 Subject: KVM: x86: Fix APIC page invalidation race Commit b1394e745b94 ("KVM: x86: fix APIC page invalidation") tried to fix inappropriate APIC page invalidation by re-introducing arch specific kvm_arch_mmu_notifier_invalidate_range() and calling it from kvm_mmu_notifier_invalidate_range_start. However, the patch left a possible race where the VMCS APIC address cache is updated *before* it is unmapped: (Invalidator) kvm_mmu_notifier_invalidate_range_start() (Invalidator) kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD) (KVM VCPU) vcpu_enter_guest() (KVM VCPU) kvm_vcpu_reload_apic_access_page() (Invalidator) actually unmap page Because of the above race, there can be a mismatch between the host physical address stored in the APIC_ACCESS_PAGE VMCS field and the host physical address stored in the EPT entry for the APIC GPA (0xfee0000). When this happens, the processor will not trap APIC accesses, and will instead show the raw contents of the APIC-access page. Because Windows OS periodically checks for unexpected modifications to the LAPIC register, this will show up as a BSOD crash with BugCheck CRITICAL_STRUCTURE_CORRUPTION (109) we are currently seeing in https://bugzilla.redhat.com/show_bug.cgi?id=1751017. The root cause of the issue is that kvm_arch_mmu_notifier_invalidate_range() cannot guarantee that no additional references are taken to the pages in the range before kvm_mmu_notifier_invalidate_range_end(). Fortunately, this case is supported by the MMU notifier API, as documented in include/linux/mmu_notifier.h: * If the subsystem * can't guarantee that no additional references are taken to * the pages in the range, it has to implement the * invalidate_range() notifier to remove any references taken * after invalidate_range_start(). The fix therefore is to reload the APIC-access page field in the VMCS from kvm_mmu_notifier_invalidate_range() instead of ..._range_start(). Cc: stable@vger.kernel.org Fixes: b1394e745b94 ("KVM: x86: fix APIC page invalidation") Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=197951 Signed-off-by: Eiichi Tsukata Message-Id: <20200606042627.61070-1-eiichi.tsukata@nutanix.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d38d6b9c24be..e2f82131bb3e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1420,8 +1420,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp, } #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */ -int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end, bool blockable); +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end); #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 2a18b7e7cd8882f626316c340c6f2fca49b5fa12 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 10 Jun 2020 19:55:32 +0200 Subject: KVM: async_pf: Inject 'page ready' event only if 'page not present' was previously injected 'Page not present' event may or may not get injected depending on guest's state. If the event wasn't injected, there is no need to inject the corresponding 'page ready' event as the guest may get confused. E.g. Linux thinks that the corresponding 'page not present' event wasn't delivered *yet* and allocates a 'dummy entry' for it. This entry is never freed. Note, 'wakeup all' events have no corresponding 'page not present' event and always get injected. s390 seems to always be able to inject 'page not present', the change is effectively a nop. Suggested-by: Vivek Goyal Signed-off-by: Vitaly Kuznetsov Message-Id: <20200610175532.779793-2-vkuznets@redhat.com> Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=208081 Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e2f82131bb3e..62ec926c78a0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -206,6 +206,7 @@ struct kvm_async_pf { unsigned long addr; struct kvm_arch_async_pf arch; bool wakeup_all; + bool notpresent_injected; }; void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); -- cgit v1.2.3