From f61c94bb99ca4253ac5dd57750e1af209a4beb7a Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 8 Aug 2012 20:38:19 +0000 Subject: KVM: PPC: booke: Add watchdog emulation This patch adds the watchdog emulation in KVM. The watchdog emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_BOOKE_WATCHDOG) ioctl. The kernel timer are used for watchdog emulation and emulates h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how it is configured. Signed-off-by: Liu Yu Signed-off-by: Scott Wood [bharat.bhushan@freescale.com: reworked patch] Signed-off-by: Bharat Bhushan [agraf: adjust to new request framework] Signed-off-by: Alexander Graf --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2850656e2e96..0ca3663206f8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -118,6 +118,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_IMMEDIATE_EXIT 15 #define KVM_REQ_PMU 16 #define KVM_REQ_PMI 17 +#define KVM_REQ_WATCHDOG 18 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v1.2.3 From 8b6e4547e0e4b6aac11df6d8d4e71ea2ab159b5e Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 20 Sep 2012 07:43:08 +0200 Subject: KVM: x86: Convert kvm_arch_vcpu_reset into private kvm_vcpu_reset There are no external callers of this function as there is no concept of resetting a vcpu from generic code. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 93bfc9f9815c..c35b1c08c004 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -582,7 +582,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id); int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); -int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu); int kvm_arch_hardware_enable(void *garbage); void kvm_arch_hardware_disable(void *garbage); int kvm_arch_hardware_setup(void); -- cgit v1.2.3 From 8ca40a70a70988c0bdea106c894843f763ca2989 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 14 Oct 2012 23:10:18 -0400 Subject: KVM: Take kvm instead of vcpu to mmu_notifier_retry The mmu_notifier_retry is not specific to any vcpu (and never will be) so only take struct kvm as a parameter. The motivation is the ARM mmu code that needs to call this from somewhere where we long let go of the vcpu pointer. Signed-off-by: Christoffer Dall Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6afc5be2615e..82e2c783a21e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -841,9 +841,9 @@ extern struct kvm_stats_debugfs_item debugfs_entries[]; extern struct dentry *kvm_debugfs_dir; #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) -static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_seq) +static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) { - if (unlikely(vcpu->kvm->mmu_notifier_count)) + if (unlikely(kvm->mmu_notifier_count)) return 1; /* * Ensure the read of mmu_notifier_count happens before the read @@ -856,7 +856,7 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se * can't rely on kvm->mmu_lock to keep things ordered. */ smp_rmb(); - if (vcpu->kvm->mmu_notifier_seq != mmu_seq) + if (kvm->mmu_notifier_seq != mmu_seq) return 1; return 0; } -- cgit v1.2.3 From b080935c8638e08134629d0a9ebdf35669bec14d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 5 Oct 2012 23:07:19 +0200 Subject: kvm: Directly account vtime to system on guest switch Switching to or from guest context is done on ioctl context. So by the time we call kvm_guest_enter() or kvm_guest_exit() we know we are not running the idle task. As a result, we can directly account the cputime using vtime_account_system(). There are two good reasons to do this: * We avoid some useless checks on guest switch. It optimizes a bit this fast path. * In the case of CONFIG_IRQ_TIME_ACCOUNTING, calling vtime_account() checks for irq time to account. This is pointless since we know we are not in an irq on guest switch. This is wasting cpu cycles for no good reason. vtime_account_system() OTOH is a no-op in this config option. * We can remove the irq disable/enable around kvm guest switch in s390. A further optimization may consist in introducing a vtime_account_guest() that directly calls account_guest_time(). Signed-off-by: Frederic Weisbecker Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Avi Kivity Cc: Marcelo Tosatti Cc: Joerg Roedel Cc: Alexander Graf Cc: Xiantao Zhang Cc: Christian Borntraeger Cc: Cornelia Huck Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker --- include/linux/kvm_host.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 93bfc9f9815c..0e2212fe4784 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -737,7 +737,11 @@ static inline int kvm_deassign_device(struct kvm *kvm, static inline void kvm_guest_enter(void) { BUG_ON(preemptible()); - vtime_account(current); + /* + * This is running in ioctl context so we can avoid + * the call to vtime_account() with its unnecessary idle check. + */ + vtime_account_system(current); current->flags |= PF_VCPU; /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode @@ -751,7 +755,11 @@ static inline void kvm_guest_enter(void) static inline void kvm_guest_exit(void) { - vtime_account(current); + /* + * This is running in ioctl context so we can avoid + * the call to vtime_account() with its unnecessary idle check. + */ + vtime_account_system(current); current->flags &= ~PF_VCPU; } -- cgit v1.2.3 From 81c52c56e2b43589091ee29038bcf793d3f184ab Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 16 Oct 2012 20:10:59 +0800 Subject: KVM: do not treat noslot pfn as a error pfn This patch filters noslot pfn out from error pfns based on Marcelo comment: noslot pfn is not a error pfn After this patch, - is_noslot_pfn indicates that the gfn is not in slot - is_error_pfn indicates that the gfn is in slot but the error is occurred when translate the gfn to pfn - is_error_noslot_pfn indicates that the pfn either it is error pfns or it is noslot pfn And is_invalid_pfn can be removed, it makes the code more clean Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 82e2c783a21e..99a47627e046 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -58,28 +58,40 @@ /* * For the normal pfn, the highest 12 bits should be zero, - * so we can mask these bits to indicate the error. + * so we can mask bit 62 ~ bit 52 to indicate the error pfn, + * mask bit 63 to indicate the noslot pfn. */ -#define KVM_PFN_ERR_MASK (0xfffULL << 52) +#define KVM_PFN_ERR_MASK (0x7ffULL << 52) +#define KVM_PFN_ERR_NOSLOT_MASK (0xfffULL << 52) +#define KVM_PFN_NOSLOT (0x1ULL << 63) #define KVM_PFN_ERR_FAULT (KVM_PFN_ERR_MASK) #define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1) -#define KVM_PFN_ERR_BAD (KVM_PFN_ERR_MASK + 2) -#define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 3) +#define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2) +/* + * error pfns indicate that the gfn is in slot but faild to + * translate it to pfn on host. + */ static inline bool is_error_pfn(pfn_t pfn) { return !!(pfn & KVM_PFN_ERR_MASK); } -static inline bool is_noslot_pfn(pfn_t pfn) +/* + * error_noslot pfns indicate that the gfn can not be + * translated to pfn - it is not in slot or failed to + * translate it to pfn. + */ +static inline bool is_error_noslot_pfn(pfn_t pfn) { - return pfn == KVM_PFN_ERR_BAD; + return !!(pfn & KVM_PFN_ERR_NOSLOT_MASK); } -static inline bool is_invalid_pfn(pfn_t pfn) +/* noslot pfn indicates that the gfn is not in slot. */ +static inline bool is_noslot_pfn(pfn_t pfn) { - return !is_noslot_pfn(pfn) && is_error_pfn(pfn); + return pfn == KVM_PFN_NOSLOT; } #define KVM_HVA_ERR_BAD (PAGE_OFFSET) -- cgit v1.2.3 From 87da7e66a40532b743cd50972fcf85a1f15b14ea Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 24 Oct 2012 14:07:59 +0800 Subject: KVM: x86: fix vcpu->mmio_fragments overflow After commit b3356bf0dbb349 (KVM: emulator: optimize "rep ins" handling), the pieces of io data can be collected and write them to the guest memory or MMIO together Unfortunately, kvm splits the mmio access into 8 bytes and store them to vcpu->mmio_fragments. If the guest uses "rep ins" to move large data, it will cause vcpu->mmio_fragments overflow The bug can be exposed by isapc (-M isapc): [23154.818733] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC [ ......] [23154.858083] Call Trace: [23154.859874] [] kvm_get_cr8+0x1d/0x28 [kvm] [23154.861677] [] kvm_arch_vcpu_ioctl_run+0xcda/0xe45 [kvm] [23154.863604] [] ? kvm_arch_vcpu_load+0x17b/0x180 [kvm] Actually, we can use one mmio_fragment to store a large mmio access then split it when we pass the mmio-exit-info to userspace. After that, we only need two entries to store mmio info for the cross-mmio pages access Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 93bfc9f9815c..ecc554374e44 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -42,19 +42,8 @@ */ #define KVM_MEMSLOT_INVALID (1UL << 16) -/* - * If we support unaligned MMIO, at most one fragment will be split into two: - */ -#ifdef KVM_UNALIGNED_MMIO -# define KVM_EXTRA_MMIO_FRAGMENTS 1 -#else -# define KVM_EXTRA_MMIO_FRAGMENTS 0 -#endif - -#define KVM_USER_MMIO_SIZE 8 - -#define KVM_MAX_MMIO_FRAGMENTS \ - (KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS) +/* Two fragments for cross MMIO pages. */ +#define KVM_MAX_MMIO_FRAGMENTS 2 /* * For the normal pfn, the highest 12 bits should be zero, -- cgit v1.2.3 From fd25b4c2f226de818e1d2b71e3e681d28bcaf5ba Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 13 Nov 2012 18:21:22 +0100 Subject: vtime: Remove the underscore prefix invasion Prepending irq-unsafe vtime APIs with underscores was actually a bad idea as the result is a big mess in the API namespace that is even waiting to be further extended. Also these helpers are always called from irq safe callers except kvm. Just provide a vtime_account_system_irqsafe() for this specific case so that we can remove the underscore prefix on other vtime functions. Signed-off-by: Frederic Weisbecker Reviewed-by: Steven Rostedt Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0e2212fe4784..f17158bdd4fc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -741,7 +741,7 @@ static inline void kvm_guest_enter(void) * This is running in ioctl context so we can avoid * the call to vtime_account() with its unnecessary idle check. */ - vtime_account_system(current); + vtime_account_system_irqsafe(current); current->flags |= PF_VCPU; /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode @@ -759,7 +759,7 @@ static inline void kvm_guest_exit(void) * This is running in ioctl context so we can avoid * the call to vtime_account() with its unnecessary idle check. */ - vtime_account_system(current); + vtime_account_system_irqsafe(current); current->flags &= ~PF_VCPU; } -- cgit v1.2.3 From d828199e84447795c6669ff0e6c6d55eb9beeff6 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 27 Nov 2012 23:29:01 -0200 Subject: KVM: x86: implement PVCLOCK_TSC_STABLE_BIT pvclock flag KVM added a global variable to guarantee monotonicity in the guest. One of the reasons for that is that the time between 1. ktime_get_ts(×pec); 2. rdtscll(tsc); Is variable. That is, given a host with stable TSC, suppose that two VCPUs read the same time via ktime_get_ts() above. The time required to execute 2. is not the same on those two instances executing in different VCPUS (cache misses, interrupts...). If the TSC value that is used by the host to interpolate when calculating the monotonic time is the same value used to calculate the tsc_timestamp value stored in the pvclock data structure, and a single tuple is visible to all vcpus simultaneously, this problem disappears. See comment on top of pvclock_update_vm_gtod_copy for details. Monotonicity is then guaranteed by synchronicity of the host TSCs and guest TSCs. Set TSC stable pvclock flag in that case, allowing the guest to read clock from userspace. Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 99a47627e046..c94c9985dee0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -131,6 +131,8 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_PMU 16 #define KVM_REQ_PMI 17 #define KVM_REQ_WATCHDOG 18 +#define KVM_REQ_MASTERCLOCK_UPDATE 19 +#define KVM_REQ_MCLOCK_INPROGRESS 20 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 @@ -540,6 +542,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm); +void kvm_make_mclock_inprogress_request(struct kvm *kvm); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); -- cgit v1.2.3 From 42897d866b120547777ae1fd316680ec53356d9c Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 27 Nov 2012 23:29:02 -0200 Subject: KVM: x86: add kvm_arch_vcpu_postcreate callback, move TSC initialization TSC initialization will soon make use of online_vcpus. Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c94c9985dee0..8e5c7b651655 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -596,6 +596,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id); int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); +int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); int kvm_arch_hardware_enable(void *garbage); -- cgit v1.2.3 From 01f218803757c9ec1152ac2fd39d03c27c452634 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 17 Oct 2012 18:06:02 +0200 Subject: kvm: add kvm_set_irq_inatomic Add an API to inject IRQ from atomic context. Return EWOULDBLOCK if impossible (e.g. for multicast). Only MSI is supported ATM. Signed-off-by: Michael S. Tsirkin Signed-off-by: Gleb Natapov --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8e5c7b651655..36c3704bfa7c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -693,6 +693,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, unsigned long *deliver_bitmask); #endif int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level); +int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); -- cgit v1.2.3 From 914daba865cb5c38cd5fdee024ca38029315b38f Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 9 Oct 2012 00:22:59 +0200 Subject: KVM: Distangle eventfd code from irqchip The current eventfd code assumes that when we have eventfd, we also have irqfd for in-kernel interrupt delivery. This is not necessarily true. On PPC we don't have an in-kernel irqchip yet, but we can still support easily support eventfd. Signed-off-by: Alexander Graf --- include/linux/kvm_host.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8e5c7b651655..c823e47c3641 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -900,10 +900,20 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {} #ifdef CONFIG_HAVE_KVM_EVENTFD void kvm_eventfd_init(struct kvm *kvm); +int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); + +#ifdef CONFIG_HAVE_KVM_IRQCHIP int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *); -int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); +#else +static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) +{ + return -EINVAL; +} + +static inline void kvm_irqfd_release(struct kvm *kvm) {} +#endif #else -- cgit v1.2.3