From a13007160f1b9ec7c67e28ec9254f197c5c08d7d Mon Sep 17 00:00:00 2001
From: Amos Kong <akong@redhat.com>
Date: Fri, 9 Mar 2012 12:17:32 +0800
Subject: KVM: resize kvm_io_range array dynamically

This patch makes the kvm_io_range array can be resized dynamically.

Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux/kvm_host.h')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 665a260c7e09..ba9fb4a9762d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -68,10 +68,11 @@ struct kvm_io_range {
 	struct kvm_io_device *dev;
 };
 
+#define NR_IOBUS_DEVS 300
+
 struct kvm_io_bus {
 	int                   dev_count;
-#define NR_IOBUS_DEVS 300
-	struct kvm_io_range range[NR_IOBUS_DEVS];
+	struct kvm_io_range range[];
 };
 
 enum kvm_bus {
-- 
cgit v1.2.3


From 786a9f888bfbe70a36d0592b26037ca1e8c8da7f Mon Sep 17 00:00:00 2001
From: Amos Kong <akong@redhat.com>
Date: Fri, 9 Mar 2012 12:17:40 +0800
Subject: KVM: set upper bounds for iobus dev to limit userspace

kvm_io_bus devices are used for ioevent, pit, pic, ioapic,
coalesced_mmio.

Currently Qemu only emulates one PCI bus, it contains 32 slots,
one slot contains 8 functions, maximum of supported PCI devices:
 1 * 32 * 8 = 256. One virtio-blk takes one iobus device,
one virtio-net(vhost=on) takes two iobus devices.
The maximum of coalesced mmio zone is 100, each zone
has an iobus devices. So 300 io_bus devices are not enough.

Set an upper bounds for kvm_io_range to limit userspace.
1000 is a very large limit and not bloat the typical user.

Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/kvm_host.h')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ba9fb4a9762d..3a2cea616283 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -68,7 +68,7 @@ struct kvm_io_range {
 	struct kvm_io_device *dev;
 };
 
-#define NR_IOBUS_DEVS 300
+#define NR_IOBUS_DEVS 1000
 
 struct kvm_io_bus {
 	int                   dev_count;
-- 
cgit v1.2.3


From b6d33834bd4e8bdf4a199812e31b3e36da53c794 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <c.dall@virtualopensystems.com>
Date: Thu, 8 Mar 2012 16:44:24 -0500
Subject: KVM: Factor out kvm_vcpu_kick to arch-generic code

The kvm_vcpu_kick function performs roughly the same funcitonality on
most all architectures, so we shouldn't have separate copies.

PowerPC keeps a pointer to interchanging waitqueues on the vcpu_arch
structure and to accomodate this special need a
__KVM_HAVE_ARCH_VCPU_GET_WQ define and accompanying function
kvm_arch_vcpu_wq have been defined. For all other architectures this
is a generic inline that just returns &vcpu->wq;

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux/kvm_host.h')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3a2cea616283..5b624e1ff814 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -439,6 +439,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			     gfn_t gfn);
 
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
 void kvm_resched(struct kvm_vcpu *vcpu);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
@@ -507,6 +508,7 @@ int kvm_arch_hardware_setup(void);
 void kvm_arch_hardware_unsetup(void);
 void kvm_arch_check_processor_compat(void *rtn);
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
 
 void kvm_free_physmem(struct kvm *kvm);
 
@@ -522,6 +524,13 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 }
 #endif
 
+#ifndef __KVM_HAVE_ARCH_VCPU_GET_WQ
+static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
+{
+	return &vcpu->wq;
+}
+#endif
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
 void kvm_arch_destroy_vm(struct kvm *kvm);
 void kvm_free_all_assigned_devices(struct kvm *kvm);
-- 
cgit v1.2.3


From 2246f8b56315befa30f3d3d2800e0734c774f70e Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 13 Mar 2012 22:35:01 +0100
Subject: KVM: PPC: Rework wqp conditional code

On PowerPC, we sometimes use a waitqueue per core, not per thread,
so we can't always use the vcpu internal waitqueue.

This code has been generalized by Christoffer Dall recently, but
unfortunately broke compilation for PowerPC. At the time the helper
function is defined, struct kvm_vcpu is not declared yet, so we can't
dereference it.

This patch moves all logic into the generic inline function, at which
time we have all information necessary.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux/kvm_host.h')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5b624e1ff814..5184817e714a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -524,12 +524,14 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 }
 #endif
 
-#ifndef __KVM_HAVE_ARCH_VCPU_GET_WQ
 static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
 {
+#ifdef __KVM_HAVE_ARCH_WQP
+	return vcpu->arch.wqp;
+#else
 	return &vcpu->wq;
-}
 #endif
+}
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
 void kvm_arch_destroy_vm(struct kvm *kvm);
-- 
cgit v1.2.3


From 93474b25af1eabf5b14743793156e8d307bfcd6b Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Thu, 1 Mar 2012 19:34:45 +0900
Subject: KVM: Remove unused dirty_bitmap_head and nr_dirty_pages

Now that we do neither double buffering nor heuristic selection of the
write protection method these are not needed anymore.

Note: some drivers have their own implementation of set_bit_le() and
making it generic needs a bit of work; so we use test_and_set_bit_le()
and will later replace it with generic set_bit_le().

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux/kvm_host.h')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5184817e714a..49c2f2fd281f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -179,8 +179,6 @@ struct kvm_memory_slot {
 	unsigned long flags;
 	unsigned long *rmap;
 	unsigned long *dirty_bitmap;
-	unsigned long *dirty_bitmap_head;
-	unsigned long nr_dirty_pages;
 	struct kvm_arch_memory_slot arch;
 	unsigned long userspace_addr;
 	int user_alloc;
-- 
cgit v1.2.3


From f78146b0f9230765c6315b2e14f56112513389ad Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 18 Apr 2012 19:22:47 +0300
Subject: KVM: Fix page-crossing MMIO

MMIO that are split across a page boundary are currently broken - the
code does not expect to be aborted by the exit to userspace for the
first MMIO fragment.

This patch fixes the problem by generalizing the current code for handling
16-byte MMIOs to handle a number of "fragments", and changes the MMIO
code to create those fragments.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/kvm_host.h | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

(limited to 'include/linux/kvm_host.h')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a2d00b1bbf54..186ffab0b9f0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -34,6 +34,20 @@
 #define KVM_MMIO_SIZE 8
 #endif
 
+/*
+ * If we support unaligned MMIO, at most one fragment will be split into two:
+ */
+#ifdef KVM_UNALIGNED_MMIO
+#  define KVM_EXTRA_MMIO_FRAGMENTS 1
+#else
+#  define KVM_EXTRA_MMIO_FRAGMENTS 0
+#endif
+
+#define KVM_USER_MMIO_SIZE 8
+
+#define KVM_MAX_MMIO_FRAGMENTS \
+	(KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS)
+
 /*
  * vcpu->requests bit members
  */
@@ -117,6 +131,16 @@ enum {
 	EXITING_GUEST_MODE
 };
 
+/*
+ * Sometimes a large or cross-page mmio needs to be broken up into separate
+ * exits for userspace servicing.
+ */
+struct kvm_mmio_fragment {
+	gpa_t gpa;
+	void *data;
+	unsigned len;
+};
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 #ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -144,10 +168,9 @@ struct kvm_vcpu {
 	int mmio_needed;
 	int mmio_read_completed;
 	int mmio_is_write;
-	int mmio_size;
-	int mmio_index;
-	unsigned char mmio_data[KVM_MMIO_SIZE];
-	gpa_t mmio_phys_addr;
+	int mmio_cur_fragment;
+	int mmio_nr_fragments;
+	struct kvm_mmio_fragment mmio_fragments[KVM_MAX_MMIO_FRAGMENTS];
 #endif
 
 #ifdef CONFIG_KVM_ASYNC_PF
-- 
cgit v1.2.3


From 07975ad3b30579ca27d880491ad992326b930c63 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Thu, 29 Mar 2012 21:14:12 +0200
Subject: KVM: Introduce direct MSI message injection for in-kernel irqchips

Currently, MSI messages can only be injected to in-kernel irqchips by
defining a corresponding IRQ route for each message. This is not only
unhandy if the MSI messages are generated "on the fly" by user space,
IRQ routes are a limited resource that user space has to manage
carefully.

By providing a direct injection path, we can both avoid using up limited
resources and simplify the necessary steps for user land.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux/kvm_host.h')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 186ffab0b9f0..6f343307d72b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -802,6 +802,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
 			unsigned flags);
 void kvm_free_irq_routing(struct kvm *kvm);
 
+int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
+
 #else
 
 static inline void kvm_free_irq_routing(struct kvm *kvm) {}
-- 
cgit v1.2.3


From 41628d334361670d825fb03c04568f5ef9f084dc Mon Sep 17 00:00:00 2001
From: Konstantin Weitz <WEITZKON@de.ibm.com>
Date: Wed, 25 Apr 2012 15:30:38 +0200
Subject: KVM: s390: Implement the directed yield (diag 9c) hypervisor call for
 KVM

This patch implements the directed yield hypercall found on other
System z hypervisors. It delegates execution time to the virtual cpu
specified in the instruction's parameter.

Useful to avoid long spinlock waits in the guest.

Christian Borntraeger: moved common code in virt/kvm/

Signed-off-by: Konstantin Weitz <WEITZKON@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux/kvm_host.h')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6f343307d72b..cae342d29d1b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -461,6 +461,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
 
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
+bool kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
 void kvm_resched(struct kvm_vcpu *vcpu);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From c142786c6291189b5c85f53d91743e1eefbd8fe0 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 14 May 2012 15:44:06 +0300
Subject: KVM: MMU: Don't use RCU for lockless shadow walking

Using RCU for lockless shadow walking can increase the amount of memory
in use by the system, since RCU grace periods are unpredictable.  We also
have an unconditional write to a shared variable (reader_counter), which
isn't good for scaling.

Replace that with a scheme similar to x86's get_user_pages_fast(): disable
interrupts during lockless shadow walk to force the freer
(kvm_mmu_commit_zap_page()) to wait for the TLB flush IPI to find the
processor with interrupts enabled.

We also add a new vcpu->mode, READING_SHADOW_PAGE_TABLES, to prevent
kvm_flush_remote_tlbs() from avoiding the IPI.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/kvm_host.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux/kvm_host.h')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cae342d29d1b..c4464356b35b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -128,7 +128,8 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 enum {
 	OUTSIDE_GUEST_MODE,
 	IN_GUEST_MODE,
-	EXITING_GUEST_MODE
+	EXITING_GUEST_MODE,
+	READING_SHADOW_PAGE_TABLES,
 };
 
 /*
-- 
cgit v1.2.3