From e790d9ef6405633b007339d746b709aed43a928d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 21 Aug 2014 18:08:05 +0200 Subject: KVM: add kvm_arch_sched_in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce preempt notifiers for architecture specific code. Advantage over creating a new notifier in every arch is slightly simpler code and guaranteed call order with respect to kvm_sched_in. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a4c33b34fe3f..ebd723676633 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -624,6 +624,8 @@ void kvm_arch_exit(void); int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu); + void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 64d831269ccbca1fc6d739a0f3c8aa24afb43a5e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 19 Aug 2014 12:15:00 +0200 Subject: KVM: Introduce gfn_to_hva_memslot_prot To support read-only memory regions on arm and arm64, we have a need to resolve a gfn to an hva given a pointer to a memslot to avoid looping through the memslots twice and to reuse the hva error checking of gfn_to_hva_prot(), add a new gfn_to_hva_memslot_prot() function and refactor gfn_to_hva_prot() to use this function. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ebd723676633..6d8a658ec174 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -528,6 +528,8 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); +unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, + bool *writable); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); -- cgit v1.2.3 From 0f8a4de3e088797576ac76200b634b802e5c7781 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 26 Aug 2014 14:00:37 +0200 Subject: KVM: Unconditionally export KVM_CAP_READONLY_MEM The idea between capabilities and the KVM_CHECK_EXTENSION ioctl is that userspace can, at run-time, determine if a feature is supported or not. This allows KVM to being supporting a new feature with a new kernel version without any need to update user space. Unfortunately, since the definition of KVM_CAP_READONLY_MEM was guarded by #ifdef __KVM_HAVE_READONLY_MEM, such discovery still required a user space update. Therefore, unconditionally export KVM_CAP_READONLY_MEM and change the in-kernel conditional to rely on __KVM_HAVE_READONLY_MEM. Signed-off-by: Christoffer Dall Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index cf3a2ff440e4..90d3edab839c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -738,9 +738,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_GET_SMMU_INFO 78 #define KVM_CAP_S390_COW 79 #define KVM_CAP_PPC_ALLOC_HTAB 80 -#ifdef __KVM_HAVE_READONLY_MEM #define KVM_CAP_READONLY_MEM 81 -#endif #define KVM_CAP_IRQFD_RESAMPLE 82 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #define KVM_CAP_PPC_HTAB_FD 84 -- cgit v1.2.3 From 44b5ce73c99c389817be71b9161bceb197d40ecb Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 26 Aug 2014 14:00:38 +0200 Subject: KVM: Unconditionally export KVM_CAP_USER_NMI The idea between capabilities and the KVM_CHECK_EXTENSION ioctl is that userspace can, at run-time, determine if a feature is supported or not. This allows KVM to being supporting a new feature with a new kernel version without any need to update user space. Unfortunately, since the definition of KVM_CAP_USER_NMI was guarded by #ifdef __KVM_HAVE_USER_NMI, such discovery still required a user space update. Therefore, unconditionally export KVM_CAP_USER_NMI and change the the typo in the comment for the IOCTL number definition as well. Signed-off-by: Christoffer Dall Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 90d3edab839c..0695a1e3e332 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -654,9 +654,7 @@ struct kvm_ppc_smmu_info { #endif /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 -#ifdef __KVM_HAVE_USER_NMI #define KVM_CAP_USER_NMI 22 -#endif #ifdef __KVM_HAVE_GUEST_DEBUG #define KVM_CAP_SET_GUEST_DEBUG 23 #endif @@ -1091,7 +1089,7 @@ struct kvm_s390_ucas_mapping { #define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) -/* Available with KVM_CAP_NMI */ +/* Available with KVM_CAP_USER_NMI */ #define KVM_NMI _IO(KVMIO, 0x9a) /* Available with KVM_CAP_SET_GUEST_DEBUG */ #define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) -- cgit v1.2.3 From 656473003bc7e056c3bbd4a4d9832dad01e86f76 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 29 Aug 2014 14:01:17 +0200 Subject: KVM: forward declare structs in kvm_types.h Opaque KVM structs are useful for prototypes in asm/kvm_host.h, to avoid "'struct foo' declared inside parameter list" warnings (and consequent breakage due to conflicting types). Move them from individual files to a generic place in linux/kvm_types.h. Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 6 ------ include/linux/kvm_types.h | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ebd723676633..e1cb915a1096 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -140,8 +140,6 @@ static inline bool is_error_page(struct page *page) #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -struct kvm; -struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; extern spinlock_t kvm_lock; @@ -325,8 +323,6 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; -struct kvm_irq_routing_table; - #ifndef KVM_PRIVATE_MEM_SLOTS #define KVM_PRIVATE_MEM_SLOTS 0 #endif @@ -1036,8 +1032,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) extern bool kvm_rebooting; -struct kvm_device_ops; - struct kvm_device { struct kvm_device_ops *ops; struct kvm *kvm; diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index b0bcce0ddc95..b606bb689a3e 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -17,6 +17,20 @@ #ifndef __KVM_TYPES_H__ #define __KVM_TYPES_H__ +struct kvm; +struct kvm_async_pf; +struct kvm_device_ops; +struct kvm_interrupt; +struct kvm_irq_routing_table; +struct kvm_memory_slot; +struct kvm_one_reg; +struct kvm_run; +struct kvm_userspace_memory_region; +struct kvm_vcpu; +struct kvm_vcpu_init; + +enum kvm_mr_change; + #include /* -- cgit v1.2.3 From 13a34e067eab24fec882e1834fbf2cc31911d474 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 28 Aug 2014 15:13:03 +0200 Subject: KVM: remove garbage arg to *hardware_{en,dis}able MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the beggining was on_each_cpu(), which required an unused argument to kvm_arch_ops.hardware_{en,dis}able, but this was soon forgotten. Remove unnecessary arguments that stem from this. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e1cb915a1096..e098dce179df 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -630,8 +630,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); -int kvm_arch_hardware_enable(void *garbage); -void kvm_arch_hardware_disable(void *garbage); +int kvm_arch_hardware_enable(void); +void kvm_arch_hardware_disable(void); int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); -- cgit v1.2.3 From 184564efae4d775225c8fe3b762a56956fb1f827 Mon Sep 17 00:00:00 2001 From: Zhang Haoyu Date: Thu, 11 Sep 2014 16:47:04 +0800 Subject: kvm: ioapic: conditionally delay irq delivery duringeoi broadcast Currently, we call ioapic_service() immediately when we find the irq is still active during eoi broadcast. But for real hardware, there's some delay between the EOI writing and irq delivery. If we do not emulate this behavior, and re-inject the interrupt immediately after the guest sends an EOI and re-enables interrupts, a guest might spend all its time in the ISR if it has a broken handler for a level-triggered interrupt. Such livelock actually happens with Windows guests when resuming from hibernation. As there's no way to recognize the broken handle from new raised ones, this patch delays an interrupt if 10.000 consecutive EOIs found that the interrupt was still high. The guest can then make a little forward progress, until a proper IRQ handler is set or until some detection routine in the guest (such as Linux's note_interrupt()) recognizes the situation. Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: Zhang Haoyu Signed-off-by: Paolo Bonzini --- include/trace/events/kvm.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 908925ace776..ab679c395042 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -95,6 +95,26 @@ TRACE_EVENT(kvm_ioapic_set_irq, __entry->coalesced ? " (coalesced)" : "") ); +TRACE_EVENT(kvm_ioapic_delayed_eoi_inj, + TP_PROTO(__u64 e), + TP_ARGS(e), + + TP_STRUCT__entry( + __field( __u64, e ) + ), + + TP_fast_assign( + __entry->e = e; + ), + + TP_printk("dst %x vec=%u (%s|%s|%s%s)", + (u8)(__entry->e >> 56), (u8)__entry->e, + __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), + (__entry->e & (1<<11)) ? "logical" : "physical", + (__entry->e & (1<<15)) ? "level" : "edge", + (__entry->e & (1<<16)) ? "|masked" : "") +); + TRACE_EVENT(kvm_msi_set_irq, TP_PROTO(__u64 address, __u64 data), TP_ARGS(address, data), -- cgit v1.2.3 From d60eacb07053142bfb9b41582074a89a790a9d46 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Sep 2014 10:27:33 +0100 Subject: KVM: device: add simple registration mechanism for kvm_device_ops kvm_ioctl_create_device currently has knowledge of all the device types and their associated ops. This is fairly inflexible when adding support for new in-kernel device emulations, so move what we currently have out into a table, which can support dynamic registration of ops by new drivers for virtual hardware. Cc: Alex Williamson Cc: Alex Graf Cc: Gleb Natapov Cc: Paolo Bonzini Cc: Marc Zyngier Acked-by: Cornelia Huck Reviewed-by: Christoffer Dall Signed-off-by: Will Deacon Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 22 ++++++++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e098dce179df..b6e954742951 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1064,6 +1064,7 @@ struct kvm_device_ops { void kvm_device_get(struct kvm_device *dev); void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); +int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0695a1e3e332..60768822b140 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -943,15 +943,25 @@ struct kvm_device_attr { __u64 addr; /* userspace address of attr data */ }; -#define KVM_DEV_TYPE_FSL_MPIC_20 1 -#define KVM_DEV_TYPE_FSL_MPIC_42 2 -#define KVM_DEV_TYPE_XICS 3 -#define KVM_DEV_TYPE_VFIO 4 #define KVM_DEV_VFIO_GROUP 1 #define KVM_DEV_VFIO_GROUP_ADD 1 #define KVM_DEV_VFIO_GROUP_DEL 2 -#define KVM_DEV_TYPE_ARM_VGIC_V2 5 -#define KVM_DEV_TYPE_FLIC 6 + +enum kvm_device_type { + KVM_DEV_TYPE_FSL_MPIC_20 = 1, +#define KVM_DEV_TYPE_FSL_MPIC_20 KVM_DEV_TYPE_FSL_MPIC_20 + KVM_DEV_TYPE_FSL_MPIC_42, +#define KVM_DEV_TYPE_FSL_MPIC_42 KVM_DEV_TYPE_FSL_MPIC_42 + KVM_DEV_TYPE_XICS, +#define KVM_DEV_TYPE_XICS KVM_DEV_TYPE_XICS + KVM_DEV_TYPE_VFIO, +#define KVM_DEV_TYPE_VFIO KVM_DEV_TYPE_VFIO + KVM_DEV_TYPE_ARM_VGIC_V2, +#define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2 + KVM_DEV_TYPE_FLIC, +#define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC + KVM_DEV_TYPE_MAX, +}; /* * ioctls for VM fds -- cgit v1.2.3 From c06a841bf36340e9e917ce60d11a6425ac85d0bd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Sep 2014 10:27:34 +0100 Subject: KVM: ARM: vgic: register kvm_device_ops dynamically Now that we have a dynamic means to register kvm_device_ops, use that for the ARM VGIC, instead of relying on the static table. Cc: Gleb Natapov Cc: Paolo Bonzini Acked-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Will Deacon Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b6e954742951..601d321f96e7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1069,7 +1069,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; extern struct kvm_device_ops kvm_vfio_ops; -extern struct kvm_device_ops kvm_arm_vgic_v2_ops; extern struct kvm_device_ops kvm_flic_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT -- cgit v1.2.3 From 84877d93336de21a6251db00b841468a83c65906 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Tue, 2 Sep 2014 10:27:35 +0100 Subject: KVM: s390: register flic ops dynamically Using the new kvm_register_device_ops() interface makes us get rid of an #ifdef in common code. Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Cornelia Huck Signed-off-by: Will Deacon Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 601d321f96e7..7ef088bec715 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1069,7 +1069,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; extern struct kvm_device_ops kvm_vfio_ops; -extern struct kvm_device_ops kvm_flic_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT -- cgit v1.2.3 From 80ce1639727e9d38729c34f162378508c307ca25 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Sep 2014 10:27:36 +0100 Subject: KVM: VFIO: register kvm_device_ops dynamically Now that we have a dynamic means to register kvm_device_ops, use that for the VFIO kvm device, instead of relying on the static table. This is achieved by a module_init call to register the ops with KVM. Cc: Gleb Natapov Cc: Paolo Bonzini Acked-by: Alex Williamson Signed-off-by: Will Deacon Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7ef088bec715..d44a2d640551 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1068,7 +1068,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; -extern struct kvm_device_ops kvm_vfio_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT -- cgit v1.2.3 From 227844f53864077ccaefe01d0960fcccc03445ce Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 9 Jun 2014 12:27:18 +0200 Subject: arm/arm64: KVM: Rename irq_state to irq_pending The irq_state field on the distributor struct is ambiguous in its meaning; the comment says it's the level of the input put, but that doesn't make much sense for edge-triggered interrupts. The code actually uses this state variable to check if the interrupt is in the pending state on the distributor so clarify the comment and rename the actual variable and accessor methods. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 35b0c121bb65..388d442eecb5 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -140,8 +140,8 @@ struct vgic_dist { /* Interrupt enabled (one bit per IRQ) */ struct vgic_bitmap irq_enabled; - /* Interrupt 'pin' level */ - struct vgic_bitmap irq_state; + /* Interrupt state is pending on the distributor */ + struct vgic_bitmap irq_pending; /* Level-triggered interrupt in progress */ struct vgic_bitmap irq_active; -- cgit v1.2.3 From dbf20f9d8105cca531614c8bff9a74351e8e67e7 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 9 Jun 2014 12:55:13 +0200 Subject: arm/arm64: KVM: Rename irq_active to irq_queued We have a special bitmap on the distributor struct to keep track of when level-triggered interrupts are queued on the list registers. This was named irq_active, which is confusing, because the active state of an interrupt as per the GIC spec is a different thing, not specifically related to edge-triggered/level-triggered configurations but rather indicates an interrupt which has been ack'ed but not yet eoi'ed. Rename the bitmap and the corresponding accessor functions to irq_queued to clarify what this is actually used for. Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 388d442eecb5..7d8e61fa9928 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -143,8 +143,8 @@ struct vgic_dist { /* Interrupt state is pending on the distributor */ struct vgic_bitmap irq_pending; - /* Level-triggered interrupt in progress */ - struct vgic_bitmap irq_active; + /* Level-triggered interrupt queued on VCPU interface */ + struct vgic_bitmap irq_queued; /* Interrupt priority. Not used yet. */ struct vgic_bytemap irq_priority; -- cgit v1.2.3 From faa1b46c3e9f4d40359aee04ff275eea5f4cae3a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 14 Jun 2014 21:54:51 +0200 Subject: arm/arm64: KVM: vgic: Improve handling of GICD_I{CS}PENDRn Writes to GICD_ISPENDRn and GICD_ICPENDRn are currently not handled correctly for level-triggered interrupts. The spec states that for level-triggered interrupts, writes to the GICD_ISPENDRn activate the output of a flip-flop which is in turn or'ed with the actual input interrupt signal. Correspondingly, writes to GICD_ICPENDRn simply deactivates the output of that flip-flop, but does not (of course) affect the external input signal. Reads from GICC_IAR will also deactivate the flip-flop output. This requires us to track the state of the level-input separately from the state in the flip-flop. We therefore introduce two new variables on the distributor struct to track these two states. Astute readers may notice that this is introducing more state than required (because an OR of the two states gives you the pending state), but the remaining vgic code uses the pending bitmap for optimized operations to figure out, at the end of the day, if an interrupt is pending or not on the distributor side. Refactoring the code to consider the two state variables all the places where we currently access the precomputed pending value, did not look pretty. Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 7d8e61fa9928..f074539c6ac5 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -140,9 +140,23 @@ struct vgic_dist { /* Interrupt enabled (one bit per IRQ) */ struct vgic_bitmap irq_enabled; - /* Interrupt state is pending on the distributor */ + /* Level-triggered interrupt external input is asserted */ + struct vgic_bitmap irq_level; + + /* + * Interrupt state is pending on the distributor + */ struct vgic_bitmap irq_pending; + /* + * Tracks writes to GICD_ISPENDRn and GICD_ICPENDRn for level-triggered + * interrupts. Essentially holds the state of the flip-flop in + * Figure 4-10 on page 4-101 in ARM IHI 0048B.b. + * Once set, it is only cleared for level-triggered interrupts on + * guest ACKs (when we queue it) or writes to GICD_ICPENDRn. + */ + struct vgic_bitmap irq_soft_pend; + /* Level-triggered interrupt queued on VCPU interface */ struct vgic_bitmap irq_queued; -- cgit v1.2.3 From c1bfb577addd4867a82c4f235824a315d5afb94a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:01 +0100 Subject: arm/arm64: KVM: vgic: switch to dynamic allocation So far, all the VGIC data structures are statically defined by the *maximum* number of vcpus and interrupts it supports. It means that we always have to oversize it to cater for the worse case. Start by changing the data structures to be dynamically sizeable, and allocate them at runtime. The sizes are still very static though. Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 76 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f074539c6ac5..fd1b8f252da1 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -54,19 +54,33 @@ * - a bunch of shared interrupts (SPI) */ struct vgic_bitmap { - union { - u32 reg[VGIC_NR_PRIVATE_IRQS / 32]; - DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS); - } percpu[VGIC_MAX_CPUS]; - union { - u32 reg[VGIC_NR_SHARED_IRQS / 32]; - DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS); - } shared; + /* + * - One UL per VCPU for private interrupts (assumes UL is at + * least 32 bits) + * - As many UL as necessary for shared interrupts. + * + * The private interrupts are accessed via the "private" + * field, one UL per vcpu (the state for vcpu n is in + * private[n]). The shared interrupts are accessed via the + * "shared" pointer (IRQn state is at bit n-32 in the bitmap). + */ + unsigned long *private; + unsigned long *shared; }; struct vgic_bytemap { - u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4]; - u32 shared[VGIC_NR_SHARED_IRQS / 4]; + /* + * - 8 u32 per VCPU for private interrupts + * - As many u32 as necessary for shared interrupts. + * + * The private interrupts are accessed via the "private" + * field, (the state for vcpu n is in private[n*8] to + * private[n*8 + 7]). The shared interrupts are accessed via + * the "shared" pointer (IRQn state is at byte (n-32)%4 of the + * shared[(n-32)/4] word). + */ + u32 *private; + u32 *shared; }; struct kvm_vcpu; @@ -127,6 +141,9 @@ struct vgic_dist { bool in_kernel; bool ready; + int nr_cpus; + int nr_irqs; + /* Virtual control interface mapping */ void __iomem *vctrl_base; @@ -166,15 +183,36 @@ struct vgic_dist { /* Level/edge triggered */ struct vgic_bitmap irq_cfg; - /* Source CPU per SGI and target CPU */ - u8 irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS]; + /* + * Source CPU per SGI and target CPU: + * + * Each byte represent a SGI observable on a VCPU, each bit of + * this byte indicating if the corresponding VCPU has + * generated this interrupt. This is a GICv2 feature only. + * + * For VCPUn (n < 8), irq_sgi_sources[n*16] to [n*16 + 15] are + * the SGIs observable on VCPUn. + */ + u8 *irq_sgi_sources; - /* Target CPU for each IRQ */ - u8 irq_spi_cpu[VGIC_NR_SHARED_IRQS]; - struct vgic_bitmap irq_spi_target[VGIC_MAX_CPUS]; + /* + * Target CPU for each SPI: + * + * Array of available SPI, each byte indicating the target + * VCPU for SPI. IRQn (n >=32) is at irq_spi_cpu[n-32]. + */ + u8 *irq_spi_cpu; + + /* + * Reverse lookup of irq_spi_cpu for faster compute pending: + * + * Array of bitmaps, one per VCPU, describing if IRQn is + * routed to a particular VCPU. + */ + struct vgic_bitmap *irq_spi_target; /* Bitmap indicating which CPU has something pending */ - unsigned long irq_pending_on_cpu; + unsigned long *irq_pending_on_cpu; #endif }; @@ -204,11 +242,11 @@ struct vgic_v3_cpu_if { struct vgic_cpu { #ifdef CONFIG_KVM_ARM_VGIC /* per IRQ to LR mapping */ - u8 vgic_irq_lr_map[VGIC_NR_IRQS]; + u8 *vgic_irq_lr_map; /* Pending interrupts on this VCPU */ DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); - DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS); + unsigned long *pending_shared; /* Bitmap of used/free list registers */ DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS); @@ -239,7 +277,9 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); int kvm_vgic_init(struct kvm *kvm); int kvm_vgic_create(struct kvm *kvm); +void kvm_vgic_destroy(struct kvm *kvm); int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, -- cgit v1.2.3 From fb65ab63b8cae510ea1e43e68b5da2f9980aa6d5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:02 +0100 Subject: arm/arm64: KVM: vgic: Parametrize VGIC_NR_SHARED_IRQS Having a dynamic number of supported interrupts means that we cannot relly on VGIC_NR_SHARED_IRQS being fixed anymore. Instead, make it take the distributor structure as a parameter, so it can return the right value. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index fd1b8f252da1..b2f9936df319 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -29,7 +29,6 @@ #define VGIC_NR_SGIS 16 #define VGIC_NR_PPIS 16 #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) -#define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS) #define VGIC_MAX_CPUS KVM_MAX_VCPUS #define VGIC_V2_MAX_LRS (1 << 6) -- cgit v1.2.3 From fc675e355e705a046df7b635d3f3330c0ad94569 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:03 +0100 Subject: arm/arm64: KVM: vgic: kill VGIC_MAX_CPUS We now have the information about the number of CPU interfaces in the distributor itself. Let's get rid of VGIC_MAX_CPUS, and just rely on KVM_MAX_VCPUS where we don't have the choice. Yet. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index b2f9936df319..3b73d7845124 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -29,13 +29,12 @@ #define VGIC_NR_SGIS 16 #define VGIC_NR_PPIS 16 #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) -#define VGIC_MAX_CPUS KVM_MAX_VCPUS #define VGIC_V2_MAX_LRS (1 << 6) #define VGIC_V3_MAX_LRS 16 /* Sanity checks... */ -#if (VGIC_MAX_CPUS > 8) +#if (KVM_MAX_VCPUS > 8) #error Invalid number of CPU interfaces #endif -- cgit v1.2.3 From c3c918361adcceb816c92b21dd95d2b46fb96a8f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:04 +0100 Subject: arm/arm64: KVM: vgic: handle out-of-range MMIO accesses Now that we can (almost) dynamically size the number of interrupts, we're facing an interesting issue: We have to evaluate at runtime whether or not an access hits a valid register, based on the sizing of this particular instance of the distributor. Furthermore, the GIC spec says that accessing a reserved register is RAZ/WI. For this, add a new field to our range structure, indicating the number of bits a single interrupts uses. That allows us to find out whether or not the access is in range. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 3b73d7845124..2767f939f47c 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -32,6 +32,7 @@ #define VGIC_V2_MAX_LRS (1 << 6) #define VGIC_V3_MAX_LRS 16 +#define VGIC_MAX_IRQS 1024 /* Sanity checks... */ #if (KVM_MAX_VCPUS > 8) @@ -42,7 +43,7 @@ #error "VGIC_NR_IRQS must be a multiple of 32" #endif -#if (VGIC_NR_IRQS > 1024) +#if (VGIC_NR_IRQS > VGIC_MAX_IRQS) #error "VGIC_NR_IRQS must be <= 1024" #endif -- cgit v1.2.3 From 5fb66da64064d0cb8dcce4cc8bf4cb1b921b13a0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:05 +0100 Subject: arm/arm64: KVM: vgic: kill VGIC_NR_IRQS Nuke VGIC_NR_IRQS entierly, now that the distributor instance contains the number of IRQ allocated to this GIC. Also add VGIC_NR_IRQS_LEGACY to preserve the current API. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 2767f939f47c..aa20d4a7242f 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -25,7 +25,7 @@ #include #include -#define VGIC_NR_IRQS 256 +#define VGIC_NR_IRQS_LEGACY 256 #define VGIC_NR_SGIS 16 #define VGIC_NR_PPIS 16 #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) @@ -39,11 +39,11 @@ #error Invalid number of CPU interfaces #endif -#if (VGIC_NR_IRQS & 31) +#if (VGIC_NR_IRQS_LEGACY & 31) #error "VGIC_NR_IRQS must be a multiple of 32" #endif -#if (VGIC_NR_IRQS > VGIC_MAX_IRQS) +#if (VGIC_NR_IRQS_LEGACY > VGIC_MAX_IRQS) #error "VGIC_NR_IRQS must be <= 1024" #endif -- cgit v1.2.3 From 4956f2bc1fdee4bc336532f3f34635a8534cedfd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:06 +0100 Subject: arm/arm64: KVM: vgic: delay vgic allocation until init time It is now quite easy to delay the allocation of the vgic tables until we actually require it to be up and running (when the first vcpu is kicking around, or someones tries to access the GIC registers). This allow us to allocate memory for the exact number of CPUs we have. As nobody configures the number of interrupts just yet, use a fallback to VGIC_NR_IRQS_LEGACY. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index aa20d4a7242f..2f2aac8448a4 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -277,7 +277,6 @@ int kvm_vgic_hyp_init(void); int kvm_vgic_init(struct kvm *kvm); int kvm_vgic_create(struct kvm *kvm); void kvm_vgic_destroy(struct kvm *kvm); -int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 234b239bea395316d7f78018c672f4a88b3cdf0d Mon Sep 17 00:00:00 2001 From: Andres Lagar-Cavilla Date: Wed, 17 Sep 2014 10:51:48 -0700 Subject: kvm: Faults which trigger IO release the mmap_sem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When KVM handles a tdp fault it uses FOLL_NOWAIT. If the guest memory has been swapped out or is behind a filemap, this will trigger async readahead and return immediately. The rationale is that KVM will kick back the guest with an "async page fault" and allow for some other guest process to take over. If async PFs are enabled the fault is retried asap from an async workqueue. If not, it's retried immediately in the same code path. In either case the retry will not relinquish the mmap semaphore and will block on the IO. This is a bad thing, as other mmap semaphore users now stall as a function of swap or filemap latency. This patch ensures both the regular and async PF path re-enter the fault allowing for the mmap semaphore to be relinquished in the case of IO wait. Reviewed-by: Radim Krčmář Signed-off-by: Andres Lagar-Cavilla Acked-by: Andrew Morton Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 11 +++++++++++ include/linux/mm.h | 1 + 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d44a2d640551..45aaeb3360c9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -198,6 +198,17 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif +/* + * Carry out a gup that requires IO. Allow the mm to relinquish the mmap + * semaphore if the filemap/swap has to wait on a page lock. pagep == NULL + * controls whether we retry the gup one more time to completion in that case. + * Typically this is called after a FAULT_FLAG_RETRY_NOWAIT in the main tdp + * handler. + */ +int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm, + unsigned long addr, bool write_fault, + struct page **pagep); + enum { OUTSIDE_GUEST_MODE, IN_GUEST_MODE, diff --git a/include/linux/mm.h b/include/linux/mm.h index 8981cc882ed2..0f4196a0bc20 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1985,6 +1985,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ +#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); -- cgit v1.2.3 From 8a9522d2fe6a1b643d3aef5ab7f097f73c601e7a Mon Sep 17 00:00:00 2001 From: Andres Lagar-Cavilla Date: Tue, 23 Sep 2014 12:34:54 -0700 Subject: kvm/x86/mmu: Pass gfn and level to rmapp callback. Callbacks don't have to do extra computation to learn what the caller (lvm_handle_hva_range()) knows very well. Useful for debugging/tracing/printk/future. Signed-off-by: Andres Lagar-Cavilla Signed-off-by: Paolo Bonzini --- include/trace/events/kvm.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index ab679c395042..6edf1f2028cd 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -225,24 +225,26 @@ TRACE_EVENT(kvm_fpu, ); TRACE_EVENT(kvm_age_page, - TP_PROTO(ulong hva, struct kvm_memory_slot *slot, int ref), - TP_ARGS(hva, slot, ref), + TP_PROTO(ulong gfn, int level, struct kvm_memory_slot *slot, int ref), + TP_ARGS(gfn, level, slot, ref), TP_STRUCT__entry( __field( u64, hva ) __field( u64, gfn ) + __field( u8, level ) __field( u8, referenced ) ), TP_fast_assign( - __entry->hva = hva; - __entry->gfn = - slot->base_gfn + ((hva - slot->userspace_addr) >> PAGE_SHIFT); + __entry->gfn = gfn; + __entry->level = level; + __entry->hva = ((gfn - slot->base_gfn) << + PAGE_SHIFT) + slot->userspace_addr; __entry->referenced = ref; ), - TP_printk("hva %llx gfn %llx %s", - __entry->hva, __entry->gfn, + TP_printk("hva %llx gfn %llx level %u %s", + __entry->hva, __entry->gfn, __entry->level, __entry->referenced ? "YOUNG" : "OLD") ); -- cgit v1.2.3 From 57128468080a8b6ea452223036d3e417f748af55 Mon Sep 17 00:00:00 2001 From: Andres Lagar-Cavilla Date: Mon, 22 Sep 2014 14:54:42 -0700 Subject: kvm: Fix page ageing bugs 1. We were calling clear_flush_young_notify in unmap_one, but we are within an mmu notifier invalidate range scope. The spte exists no more (due to range_start) and the accessed bit info has already been propagated (due to kvm_pfn_set_accessed). Simply call clear_flush_young. 2. We clear_flush_young on a primary MMU PMD, but this may be mapped as a collection of PTEs by the secondary MMU (e.g. during log-dirty). This required expanding the interface of the clear_flush_young mmu notifier, so a lot of code has been trivially touched. 3. In the absence of shadow_accessed_mask (e.g. EPT A bit), we emulate the access bit by blowing the spte. This requires proper synchronizing with MMU notifier consumers, like every other removal of spte's does. Signed-off-by: Andres Lagar-Cavilla Acked-by: Rik van Riel Signed-off-by: Paolo Bonzini --- include/linux/mmu_notifier.h | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 27288692241e..88787bb4b3b9 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -57,10 +57,13 @@ struct mmu_notifier_ops { * pte. This way the VM will provide proper aging to the * accesses to the page through the secondary MMUs and not * only to the ones through the Linux pte. + * Start-end is necessary in case the secondary MMU is mapping the page + * at a smaller granularity than the primary MMU. */ int (*clear_flush_young)(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address); + unsigned long start, + unsigned long end); /* * test_young is called to check the young/accessed bitflag in @@ -175,7 +178,8 @@ extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); extern void __mmu_notifier_release(struct mm_struct *mm); extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address); + unsigned long start, + unsigned long end); extern int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address); extern void __mmu_notifier_change_pte(struct mm_struct *mm, @@ -194,10 +198,11 @@ static inline void mmu_notifier_release(struct mm_struct *mm) } static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { if (mm_has_notifiers(mm)) - return __mmu_notifier_clear_flush_young(mm, address); + return __mmu_notifier_clear_flush_young(mm, start, end); return 0; } @@ -255,7 +260,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) unsigned long ___address = __address; \ __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ - ___address); \ + ___address, \ + ___address + \ + PAGE_SIZE); \ __young; \ }) @@ -266,7 +273,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) unsigned long ___address = __address; \ __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ - ___address); \ + ___address, \ + ___address + \ + PMD_SIZE); \ __young; \ }) @@ -301,7 +310,8 @@ static inline void mmu_notifier_release(struct mm_struct *mm) } static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { return 0; } -- cgit v1.2.3 From 445b8236959bfe624a5aa9bce89f44a3bec9b2b1 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Wed, 24 Sep 2014 15:57:55 +0800 Subject: kvm: Rename make_all_cpus_request() to kvm_make_all_cpus_request() and make it non-static Different architectures need different requests, and in fact we will use this function in architecture-specific code later. This will be outside kvm_main.c, so make it non-static and rename it to kvm_make_all_cpus_request(). Reviewed-by: Paolo Bonzini Signed-off-by: Tang Chen Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 45aaeb3360c9..12c591fc2571 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -586,6 +586,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm); void kvm_make_mclock_inprogress_request(struct kvm *kvm); void kvm_make_scan_ioapic_request(struct kvm *kvm); +bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); -- cgit v1.2.3 From 4256f43f9fab91e1c17b5846a240cf4b66a768a8 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Wed, 24 Sep 2014 15:57:54 +0800 Subject: kvm: x86: Add request bit to reload APIC access page address Currently, the APIC access page is pinned by KVM for the entire life of the guest. We want to make it migratable in order to make memory hot-unplug available for machines that run KVM. This patch prepares to handle this in generic code, through a new request bit (that will be set by the MMU notifier) and a new hook that is called whenever the request bit is processed. Signed-off-by: Tang Chen Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 12c591fc2571..d594f9f34429 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -136,6 +136,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22 #define KVM_REQ_ENABLE_IBS 23 #define KVM_REQ_DISABLE_IBS 24 +#define KVM_REQ_APIC_PAGE_RELOAD 25 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v1.2.3