summaryrefslogtreecommitdiff
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/44x_tlb.c4
-rw-r--r--arch/powerpc/kvm/Kconfig34
-rw-r--r--arch/powerpc/kvm/Makefile27
-rw-r--r--arch/powerpc/kvm/book3s.c1007
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c54
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c180
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c73
-rw-r--r--arch/powerpc/kvm/book3s_exports.c9
-rw-r--r--arch/powerpc/kvm/book3s_hv.c1269
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c155
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S166
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c370
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S1345
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S21
-rw-r--r--arch/powerpc/kvm/book3s_mmu_hpte.c71
-rw-r--r--arch/powerpc/kvm/book3s_pr.c1029
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S102
-rw-r--r--arch/powerpc/kvm/book3s_segment.S117
-rw-r--r--arch/powerpc/kvm/booke.c132
-rw-r--r--arch/powerpc/kvm/booke.h23
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S66
-rw-r--r--arch/powerpc/kvm/e500.c7
-rw-r--r--arch/powerpc/kvm/e500_emulate.c4
-rw-r--r--arch/powerpc/kvm/e500_tlb.c800
-rw-r--r--arch/powerpc/kvm/e500_tlb.h13
-rw-r--r--arch/powerpc/kvm/powerpc.c78
-rw-r--r--arch/powerpc/kvm/timing.c9
-rw-r--r--arch/powerpc/kvm/trace.h4
28 files changed, 5689 insertions, 1480 deletions
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 5f3cff83e089..33aa715dab28 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -387,8 +387,10 @@ static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu,
}
}
-void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
+void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
{
+ int usermode = vcpu->arch.shared->msr & MSR_PR;
+
vcpu->arch.shadow_pid = !usermode;
}
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 105b6918b23e..78133deb4b64 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -20,7 +20,6 @@ config KVM
bool
select PREEMPT_NOTIFIERS
select ANON_INODES
- select KVM_MMIO
config KVM_BOOK3S_HANDLER
bool
@@ -28,16 +27,22 @@ config KVM_BOOK3S_HANDLER
config KVM_BOOK3S_32_HANDLER
bool
select KVM_BOOK3S_HANDLER
+ select KVM_MMIO
config KVM_BOOK3S_64_HANDLER
bool
select KVM_BOOK3S_HANDLER
+config KVM_BOOK3S_PR
+ bool
+ select KVM_MMIO
+
config KVM_BOOK3S_32
tristate "KVM support for PowerPC book3s_32 processors"
depends on EXPERIMENTAL && PPC_BOOK3S_32 && !SMP && !PTE_64BIT
select KVM
select KVM_BOOK3S_32_HANDLER
+ select KVM_BOOK3S_PR
---help---
Support running unmodified book3s_32 guest kernels
in virtual machines on book3s_32 host processors.
@@ -50,8 +55,8 @@ config KVM_BOOK3S_32
config KVM_BOOK3S_64
tristate "KVM support for PowerPC book3s_64 processors"
depends on EXPERIMENTAL && PPC_BOOK3S_64
- select KVM
select KVM_BOOK3S_64_HANDLER
+ select KVM
---help---
Support running unmodified book3s_64 and book3s_32 guest kernels
in virtual machines on book3s_64 host processors.
@@ -61,10 +66,34 @@ config KVM_BOOK3S_64
If unsure, say N.
+config KVM_BOOK3S_64_HV
+ bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
+ depends on KVM_BOOK3S_64
+ ---help---
+ Support running unmodified book3s_64 guest kernels in
+ virtual machines on POWER7 and PPC970 processors that have
+ hypervisor mode available to the host.
+
+ If you say Y here, KVM will use the hardware virtualization
+ facilities of POWER7 (and later) processors, meaning that
+ guest operating systems will run at full hardware speed
+ using supervisor and user modes. However, this also means
+ that KVM is not usable under PowerVM (pHyp), is only usable
+ on POWER7 (or later) processors and PPC970-family processors,
+ and cannot emulate a different processor from the host processor.
+
+ If unsure, say N.
+
+config KVM_BOOK3S_64_PR
+ def_bool y
+ depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV
+ select KVM_BOOK3S_PR
+
config KVM_440
bool "KVM support for PowerPC 440 processors"
depends on EXPERIMENTAL && 44x
select KVM
+ select KVM_MMIO
---help---
Support running unmodified 440 guest kernels in virtual machines on
440 host processors.
@@ -89,6 +118,7 @@ config KVM_E500
bool "KVM support for PowerPC E500 processors"
depends on EXPERIMENTAL && E500
select KVM
+ select KVM_MMIO
---help---
Support running unmodified E500 guest kernels in virtual machines on
E500 host processors.
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 4d6863823f69..08428e2c188d 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -38,24 +38,42 @@ kvm-e500-objs := \
e500_emulate.o
kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs)
-kvm-book3s_64-objs := \
- $(common-objs-y) \
+kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
+ ../../../virt/kvm/coalesced_mmio.o \
fpu.o \
book3s_paired_singles.o \
- book3s.o \
+ book3s_pr.o \
book3s_emulate.o \
book3s_interrupts.o \
book3s_mmu_hpte.o \
book3s_64_mmu_host.o \
book3s_64_mmu.o \
book3s_32_mmu.o
-kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-objs)
+
+kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
+ book3s_hv.o \
+ book3s_hv_interrupts.o \
+ book3s_64_mmu_hv.o
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
+ book3s_hv_rm_mmu.o \
+ book3s_64_vio_hv.o \
+ book3s_hv_builtin.o
+
+kvm-book3s_64-module-objs := \
+ ../../../virt/kvm/kvm_main.o \
+ powerpc.o \
+ emulate.o \
+ book3s.o \
+ $(kvm-book3s_64-objs-y)
+
+kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
kvm-book3s_32-objs := \
$(common-objs-y) \
fpu.o \
book3s_paired_singles.o \
book3s.o \
+ book3s_pr.o \
book3s_emulate.o \
book3s_interrupts.o \
book3s_mmu_hpte.o \
@@ -70,3 +88,4 @@ obj-$(CONFIG_KVM_E500) += kvm.o
obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o
obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o
+obj-y += $(kvm-book3s_64-builtin-objs-y)
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 0f95b5cce033..f68a34d16035 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -17,7 +17,6 @@
#include <linux/kvm_host.h>
#include <linux/err.h>
#include <linux/slab.h>
-#include "trace.h"
#include <asm/reg.h>
#include <asm/cputable.h>
@@ -28,25 +27,17 @@
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/mmu_context.h>
+#include <asm/page.h>
#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
+#include "trace.h"
+
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
/* #define EXIT_DEBUG */
-/* #define DEBUG_EXT */
-
-static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
- ulong msr);
-
-/* Some compatibility defines */
-#ifdef CONFIG_PPC_BOOK3S_32
-#define MSR_USER32 MSR_USER
-#define MSR_USER64 MSR_USER
-#define HW_PAGE_SIZE PAGE_SIZE
-#endif
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "exits", VCPU_STAT(sum_exits) },
@@ -77,100 +68,11 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
{
}
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-{
-#ifdef CONFIG_PPC_BOOK3S_64
- memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb));
- memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
- sizeof(get_paca()->shadow_vcpu));
- to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max;
-#endif
-
-#ifdef CONFIG_PPC_BOOK3S_32
- current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
-#endif
-}
-
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_PPC_BOOK3S_64
- memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb));
- memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
- sizeof(get_paca()->shadow_vcpu));
- to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max;
-#endif
-
- kvmppc_giveup_ext(vcpu, MSR_FP);
- kvmppc_giveup_ext(vcpu, MSR_VEC);
- kvmppc_giveup_ext(vcpu, MSR_VSX);
-}
-
-static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
-{
- ulong smsr = vcpu->arch.shared->msr;
-
- /* Guest MSR values */
- smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE;
- /* Process MSR values */
- smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
- /* External providers the guest reserved */
- smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext);
- /* 64-bit Process MSR values */
-#ifdef CONFIG_PPC_BOOK3S_64
- smsr |= MSR_ISF | MSR_HV;
-#endif
- vcpu->arch.shadow_msr = smsr;
-}
-
-void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
-{
- ulong old_msr = vcpu->arch.shared->msr;
-
-#ifdef EXIT_DEBUG
- printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
-#endif
-
- msr &= to_book3s(vcpu)->msr_mask;
- vcpu->arch.shared->msr = msr;
- kvmppc_recalc_shadow_msr(vcpu);
-
- if (msr & MSR_POW) {
- if (!vcpu->arch.pending_exceptions) {
- kvm_vcpu_block(vcpu);
- vcpu->stat.halt_wakeup++;
-
- /* Unset POW bit after we woke up */
- msr &= ~MSR_POW;
- vcpu->arch.shared->msr = msr;
- }
- }
-
- if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) !=
- (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
- kvmppc_mmu_flush_segments(vcpu);
- kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
-
- /* Preload magic page segment when in kernel mode */
- if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) {
- struct kvm_vcpu_arch *a = &vcpu->arch;
-
- if (msr & MSR_DR)
- kvmppc_mmu_map_segment(vcpu, a->magic_page_ea);
- else
- kvmppc_mmu_map_segment(vcpu, a->magic_page_pa);
- }
- }
-
- /* Preload FPU if it's enabled */
- if (vcpu->arch.shared->msr & MSR_FP)
- kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
-}
-
void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
{
vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu);
vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags;
- kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec);
+ kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
vcpu->arch.mmu.reset_msr(vcpu);
}
@@ -204,11 +106,13 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
unsigned int vec)
{
+ unsigned long old_pending = vcpu->arch.pending_exceptions;
+
clear_bit(kvmppc_book3s_vec2irqprio(vec),
&vcpu->arch.pending_exceptions);
- if (!vcpu->arch.pending_exceptions)
- vcpu->arch.shared->int_pending = 0;
+ kvmppc_update_int_pending(vcpu, vcpu->arch.pending_exceptions,
+ old_pending);
}
void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
@@ -225,8 +129,8 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
{
- to_book3s(vcpu)->prog_flags = flags;
- kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM);
+ /* might as well deliver this straight away */
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags);
}
void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
@@ -266,21 +170,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
{
int deliver = 1;
int vec = 0;
- ulong flags = 0ULL;
- ulong crit_raw = vcpu->arch.shared->critical;
- ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
- bool crit;
-
- /* Truncate crit indicators in 32 bit mode */
- if (!(vcpu->arch.shared->msr & MSR_SF)) {
- crit_raw &= 0xffffffff;
- crit_r1 &= 0xffffffff;
- }
-
- /* Critical section when crit == r1 */
- crit = (crit_raw == crit_r1);
- /* ... and we're in supervisor mode */
- crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
+ bool crit = kvmppc_critical_section(vcpu);
switch (priority) {
case BOOK3S_IRQPRIO_DECREMENTER:
@@ -315,7 +205,6 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
break;
case BOOK3S_IRQPRIO_PROGRAM:
vec = BOOK3S_INTERRUPT_PROGRAM;
- flags = to_book3s(vcpu)->prog_flags;
break;
case BOOK3S_IRQPRIO_VSX:
vec = BOOK3S_INTERRUPT_VSX;
@@ -346,7 +235,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
#endif
if (deliver)
- kvmppc_inject_interrupt(vcpu, vec, flags);
+ kvmppc_inject_interrupt(vcpu, vec, 0);
return deliver;
}
@@ -392,64 +281,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
}
/* Tell the guest about our interrupt status */
- if (*pending)
- vcpu->arch.shared->int_pending = 1;
- else if (old_pending)
- vcpu->arch.shared->int_pending = 0;
-}
-
-void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
-{
- u32 host_pvr;
-
- vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
- vcpu->arch.pvr = pvr;
-#ifdef CONFIG_PPC_BOOK3S_64
- if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
- kvmppc_mmu_book3s_64_init(vcpu);
- to_book3s(vcpu)->hior = 0xfff00000;
- to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
- } else
-#endif
- {
- kvmppc_mmu_book3s_32_init(vcpu);
- to_book3s(vcpu)->hior = 0;
- to_book3s(vcpu)->msr_mask = 0xffffffffULL;
- }
-
- /* If we are in hypervisor level on 970, we can tell the CPU to
- * treat DCBZ as 32 bytes store */
- vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
- if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) &&
- !strcmp(cur_cpu_spec->platform, "ppc970"))
- vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
-
- /* Cell performs badly if MSR_FEx are set. So let's hope nobody
- really needs them in a VM on Cell and force disable them. */
- if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
- to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
-
-#ifdef CONFIG_PPC_BOOK3S_32
- /* 32 bit Book3S always has 32 byte dcbz */
- vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
-#endif
-
- /* On some CPUs we can execute paired single operations natively */
- asm ( "mfpvr %0" : "=r"(host_pvr));
- switch (host_pvr) {
- case 0x00080200: /* lonestar 2.0 */
- case 0x00088202: /* lonestar 2.2 */
- case 0x70000100: /* gekko 1.0 */
- case 0x00080100: /* gekko 2.0 */
- case 0x00083203: /* gekko 2.3a */
- case 0x00083213: /* gekko 2.3b */
- case 0x00083204: /* gekko 2.4 */
- case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */
- case 0x00087200: /* broadway */
- vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS;
- /* Enable HID2.PSE - in case we need it later */
- mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29));
- }
+ kvmppc_update_int_pending(vcpu, *pending, old_pending);
}
pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
@@ -471,44 +303,6 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
return gfn_to_pfn(vcpu->kvm, gfn);
}
-/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
- * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
- * emulate 32 bytes dcbz length.
- *
- * The Book3s_64 inventors also realized this case and implemented a special bit
- * in the HID5 register, which is a hypervisor ressource. Thus we can't use it.
- *
- * My approach here is to patch the dcbz instruction on executing pages.
- */
-static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
-{
- struct page *hpage;
- u64 hpage_offset;
- u32 *page;
- int i;
-
- hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
- if (is_error_page(hpage)) {
- kvm_release_page_clean(hpage);
- return;
- }
-
- hpage_offset = pte->raddr & ~PAGE_MASK;
- hpage_offset &= ~0xFFFULL;
- hpage_offset /= 4;
-
- get_page(hpage);
- page = kmap_atomic(hpage, KM_USER0);
-
- /* patch dcbz into reserved instruction, so we trap */
- for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
- if ((page[i] & 0xff0007ff) == INS_DCBZ)
- page[i] &= 0xfffffff7;
-
- kunmap_atomic(page, KM_USER0);
- put_page(hpage);
-}
-
static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
struct kvmppc_pte *pte)
{
@@ -606,519 +400,6 @@ mmio:
return EMULATE_DO_MMIO;
}
-static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
-{
- ulong mp_pa = vcpu->arch.magic_page_pa;
-
- if (unlikely(mp_pa) &&
- unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
- return 1;
- }
-
- return kvm_is_visible_gfn(vcpu->kvm, gfn);
-}
-
-int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
- ulong eaddr, int vec)
-{
- bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
- int r = RESUME_GUEST;
- int relocated;
- int page_found = 0;
- struct kvmppc_pte pte;
- bool is_mmio = false;
- bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false;
- bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false;
- u64 vsid;
-
- relocated = data ? dr : ir;
-
- /* Resolve real address if translation turned on */
- if (relocated) {
- page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data);
- } else {
- pte.may_execute = true;
- pte.may_read = true;
- pte.may_write = true;
- pte.raddr = eaddr & KVM_PAM;
- pte.eaddr = eaddr;
- pte.vpage = eaddr >> 12;
- }
-
- switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
- case 0:
- pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
- break;
- case MSR_DR:
- case MSR_IR:
- vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
-
- if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR)
- pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
- else
- pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
- pte.vpage |= vsid;
-
- if (vsid == -1)
- page_found = -EINVAL;
- break;
- }
-
- if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
- (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
- /*
- * If we do the dcbz hack, we have to NX on every execution,
- * so we can patch the executing code. This renders our guest
- * NX-less.
- */
- pte.may_execute = !data;
- }
-
- if (page_found == -ENOENT) {
- /* Page not found in guest PTE entries */
- vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
- vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
- vcpu->arch.shared->msr |=
- (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
- kvmppc_book3s_queue_irqprio(vcpu, vec);
- } else if (page_found == -EPERM) {
- /* Storage protection */
- vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
- vcpu->arch.shared->dsisr =
- to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
- vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
- vcpu->arch.shared->msr |=
- (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
- kvmppc_book3s_queue_irqprio(vcpu, vec);
- } else if (page_found == -EINVAL) {
- /* Page not found in guest SLB */
- vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
- kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
- } else if (!is_mmio &&
- kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
- /* The guest's PTE is not mapped yet. Map on the host */
- kvmppc_mmu_map_page(vcpu, &pte);
- if (data)
- vcpu->stat.sp_storage++;
- else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
- (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
- kvmppc_patch_dcbz(vcpu, &pte);
- } else {
- /* MMIO */
- vcpu->stat.mmio_exits++;
- vcpu->arch.paddr_accessed = pte.raddr;
- r = kvmppc_emulate_mmio(run, vcpu);
- if ( r == RESUME_HOST_NV )
- r = RESUME_HOST;
- }
-
- return r;
-}
-
-static inline int get_fpr_index(int i)
-{
-#ifdef CONFIG_VSX
- i *= 2;
-#endif
- return i;
-}
-
-/* Give up external provider (FPU, Altivec, VSX) */
-void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
-{
- struct thread_struct *t = &current->thread;
- u64 *vcpu_fpr = vcpu->arch.fpr;
-#ifdef CONFIG_VSX
- u64 *vcpu_vsx = vcpu->arch.vsr;
-#endif
- u64 *thread_fpr = (u64*)t->fpr;
- int i;
-
- if (!(vcpu->arch.guest_owned_ext & msr))
- return;
-
-#ifdef DEBUG_EXT
- printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
-#endif
-
- switch (msr) {
- case MSR_FP:
- giveup_fpu(current);
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
- vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
-
- vcpu->arch.fpscr = t->fpscr.val;
- break;
- case MSR_VEC:
-#ifdef CONFIG_ALTIVEC
- giveup_altivec(current);
- memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
- vcpu->arch.vscr = t->vscr;
-#endif
- break;
- case MSR_VSX:
-#ifdef CONFIG_VSX
- __giveup_vsx(current);
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
- vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
-#endif
- break;
- default:
- BUG();
- }
-
- vcpu->arch.guest_owned_ext &= ~msr;
- current->thread.regs->msr &= ~msr;
- kvmppc_recalc_shadow_msr(vcpu);
-}
-
-static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
-{
- ulong srr0 = kvmppc_get_pc(vcpu);
- u32 last_inst = kvmppc_get_last_inst(vcpu);
- int ret;
-
- ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
- if (ret == -ENOENT) {
- ulong msr = vcpu->arch.shared->msr;
-
- msr = kvmppc_set_field(msr, 33, 33, 1);
- msr = kvmppc_set_field(msr, 34, 36, 0);
- vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0);
- kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
- return EMULATE_AGAIN;
- }
-
- return EMULATE_DONE;
-}
-
-static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr)
-{
-
- /* Need to do paired single emulation? */
- if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
- return EMULATE_DONE;
-
- /* Read out the instruction */
- if (kvmppc_read_inst(vcpu) == EMULATE_DONE)
- /* Need to emulate */
- return EMULATE_FAIL;
-
- return EMULATE_AGAIN;
-}
-
-/* Handle external providers (FPU, Altivec, VSX) */
-static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
- ulong msr)
-{
- struct thread_struct *t = &current->thread;
- u64 *vcpu_fpr = vcpu->arch.fpr;
-#ifdef CONFIG_VSX
- u64 *vcpu_vsx = vcpu->arch.vsr;
-#endif
- u64 *thread_fpr = (u64*)t->fpr;
- int i;
-
- /* When we have paired singles, we emulate in software */
- if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
- return RESUME_GUEST;
-
- if (!(vcpu->arch.shared->msr & msr)) {
- kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
- return RESUME_GUEST;
- }
-
- /* We already own the ext */
- if (vcpu->arch.guest_owned_ext & msr) {
- return RESUME_GUEST;
- }
-
-#ifdef DEBUG_EXT
- printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
-#endif
-
- current->thread.regs->msr |= msr;
-
- switch (msr) {
- case MSR_FP:
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
- thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
-
- t->fpscr.val = vcpu->arch.fpscr;
- t->fpexc_mode = 0;
- kvmppc_load_up_fpu();
- break;
- case MSR_VEC:
-#ifdef CONFIG_ALTIVEC
- memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
- t->vscr = vcpu->arch.vscr;
- t->vrsave = -1;
- kvmppc_load_up_altivec();
-#endif
- break;
- case MSR_VSX:
-#ifdef CONFIG_VSX
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
- thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
- kvmppc_load_up_vsx();
-#endif
- break;
- default:
- BUG();
- }
-
- vcpu->arch.guest_owned_ext |= msr;
-
- kvmppc_recalc_shadow_msr(vcpu);
-
- return RESUME_GUEST;
-}
-
-int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int exit_nr)
-{
- int r = RESUME_HOST;
-
- vcpu->stat.sum_exits++;
-
- run->exit_reason = KVM_EXIT_UNKNOWN;
- run->ready_for_interrupt_injection = 1;
-
- trace_kvm_book3s_exit(exit_nr, vcpu);
- kvm_resched(vcpu);
- switch (exit_nr) {
- case BOOK3S_INTERRUPT_INST_STORAGE:
- vcpu->stat.pf_instruc++;
-
-#ifdef CONFIG_PPC_BOOK3S_32
- /* We set segments as unused segments when invalidating them. So
- * treat the respective fault as segment fault. */
- if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]
- == SR_INVALID) {
- kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
- r = RESUME_GUEST;
- break;
- }
-#endif
-
- /* only care about PTEG not found errors, but leave NX alone */
- if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) {
- r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
- vcpu->stat.sp_instruc++;
- } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
- (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
- /*
- * XXX If we do the dcbz hack we use the NX bit to flush&patch the page,
- * so we can't use the NX bit inside the guest. Let's cross our fingers,
- * that no guest that needs the dcbz hack does NX.
- */
- kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
- r = RESUME_GUEST;
- } else {
- vcpu->arch.shared->msr |=
- to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
- kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
- r = RESUME_GUEST;
- }
- break;
- case BOOK3S_INTERRUPT_DATA_STORAGE:
- {
- ulong dar = kvmppc_get_fault_dar(vcpu);
- vcpu->stat.pf_storage++;
-
-#ifdef CONFIG_PPC_BOOK3S_32
- /* We set segments as unused segments when invalidating them. So
- * treat the respective fault as segment fault. */
- if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) {
- kvmppc_mmu_map_segment(vcpu, dar);
- r = RESUME_GUEST;
- break;
- }
-#endif
-
- /* The only case we need to handle is missing shadow PTEs */
- if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) {
- r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
- } else {
- vcpu->arch.shared->dar = dar;
- vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
- kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
- r = RESUME_GUEST;
- }
- break;
- }
- case BOOK3S_INTERRUPT_DATA_SEGMENT:
- if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
- vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
- kvmppc_book3s_queue_irqprio(vcpu,
- BOOK3S_INTERRUPT_DATA_SEGMENT);
- }
- r = RESUME_GUEST;
- break;
- case BOOK3S_INTERRUPT_INST_SEGMENT:
- if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) {
- kvmppc_book3s_queue_irqprio(vcpu,
- BOOK3S_INTERRUPT_INST_SEGMENT);
- }
- r = RESUME_GUEST;
- break;
- /* We're good on these - the host merely wanted to get our attention */
- case BOOK3S_INTERRUPT_DECREMENTER:
- vcpu->stat.dec_exits++;
- r = RESUME_GUEST;
- break;
- case BOOK3S_INTERRUPT_EXTERNAL:
- vcpu->stat.ext_intr_exits++;
- r = RESUME_GUEST;
- break;
- case BOOK3S_INTERRUPT_PERFMON:
- r = RESUME_GUEST;
- break;
- case BOOK3S_INTERRUPT_PROGRAM:
- {
- enum emulation_result er;
- ulong flags;
-
-program_interrupt:
- flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull;
-
- if (vcpu->arch.shared->msr & MSR_PR) {
-#ifdef EXIT_DEBUG
- printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
-#endif
- if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) !=
- (INS_DCBZ & 0xfffffff7)) {
- kvmppc_core_queue_program(vcpu, flags);
- r = RESUME_GUEST;
- break;
- }
- }
-
- vcpu->stat.emulated_inst_exits++;
- er = kvmppc_emulate_instruction(run, vcpu);
- switch (er) {
- case EMULATE_DONE:
- r = RESUME_GUEST_NV;
- break;
- case EMULATE_AGAIN:
- r = RESUME_GUEST;
- break;
- case EMULATE_FAIL:
- printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
- __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
- kvmppc_core_queue_program(vcpu, flags);
- r = RESUME_GUEST;
- break;
- case EMULATE_DO_MMIO:
- run->exit_reason = KVM_EXIT_MMIO;
- r = RESUME_HOST_NV;
- break;
- default:
- BUG();
- }
- break;
- }
- case BOOK3S_INTERRUPT_SYSCALL:
- if (vcpu->arch.osi_enabled &&
- (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
- (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
- /* MOL hypercalls */
- u64 *gprs = run->osi.gprs;
- int i;
-
- run->exit_reason = KVM_EXIT_OSI;
- for (i = 0; i < 32; i++)
- gprs[i] = kvmppc_get_gpr(vcpu, i);
- vcpu->arch.osi_needed = 1;
- r = RESUME_HOST_NV;
- } else if (!(vcpu->arch.shared->msr & MSR_PR) &&
- (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
- /* KVM PV hypercalls */
- kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
- r = RESUME_GUEST;
- } else {
- /* Guest syscalls */
- vcpu->stat.syscall_exits++;
- kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
- r = RESUME_GUEST;
- }
- break;
- case BOOK3S_INTERRUPT_FP_UNAVAIL:
- case BOOK3S_INTERRUPT_ALTIVEC:
- case BOOK3S_INTERRUPT_VSX:
- {
- int ext_msr = 0;
-
- switch (exit_nr) {
- case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break;
- case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break;
- case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break;
- }
-
- switch (kvmppc_check_ext(vcpu, exit_nr)) {
- case EMULATE_DONE:
- /* everything ok - let's enable the ext */
- r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
- break;
- case EMULATE_FAIL:
- /* we need to emulate this instruction */
- goto program_interrupt;
- break;
- default:
- /* nothing to worry about - go again */
- break;
- }
- break;
- }
- case BOOK3S_INTERRUPT_ALIGNMENT:
- if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
- vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu,
- kvmppc_get_last_inst(vcpu));
- vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu,
- kvmppc_get_last_inst(vcpu));
- kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
- }
- r = RESUME_GUEST;
- break;
- case BOOK3S_INTERRUPT_MACHINE_CHECK:
- case BOOK3S_INTERRUPT_TRACE:
- kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
- r = RESUME_GUEST;
- break;
- default:
- /* Ugh - bork here! What did we get? */
- printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
- exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1);
- r = RESUME_HOST;
- BUG();
- break;
- }
-
-
- if (!(r & RESUME_HOST)) {
- /* To avoid clobbering exit_reason, only check for signals if
- * we aren't already exiting to userspace for some other
- * reason. */
- if (signal_pending(current)) {
-#ifdef EXIT_DEBUG
- printk(KERN_EMERG "KVM: Going back to host\n");
-#endif
- vcpu->stat.signal_exits++;
- run->exit_reason = KVM_EXIT_INTR;
- r = -EINTR;
- } else {
- /* In case an interrupt came in that was triggered
- * from userspace (like DEC), we need to check what
- * to inject now! */
- kvmppc_core_deliver_interrupts(vcpu);
- }
- }
-
- trace_kvm_book3s_reenter(r, vcpu);
-
- return r;
-}
-
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
return 0;
@@ -1179,69 +460,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return 0;
}
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
- struct kvm_sregs *sregs)
-{
- struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
- int i;
-
- sregs->pvr = vcpu->arch.pvr;
-
- sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
- if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
- for (i = 0; i < 64; i++) {
- sregs->u.s.ppc64.slb[i].slbe = vcpu3s->slb[i].orige | i;
- sregs->u.s.ppc64.slb[i].slbv = vcpu3s->slb[i].origv;
- }
- } else {
- for (i = 0; i < 16; i++)
- sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i];
-
- for (i = 0; i < 8; i++) {
- sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
- sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
- }
- }
-
- return 0;
-}
-
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
- struct kvm_sregs *sregs)
-{
- struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
- int i;
-
- kvmppc_set_pvr(vcpu, sregs->pvr);
-
- vcpu3s->sdr1 = sregs->u.s.sdr1;
- if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
- for (i = 0; i < 64; i++) {
- vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv,
- sregs->u.s.ppc64.slb[i].slbe);
- }
- } else {
- for (i = 0; i < 16; i++) {
- vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
- }
- for (i = 0; i < 8; i++) {
- kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false,
- (u32)sregs->u.s.ppc32.ibat[i]);
- kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true,
- (u32)(sregs->u.s.ppc32.ibat[i] >> 32));
- kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false,
- (u32)sregs->u.s.ppc32.dbat[i]);
- kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true,
- (u32)(sregs->u.s.ppc32.dbat[i] >> 32));
- }
- }
-
- /* Flush the MMU after messing with the segments */
- kvmppc_mmu_pte_flush(vcpu, 0, 0);
-
- return 0;
-}
-
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
return -ENOTSUPP;
@@ -1296,202 +514,3 @@ out:
mutex_unlock(&kvm->slots_lock);
return r;
}
-
-int kvmppc_core_check_processor_compat(void)
-{
- return 0;
-}
-
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
-{
- struct kvmppc_vcpu_book3s *vcpu_book3s;
- struct kvm_vcpu *vcpu;
- int err = -ENOMEM;
- unsigned long p;
-
- vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
- if (!vcpu_book3s)
- goto out;
-
- vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *)
- kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
- if (!vcpu_book3s->shadow_vcpu)
- goto free_vcpu;
-
- vcpu = &vcpu_book3s->vcpu;
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_shadow_vcpu;
-
- p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
- /* the real shared page fills the last 4k of our page */
- vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
- if (!p)
- goto uninit_vcpu;
-
- vcpu->arch.host_retip = kvm_return_point;
- vcpu->arch.host_msr = mfmsr();
-#ifdef CONFIG_PPC_BOOK3S_64
- /* default to book3s_64 (970fx) */
- vcpu->arch.pvr = 0x3C0301;
-#else
- /* default to book3s_32 (750) */
- vcpu->arch.pvr = 0x84202;
-#endif
- kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
- vcpu_book3s->slb_nr = 64;
-
- /* remember where some real-mode handlers are */
- vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem;
- vcpu->arch.trampoline_enter = kvmppc_trampoline_enter;
- vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
-#ifdef CONFIG_PPC_BOOK3S_64
- vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
-#else
- vcpu->arch.rmcall = (ulong)kvmppc_rmcall;
-#endif
-
- vcpu->arch.shadow_msr = MSR_USER64;
-
- err = kvmppc_mmu_init(vcpu);
- if (err < 0)
- goto uninit_vcpu;
-
- return vcpu;
-
-uninit_vcpu:
- kvm_vcpu_uninit(vcpu);
-free_shadow_vcpu:
- kfree(vcpu_book3s->shadow_vcpu);
-free_vcpu:
- vfree(vcpu_book3s);
-out:
- return ERR_PTR(err);
-}
-
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
-{
- struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
-
- free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
- kvm_vcpu_uninit(vcpu);
- kfree(vcpu_book3s->shadow_vcpu);
- vfree(vcpu_book3s);
-}
-
-extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
-int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
-{
- int ret;
- double fpr[32][TS_FPRWIDTH];
- unsigned int fpscr;
- int fpexc_mode;
-#ifdef CONFIG_ALTIVEC
- vector128 vr[32];
- vector128 vscr;
- unsigned long uninitialized_var(vrsave);
- int used_vr;
-#endif
-#ifdef CONFIG_VSX
- int used_vsr;
-#endif
- ulong ext_msr;
-
- /* No need to go into the guest when all we do is going out */
- if (signal_pending(current)) {
- kvm_run->exit_reason = KVM_EXIT_INTR;
- return -EINTR;
- }
-
- /* Save FPU state in stack */
- if (current->thread.regs->msr & MSR_FP)
- giveup_fpu(current);
- memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
- fpscr = current->thread.fpscr.val;
- fpexc_mode = current->thread.fpexc_mode;
-
-#ifdef CONFIG_ALTIVEC
- /* Save Altivec state in stack */
- used_vr = current->thread.used_vr;
- if (used_vr) {
- if (current->thread.regs->msr & MSR_VEC)
- giveup_altivec(current);
- memcpy(vr, current->thread.vr, sizeof(current->thread.vr));
- vscr = current->thread.vscr;
- vrsave = current->thread.vrsave;
- }
-#endif
-
-#ifdef CONFIG_VSX
- /* Save VSX state in stack */
- used_vsr = current->thread.used_vsr;
- if (used_vsr && (current->thread.regs->msr & MSR_VSX))
- __giveup_vsx(current);
-#endif
-
- /* Remember the MSR with disabled extensions */
- ext_msr = current->thread.regs->msr;
-
- /* XXX we get called with irq disabled - change that! */
- local_irq_enable();
-
- /* Preload FPU if it's enabled */
- if (vcpu->arch.shared->msr & MSR_FP)
- kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
-
- ret = __kvmppc_vcpu_entry(kvm_run, vcpu);
-
- local_irq_disable();
-
- current->thread.regs->msr = ext_msr;
-
- /* Make sure we save the guest FPU/Altivec/VSX state */
- kvmppc_giveup_ext(vcpu, MSR_FP);
- kvmppc_giveup_ext(vcpu, MSR_VEC);
- kvmppc_giveup_ext(vcpu, MSR_VSX);
-
- /* Restore FPU state from stack */
- memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
- current->thread.fpscr.val = fpscr;
- current->thread.fpexc_mode = fpexc_mode;
-
-#ifdef CONFIG_ALTIVEC
- /* Restore Altivec state from stack */
- if (used_vr && current->thread.used_vr) {
- memcpy(current->thread.vr, vr, sizeof(current->thread.vr));
- current->thread.vscr = vscr;
- current->thread.vrsave = vrsave;
- }
- current->thread.used_vr = used_vr;
-#endif
-
-#ifdef CONFIG_VSX
- current->thread.used_vsr = used_vsr;
-#endif
-
- return ret;
-}
-
-static int kvmppc_book3s_init(void)
-{
- int r;
-
- r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0,
- THIS_MODULE);
-
- if (r)
- return r;
-
- r = kvmppc_mmu_hpte_sysinit();
-
- return r;
-}
-
-static void kvmppc_book3s_exit(void)
-{
- kvmppc_mmu_hpte_sysexit();
- kvm_exit();
-}
-
-module_init(kvmppc_book3s_init);
-module_exit(kvmppc_book3s_exit);
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index d7889ef3211e..c6d3e194b6b4 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -41,36 +41,36 @@ static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
}
static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
- struct kvmppc_vcpu_book3s *vcpu_book3s,
+ struct kvm_vcpu *vcpu,
gva_t eaddr)
{
int i;
u64 esid = GET_ESID(eaddr);
u64 esid_1t = GET_ESID_1T(eaddr);
- for (i = 0; i < vcpu_book3s->slb_nr; i++) {
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
u64 cmp_esid = esid;
- if (!vcpu_book3s->slb[i].valid)
+ if (!vcpu->arch.slb[i].valid)
continue;
- if (vcpu_book3s->slb[i].tb)
+ if (vcpu->arch.slb[i].tb)
cmp_esid = esid_1t;
- if (vcpu_book3s->slb[i].esid == cmp_esid)
- return &vcpu_book3s->slb[i];
+ if (vcpu->arch.slb[i].esid == cmp_esid)
+ return &vcpu->arch.slb[i];
}
dprintk("KVM: No SLB entry found for 0x%lx [%llx | %llx]\n",
eaddr, esid, esid_1t);
- for (i = 0; i < vcpu_book3s->slb_nr; i++) {
- if (vcpu_book3s->slb[i].vsid)
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
+ if (vcpu->arch.slb[i].vsid)
dprintk(" %d: %c%c%c %llx %llx\n", i,
- vcpu_book3s->slb[i].valid ? 'v' : ' ',
- vcpu_book3s->slb[i].large ? 'l' : ' ',
- vcpu_book3s->slb[i].tb ? 't' : ' ',
- vcpu_book3s->slb[i].esid,
- vcpu_book3s->slb[i].vsid);
+ vcpu->arch.slb[i].valid ? 'v' : ' ',
+ vcpu->arch.slb[i].large ? 'l' : ' ',
+ vcpu->arch.slb[i].tb ? 't' : ' ',
+ vcpu->arch.slb[i].esid,
+ vcpu->arch.slb[i].vsid);
}
return NULL;
@@ -81,7 +81,7 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
{
struct kvmppc_slb *slb;
- slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), eaddr);
+ slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr);
if (!slb)
return 0;
@@ -180,7 +180,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
return 0;
}
- slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu_book3s, eaddr);
+ slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr);
if (!slbe)
goto no_seg_found;
@@ -320,10 +320,10 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
esid_1t = GET_ESID_1T(rb);
slb_nr = rb & 0xfff;
- if (slb_nr > vcpu_book3s->slb_nr)
+ if (slb_nr > vcpu->arch.slb_nr)
return;
- slbe = &vcpu_book3s->slb[slb_nr];
+ slbe = &vcpu->arch.slb[slb_nr];
slbe->large = (rs & SLB_VSID_L) ? 1 : 0;
slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0;
@@ -344,38 +344,35 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
struct kvmppc_slb *slbe;
- if (slb_nr > vcpu_book3s->slb_nr)
+ if (slb_nr > vcpu->arch.slb_nr)
return 0;
- slbe = &vcpu_book3s->slb[slb_nr];
+ slbe = &vcpu->arch.slb[slb_nr];
return slbe->orige;
}
static u64 kvmppc_mmu_book3s_64_slbmfev(struct kvm_vcpu *vcpu, u64 slb_nr)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
struct kvmppc_slb *slbe;
- if (slb_nr > vcpu_book3s->slb_nr)
+ if (slb_nr > vcpu->arch.slb_nr)
return 0;
- slbe = &vcpu_book3s->slb[slb_nr];
+ slbe = &vcpu->arch.slb[slb_nr];
return slbe->origv;
}
static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
struct kvmppc_slb *slbe;
dprintk("KVM MMU: slbie(0x%llx)\n", ea);
- slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu_book3s, ea);
+ slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
if (!slbe)
return;
@@ -389,13 +386,12 @@ static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
int i;
dprintk("KVM MMU: slbia()\n");
- for (i = 1; i < vcpu_book3s->slb_nr; i++)
- vcpu_book3s->slb[i].valid = false;
+ for (i = 1; i < vcpu->arch.slb_nr; i++)
+ vcpu->arch.slb[i].valid = false;
if (vcpu->arch.shared->msr & MSR_IR) {
kvmppc_mmu_flush_segments(vcpu);
@@ -464,7 +460,7 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
ulong mp_ea = vcpu->arch.magic_page_ea;
if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
- slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea);
+ slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
if (slb)
gvsid = slb->vsid;
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
new file mode 100644
index 000000000000..bc3a2ea94217
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -0,0 +1,180 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/hugetlb.h>
+
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
+#include <asm/hvcall.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+#include <asm/cputable.h>
+
+/* For now use fixed-size 16MB page table */
+#define HPT_ORDER 24
+#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
+#define HPT_HASH_MASK (HPT_NPTEG - 1)
+
+/* Pages in the VRMA are 16MB pages */
+#define VRMA_PAGE_ORDER 24
+#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
+
+/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
+#define MAX_LPID_970 63
+#define NR_LPIDS (LPID_RSVD + 1)
+unsigned long lpid_inuse[BITS_TO_LONGS(NR_LPIDS)];
+
+long kvmppc_alloc_hpt(struct kvm *kvm)
+{
+ unsigned long hpt;
+ unsigned long lpid;
+
+ hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|__GFP_NOWARN,
+ HPT_ORDER - PAGE_SHIFT);
+ if (!hpt) {
+ pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n");
+ return -ENOMEM;
+ }
+ kvm->arch.hpt_virt = hpt;
+
+ do {
+ lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS);
+ if (lpid >= NR_LPIDS) {
+ pr_err("kvm_alloc_hpt: No LPIDs free\n");
+ free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
+ return -ENOMEM;
+ }
+ } while (test_and_set_bit(lpid, lpid_inuse));
+
+ kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18);
+ kvm->arch.lpid = lpid;
+
+ pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid);
+ return 0;
+}
+
+void kvmppc_free_hpt(struct kvm *kvm)
+{
+ clear_bit(kvm->arch.lpid, lpid_inuse);
+ free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
+}
+
+void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
+{
+ unsigned long i;
+ unsigned long npages = kvm->arch.ram_npages;
+ unsigned long pfn;
+ unsigned long *hpte;
+ unsigned long hash;
+ struct kvmppc_pginfo *pginfo = kvm->arch.ram_pginfo;
+
+ if (!pginfo)
+ return;
+
+ /* VRMA can't be > 1TB */
+ if (npages > 1ul << (40 - kvm->arch.ram_porder))
+ npages = 1ul << (40 - kvm->arch.ram_porder);
+ /* Can't use more than 1 HPTE per HPTEG */
+ if (npages > HPT_NPTEG)
+ npages = HPT_NPTEG;
+
+ for (i = 0; i < npages; ++i) {
+ pfn = pginfo[i].pfn;
+ if (!pfn)
+ break;
+ /* can't use hpt_hash since va > 64 bits */
+ hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
+ /*
+ * We assume that the hash table is empty and no
+ * vcpus are using it at this stage. Since we create
+ * at most one HPTE per HPTEG, we just assume entry 7
+ * is available and use it.
+ */
+ hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 7));
+ hpte += 7 * 2;
+ /* HPTE low word - RPN, protection, etc. */
+ hpte[1] = (pfn << PAGE_SHIFT) | HPTE_R_R | HPTE_R_C |
+ HPTE_R_M | PP_RWXX;
+ wmb();
+ hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
+ (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED |
+ HPTE_V_LARGE | HPTE_V_VALID;
+ }
+}
+
+int kvmppc_mmu_hv_init(void)
+{
+ unsigned long host_lpid, rsvd_lpid;
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return -EINVAL;
+
+ memset(lpid_inuse, 0, sizeof(lpid_inuse));
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ host_lpid = mfspr(SPRN_LPID); /* POWER7 */
+ rsvd_lpid = LPID_RSVD;
+ } else {
+ host_lpid = 0; /* PPC970 */
+ rsvd_lpid = MAX_LPID_970;
+ }
+
+ set_bit(host_lpid, lpid_inuse);
+ /* rsvd_lpid is reserved for use in partition switching */
+ set_bit(rsvd_lpid, lpid_inuse);
+
+ return 0;
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+}
+
+static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
+{
+ kvmppc_set_msr(vcpu, MSR_SF | MSR_ME);
+}
+
+static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *gpte, bool data)
+{
+ return -ENOENT;
+}
+
+void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ vcpu->arch.slb_nr = 32; /* POWER7 */
+ else
+ vcpu->arch.slb_nr = 64;
+
+ mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
+ mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
+
+ vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
+}
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
new file mode 100644
index 000000000000..ea0f8c537c28
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -0,0 +1,73 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/hugetlb.h>
+#include <linux/list.h>
+
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
+#include <asm/hvcall.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+#include <asm/kvm_host.h>
+#include <asm/udbg.h>
+
+#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
+
+long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba, unsigned long tce)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvmppc_spapr_tce_table *stt;
+
+ /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
+ /* liobn, ioba, tce); */
+
+ list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
+ if (stt->liobn == liobn) {
+ unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
+ struct page *page;
+ u64 *tbl;
+
+ /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */
+ /* liobn, stt, stt->window_size); */
+ if (ioba >= stt->window_size)
+ return H_PARAMETER;
+
+ page = stt->pages[idx / TCES_PER_PAGE];
+ tbl = (u64 *)page_address(page);
+
+ /* FIXME: Need to validate the TCE itself */
+ /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */
+ tbl[idx % TCES_PER_PAGE] = tce;
+ return H_SUCCESS;
+ }
+ }
+
+ /* Didn't find the liobn, punt it to userspace */
+ return H_TOO_HARD;
+}
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
index 1dd5a1ddfd0d..88c8f26add02 100644
--- a/arch/powerpc/kvm/book3s_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -20,8 +20,11 @@
#include <linux/module.h>
#include <asm/kvm_book3s.h>
-EXPORT_SYMBOL_GPL(kvmppc_trampoline_enter);
-EXPORT_SYMBOL_GPL(kvmppc_trampoline_lowmem);
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline);
+#else
+EXPORT_SYMBOL_GPL(kvmppc_handler_trampoline_enter);
+EXPORT_SYMBOL_GPL(kvmppc_handler_lowmem_trampoline);
EXPORT_SYMBOL_GPL(kvmppc_rmcall);
EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
#ifdef CONFIG_ALTIVEC
@@ -30,3 +33,5 @@ EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
#ifdef CONFIG_VSX
EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx);
#endif
+#endif
+
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
new file mode 100644
index 000000000000..cc0d7f1b19ab
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -0,0 +1,1269 @@
+/*
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ * Paul Mackerras <paulus@au1.ibm.com>
+ * Alexander Graf <agraf@suse.de>
+ * Kevin Wolf <mail@kevin-wolf.de>
+ *
+ * Description: KVM functions specific to running on Book 3S
+ * processors in hypervisor mode (specifically POWER7 and later).
+ *
+ * This file is derived from arch/powerpc/kvm/book3s.c,
+ * by Alexander Graf <agraf@suse.de>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/preempt.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/anon_inodes.h>
+#include <linux/cpumask.h>
+#include <linux/spinlock.h>
+#include <linux/page-flags.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu_context.h>
+#include <asm/lppaca.h>
+#include <asm/processor.h>
+#include <asm/cputhreads.h>
+#include <asm/page.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+
+/*
+ * For now, limit memory to 64GB and require it to be large pages.
+ * This value is chosen because it makes the ram_pginfo array be
+ * 64kB in size, which is about as large as we want to be trying
+ * to allocate with kmalloc.
+ */
+#define MAX_MEM_ORDER 36
+
+#define LARGE_PAGE_ORDER 24 /* 16MB pages */
+
+/* #define EXIT_DEBUG */
+/* #define EXIT_DEBUG_SIMPLE */
+/* #define EXIT_DEBUG_INT */
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ local_paca->kvm_hstate.kvm_vcpu = vcpu;
+ local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore;
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+}
+
+static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu);
+static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu);
+
+void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
+{
+ u64 now;
+ unsigned long dec_nsec;
+
+ now = get_tb();
+ if (now >= vcpu->arch.dec_expires && !kvmppc_core_pending_dec(vcpu))
+ kvmppc_core_queue_dec(vcpu);
+ if (vcpu->arch.pending_exceptions)
+ return;
+ if (vcpu->arch.dec_expires != ~(u64)0) {
+ dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC /
+ tb_ticks_per_sec;
+ hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
+ HRTIMER_MODE_REL);
+ }
+
+ kvmppc_vcpu_blocked(vcpu);
+
+ kvm_vcpu_block(vcpu);
+ vcpu->stat.halt_wakeup++;
+
+ if (vcpu->arch.dec_expires != ~(u64)0)
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+
+ kvmppc_vcpu_unblocked(vcpu);
+}
+
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+{
+ vcpu->arch.shregs.msr = msr;
+}
+
+void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
+{
+ vcpu->arch.pvr = pvr;
+}
+
+void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
+ pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
+ vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
+ for (r = 0; r < 16; ++r)
+ pr_err("r%2d = %.16lx r%d = %.16lx\n",
+ r, kvmppc_get_gpr(vcpu, r),
+ r+16, kvmppc_get_gpr(vcpu, r+16));
+ pr_err("ctr = %.16lx lr = %.16lx\n",
+ vcpu->arch.ctr, vcpu->arch.lr);
+ pr_err("srr0 = %.16llx srr1 = %.16llx\n",
+ vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
+ pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
+ vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
+ pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
+ vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
+ pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
+ vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
+ pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
+ pr_err("fault dar = %.16lx dsisr = %.8x\n",
+ vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+ pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
+ for (r = 0; r < vcpu->arch.slb_max; ++r)
+ pr_err(" ESID = %.16llx VSID = %.16llx\n",
+ vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
+ pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
+ vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1,
+ vcpu->arch.last_inst);
+}
+
+struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
+{
+ int r;
+ struct kvm_vcpu *v, *ret = NULL;
+
+ mutex_lock(&kvm->lock);
+ kvm_for_each_vcpu(r, v, kvm) {
+ if (v->vcpu_id == id) {
+ ret = v;
+ break;
+ }
+ }
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
+static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
+{
+ vpa->shared_proc = 1;
+ vpa->yield_count = 1;
+}
+
+static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
+ unsigned long flags,
+ unsigned long vcpuid, unsigned long vpa)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long pg_index, ra, len;
+ unsigned long pg_offset;
+ void *va;
+ struct kvm_vcpu *tvcpu;
+
+ tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
+ if (!tvcpu)
+ return H_PARAMETER;
+
+ flags >>= 63 - 18;
+ flags &= 7;
+ if (flags == 0 || flags == 4)
+ return H_PARAMETER;
+ if (flags < 4) {
+ if (vpa & 0x7f)
+ return H_PARAMETER;
+ /* registering new area; convert logical addr to real */
+ pg_index = vpa >> kvm->arch.ram_porder;
+ pg_offset = vpa & (kvm->arch.ram_psize - 1);
+ if (pg_index >= kvm->arch.ram_npages)
+ return H_PARAMETER;
+ if (kvm->arch.ram_pginfo[pg_index].pfn == 0)
+ return H_PARAMETER;
+ ra = kvm->arch.ram_pginfo[pg_index].pfn << PAGE_SHIFT;
+ ra |= pg_offset;
+ va = __va(ra);
+ if (flags <= 1)
+ len = *(unsigned short *)(va + 4);
+ else
+ len = *(unsigned int *)(va + 4);
+ if (pg_offset + len > kvm->arch.ram_psize)
+ return H_PARAMETER;
+ switch (flags) {
+ case 1: /* register VPA */
+ if (len < 640)
+ return H_PARAMETER;
+ tvcpu->arch.vpa = va;
+ init_vpa(vcpu, va);
+ break;
+ case 2: /* register DTL */
+ if (len < 48)
+ return H_PARAMETER;
+ if (!tvcpu->arch.vpa)
+ return H_RESOURCE;
+ len -= len % 48;
+ tvcpu->arch.dtl = va;
+ tvcpu->arch.dtl_end = va + len;
+ break;
+ case 3: /* register SLB shadow buffer */
+ if (len < 8)
+ return H_PARAMETER;
+ if (!tvcpu->arch.vpa)
+ return H_RESOURCE;
+ tvcpu->arch.slb_shadow = va;
+ len = (len - 16) / 16;
+ tvcpu->arch.slb_shadow = va;
+ break;
+ }
+ } else {
+ switch (flags) {
+ case 5: /* unregister VPA */
+ if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl)
+ return H_RESOURCE;
+ tvcpu->arch.vpa = NULL;
+ break;
+ case 6: /* unregister DTL */
+ tvcpu->arch.dtl = NULL;
+ break;
+ case 7: /* unregister SLB shadow buffer */
+ tvcpu->arch.slb_shadow = NULL;
+ break;
+ }
+ }
+ return H_SUCCESS;
+}
+
+int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
+{
+ unsigned long req = kvmppc_get_gpr(vcpu, 3);
+ unsigned long target, ret = H_SUCCESS;
+ struct kvm_vcpu *tvcpu;
+
+ switch (req) {
+ case H_CEDE:
+ vcpu->arch.shregs.msr |= MSR_EE;
+ vcpu->arch.ceded = 1;
+ smp_mb();
+ if (!vcpu->arch.prodded)
+ kvmppc_vcpu_block(vcpu);
+ else
+ vcpu->arch.prodded = 0;
+ smp_mb();
+ vcpu->arch.ceded = 0;
+ break;
+ case H_PROD:
+ target = kvmppc_get_gpr(vcpu, 4);
+ tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ if (!tvcpu) {
+ ret = H_PARAMETER;
+ break;
+ }
+ tvcpu->arch.prodded = 1;
+ smp_mb();
+ if (vcpu->arch.ceded) {
+ if (waitqueue_active(&vcpu->wq)) {
+ wake_up_interruptible(&vcpu->wq);
+ vcpu->stat.halt_wakeup++;
+ }
+ }
+ break;
+ case H_CONFER:
+ break;
+ case H_REGISTER_VPA:
+ ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ break;
+ default:
+ return RESUME_HOST;
+ }
+ kvmppc_set_gpr(vcpu, 3, ret);
+ vcpu->arch.hcall_needed = 0;
+ return RESUME_GUEST;
+}
+
+static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ struct task_struct *tsk)
+{
+ int r = RESUME_HOST;
+
+ vcpu->stat.sum_exits++;
+
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+ run->ready_for_interrupt_injection = 1;
+ switch (vcpu->arch.trap) {
+ /* We're good on these - the host merely wanted to get our attention */
+ case BOOK3S_INTERRUPT_HV_DECREMENTER:
+ vcpu->stat.dec_exits++;
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_EXTERNAL:
+ vcpu->stat.ext_intr_exits++;
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_PERFMON:
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_PROGRAM:
+ {
+ ulong flags;
+ /*
+ * Normally program interrupts are delivered directly
+ * to the guest by the hardware, but we can get here
+ * as a result of a hypervisor emulation interrupt
+ * (e40) getting turned into a 700 by BML RTAS.
+ */
+ flags = vcpu->arch.shregs.msr & 0x1f0000ull;
+ kvmppc_core_queue_program(vcpu, flags);
+ r = RESUME_GUEST;
+ break;
+ }
+ case BOOK3S_INTERRUPT_SYSCALL:
+ {
+ /* hcall - punt to userspace */
+ int i;
+
+ if (vcpu->arch.shregs.msr & MSR_PR) {
+ /* sc 1 from userspace - reflect to guest syscall */
+ kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL);
+ r = RESUME_GUEST;
+ break;
+ }
+ run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
+ for (i = 0; i < 9; ++i)
+ run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
+ run->exit_reason = KVM_EXIT_PAPR_HCALL;
+ vcpu->arch.hcall_needed = 1;
+ r = RESUME_HOST;
+ break;
+ }
+ /*
+ * We get these next two if the guest does a bad real-mode access,
+ * as we have enabled VRMA (virtualized real mode area) mode in the
+ * LPCR. We just generate an appropriate DSI/ISI to the guest.
+ */
+ case BOOK3S_INTERRUPT_H_DATA_STORAGE:
+ vcpu->arch.shregs.dsisr = vcpu->arch.fault_dsisr;
+ vcpu->arch.shregs.dar = vcpu->arch.fault_dar;
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, 0);
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_H_INST_STORAGE:
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE,
+ 0x08000000);
+ r = RESUME_GUEST;
+ break;
+ /*
+ * This occurs if the guest executes an illegal instruction.
+ * We just generate a program interrupt to the guest, since
+ * we don't emulate any guest instructions at this stage.
+ */
+ case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+ kvmppc_core_queue_program(vcpu, 0x80000);
+ r = RESUME_GUEST;
+ break;
+ default:
+ kvmppc_dump_regs(vcpu);
+ printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+ vcpu->arch.trap, kvmppc_get_pc(vcpu),
+ vcpu->arch.shregs.msr);
+ r = RESUME_HOST;
+ BUG();
+ break;
+ }
+
+
+ if (!(r & RESUME_HOST)) {
+ /* To avoid clobbering exit_reason, only check for signals if
+ * we aren't already exiting to userspace for some other
+ * reason. */
+ if (signal_pending(tsk)) {
+ vcpu->stat.signal_exits++;
+ run->exit_reason = KVM_EXIT_INTR;
+ r = -EINTR;
+ } else {
+ kvmppc_core_deliver_interrupts(vcpu);
+ }
+ }
+
+ return r;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ int i;
+
+ sregs->pvr = vcpu->arch.pvr;
+
+ memset(sregs, 0, sizeof(struct kvm_sregs));
+ for (i = 0; i < vcpu->arch.slb_max; i++) {
+ sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
+ sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
+ }
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ int i, j;
+
+ kvmppc_set_pvr(vcpu, sregs->pvr);
+
+ j = 0;
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
+ if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
+ vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
+ vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
+ ++j;
+ }
+ }
+ vcpu->arch.slb_max = j;
+
+ return 0;
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ return 0;
+ return -EIO;
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ struct kvm_vcpu *vcpu;
+ int err = -EINVAL;
+ int core;
+ struct kvmppc_vcore *vcore;
+
+ core = id / threads_per_core;
+ if (core >= KVM_MAX_VCORES)
+ goto out;
+
+ err = -ENOMEM;
+ vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
+ if (!vcpu)
+ goto out;
+
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_vcpu;
+
+ vcpu->arch.shared = &vcpu->arch.shregs;
+ vcpu->arch.last_cpu = -1;
+ vcpu->arch.mmcr[0] = MMCR0_FC;
+ vcpu->arch.ctrl = CTRL_RUNLATCH;
+ /* default to host PVR, since we can't spoof it */
+ vcpu->arch.pvr = mfspr(SPRN_PVR);
+ kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+
+ kvmppc_mmu_book3s_hv_init(vcpu);
+
+ /*
+ * Some vcpus may start out in stopped state. If we initialize
+ * them to busy-in-host state they will stop other vcpus in the
+ * vcore from running. Instead we initialize them to blocked
+ * state, effectively considering them to be stopped until we
+ * see the first run ioctl for them.
+ */
+ vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
+
+ init_waitqueue_head(&vcpu->arch.cpu_run);
+
+ mutex_lock(&kvm->lock);
+ vcore = kvm->arch.vcores[core];
+ if (!vcore) {
+ vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
+ if (vcore) {
+ INIT_LIST_HEAD(&vcore->runnable_threads);
+ spin_lock_init(&vcore->lock);
+ }
+ kvm->arch.vcores[core] = vcore;
+ }
+ mutex_unlock(&kvm->lock);
+
+ if (!vcore)
+ goto free_vcpu;
+
+ spin_lock(&vcore->lock);
+ ++vcore->num_threads;
+ ++vcore->n_blocked;
+ spin_unlock(&vcore->lock);
+ vcpu->arch.vcore = vcore;
+
+ return vcpu;
+
+free_vcpu:
+ kfree(vcpu);
+out:
+ return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ kvm_vcpu_uninit(vcpu);
+ kfree(vcpu);
+}
+
+static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ spin_lock(&vc->lock);
+ vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
+ ++vc->n_blocked;
+ if (vc->n_runnable > 0 &&
+ vc->n_runnable + vc->n_blocked == vc->num_threads) {
+ vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
+ arch.run_list);
+ wake_up(&vcpu->arch.cpu_run);
+ }
+ spin_unlock(&vc->lock);
+}
+
+static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ spin_lock(&vc->lock);
+ vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+ --vc->n_blocked;
+ spin_unlock(&vc->lock);
+}
+
+extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
+extern void xics_wake_cpu(int cpu);
+
+static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
+ struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu *v;
+
+ if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
+ return;
+ vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+ --vc->n_runnable;
+ /* decrement the physical thread id of each following vcpu */
+ v = vcpu;
+ list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
+ --v->arch.ptid;
+ list_del(&vcpu->arch.run_list);
+}
+
+static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
+{
+ int cpu;
+ struct paca_struct *tpaca;
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ cpu = vc->pcpu + vcpu->arch.ptid;
+ tpaca = &paca[cpu];
+ tpaca->kvm_hstate.kvm_vcpu = vcpu;
+ tpaca->kvm_hstate.kvm_vcore = vc;
+ smp_wmb();
+#ifdef CONFIG_PPC_ICP_NATIVE
+ if (vcpu->arch.ptid) {
+ tpaca->cpu_start = 0x80;
+ tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST;
+ wmb();
+ xics_wake_cpu(cpu);
+ ++vc->n_woken;
+ }
+#endif
+}
+
+static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
+{
+ int i;
+
+ HMT_low();
+ i = 0;
+ while (vc->nap_count < vc->n_woken) {
+ if (++i >= 1000000) {
+ pr_err("kvmppc_wait_for_nap timeout %d %d\n",
+ vc->nap_count, vc->n_woken);
+ break;
+ }
+ cpu_relax();
+ }
+ HMT_medium();
+}
+
+/*
+ * Check that we are on thread 0 and that any other threads in
+ * this core are off-line.
+ */
+static int on_primary_thread(void)
+{
+ int cpu = smp_processor_id();
+ int thr = cpu_thread_in_core(cpu);
+
+ if (thr)
+ return 0;
+ while (++thr < threads_per_core)
+ if (cpu_online(cpu + thr))
+ return 0;
+ return 1;
+}
+
+/*
+ * Run a set of guest threads on a physical core.
+ * Called with vc->lock held.
+ */
+static int kvmppc_run_core(struct kvmppc_vcore *vc)
+{
+ struct kvm_vcpu *vcpu, *vnext;
+ long ret;
+ u64 now;
+
+ /* don't start if any threads have a signal pending */
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ if (signal_pending(vcpu->arch.run_task))
+ return 0;
+
+ /*
+ * Make sure we are running on thread 0, and that
+ * secondary threads are offline.
+ * XXX we should also block attempts to bring any
+ * secondary threads online.
+ */
+ if (threads_per_core > 1 && !on_primary_thread()) {
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ vcpu->arch.ret = -EBUSY;
+ goto out;
+ }
+
+ vc->n_woken = 0;
+ vc->nap_count = 0;
+ vc->entry_exit_count = 0;
+ vc->vcore_running = 1;
+ vc->in_guest = 0;
+ vc->pcpu = smp_processor_id();
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ kvmppc_start_thread(vcpu);
+ vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
+ arch.run_list);
+
+ spin_unlock(&vc->lock);
+
+ preempt_disable();
+ kvm_guest_enter();
+ __kvmppc_vcore_entry(NULL, vcpu);
+
+ /* wait for secondary threads to finish writing their state to memory */
+ spin_lock(&vc->lock);
+ if (vc->nap_count < vc->n_woken)
+ kvmppc_wait_for_nap(vc);
+ /* prevent other vcpu threads from doing kvmppc_start_thread() now */
+ vc->vcore_running = 2;
+ spin_unlock(&vc->lock);
+
+ /* make sure updates to secondary vcpu structs are visible now */
+ smp_mb();
+ kvm_guest_exit();
+
+ preempt_enable();
+ kvm_resched(vcpu);
+
+ now = get_tb();
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+ /* cancel pending dec exception if dec is positive */
+ if (now < vcpu->arch.dec_expires &&
+ kvmppc_core_pending_dec(vcpu))
+ kvmppc_core_dequeue_dec(vcpu);
+ if (!vcpu->arch.trap) {
+ if (signal_pending(vcpu->arch.run_task)) {
+ vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
+ vcpu->arch.ret = -EINTR;
+ }
+ continue; /* didn't get to run */
+ }
+ ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
+ vcpu->arch.run_task);
+ vcpu->arch.ret = ret;
+ vcpu->arch.trap = 0;
+ }
+
+ spin_lock(&vc->lock);
+ out:
+ vc->vcore_running = 0;
+ list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
+ arch.run_list) {
+ if (vcpu->arch.ret != RESUME_GUEST) {
+ kvmppc_remove_runnable(vc, vcpu);
+ wake_up(&vcpu->arch.cpu_run);
+ }
+ }
+
+ return 1;
+}
+
+static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+ int ptid;
+ int wait_state;
+ struct kvmppc_vcore *vc;
+ DEFINE_WAIT(wait);
+
+ /* No need to go into the guest when all we do is going out */
+ if (signal_pending(current)) {
+ kvm_run->exit_reason = KVM_EXIT_INTR;
+ return -EINTR;
+ }
+
+ /* On PPC970, check that we have an RMA region */
+ if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201))
+ return -EPERM;
+
+ kvm_run->exit_reason = 0;
+ vcpu->arch.ret = RESUME_GUEST;
+ vcpu->arch.trap = 0;
+
+ flush_fp_to_thread(current);
+ flush_altivec_to_thread(current);
+ flush_vsx_to_thread(current);
+
+ /*
+ * Synchronize with other threads in this virtual core
+ */
+ vc = vcpu->arch.vcore;
+ spin_lock(&vc->lock);
+ /* This happens the first time this is called for a vcpu */
+ if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED)
+ --vc->n_blocked;
+ vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
+ ptid = vc->n_runnable;
+ vcpu->arch.run_task = current;
+ vcpu->arch.kvm_run = kvm_run;
+ vcpu->arch.ptid = ptid;
+ list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
+ ++vc->n_runnable;
+
+ wait_state = TASK_INTERRUPTIBLE;
+ while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
+ if (signal_pending(current)) {
+ if (!vc->vcore_running) {
+ kvm_run->exit_reason = KVM_EXIT_INTR;
+ vcpu->arch.ret = -EINTR;
+ break;
+ }
+ /* have to wait for vcore to stop executing guest */
+ wait_state = TASK_UNINTERRUPTIBLE;
+ smp_send_reschedule(vc->pcpu);
+ }
+
+ if (!vc->vcore_running &&
+ vc->n_runnable + vc->n_blocked == vc->num_threads) {
+ /* we can run now */
+ if (kvmppc_run_core(vc))
+ continue;
+ }
+
+ if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0)
+ kvmppc_start_thread(vcpu);
+
+ /* wait for other threads to come in, or wait for vcore */
+ prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
+ spin_unlock(&vc->lock);
+ schedule();
+ finish_wait(&vcpu->arch.cpu_run, &wait);
+ spin_lock(&vc->lock);
+ }
+
+ if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
+ kvmppc_remove_runnable(vc, vcpu);
+ spin_unlock(&vc->lock);
+
+ return vcpu->arch.ret;
+}
+
+int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ do {
+ r = kvmppc_run_vcpu(run, vcpu);
+
+ if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
+ !(vcpu->arch.shregs.msr & MSR_PR)) {
+ r = kvmppc_pseries_do_hcall(vcpu);
+ kvmppc_core_deliver_interrupts(vcpu);
+ }
+ } while (r == RESUME_GUEST);
+ return r;
+}
+
+static long kvmppc_stt_npages(unsigned long window_size)
+{
+ return ALIGN((window_size >> SPAPR_TCE_SHIFT)
+ * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
+}
+
+static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
+{
+ struct kvm *kvm = stt->kvm;
+ int i;
+
+ mutex_lock(&kvm->lock);
+ list_del(&stt->list);
+ for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
+ __free_page(stt->pages[i]);
+ kfree(stt);
+ mutex_unlock(&kvm->lock);
+
+ kvm_put_kvm(kvm);
+}
+
+static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
+ struct page *page;
+
+ if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
+ return VM_FAULT_SIGBUS;
+
+ page = stt->pages[vmf->pgoff];
+ get_page(page);
+ vmf->page = page;
+ return 0;
+}
+
+static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
+ .fault = kvm_spapr_tce_fault,
+};
+
+static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ vma->vm_ops = &kvm_spapr_tce_vm_ops;
+ return 0;
+}
+
+static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
+{
+ struct kvmppc_spapr_tce_table *stt = filp->private_data;
+
+ release_spapr_tce_table(stt);
+ return 0;
+}
+
+static struct file_operations kvm_spapr_tce_fops = {
+ .mmap = kvm_spapr_tce_mmap,
+ .release = kvm_spapr_tce_release,
+};
+
+long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+ struct kvm_create_spapr_tce *args)
+{
+ struct kvmppc_spapr_tce_table *stt = NULL;
+ long npages;
+ int ret = -ENOMEM;
+ int i;
+
+ /* Check this LIOBN hasn't been previously allocated */
+ list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
+ if (stt->liobn == args->liobn)
+ return -EBUSY;
+ }
+
+ npages = kvmppc_stt_npages(args->window_size);
+
+ stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *),
+ GFP_KERNEL);
+ if (!stt)
+ goto fail;
+
+ stt->liobn = args->liobn;
+ stt->window_size = args->window_size;
+ stt->kvm = kvm;
+
+ for (i = 0; i < npages; i++) {
+ stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!stt->pages[i])
+ goto fail;
+ }
+
+ kvm_get_kvm(kvm);
+
+ mutex_lock(&kvm->lock);
+ list_add(&stt->list, &kvm->arch.spapr_tce_tables);
+
+ mutex_unlock(&kvm->lock);
+
+ return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
+ stt, O_RDWR);
+
+fail:
+ if (stt) {
+ for (i = 0; i < npages; i++)
+ if (stt->pages[i])
+ __free_page(stt->pages[i]);
+
+ kfree(stt);
+ }
+ return ret;
+}
+
+/* Work out RMLS (real mode limit selector) field value for a given RMA size.
+ Assumes POWER7 or PPC970. */
+static inline int lpcr_rmls(unsigned long rma_size)
+{
+ switch (rma_size) {
+ case 32ul << 20: /* 32 MB */
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ return 8; /* only supported on POWER7 */
+ return -1;
+ case 64ul << 20: /* 64 MB */
+ return 3;
+ case 128ul << 20: /* 128 MB */
+ return 7;
+ case 256ul << 20: /* 256 MB */
+ return 4;
+ case 1ul << 30: /* 1 GB */
+ return 2;
+ case 16ul << 30: /* 16 GB */
+ return 1;
+ case 256ul << 30: /* 256 GB */
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct kvmppc_rma_info *ri = vma->vm_file->private_data;
+ struct page *page;
+
+ if (vmf->pgoff >= ri->npages)
+ return VM_FAULT_SIGBUS;
+
+ page = pfn_to_page(ri->base_pfn + vmf->pgoff);
+ get_page(page);
+ vmf->page = page;
+ return 0;
+}
+
+static const struct vm_operations_struct kvm_rma_vm_ops = {
+ .fault = kvm_rma_fault,
+};
+
+static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ vma->vm_flags |= VM_RESERVED;
+ vma->vm_ops = &kvm_rma_vm_ops;
+ return 0;
+}
+
+static int kvm_rma_release(struct inode *inode, struct file *filp)
+{
+ struct kvmppc_rma_info *ri = filp->private_data;
+
+ kvm_release_rma(ri);
+ return 0;
+}
+
+static struct file_operations kvm_rma_fops = {
+ .mmap = kvm_rma_mmap,
+ .release = kvm_rma_release,
+};
+
+long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
+{
+ struct kvmppc_rma_info *ri;
+ long fd;
+
+ ri = kvm_alloc_rma();
+ if (!ri)
+ return -ENOMEM;
+
+ fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR);
+ if (fd < 0)
+ kvm_release_rma(ri);
+
+ ret->rma_size = ri->npages << PAGE_SHIFT;
+ return fd;
+}
+
+static struct page *hva_to_page(unsigned long addr)
+{
+ struct page *page[1];
+ int npages;
+
+ might_sleep();
+
+ npages = get_user_pages_fast(addr, 1, 1, page);
+
+ if (unlikely(npages != 1))
+ return 0;
+
+ return page[0];
+}
+
+int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem)
+{
+ unsigned long psize, porder;
+ unsigned long i, npages, totalpages;
+ unsigned long pg_ix;
+ struct kvmppc_pginfo *pginfo;
+ unsigned long hva;
+ struct kvmppc_rma_info *ri = NULL;
+ struct page *page;
+
+ /* For now, only allow 16MB pages */
+ porder = LARGE_PAGE_ORDER;
+ psize = 1ul << porder;
+ if ((mem->memory_size & (psize - 1)) ||
+ (mem->guest_phys_addr & (psize - 1))) {
+ pr_err("bad memory_size=%llx @ %llx\n",
+ mem->memory_size, mem->guest_phys_addr);
+ return -EINVAL;
+ }
+
+ npages = mem->memory_size >> porder;
+ totalpages = (mem->guest_phys_addr + mem->memory_size) >> porder;
+
+ /* More memory than we have space to track? */
+ if (totalpages > (1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER)))
+ return -EINVAL;
+
+ /* Do we already have an RMA registered? */
+ if (mem->guest_phys_addr == 0 && kvm->arch.rma)
+ return -EINVAL;
+
+ if (totalpages > kvm->arch.ram_npages)
+ kvm->arch.ram_npages = totalpages;
+
+ /* Is this one of our preallocated RMAs? */
+ if (mem->guest_phys_addr == 0) {
+ struct vm_area_struct *vma;
+
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma(current->mm, mem->userspace_addr);
+ if (vma && vma->vm_file &&
+ vma->vm_file->f_op == &kvm_rma_fops &&
+ mem->userspace_addr == vma->vm_start)
+ ri = vma->vm_file->private_data;
+ up_read(&current->mm->mmap_sem);
+ if (!ri && cpu_has_feature(CPU_FTR_ARCH_201)) {
+ pr_err("CPU requires an RMO\n");
+ return -EINVAL;
+ }
+ }
+
+ if (ri) {
+ unsigned long rma_size;
+ unsigned long lpcr;
+ long rmls;
+
+ rma_size = ri->npages << PAGE_SHIFT;
+ if (rma_size > mem->memory_size)
+ rma_size = mem->memory_size;
+ rmls = lpcr_rmls(rma_size);
+ if (rmls < 0) {
+ pr_err("Can't use RMA of 0x%lx bytes\n", rma_size);
+ return -EINVAL;
+ }
+ atomic_inc(&ri->use_count);
+ kvm->arch.rma = ri;
+ kvm->arch.n_rma_pages = rma_size >> porder;
+
+ /* Update LPCR and RMOR */
+ lpcr = kvm->arch.lpcr;
+ if (cpu_has_feature(CPU_FTR_ARCH_201)) {
+ /* PPC970; insert RMLS value (split field) in HID4 */
+ lpcr &= ~((1ul << HID4_RMLS0_SH) |
+ (3ul << HID4_RMLS2_SH));
+ lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) |
+ ((rmls & 3) << HID4_RMLS2_SH);
+ /* RMOR is also in HID4 */
+ lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
+ << HID4_RMOR_SH;
+ } else {
+ /* POWER7 */
+ lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
+ lpcr |= rmls << LPCR_RMLS_SH;
+ kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
+ }
+ kvm->arch.lpcr = lpcr;
+ pr_info("Using RMO at %lx size %lx (LPCR = %lx)\n",
+ ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
+ }
+
+ pg_ix = mem->guest_phys_addr >> porder;
+ pginfo = kvm->arch.ram_pginfo + pg_ix;
+ for (i = 0; i < npages; ++i, ++pg_ix) {
+ if (ri && pg_ix < kvm->arch.n_rma_pages) {
+ pginfo[i].pfn = ri->base_pfn +
+ (pg_ix << (porder - PAGE_SHIFT));
+ continue;
+ }
+ hva = mem->userspace_addr + (i << porder);
+ page = hva_to_page(hva);
+ if (!page) {
+ pr_err("oops, no pfn for hva %lx\n", hva);
+ goto err;
+ }
+ /* Check it's a 16MB page */
+ if (!PageHead(page) ||
+ compound_order(page) != (LARGE_PAGE_ORDER - PAGE_SHIFT)) {
+ pr_err("page at %lx isn't 16MB (o=%d)\n",
+ hva, compound_order(page));
+ goto err;
+ }
+ pginfo[i].pfn = page_to_pfn(page);
+ }
+
+ return 0;
+
+ err:
+ return -EINVAL;
+}
+
+void kvmppc_core_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem)
+{
+ if (mem->guest_phys_addr == 0 && mem->memory_size != 0 &&
+ !kvm->arch.rma)
+ kvmppc_map_vrma(kvm, mem);
+}
+
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+ long r;
+ unsigned long npages = 1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER);
+ long err = -ENOMEM;
+ unsigned long lpcr;
+
+ /* Allocate hashed page table */
+ r = kvmppc_alloc_hpt(kvm);
+ if (r)
+ return r;
+
+ INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+
+ kvm->arch.ram_pginfo = kzalloc(npages * sizeof(struct kvmppc_pginfo),
+ GFP_KERNEL);
+ if (!kvm->arch.ram_pginfo) {
+ pr_err("kvmppc_core_init_vm: couldn't alloc %lu bytes\n",
+ npages * sizeof(struct kvmppc_pginfo));
+ goto out_free;
+ }
+
+ kvm->arch.ram_npages = 0;
+ kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER;
+ kvm->arch.ram_porder = LARGE_PAGE_ORDER;
+ kvm->arch.rma = NULL;
+ kvm->arch.n_rma_pages = 0;
+
+ kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_201)) {
+ /* PPC970; HID4 is effectively the LPCR */
+ unsigned long lpid = kvm->arch.lpid;
+ kvm->arch.host_lpid = 0;
+ kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
+ lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
+ lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
+ ((lpid & 0xf) << HID4_LPID5_SH);
+ } else {
+ /* POWER7; init LPCR for virtual RMA mode */
+ kvm->arch.host_lpid = mfspr(SPRN_LPID);
+ kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
+ lpcr &= LPCR_PECE | LPCR_LPES;
+ lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
+ LPCR_VPM0 | LPCR_VRMA_L;
+ }
+ kvm->arch.lpcr = lpcr;
+
+ return 0;
+
+ out_free:
+ kvmppc_free_hpt(kvm);
+ return err;
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+ struct kvmppc_pginfo *pginfo;
+ unsigned long i;
+
+ if (kvm->arch.ram_pginfo) {
+ pginfo = kvm->arch.ram_pginfo;
+ kvm->arch.ram_pginfo = NULL;
+ for (i = kvm->arch.n_rma_pages; i < kvm->arch.ram_npages; ++i)
+ if (pginfo[i].pfn)
+ put_page(pfn_to_page(pginfo[i].pfn));
+ kfree(pginfo);
+ }
+ if (kvm->arch.rma) {
+ kvm_release_rma(kvm->arch.rma);
+ kvm->arch.rma = NULL;
+ }
+
+ kvmppc_free_hpt(kvm);
+ WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
+}
+
+/* These are stubs for now */
+void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
+{
+}
+
+/* We don't need to emulate any privileged instructions or dcbz */
+int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
+{
+ return EMULATE_FAIL;
+}
+
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+ return EMULATE_FAIL;
+}
+
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+ return EMULATE_FAIL;
+}
+
+static int kvmppc_book3s_hv_init(void)
+{
+ int r;
+
+ r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+
+ if (r)
+ return r;
+
+ r = kvmppc_mmu_hv_init();
+
+ return r;
+}
+
+static void kvmppc_book3s_hv_exit(void)
+{
+ kvm_exit();
+}
+
+module_init(kvmppc_book3s_hv_init);
+module_exit(kvmppc_book3s_hv_exit);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
new file mode 100644
index 000000000000..d43120355eec
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/preempt.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/bootmem.h>
+#include <linux/init.h>
+
+#include <asm/cputable.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+
+/*
+ * This maintains a list of RMAs (real mode areas) for KVM guests to use.
+ * Each RMA has to be physically contiguous and of a size that the
+ * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB,
+ * and other larger sizes. Since we are unlikely to be allocate that
+ * much physically contiguous memory after the system is up and running,
+ * we preallocate a set of RMAs in early boot for KVM to use.
+ */
+static unsigned long kvm_rma_size = 64 << 20; /* 64MB */
+static unsigned long kvm_rma_count;
+
+static int __init early_parse_rma_size(char *p)
+{
+ if (!p)
+ return 1;
+
+ kvm_rma_size = memparse(p, &p);
+
+ return 0;
+}
+early_param("kvm_rma_size", early_parse_rma_size);
+
+static int __init early_parse_rma_count(char *p)
+{
+ if (!p)
+ return 1;
+
+ kvm_rma_count = simple_strtoul(p, NULL, 0);
+
+ return 0;
+}
+early_param("kvm_rma_count", early_parse_rma_count);
+
+static struct kvmppc_rma_info *rma_info;
+static LIST_HEAD(free_rmas);
+static DEFINE_SPINLOCK(rma_lock);
+
+/* Work out RMLS (real mode limit selector) field value for a given RMA size.
+ Assumes POWER7 or PPC970. */
+static inline int lpcr_rmls(unsigned long rma_size)
+{
+ switch (rma_size) {
+ case 32ul << 20: /* 32 MB */
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ return 8; /* only supported on POWER7 */
+ return -1;
+ case 64ul << 20: /* 64 MB */
+ return 3;
+ case 128ul << 20: /* 128 MB */
+ return 7;
+ case 256ul << 20: /* 256 MB */
+ return 4;
+ case 1ul << 30: /* 1 GB */
+ return 2;
+ case 16ul << 30: /* 16 GB */
+ return 1;
+ case 256ul << 30: /* 256 GB */
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+/*
+ * Called at boot time while the bootmem allocator is active,
+ * to allocate contiguous physical memory for the real memory
+ * areas for guests.
+ */
+void kvm_rma_init(void)
+{
+ unsigned long i;
+ unsigned long j, npages;
+ void *rma;
+ struct page *pg;
+
+ /* Only do this on PPC970 in HV mode */
+ if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+ !cpu_has_feature(CPU_FTR_ARCH_201))
+ return;
+
+ if (!kvm_rma_size || !kvm_rma_count)
+ return;
+
+ /* Check that the requested size is one supported in hardware */
+ if (lpcr_rmls(kvm_rma_size) < 0) {
+ pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
+ return;
+ }
+
+ npages = kvm_rma_size >> PAGE_SHIFT;
+ rma_info = alloc_bootmem(kvm_rma_count * sizeof(struct kvmppc_rma_info));
+ for (i = 0; i < kvm_rma_count; ++i) {
+ rma = alloc_bootmem_align(kvm_rma_size, kvm_rma_size);
+ pr_info("Allocated KVM RMA at %p (%ld MB)\n", rma,
+ kvm_rma_size >> 20);
+ rma_info[i].base_virt = rma;
+ rma_info[i].base_pfn = __pa(rma) >> PAGE_SHIFT;
+ rma_info[i].npages = npages;
+ list_add_tail(&rma_info[i].list, &free_rmas);
+ atomic_set(&rma_info[i].use_count, 0);
+
+ pg = pfn_to_page(rma_info[i].base_pfn);
+ for (j = 0; j < npages; ++j) {
+ atomic_inc(&pg->_count);
+ ++pg;
+ }
+ }
+}
+
+struct kvmppc_rma_info *kvm_alloc_rma(void)
+{
+ struct kvmppc_rma_info *ri;
+
+ ri = NULL;
+ spin_lock(&rma_lock);
+ if (!list_empty(&free_rmas)) {
+ ri = list_first_entry(&free_rmas, struct kvmppc_rma_info, list);
+ list_del(&ri->list);
+ atomic_inc(&ri->use_count);
+ }
+ spin_unlock(&rma_lock);
+ return ri;
+}
+EXPORT_SYMBOL_GPL(kvm_alloc_rma);
+
+void kvm_release_rma(struct kvmppc_rma_info *ri)
+{
+ if (atomic_dec_and_test(&ri->use_count)) {
+ spin_lock(&rma_lock);
+ list_add_tail(&ri->list, &free_rmas);
+ spin_unlock(&rma_lock);
+
+ }
+}
+EXPORT_SYMBOL_GPL(kvm_release_rma);
+
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
new file mode 100644
index 000000000000..3f7b674dd4bf
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -0,0 +1,166 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * Derived from book3s_interrupts.S, which is:
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+#include <asm/exception-64s.h>
+#include <asm/ppc-opcode.h>
+
+/*****************************************************************************
+ * *
+ * Guest entry / exit code that is in kernel module memory (vmalloc) *
+ * *
+ ****************************************************************************/
+
+/* Registers:
+ * r4: vcpu pointer
+ */
+_GLOBAL(__kvmppc_vcore_entry)
+
+ /* Write correct stack frame */
+ mflr r0
+ std r0,PPC_LR_STKOFF(r1)
+
+ /* Save host state to the stack */
+ stdu r1, -SWITCH_FRAME_SIZE(r1)
+
+ /* Save non-volatile registers (r14 - r31) */
+ SAVE_NVGPRS(r1)
+
+ /* Save host DSCR */
+BEGIN_FTR_SECTION
+ mfspr r3, SPRN_DSCR
+ std r3, HSTATE_DSCR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* Save host DABR */
+ mfspr r3, SPRN_DABR
+ std r3, HSTATE_DABR(r13)
+
+ /* Hard-disable interrupts */
+ mfmsr r10
+ std r10, HSTATE_HOST_MSR(r13)
+ rldicl r10,r10,48,1
+ rotldi r10,r10,16
+ mtmsrd r10,1
+
+ /* Save host PMU registers and load guest PMU registers */
+ /* R4 is live here (vcpu pointer) but not r3 or r5 */
+ li r3, 1
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
+ mfspr r7, SPRN_MMCR0 /* save MMCR0 */
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
+ isync
+ ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
+ lbz r5, LPPACA_PMCINUSE(r3)
+ cmpwi r5, 0
+ beq 31f /* skip if not */
+ mfspr r5, SPRN_MMCR1
+ mfspr r6, SPRN_MMCRA
+ std r7, HSTATE_MMCR(r13)
+ std r5, HSTATE_MMCR + 8(r13)
+ std r6, HSTATE_MMCR + 16(r13)
+ mfspr r3, SPRN_PMC1
+ mfspr r5, SPRN_PMC2
+ mfspr r6, SPRN_PMC3
+ mfspr r7, SPRN_PMC4
+ mfspr r8, SPRN_PMC5
+ mfspr r9, SPRN_PMC6
+BEGIN_FTR_SECTION
+ mfspr r10, SPRN_PMC7
+ mfspr r11, SPRN_PMC8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ stw r3, HSTATE_PMC(r13)
+ stw r5, HSTATE_PMC + 4(r13)
+ stw r6, HSTATE_PMC + 8(r13)
+ stw r7, HSTATE_PMC + 12(r13)
+ stw r8, HSTATE_PMC + 16(r13)
+ stw r9, HSTATE_PMC + 20(r13)
+BEGIN_FTR_SECTION
+ stw r10, HSTATE_PMC + 24(r13)
+ stw r11, HSTATE_PMC + 28(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+31:
+
+ /*
+ * Put whatever is in the decrementer into the
+ * hypervisor decrementer.
+ */
+ mfspr r8,SPRN_DEC
+ mftb r7
+ mtspr SPRN_HDEC,r8
+ extsw r8,r8
+ add r8,r8,r7
+ std r8,HSTATE_DECEXP(r13)
+
+ /*
+ * On PPC970, if the guest vcpu has an external interrupt pending,
+ * send ourselves an IPI so as to interrupt the guest once it
+ * enables interrupts. (It must have interrupts disabled,
+ * otherwise we would already have delivered the interrupt.)
+ */
+BEGIN_FTR_SECTION
+ ld r0, VCPU_PENDING_EXC(r4)
+ li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
+ oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
+ and. r0, r0, r7
+ beq 32f
+ mr r31, r4
+ lhz r3, PACAPACAINDEX(r13)
+ bl smp_send_reschedule
+ nop
+ mr r4, r31
+32:
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+
+ /* Jump to partition switch code */
+ bl .kvmppc_hv_entry_trampoline
+ nop
+
+/*
+ * We return here in virtual mode after the guest exits
+ * with something that we can't handle in real mode.
+ * Interrupts are enabled again at this point.
+ */
+
+.global kvmppc_handler_highmem
+kvmppc_handler_highmem:
+
+ /*
+ * Register usage at this point:
+ *
+ * R1 = host R1
+ * R2 = host R2
+ * R12 = exit handler id
+ * R13 = PACA
+ */
+
+ /* Restore non-volatile host registers (r14 - r31) */
+ REST_NVGPRS(r1)
+
+ addi r1, r1, SWITCH_FRAME_SIZE
+ ld r0, PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
new file mode 100644
index 000000000000..fcfe6b055558
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -0,0 +1,370 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/hugetlb.h>
+
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
+#include <asm/hvcall.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+
+/* For now use fixed-size 16MB page table */
+#define HPT_ORDER 24
+#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
+#define HPT_HASH_MASK (HPT_NPTEG - 1)
+
+#define HPTE_V_HVLOCK 0x40UL
+
+static inline long lock_hpte(unsigned long *hpte, unsigned long bits)
+{
+ unsigned long tmp, old;
+
+ asm volatile(" ldarx %0,0,%2\n"
+ " and. %1,%0,%3\n"
+ " bne 2f\n"
+ " ori %0,%0,%4\n"
+ " stdcx. %0,0,%2\n"
+ " beq+ 2f\n"
+ " li %1,%3\n"
+ "2: isync"
+ : "=&r" (tmp), "=&r" (old)
+ : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK)
+ : "cc", "memory");
+ return old == 0;
+}
+
+long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel)
+{
+ unsigned long porder;
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long i, lpn, pa;
+ unsigned long *hpte;
+
+ /* only handle 4k, 64k and 16M pages for now */
+ porder = 12;
+ if (pteh & HPTE_V_LARGE) {
+ if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+ (ptel & 0xf000) == 0x1000) {
+ /* 64k page */
+ porder = 16;
+ } else if ((ptel & 0xff000) == 0) {
+ /* 16M page */
+ porder = 24;
+ /* lowest AVA bit must be 0 for 16M pages */
+ if (pteh & 0x80)
+ return H_PARAMETER;
+ } else
+ return H_PARAMETER;
+ }
+ lpn = (ptel & HPTE_R_RPN) >> kvm->arch.ram_porder;
+ if (lpn >= kvm->arch.ram_npages || porder > kvm->arch.ram_porder)
+ return H_PARAMETER;
+ pa = kvm->arch.ram_pginfo[lpn].pfn << PAGE_SHIFT;
+ if (!pa)
+ return H_PARAMETER;
+ /* Check WIMG */
+ if ((ptel & HPTE_R_WIMG) != HPTE_R_M &&
+ (ptel & HPTE_R_WIMG) != (HPTE_R_W | HPTE_R_I | HPTE_R_M))
+ return H_PARAMETER;
+ pteh &= ~0x60UL;
+ ptel &= ~(HPTE_R_PP0 - kvm->arch.ram_psize);
+ ptel |= pa;
+ if (pte_index >= (HPT_NPTEG << 3))
+ return H_PARAMETER;
+ if (likely((flags & H_EXACT) == 0)) {
+ pte_index &= ~7UL;
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+ for (i = 0; ; ++i) {
+ if (i == 8)
+ return H_PTEG_FULL;
+ if ((*hpte & HPTE_V_VALID) == 0 &&
+ lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID))
+ break;
+ hpte += 2;
+ }
+ } else {
+ i = 0;
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+ if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID))
+ return H_PTEG_FULL;
+ }
+ hpte[1] = ptel;
+ eieio();
+ hpte[0] = pteh;
+ asm volatile("ptesync" : : : "memory");
+ atomic_inc(&kvm->arch.ram_pginfo[lpn].refcnt);
+ vcpu->arch.gpr[4] = pte_index + i;
+ return H_SUCCESS;
+}
+
+static unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
+ unsigned long pte_index)
+{
+ unsigned long rb, va_low;
+
+ rb = (v & ~0x7fUL) << 16; /* AVA field */
+ va_low = pte_index >> 3;
+ if (v & HPTE_V_SECONDARY)
+ va_low = ~va_low;
+ /* xor vsid from AVA */
+ if (!(v & HPTE_V_1TB_SEG))
+ va_low ^= v >> 12;
+ else
+ va_low ^= v >> 24;
+ va_low &= 0x7ff;
+ if (v & HPTE_V_LARGE) {
+ rb |= 1; /* L field */
+ if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+ (r & 0xff000)) {
+ /* non-16MB large page, must be 64k */
+ /* (masks depend on page size) */
+ rb |= 0x1000; /* page encoding in LP field */
+ rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
+ rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */
+ }
+ } else {
+ /* 4kB page */
+ rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */
+ }
+ rb |= (v >> 54) & 0x300; /* B field */
+ return rb;
+}
+
+#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
+
+static inline int try_lock_tlbie(unsigned int *lock)
+{
+ unsigned int tmp, old;
+ unsigned int token = LOCK_TOKEN;
+
+ asm volatile("1:lwarx %1,0,%2\n"
+ " cmpwi cr0,%1,0\n"
+ " bne 2f\n"
+ " stwcx. %3,0,%2\n"
+ " bne- 1b\n"
+ " isync\n"
+ "2:"
+ : "=&r" (tmp), "=&r" (old)
+ : "r" (lock), "r" (token)
+ : "cc", "memory");
+ return old == 0;
+}
+
+long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn,
+ unsigned long va)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long *hpte;
+ unsigned long v, r, rb;
+
+ if (pte_index >= (HPT_NPTEG << 3))
+ return H_PARAMETER;
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+ while (!lock_hpte(hpte, HPTE_V_HVLOCK))
+ cpu_relax();
+ if ((hpte[0] & HPTE_V_VALID) == 0 ||
+ ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) ||
+ ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) {
+ hpte[0] &= ~HPTE_V_HVLOCK;
+ return H_NOT_FOUND;
+ }
+ if (atomic_read(&kvm->online_vcpus) == 1)
+ flags |= H_LOCAL;
+ vcpu->arch.gpr[4] = v = hpte[0] & ~HPTE_V_HVLOCK;
+ vcpu->arch.gpr[5] = r = hpte[1];
+ rb = compute_tlbie_rb(v, r, pte_index);
+ hpte[0] = 0;
+ if (!(flags & H_LOCAL)) {
+ while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
+ : : "r" (rb), "r" (kvm->arch.lpid));
+ asm volatile("ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+ } else {
+ asm volatile("ptesync" : : : "memory");
+ asm volatile("tlbiel %0" : : "r" (rb));
+ asm volatile("ptesync" : : : "memory");
+ }
+ return H_SUCCESS;
+}
+
+long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long *args = &vcpu->arch.gpr[4];
+ unsigned long *hp, tlbrb[4];
+ long int i, found;
+ long int n_inval = 0;
+ unsigned long flags, req, pte_index;
+ long int local = 0;
+ long int ret = H_SUCCESS;
+
+ if (atomic_read(&kvm->online_vcpus) == 1)
+ local = 1;
+ for (i = 0; i < 4; ++i) {
+ pte_index = args[i * 2];
+ flags = pte_index >> 56;
+ pte_index &= ((1ul << 56) - 1);
+ req = flags >> 6;
+ flags &= 3;
+ if (req == 3)
+ break;
+ if (req != 1 || flags == 3 ||
+ pte_index >= (HPT_NPTEG << 3)) {
+ /* parameter error */
+ args[i * 2] = ((0xa0 | flags) << 56) + pte_index;
+ ret = H_PARAMETER;
+ break;
+ }
+ hp = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+ while (!lock_hpte(hp, HPTE_V_HVLOCK))
+ cpu_relax();
+ found = 0;
+ if (hp[0] & HPTE_V_VALID) {
+ switch (flags & 3) {
+ case 0: /* absolute */
+ found = 1;
+ break;
+ case 1: /* andcond */
+ if (!(hp[0] & args[i * 2 + 1]))
+ found = 1;
+ break;
+ case 2: /* AVPN */
+ if ((hp[0] & ~0x7fUL) == args[i * 2 + 1])
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ hp[0] &= ~HPTE_V_HVLOCK;
+ args[i * 2] = ((0x90 | flags) << 56) + pte_index;
+ continue;
+ }
+ /* insert R and C bits from PTE */
+ flags |= (hp[1] >> 5) & 0x0c;
+ args[i * 2] = ((0x80 | flags) << 56) + pte_index;
+ tlbrb[n_inval++] = compute_tlbie_rb(hp[0], hp[1], pte_index);
+ hp[0] = 0;
+ }
+ if (n_inval == 0)
+ return ret;
+
+ if (!local) {
+ while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ asm volatile("ptesync" : : : "memory");
+ for (i = 0; i < n_inval; ++i)
+ asm volatile(PPC_TLBIE(%1,%0)
+ : : "r" (tlbrb[i]), "r" (kvm->arch.lpid));
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+ } else {
+ asm volatile("ptesync" : : : "memory");
+ for (i = 0; i < n_inval; ++i)
+ asm volatile("tlbiel %0" : : "r" (tlbrb[i]));
+ asm volatile("ptesync" : : : "memory");
+ }
+ return ret;
+}
+
+long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn,
+ unsigned long va)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long *hpte;
+ unsigned long v, r, rb;
+
+ if (pte_index >= (HPT_NPTEG << 3))
+ return H_PARAMETER;
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+ while (!lock_hpte(hpte, HPTE_V_HVLOCK))
+ cpu_relax();
+ if ((hpte[0] & HPTE_V_VALID) == 0 ||
+ ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) {
+ hpte[0] &= ~HPTE_V_HVLOCK;
+ return H_NOT_FOUND;
+ }
+ if (atomic_read(&kvm->online_vcpus) == 1)
+ flags |= H_LOCAL;
+ v = hpte[0];
+ r = hpte[1] & ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
+ HPTE_R_KEY_HI | HPTE_R_KEY_LO);
+ r |= (flags << 55) & HPTE_R_PP0;
+ r |= (flags << 48) & HPTE_R_KEY_HI;
+ r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
+ rb = compute_tlbie_rb(v, r, pte_index);
+ hpte[0] = v & ~HPTE_V_VALID;
+ if (!(flags & H_LOCAL)) {
+ while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
+ : : "r" (rb), "r" (kvm->arch.lpid));
+ asm volatile("ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+ } else {
+ asm volatile("ptesync" : : : "memory");
+ asm volatile("tlbiel %0" : : "r" (rb));
+ asm volatile("ptesync" : : : "memory");
+ }
+ hpte[1] = r;
+ eieio();
+ hpte[0] = v & ~HPTE_V_HVLOCK;
+ asm volatile("ptesync" : : : "memory");
+ return H_SUCCESS;
+}
+
+static unsigned long reverse_xlate(struct kvm *kvm, unsigned long realaddr)
+{
+ long int i;
+ unsigned long offset, rpn;
+
+ offset = realaddr & (kvm->arch.ram_psize - 1);
+ rpn = (realaddr - offset) >> PAGE_SHIFT;
+ for (i = 0; i < kvm->arch.ram_npages; ++i)
+ if (rpn == kvm->arch.ram_pginfo[i].pfn)
+ return (i << PAGE_SHIFT) + offset;
+ return HPTE_R_RPN; /* all 1s in the RPN field */
+}
+
+long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long *hpte, r;
+ int i, n = 1;
+
+ if (pte_index >= (HPT_NPTEG << 3))
+ return H_PARAMETER;
+ if (flags & H_READ_4) {
+ pte_index &= ~3;
+ n = 4;
+ }
+ for (i = 0; i < n; ++i, ++pte_index) {
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+ r = hpte[1];
+ if ((flags & H_R_XLATE) && (hpte[0] & HPTE_V_VALID))
+ r = reverse_xlate(kvm, r & HPTE_R_RPN) |
+ (r & ~HPTE_R_RPN);
+ vcpu->arch.gpr[4 + i * 2] = hpte[0];
+ vcpu->arch.gpr[5 + i * 2] = r;
+ }
+ return H_SUCCESS;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
new file mode 100644
index 000000000000..6dd33581a228
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -0,0 +1,1345 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * Derived from book3s_rmhandlers.S and other files, which are:
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+#include <asm/exception-64s.h>
+
+/*****************************************************************************
+ * *
+ * Real Mode handlers that need to be in the linear mapping *
+ * *
+ ****************************************************************************/
+
+ .globl kvmppc_skip_interrupt
+kvmppc_skip_interrupt:
+ mfspr r13,SPRN_SRR0
+ addi r13,r13,4
+ mtspr SPRN_SRR0,r13
+ GET_SCRATCH0(r13)
+ rfid
+ b .
+
+ .globl kvmppc_skip_Hinterrupt
+kvmppc_skip_Hinterrupt:
+ mfspr r13,SPRN_HSRR0
+ addi r13,r13,4
+ mtspr SPRN_HSRR0,r13
+ GET_SCRATCH0(r13)
+ hrfid
+ b .
+
+/*
+ * Call kvmppc_handler_trampoline_enter in real mode.
+ * Must be called with interrupts hard-disabled.
+ *
+ * Input Registers:
+ *
+ * LR = return address to continue at after eventually re-enabling MMU
+ */
+_GLOBAL(kvmppc_hv_entry_trampoline)
+ mfmsr r10
+ LOAD_REG_ADDR(r5, kvmppc_hv_entry)
+ li r0,MSR_RI
+ andc r0,r10,r0
+ li r6,MSR_IR | MSR_DR
+ andc r6,r10,r6
+ mtmsrd r0,1 /* clear RI in MSR */
+ mtsrr0 r5
+ mtsrr1 r6
+ RFI
+
+#define ULONG_SIZE 8
+#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE))
+
+/******************************************************************************
+ * *
+ * Entry code *
+ * *
+ *****************************************************************************/
+
+#define XICS_XIRR 4
+#define XICS_QIRR 0xc
+
+/*
+ * We come in here when wakened from nap mode on a secondary hw thread.
+ * Relocation is off and most register values are lost.
+ * r13 points to the PACA.
+ */
+ .globl kvm_start_guest
+kvm_start_guest:
+ ld r1,PACAEMERGSP(r13)
+ subi r1,r1,STACK_FRAME_OVERHEAD
+
+ /* get vcpu pointer */
+ ld r4, HSTATE_KVM_VCPU(r13)
+
+ /* We got here with an IPI; clear it */
+ ld r5, HSTATE_XICS_PHYS(r13)
+ li r0, 0xff
+ li r6, XICS_QIRR
+ li r7, XICS_XIRR
+ lwzcix r8, r5, r7 /* ack the interrupt */
+ sync
+ stbcix r0, r5, r6 /* clear it */
+ stwcix r8, r5, r7 /* EOI it */
+
+.global kvmppc_hv_entry
+kvmppc_hv_entry:
+
+ /* Required state:
+ *
+ * R4 = vcpu pointer
+ * MSR = ~IR|DR
+ * R13 = PACA
+ * R1 = host R1
+ * all other volatile GPRS = free
+ */
+ mflr r0
+ std r0, HSTATE_VMHANDLER(r13)
+
+ ld r14, VCPU_GPR(r14)(r4)
+ ld r15, VCPU_GPR(r15)(r4)
+ ld r16, VCPU_GPR(r16)(r4)
+ ld r17, VCPU_GPR(r17)(r4)
+ ld r18, VCPU_GPR(r18)(r4)
+ ld r19, VCPU_GPR(r19)(r4)
+ ld r20, VCPU_GPR(r20)(r4)
+ ld r21, VCPU_GPR(r21)(r4)
+ ld r22, VCPU_GPR(r22)(r4)
+ ld r23, VCPU_GPR(r23)(r4)
+ ld r24, VCPU_GPR(r24)(r4)
+ ld r25, VCPU_GPR(r25)(r4)
+ ld r26, VCPU_GPR(r26)(r4)
+ ld r27, VCPU_GPR(r27)(r4)
+ ld r28, VCPU_GPR(r28)(r4)
+ ld r29, VCPU_GPR(r29)(r4)
+ ld r30, VCPU_GPR(r30)(r4)
+ ld r31, VCPU_GPR(r31)(r4)
+
+ /* Load guest PMU registers */
+ /* R4 is live here (vcpu pointer) */
+ li r3, 1
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
+ isync
+ lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */
+ lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */
+ lwz r6, VCPU_PMC + 8(r4)
+ lwz r7, VCPU_PMC + 12(r4)
+ lwz r8, VCPU_PMC + 16(r4)
+ lwz r9, VCPU_PMC + 20(r4)
+BEGIN_FTR_SECTION
+ lwz r10, VCPU_PMC + 24(r4)
+ lwz r11, VCPU_PMC + 28(r4)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ mtspr SPRN_PMC1, r3
+ mtspr SPRN_PMC2, r5
+ mtspr SPRN_PMC3, r6
+ mtspr SPRN_PMC4, r7
+ mtspr SPRN_PMC5, r8
+ mtspr SPRN_PMC6, r9
+BEGIN_FTR_SECTION
+ mtspr SPRN_PMC7, r10
+ mtspr SPRN_PMC8, r11
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ ld r3, VCPU_MMCR(r4)
+ ld r5, VCPU_MMCR + 8(r4)
+ ld r6, VCPU_MMCR + 16(r4)
+ mtspr SPRN_MMCR1, r5
+ mtspr SPRN_MMCRA, r6
+ mtspr SPRN_MMCR0, r3
+ isync
+
+ /* Load up FP, VMX and VSX registers */
+ bl kvmppc_load_fp
+
+BEGIN_FTR_SECTION
+ /* Switch DSCR to guest value */
+ ld r5, VCPU_DSCR(r4)
+ mtspr SPRN_DSCR, r5
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /*
+ * Set the decrementer to the guest decrementer.
+ */
+ ld r8,VCPU_DEC_EXPIRES(r4)
+ mftb r7
+ subf r3,r7,r8
+ mtspr SPRN_DEC,r3
+ stw r3,VCPU_DEC(r4)
+
+ ld r5, VCPU_SPRG0(r4)
+ ld r6, VCPU_SPRG1(r4)
+ ld r7, VCPU_SPRG2(r4)
+ ld r8, VCPU_SPRG3(r4)
+ mtspr SPRN_SPRG0, r5
+ mtspr SPRN_SPRG1, r6
+ mtspr SPRN_SPRG2, r7
+ mtspr SPRN_SPRG3, r8
+
+ /* Save R1 in the PACA */
+ std r1, HSTATE_HOST_R1(r13)
+
+ /* Increment yield count if they have a VPA */
+ ld r3, VCPU_VPA(r4)
+ cmpdi r3, 0
+ beq 25f
+ lwz r5, LPPACA_YIELDCOUNT(r3)
+ addi r5, r5, 1
+ stw r5, LPPACA_YIELDCOUNT(r3)
+25:
+ /* Load up DAR and DSISR */
+ ld r5, VCPU_DAR(r4)
+ lwz r6, VCPU_DSISR(r4)
+ mtspr SPRN_DAR, r5
+ mtspr SPRN_DSISR, r6
+
+ /* Set partition DABR */
+ li r5,3
+ ld r6,VCPU_DABR(r4)
+ mtspr SPRN_DABRX,r5
+ mtspr SPRN_DABR,r6
+
+BEGIN_FTR_SECTION
+ /* Restore AMR and UAMOR, set AMOR to all 1s */
+ ld r5,VCPU_AMR(r4)
+ ld r6,VCPU_UAMOR(r4)
+ li r7,-1
+ mtspr SPRN_AMR,r5
+ mtspr SPRN_UAMOR,r6
+ mtspr SPRN_AMOR,r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* Clear out SLB */
+ li r6,0
+ slbmte r6,r6
+ slbia
+ ptesync
+
+BEGIN_FTR_SECTION
+ b 30f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ /*
+ * POWER7 host -> guest partition switch code.
+ * We don't have to lock against concurrent tlbies,
+ * but we do have to coordinate across hardware threads.
+ */
+ /* Increment entry count iff exit count is zero. */
+ ld r5,HSTATE_KVM_VCORE(r13)
+ addi r9,r5,VCORE_ENTRY_EXIT
+21: lwarx r3,0,r9
+ cmpwi r3,0x100 /* any threads starting to exit? */
+ bge secondary_too_late /* if so we're too late to the party */
+ addi r3,r3,1
+ stwcx. r3,0,r9
+ bne 21b
+
+ /* Primary thread switches to guest partition. */
+ ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
+ lwz r6,VCPU_PTID(r4)
+ cmpwi r6,0
+ bne 20f
+ ld r6,KVM_SDR1(r9)
+ lwz r7,KVM_LPID(r9)
+ li r0,LPID_RSVD /* switch to reserved LPID */
+ mtspr SPRN_LPID,r0
+ ptesync
+ mtspr SPRN_SDR1,r6 /* switch to partition page table */
+ mtspr SPRN_LPID,r7
+ isync
+ li r0,1
+ stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
+ b 10f
+
+ /* Secondary threads wait for primary to have done partition switch */
+20: lbz r0,VCORE_IN_GUEST(r5)
+ cmpwi r0,0
+ beq 20b
+
+ /* Set LPCR. Set the MER bit if there is a pending external irq. */
+10: ld r8,KVM_LPCR(r9)
+ ld r0,VCPU_PENDING_EXC(r4)
+ li r7,(1 << BOOK3S_IRQPRIO_EXTERNAL)
+ oris r7,r7,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
+ and. r0,r0,r7
+ beq 11f
+ ori r8,r8,LPCR_MER
+11: mtspr SPRN_LPCR,r8
+ ld r8,KVM_RMOR(r9)
+ mtspr SPRN_RMOR,r8
+ isync
+
+ /* Check if HDEC expires soon */
+ mfspr r3,SPRN_HDEC
+ cmpwi r3,10
+ li r12,BOOK3S_INTERRUPT_HV_DECREMENTER
+ mr r9,r4
+ blt hdec_soon
+
+ /*
+ * Invalidate the TLB if we could possibly have stale TLB
+ * entries for this partition on this core due to the use
+ * of tlbiel.
+ * XXX maybe only need this on primary thread?
+ */
+ ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
+ lwz r5,VCPU_VCPUID(r4)
+ lhz r6,PACAPACAINDEX(r13)
+ rldimi r6,r5,0,62 /* XXX map as if threads 1:1 p:v */
+ lhz r8,VCPU_LAST_CPU(r4)
+ sldi r7,r6,1 /* see if this is the same vcpu */
+ add r7,r7,r9 /* as last ran on this pcpu */
+ lhz r0,KVM_LAST_VCPU(r7)
+ cmpw r6,r8 /* on the same cpu core as last time? */
+ bne 3f
+ cmpw r0,r5 /* same vcpu as this core last ran? */
+ beq 1f
+3: sth r6,VCPU_LAST_CPU(r4) /* if not, invalidate partition TLB */
+ sth r5,KVM_LAST_VCPU(r7)
+ li r6,128
+ mtctr r6
+ li r7,0x800 /* IS field = 0b10 */
+ ptesync
+2: tlbiel r7
+ addi r7,r7,0x1000
+ bdnz 2b
+ ptesync
+1:
+
+ /* Save purr/spurr */
+ mfspr r5,SPRN_PURR
+ mfspr r6,SPRN_SPURR
+ std r5,HSTATE_PURR(r13)
+ std r6,HSTATE_SPURR(r13)
+ ld r7,VCPU_PURR(r4)
+ ld r8,VCPU_SPURR(r4)
+ mtspr SPRN_PURR,r7
+ mtspr SPRN_SPURR,r8
+ b 31f
+
+ /*
+ * PPC970 host -> guest partition switch code.
+ * We have to lock against concurrent tlbies,
+ * using native_tlbie_lock to lock against host tlbies
+ * and kvm->arch.tlbie_lock to lock against guest tlbies.
+ * We also have to invalidate the TLB since its
+ * entries aren't tagged with the LPID.
+ */
+30: ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
+
+ /* first take native_tlbie_lock */
+ .section ".toc","aw"
+toc_tlbie_lock:
+ .tc native_tlbie_lock[TC],native_tlbie_lock
+ .previous
+ ld r3,toc_tlbie_lock@toc(2)
+ lwz r8,PACA_LOCK_TOKEN(r13)
+24: lwarx r0,0,r3
+ cmpwi r0,0
+ bne 24b
+ stwcx. r8,0,r3
+ bne 24b
+ isync
+
+ ld r7,KVM_LPCR(r9) /* use kvm->arch.lpcr to store HID4 */
+ li r0,0x18f
+ rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */
+ or r0,r7,r0
+ ptesync
+ sync
+ mtspr SPRN_HID4,r0 /* switch to reserved LPID */
+ isync
+ li r0,0
+ stw r0,0(r3) /* drop native_tlbie_lock */
+
+ /* invalidate the whole TLB */
+ li r0,256
+ mtctr r0
+ li r6,0
+25: tlbiel r6
+ addi r6,r6,0x1000
+ bdnz 25b
+ ptesync
+
+ /* Take the guest's tlbie_lock */
+ addi r3,r9,KVM_TLBIE_LOCK
+24: lwarx r0,0,r3
+ cmpwi r0,0
+ bne 24b
+ stwcx. r8,0,r3
+ bne 24b
+ isync
+ ld r6,KVM_SDR1(r9)
+ mtspr SPRN_SDR1,r6 /* switch to partition page table */
+
+ /* Set up HID4 with the guest's LPID etc. */
+ sync
+ mtspr SPRN_HID4,r7
+ isync
+
+ /* drop the guest's tlbie_lock */
+ li r0,0
+ stw r0,0(r3)
+
+ /* Check if HDEC expires soon */
+ mfspr r3,SPRN_HDEC
+ cmpwi r3,10
+ li r12,BOOK3S_INTERRUPT_HV_DECREMENTER
+ mr r9,r4
+ blt hdec_soon
+
+ /* Enable HDEC interrupts */
+ mfspr r0,SPRN_HID0
+ li r3,1
+ rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
+ sync
+ mtspr SPRN_HID0,r0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+
+ /* Load up guest SLB entries */
+31: lwz r5,VCPU_SLB_MAX(r4)
+ cmpwi r5,0
+ beq 9f
+ mtctr r5
+ addi r6,r4,VCPU_SLB
+1: ld r8,VCPU_SLB_E(r6)
+ ld r9,VCPU_SLB_V(r6)
+ slbmte r9,r8
+ addi r6,r6,VCPU_SLB_SIZE
+ bdnz 1b
+9:
+
+ /* Restore state of CTRL run bit; assume 1 on entry */
+ lwz r5,VCPU_CTRL(r4)
+ andi. r5,r5,1
+ bne 4f
+ mfspr r6,SPRN_CTRLF
+ clrrdi r6,r6,1
+ mtspr SPRN_CTRLT,r6
+4:
+ ld r6, VCPU_CTR(r4)
+ lwz r7, VCPU_XER(r4)
+
+ mtctr r6
+ mtxer r7
+
+ /* Move SRR0 and SRR1 into the respective regs */
+ ld r6, VCPU_SRR0(r4)
+ ld r7, VCPU_SRR1(r4)
+ mtspr SPRN_SRR0, r6
+ mtspr SPRN_SRR1, r7
+
+ ld r10, VCPU_PC(r4)
+
+ ld r11, VCPU_MSR(r4) /* r10 = vcpu->arch.msr & ~MSR_HV */
+ rldicl r11, r11, 63 - MSR_HV_LG, 1
+ rotldi r11, r11, 1 + MSR_HV_LG
+ ori r11, r11, MSR_ME
+
+fast_guest_return:
+ mtspr SPRN_HSRR0,r10
+ mtspr SPRN_HSRR1,r11
+
+ /* Activate guest mode, so faults get handled by KVM */
+ li r9, KVM_GUEST_MODE_GUEST
+ stb r9, HSTATE_IN_GUEST(r13)
+
+ /* Enter guest */
+
+ ld r5, VCPU_LR(r4)
+ lwz r6, VCPU_CR(r4)
+ mtlr r5
+ mtcr r6
+
+ ld r0, VCPU_GPR(r0)(r4)
+ ld r1, VCPU_GPR(r1)(r4)
+ ld r2, VCPU_GPR(r2)(r4)
+ ld r3, VCPU_GPR(r3)(r4)
+ ld r5, VCPU_GPR(r5)(r4)
+ ld r6, VCPU_GPR(r6)(r4)
+ ld r7, VCPU_GPR(r7)(r4)
+ ld r8, VCPU_GPR(r8)(r4)
+ ld r9, VCPU_GPR(r9)(r4)
+ ld r10, VCPU_GPR(r10)(r4)
+ ld r11, VCPU_GPR(r11)(r4)
+ ld r12, VCPU_GPR(r12)(r4)
+ ld r13, VCPU_GPR(r13)(r4)
+
+ ld r4, VCPU_GPR(r4)(r4)
+
+ hrfid
+ b .
+
+/******************************************************************************
+ * *
+ * Exit code *
+ * *
+ *****************************************************************************/
+
+/*
+ * We come here from the first-level interrupt handlers.
+ */
+ .globl kvmppc_interrupt
+kvmppc_interrupt:
+ /*
+ * Register contents:
+ * R12 = interrupt vector
+ * R13 = PACA
+ * guest CR, R12 saved in shadow VCPU SCRATCH1/0
+ * guest R13 saved in SPRN_SCRATCH0
+ */
+ /* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */
+ std r9, HSTATE_HOST_R2(r13)
+ ld r9, HSTATE_KVM_VCPU(r13)
+
+ /* Save registers */
+
+ std r0, VCPU_GPR(r0)(r9)
+ std r1, VCPU_GPR(r1)(r9)
+ std r2, VCPU_GPR(r2)(r9)
+ std r3, VCPU_GPR(r3)(r9)
+ std r4, VCPU_GPR(r4)(r9)
+ std r5, VCPU_GPR(r5)(r9)
+ std r6, VCPU_GPR(r6)(r9)
+ std r7, VCPU_GPR(r7)(r9)
+ std r8, VCPU_GPR(r8)(r9)
+ ld r0, HSTATE_HOST_R2(r13)
+ std r0, VCPU_GPR(r9)(r9)
+ std r10, VCPU_GPR(r10)(r9)
+ std r11, VCPU_GPR(r11)(r9)
+ ld r3, HSTATE_SCRATCH0(r13)
+ lwz r4, HSTATE_SCRATCH1(r13)
+ std r3, VCPU_GPR(r12)(r9)
+ stw r4, VCPU_CR(r9)
+
+ /* Restore R1/R2 so we can handle faults */
+ ld r1, HSTATE_HOST_R1(r13)
+ ld r2, PACATOC(r13)
+
+ mfspr r10, SPRN_SRR0
+ mfspr r11, SPRN_SRR1
+ std r10, VCPU_SRR0(r9)
+ std r11, VCPU_SRR1(r9)
+ andi. r0, r12, 2 /* need to read HSRR0/1? */
+ beq 1f
+ mfspr r10, SPRN_HSRR0
+ mfspr r11, SPRN_HSRR1
+ clrrdi r12, r12, 2
+1: std r10, VCPU_PC(r9)
+ std r11, VCPU_MSR(r9)
+
+ GET_SCRATCH0(r3)
+ mflr r4
+ std r3, VCPU_GPR(r13)(r9)
+ std r4, VCPU_LR(r9)
+
+ /* Unset guest mode */
+ li r0, KVM_GUEST_MODE_NONE
+ stb r0, HSTATE_IN_GUEST(r13)
+
+ stw r12,VCPU_TRAP(r9)
+
+ /* See if this is a leftover HDEC interrupt */
+ cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
+ bne 2f
+ mfspr r3,SPRN_HDEC
+ cmpwi r3,0
+ bge ignore_hdec
+2:
+ /* See if this is something we can handle in real mode */
+ cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
+ beq hcall_try_real_mode
+hcall_real_cont:
+
+ /* Check for mediated interrupts (could be done earlier really ...) */
+BEGIN_FTR_SECTION
+ cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL
+ bne+ 1f
+ ld r5,VCPU_KVM(r9)
+ ld r5,KVM_LPCR(r5)
+ andi. r0,r11,MSR_EE
+ beq 1f
+ andi. r0,r5,LPCR_MER
+ bne bounce_ext_interrupt
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* Save DEC */
+ mfspr r5,SPRN_DEC
+ mftb r6
+ extsw r5,r5
+ add r5,r5,r6
+ std r5,VCPU_DEC_EXPIRES(r9)
+
+ /* Save HEIR (HV emulation assist reg) in last_inst
+ if this is an HEI (HV emulation interrupt, e40) */
+ li r3,-1
+BEGIN_FTR_SECTION
+ cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
+ bne 11f
+ mfspr r3,SPRN_HEIR
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+11: stw r3,VCPU_LAST_INST(r9)
+
+ /* Save more register state */
+ mfxer r5
+ mfdar r6
+ mfdsisr r7
+ mfctr r8
+
+ stw r5, VCPU_XER(r9)
+ std r6, VCPU_DAR(r9)
+ stw r7, VCPU_DSISR(r9)
+ std r8, VCPU_CTR(r9)
+ /* grab HDAR & HDSISR if HV data storage interrupt (HDSI) */
+BEGIN_FTR_SECTION
+ cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
+ beq 6f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+7: std r6, VCPU_FAULT_DAR(r9)
+ stw r7, VCPU_FAULT_DSISR(r9)
+
+ /* Save guest CTRL register, set runlatch to 1 */
+ mfspr r6,SPRN_CTRLF
+ stw r6,VCPU_CTRL(r9)
+ andi. r0,r6,1
+ bne 4f
+ ori r6,r6,1
+ mtspr SPRN_CTRLT,r6
+4:
+ /* Read the guest SLB and save it away */
+ lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
+ mtctr r0
+ li r6,0
+ addi r7,r9,VCPU_SLB
+ li r5,0
+1: slbmfee r8,r6
+ andis. r0,r8,SLB_ESID_V@h
+ beq 2f
+ add r8,r8,r6 /* put index in */
+ slbmfev r3,r6
+ std r8,VCPU_SLB_E(r7)
+ std r3,VCPU_SLB_V(r7)
+ addi r7,r7,VCPU_SLB_SIZE
+ addi r5,r5,1
+2: addi r6,r6,1
+ bdnz 1b
+ stw r5,VCPU_SLB_MAX(r9)
+
+ /*
+ * Save the guest PURR/SPURR
+ */
+BEGIN_FTR_SECTION
+ mfspr r5,SPRN_PURR
+ mfspr r6,SPRN_SPURR
+ ld r7,VCPU_PURR(r9)
+ ld r8,VCPU_SPURR(r9)
+ std r5,VCPU_PURR(r9)
+ std r6,VCPU_SPURR(r9)
+ subf r5,r7,r5
+ subf r6,r8,r6
+
+ /*
+ * Restore host PURR/SPURR and add guest times
+ * so that the time in the guest gets accounted.
+ */
+ ld r3,HSTATE_PURR(r13)
+ ld r4,HSTATE_SPURR(r13)
+ add r3,r3,r5
+ add r4,r4,r6
+ mtspr SPRN_PURR,r3
+ mtspr SPRN_SPURR,r4
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
+
+ /* Clear out SLB */
+ li r5,0
+ slbmte r5,r5
+ slbia
+ ptesync
+
+hdec_soon:
+BEGIN_FTR_SECTION
+ b 32f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ /*
+ * POWER7 guest -> host partition switch code.
+ * We don't have to lock against tlbies but we do
+ * have to coordinate the hardware threads.
+ */
+ /* Increment the threads-exiting-guest count in the 0xff00
+ bits of vcore->entry_exit_count */
+ lwsync
+ ld r5,HSTATE_KVM_VCORE(r13)
+ addi r6,r5,VCORE_ENTRY_EXIT
+41: lwarx r3,0,r6
+ addi r0,r3,0x100
+ stwcx. r0,0,r6
+ bne 41b
+
+ /*
+ * At this point we have an interrupt that we have to pass
+ * up to the kernel or qemu; we can't handle it in real mode.
+ * Thus we have to do a partition switch, so we have to
+ * collect the other threads, if we are the first thread
+ * to take an interrupt. To do this, we set the HDEC to 0,
+ * which causes an HDEC interrupt in all threads within 2ns
+ * because the HDEC register is shared between all 4 threads.
+ * However, we don't need to bother if this is an HDEC
+ * interrupt, since the other threads will already be on their
+ * way here in that case.
+ */
+ cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
+ beq 40f
+ cmpwi r3,0x100 /* Are we the first here? */
+ bge 40f
+ cmpwi r3,1
+ ble 40f
+ li r0,0
+ mtspr SPRN_HDEC,r0
+40:
+
+ /* Secondary threads wait for primary to do partition switch */
+ ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
+ ld r5,HSTATE_KVM_VCORE(r13)
+ lwz r3,VCPU_PTID(r9)
+ cmpwi r3,0
+ beq 15f
+ HMT_LOW
+13: lbz r3,VCORE_IN_GUEST(r5)
+ cmpwi r3,0
+ bne 13b
+ HMT_MEDIUM
+ b 16f
+
+ /* Primary thread waits for all the secondaries to exit guest */
+15: lwz r3,VCORE_ENTRY_EXIT(r5)
+ srwi r0,r3,8
+ clrldi r3,r3,56
+ cmpw r3,r0
+ bne 15b
+ isync
+
+ /* Primary thread switches back to host partition */
+ ld r6,KVM_HOST_SDR1(r4)
+ lwz r7,KVM_HOST_LPID(r4)
+ li r8,LPID_RSVD /* switch to reserved LPID */
+ mtspr SPRN_LPID,r8
+ ptesync
+ mtspr SPRN_SDR1,r6 /* switch to partition page table */
+ mtspr SPRN_LPID,r7
+ isync
+ li r0,0
+ stb r0,VCORE_IN_GUEST(r5)
+ lis r8,0x7fff /* MAX_INT@h */
+ mtspr SPRN_HDEC,r8
+
+16: ld r8,KVM_HOST_LPCR(r4)
+ mtspr SPRN_LPCR,r8
+ isync
+ b 33f
+
+ /*
+ * PPC970 guest -> host partition switch code.
+ * We have to lock against concurrent tlbies, and
+ * we have to flush the whole TLB.
+ */
+32: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
+
+ /* Take the guest's tlbie_lock */
+ lwz r8,PACA_LOCK_TOKEN(r13)
+ addi r3,r4,KVM_TLBIE_LOCK
+24: lwarx r0,0,r3
+ cmpwi r0,0
+ bne 24b
+ stwcx. r8,0,r3
+ bne 24b
+ isync
+
+ ld r7,KVM_HOST_LPCR(r4) /* use kvm->arch.host_lpcr for HID4 */
+ li r0,0x18f
+ rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */
+ or r0,r7,r0
+ ptesync
+ sync
+ mtspr SPRN_HID4,r0 /* switch to reserved LPID */
+ isync
+ li r0,0
+ stw r0,0(r3) /* drop guest tlbie_lock */
+
+ /* invalidate the whole TLB */
+ li r0,256
+ mtctr r0
+ li r6,0
+25: tlbiel r6
+ addi r6,r6,0x1000
+ bdnz 25b
+ ptesync
+
+ /* take native_tlbie_lock */
+ ld r3,toc_tlbie_lock@toc(2)
+24: lwarx r0,0,r3
+ cmpwi r0,0
+ bne 24b
+ stwcx. r8,0,r3
+ bne 24b
+ isync
+
+ ld r6,KVM_HOST_SDR1(r4)
+ mtspr SPRN_SDR1,r6 /* switch to host page table */
+
+ /* Set up host HID4 value */
+ sync
+ mtspr SPRN_HID4,r7
+ isync
+ li r0,0
+ stw r0,0(r3) /* drop native_tlbie_lock */
+
+ lis r8,0x7fff /* MAX_INT@h */
+ mtspr SPRN_HDEC,r8
+
+ /* Disable HDEC interrupts */
+ mfspr r0,SPRN_HID0
+ li r3,0
+ rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
+ sync
+ mtspr SPRN_HID0,r0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+
+ /* load host SLB entries */
+33: ld r8,PACA_SLBSHADOWPTR(r13)
+
+ .rept SLB_NUM_BOLTED
+ ld r5,SLBSHADOW_SAVEAREA(r8)
+ ld r6,SLBSHADOW_SAVEAREA+8(r8)
+ andis. r7,r5,SLB_ESID_V@h
+ beq 1f
+ slbmte r6,r5
+1: addi r8,r8,16
+ .endr
+
+ /* Save and reset AMR and UAMOR before turning on the MMU */
+BEGIN_FTR_SECTION
+ mfspr r5,SPRN_AMR
+ mfspr r6,SPRN_UAMOR
+ std r5,VCPU_AMR(r9)
+ std r6,VCPU_UAMOR(r9)
+ li r6,0
+ mtspr SPRN_AMR,r6
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* Restore host DABR and DABRX */
+ ld r5,HSTATE_DABR(r13)
+ li r6,7
+ mtspr SPRN_DABR,r5
+ mtspr SPRN_DABRX,r6
+
+ /* Switch DSCR back to host value */
+BEGIN_FTR_SECTION
+ mfspr r8, SPRN_DSCR
+ ld r7, HSTATE_DSCR(r13)
+ std r8, VCPU_DSCR(r7)
+ mtspr SPRN_DSCR, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* Save non-volatile GPRs */
+ std r14, VCPU_GPR(r14)(r9)
+ std r15, VCPU_GPR(r15)(r9)
+ std r16, VCPU_GPR(r16)(r9)
+ std r17, VCPU_GPR(r17)(r9)
+ std r18, VCPU_GPR(r18)(r9)
+ std r19, VCPU_GPR(r19)(r9)
+ std r20, VCPU_GPR(r20)(r9)
+ std r21, VCPU_GPR(r21)(r9)
+ std r22, VCPU_GPR(r22)(r9)
+ std r23, VCPU_GPR(r23)(r9)
+ std r24, VCPU_GPR(r24)(r9)
+ std r25, VCPU_GPR(r25)(r9)
+ std r26, VCPU_GPR(r26)(r9)
+ std r27, VCPU_GPR(r27)(r9)
+ std r28, VCPU_GPR(r28)(r9)
+ std r29, VCPU_GPR(r29)(r9)
+ std r30, VCPU_GPR(r30)(r9)
+ std r31, VCPU_GPR(r31)(r9)
+
+ /* Save SPRGs */
+ mfspr r3, SPRN_SPRG0
+ mfspr r4, SPRN_SPRG1
+ mfspr r5, SPRN_SPRG2
+ mfspr r6, SPRN_SPRG3
+ std r3, VCPU_SPRG0(r9)
+ std r4, VCPU_SPRG1(r9)
+ std r5, VCPU_SPRG2(r9)
+ std r6, VCPU_SPRG3(r9)
+
+ /* Increment yield count if they have a VPA */
+ ld r8, VCPU_VPA(r9) /* do they have a VPA? */
+ cmpdi r8, 0
+ beq 25f
+ lwz r3, LPPACA_YIELDCOUNT(r8)
+ addi r3, r3, 1
+ stw r3, LPPACA_YIELDCOUNT(r8)
+25:
+ /* Save PMU registers if requested */
+ /* r8 and cr0.eq are live here */
+ li r3, 1
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
+ mfspr r4, SPRN_MMCR0 /* save MMCR0 */
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
+ isync
+ beq 21f /* if no VPA, save PMU stuff anyway */
+ lbz r7, LPPACA_PMCINUSE(r8)
+ cmpwi r7, 0 /* did they ask for PMU stuff to be saved? */
+ bne 21f
+ std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
+ b 22f
+21: mfspr r5, SPRN_MMCR1
+ mfspr r6, SPRN_MMCRA
+ std r4, VCPU_MMCR(r9)
+ std r5, VCPU_MMCR + 8(r9)
+ std r6, VCPU_MMCR + 16(r9)
+ mfspr r3, SPRN_PMC1
+ mfspr r4, SPRN_PMC2
+ mfspr r5, SPRN_PMC3
+ mfspr r6, SPRN_PMC4
+ mfspr r7, SPRN_PMC5
+ mfspr r8, SPRN_PMC6
+BEGIN_FTR_SECTION
+ mfspr r10, SPRN_PMC7
+ mfspr r11, SPRN_PMC8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ stw r3, VCPU_PMC(r9)
+ stw r4, VCPU_PMC + 4(r9)
+ stw r5, VCPU_PMC + 8(r9)
+ stw r6, VCPU_PMC + 12(r9)
+ stw r7, VCPU_PMC + 16(r9)
+ stw r8, VCPU_PMC + 20(r9)
+BEGIN_FTR_SECTION
+ stw r10, VCPU_PMC + 24(r9)
+ stw r11, VCPU_PMC + 28(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+22:
+ /* save FP state */
+ mr r3, r9
+ bl .kvmppc_save_fp
+
+ /* Secondary threads go off to take a nap on POWER7 */
+BEGIN_FTR_SECTION
+ lwz r0,VCPU_PTID(r3)
+ cmpwi r0,0
+ bne secondary_nap
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /*
+ * Reload DEC. HDEC interrupts were disabled when
+ * we reloaded the host's LPCR value.
+ */
+ ld r3, HSTATE_DECEXP(r13)
+ mftb r4
+ subf r4, r4, r3
+ mtspr SPRN_DEC, r4
+
+ /* Reload the host's PMU registers */
+ ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
+ lbz r4, LPPACA_PMCINUSE(r3)
+ cmpwi r4, 0
+ beq 23f /* skip if not */
+ lwz r3, HSTATE_PMC(r13)
+ lwz r4, HSTATE_PMC + 4(r13)
+ lwz r5, HSTATE_PMC + 8(r13)
+ lwz r6, HSTATE_PMC + 12(r13)
+ lwz r8, HSTATE_PMC + 16(r13)
+ lwz r9, HSTATE_PMC + 20(r13)
+BEGIN_FTR_SECTION
+ lwz r10, HSTATE_PMC + 24(r13)
+ lwz r11, HSTATE_PMC + 28(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ mtspr SPRN_PMC1, r3
+ mtspr SPRN_PMC2, r4
+ mtspr SPRN_PMC3, r5
+ mtspr SPRN_PMC4, r6
+ mtspr SPRN_PMC5, r8
+ mtspr SPRN_PMC6, r9
+BEGIN_FTR_SECTION
+ mtspr SPRN_PMC7, r10
+ mtspr SPRN_PMC8, r11
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ ld r3, HSTATE_MMCR(r13)
+ ld r4, HSTATE_MMCR + 8(r13)
+ ld r5, HSTATE_MMCR + 16(r13)
+ mtspr SPRN_MMCR1, r4
+ mtspr SPRN_MMCRA, r5
+ mtspr SPRN_MMCR0, r3
+ isync
+23:
+ /*
+ * For external and machine check interrupts, we need
+ * to call the Linux handler to process the interrupt.
+ * We do that by jumping to the interrupt vector address
+ * which we have in r12. The [h]rfid at the end of the
+ * handler will return to the book3s_hv_interrupts.S code.
+ * For other interrupts we do the rfid to get back
+ * to the book3s_interrupts.S code here.
+ */
+ ld r8, HSTATE_VMHANDLER(r13)
+ ld r7, HSTATE_HOST_MSR(r13)
+
+ cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
+ beq 11f
+ cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+
+ /* RFI into the highmem handler, or branch to interrupt handler */
+12: mfmsr r6
+ mtctr r12
+ li r0, MSR_RI
+ andc r6, r6, r0
+ mtmsrd r6, 1 /* Clear RI in MSR */
+ mtsrr0 r8
+ mtsrr1 r7
+ beqctr
+ RFI
+
+11:
+BEGIN_FTR_SECTION
+ b 12b
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ mtspr SPRN_HSRR0, r8
+ mtspr SPRN_HSRR1, r7
+ ba 0x500
+
+6: mfspr r6,SPRN_HDAR
+ mfspr r7,SPRN_HDSISR
+ b 7b
+
+/*
+ * Try to handle an hcall in real mode.
+ * Returns to the guest if we handle it, or continues on up to
+ * the kernel if we can't (i.e. if we don't have a handler for
+ * it, or if the handler returns H_TOO_HARD).
+ */
+ .globl hcall_try_real_mode
+hcall_try_real_mode:
+ ld r3,VCPU_GPR(r3)(r9)
+ andi. r0,r11,MSR_PR
+ bne hcall_real_cont
+ clrrdi r3,r3,2
+ cmpldi r3,hcall_real_table_end - hcall_real_table
+ bge hcall_real_cont
+ LOAD_REG_ADDR(r4, hcall_real_table)
+ lwzx r3,r3,r4
+ cmpwi r3,0
+ beq hcall_real_cont
+ add r3,r3,r4
+ mtctr r3
+ mr r3,r9 /* get vcpu pointer */
+ ld r4,VCPU_GPR(r4)(r9)
+ bctrl
+ cmpdi r3,H_TOO_HARD
+ beq hcall_real_fallback
+ ld r4,HSTATE_KVM_VCPU(r13)
+ std r3,VCPU_GPR(r3)(r4)
+ ld r10,VCPU_PC(r4)
+ ld r11,VCPU_MSR(r4)
+ b fast_guest_return
+
+ /* We've attempted a real mode hcall, but it's punted it back
+ * to userspace. We need to restore some clobbered volatiles
+ * before resuming the pass-it-to-qemu path */
+hcall_real_fallback:
+ li r12,BOOK3S_INTERRUPT_SYSCALL
+ ld r9, HSTATE_KVM_VCPU(r13)
+ ld r11, VCPU_MSR(r9)
+
+ b hcall_real_cont
+
+ .globl hcall_real_table
+hcall_real_table:
+ .long 0 /* 0 - unused */
+ .long .kvmppc_h_remove - hcall_real_table
+ .long .kvmppc_h_enter - hcall_real_table
+ .long .kvmppc_h_read - hcall_real_table
+ .long 0 /* 0x10 - H_CLEAR_MOD */
+ .long 0 /* 0x14 - H_CLEAR_REF */
+ .long .kvmppc_h_protect - hcall_real_table
+ .long 0 /* 0x1c - H_GET_TCE */
+ .long .kvmppc_h_put_tce - hcall_real_table
+ .long 0 /* 0x24 - H_SET_SPRG0 */
+ .long .kvmppc_h_set_dabr - hcall_real_table
+ .long 0 /* 0x2c */
+ .long 0 /* 0x30 */
+ .long 0 /* 0x34 */
+ .long 0 /* 0x38 */
+ .long 0 /* 0x3c */
+ .long 0 /* 0x40 */
+ .long 0 /* 0x44 */
+ .long 0 /* 0x48 */
+ .long 0 /* 0x4c */
+ .long 0 /* 0x50 */
+ .long 0 /* 0x54 */
+ .long 0 /* 0x58 */
+ .long 0 /* 0x5c */
+ .long 0 /* 0x60 */
+ .long 0 /* 0x64 */
+ .long 0 /* 0x68 */
+ .long 0 /* 0x6c */
+ .long 0 /* 0x70 */
+ .long 0 /* 0x74 */
+ .long 0 /* 0x78 */
+ .long 0 /* 0x7c */
+ .long 0 /* 0x80 */
+ .long 0 /* 0x84 */
+ .long 0 /* 0x88 */
+ .long 0 /* 0x8c */
+ .long 0 /* 0x90 */
+ .long 0 /* 0x94 */
+ .long 0 /* 0x98 */
+ .long 0 /* 0x9c */
+ .long 0 /* 0xa0 */
+ .long 0 /* 0xa4 */
+ .long 0 /* 0xa8 */
+ .long 0 /* 0xac */
+ .long 0 /* 0xb0 */
+ .long 0 /* 0xb4 */
+ .long 0 /* 0xb8 */
+ .long 0 /* 0xbc */
+ .long 0 /* 0xc0 */
+ .long 0 /* 0xc4 */
+ .long 0 /* 0xc8 */
+ .long 0 /* 0xcc */
+ .long 0 /* 0xd0 */
+ .long 0 /* 0xd4 */
+ .long 0 /* 0xd8 */
+ .long 0 /* 0xdc */
+ .long 0 /* 0xe0 */
+ .long 0 /* 0xe4 */
+ .long 0 /* 0xe8 */
+ .long 0 /* 0xec */
+ .long 0 /* 0xf0 */
+ .long 0 /* 0xf4 */
+ .long 0 /* 0xf8 */
+ .long 0 /* 0xfc */
+ .long 0 /* 0x100 */
+ .long 0 /* 0x104 */
+ .long 0 /* 0x108 */
+ .long 0 /* 0x10c */
+ .long 0 /* 0x110 */
+ .long 0 /* 0x114 */
+ .long 0 /* 0x118 */
+ .long 0 /* 0x11c */
+ .long 0 /* 0x120 */
+ .long .kvmppc_h_bulk_remove - hcall_real_table
+hcall_real_table_end:
+
+ignore_hdec:
+ mr r4,r9
+ b fast_guest_return
+
+bounce_ext_interrupt:
+ mr r4,r9
+ mtspr SPRN_SRR0,r10
+ mtspr SPRN_SRR1,r11
+ li r10,BOOK3S_INTERRUPT_EXTERNAL
+ LOAD_REG_IMMEDIATE(r11,MSR_SF | MSR_ME);
+ b fast_guest_return
+
+_GLOBAL(kvmppc_h_set_dabr)
+ std r4,VCPU_DABR(r3)
+ mtspr SPRN_DABR,r4
+ li r3,0
+ blr
+
+secondary_too_late:
+ ld r5,HSTATE_KVM_VCORE(r13)
+ HMT_LOW
+13: lbz r3,VCORE_IN_GUEST(r5)
+ cmpwi r3,0
+ bne 13b
+ HMT_MEDIUM
+ ld r11,PACA_SLBSHADOWPTR(r13)
+
+ .rept SLB_NUM_BOLTED
+ ld r5,SLBSHADOW_SAVEAREA(r11)
+ ld r6,SLBSHADOW_SAVEAREA+8(r11)
+ andis. r7,r5,SLB_ESID_V@h
+ beq 1f
+ slbmte r6,r5
+1: addi r11,r11,16
+ .endr
+ b 50f
+
+secondary_nap:
+ /* Clear any pending IPI */
+50: ld r5, HSTATE_XICS_PHYS(r13)
+ li r0, 0xff
+ li r6, XICS_QIRR
+ stbcix r0, r5, r6
+
+ /* increment the nap count and then go to nap mode */
+ ld r4, HSTATE_KVM_VCORE(r13)
+ addi r4, r4, VCORE_NAP_COUNT
+ lwsync /* make previous updates visible */
+51: lwarx r3, 0, r4
+ addi r3, r3, 1
+ stwcx. r3, 0, r4
+ bne 51b
+ isync
+
+ mfspr r4, SPRN_LPCR
+ li r0, LPCR_PECE
+ andc r4, r4, r0
+ ori r4, r4, LPCR_PECE0 /* exit nap on interrupt */
+ mtspr SPRN_LPCR, r4
+ li r0, 0
+ std r0, HSTATE_SCRATCH0(r13)
+ ptesync
+ ld r0, HSTATE_SCRATCH0(r13)
+1: cmpd r0, r0
+ bne 1b
+ nap
+ b .
+
+/*
+ * Save away FP, VMX and VSX registers.
+ * r3 = vcpu pointer
+ */
+_GLOBAL(kvmppc_save_fp)
+ mfmsr r9
+ ori r8,r9,MSR_FP
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ oris r8,r8,MSR_VEC@h
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r8,r8,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+ mtmsrd r8
+ isync
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ reg = 0
+ .rept 32
+ li r6,reg*16+VCPU_VSRS
+ stxvd2x reg,r6,r3
+ reg = reg + 1
+ .endr
+FTR_SECTION_ELSE
+#endif
+ reg = 0
+ .rept 32
+ stfd reg,reg*8+VCPU_FPRS(r3)
+ reg = reg + 1
+ .endr
+#ifdef CONFIG_VSX
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
+#endif
+ mffs fr0
+ stfd fr0,VCPU_FPSCR(r3)
+
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ reg = 0
+ .rept 32
+ li r6,reg*16+VCPU_VRS
+ stvx reg,r6,r3
+ reg = reg + 1
+ .endr
+ mfvscr vr0
+ li r6,VCPU_VSCR
+ stvx vr0,r6,r3
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+ mfspr r6,SPRN_VRSAVE
+ stw r6,VCPU_VRSAVE(r3)
+ mtmsrd r9
+ isync
+ blr
+
+/*
+ * Load up FP, VMX and VSX registers
+ * r4 = vcpu pointer
+ */
+ .globl kvmppc_load_fp
+kvmppc_load_fp:
+ mfmsr r9
+ ori r8,r9,MSR_FP
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ oris r8,r8,MSR_VEC@h
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r8,r8,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+ mtmsrd r8
+ isync
+ lfd fr0,VCPU_FPSCR(r4)
+ MTFSF_L(fr0)
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ reg = 0
+ .rept 32
+ li r7,reg*16+VCPU_VSRS
+ lxvd2x reg,r7,r4
+ reg = reg + 1
+ .endr
+FTR_SECTION_ELSE
+#endif
+ reg = 0
+ .rept 32
+ lfd reg,reg*8+VCPU_FPRS(r4)
+ reg = reg + 1
+ .endr
+#ifdef CONFIG_VSX
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
+#endif
+
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ li r7,VCPU_VSCR
+ lvx vr0,r7,r4
+ mtvscr vr0
+ reg = 0
+ .rept 32
+ li r7,reg*16+VCPU_VRS
+ lvx reg,r7,r4
+ reg = reg + 1
+ .endr
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+ lwz r7,VCPU_VRSAVE(r4)
+ mtspr SPRN_VRSAVE,r7
+ blr
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 2f0bc928b08a..c54b0e30cf3f 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -29,8 +29,7 @@
#define ULONG_SIZE 8
#define FUNC(name) GLUE(.,name)
-#define GET_SHADOW_VCPU(reg) \
- addi reg, r13, PACA_KVM_SVCPU
+#define GET_SHADOW_VCPU_R13
#define DISABLE_INTERRUPTS \
mfmsr r0; \
@@ -43,8 +42,8 @@
#define ULONG_SIZE 4
#define FUNC(name) name
-#define GET_SHADOW_VCPU(reg) \
- lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2)
+#define GET_SHADOW_VCPU_R13 \
+ lwz r13, (THREAD + THREAD_KVM_SVCPU)(r2)
#define DISABLE_INTERRUPTS \
mfmsr r0; \
@@ -85,7 +84,7 @@
* r3: kvm_run pointer
* r4: vcpu pointer
*/
-_GLOBAL(__kvmppc_vcpu_entry)
+_GLOBAL(__kvmppc_vcpu_run)
kvm_start_entry:
/* Write correct stack frame */
@@ -107,17 +106,11 @@ kvm_start_entry:
/* Load non-volatile guest state from the vcpu */
VCPU_LOAD_NVGPRS(r4)
- GET_SHADOW_VCPU(r5)
-
- /* Save R1/R2 in the PACA */
- PPC_STL r1, SVCPU_HOST_R1(r5)
- PPC_STL r2, SVCPU_HOST_R2(r5)
+kvm_start_lightweight:
- /* XXX swap in/out on load? */
+ GET_SHADOW_VCPU_R13
PPC_LL r3, VCPU_HIGHMEM_HANDLER(r4)
- PPC_STL r3, SVCPU_VMHANDLER(r5)
-
-kvm_start_lightweight:
+ PPC_STL r3, HSTATE_VMHANDLER(r13)
PPC_LL r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index 79751d8dd131..41cb0017e757 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -21,7 +21,6 @@
#include <linux/kvm_host.h>
#include <linux/hash.h>
#include <linux/slab.h>
-#include "trace.h"
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
@@ -29,6 +28,8 @@
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
+#include "trace.h"
+
#define PTE_SIZE 12
static struct kmem_cache *hpte_cache;
@@ -58,30 +59,31 @@ static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage)
void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
{
u64 index;
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
trace_kvm_book3s_mmu_map(pte);
- spin_lock(&vcpu->arch.mmu_lock);
+ spin_lock(&vcpu3s->mmu_lock);
/* Add to ePTE list */
index = kvmppc_mmu_hash_pte(pte->pte.eaddr);
- hlist_add_head_rcu(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]);
+ hlist_add_head_rcu(&pte->list_pte, &vcpu3s->hpte_hash_pte[index]);
/* Add to ePTE_long list */
index = kvmppc_mmu_hash_pte_long(pte->pte.eaddr);
hlist_add_head_rcu(&pte->list_pte_long,
- &vcpu->arch.hpte_hash_pte_long[index]);
+ &vcpu3s->hpte_hash_pte_long[index]);
/* Add to vPTE list */
index = kvmppc_mmu_hash_vpte(pte->pte.vpage);
- hlist_add_head_rcu(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]);
+ hlist_add_head_rcu(&pte->list_vpte, &vcpu3s->hpte_hash_vpte[index]);
/* Add to vPTE_long list */
index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage);
hlist_add_head_rcu(&pte->list_vpte_long,
- &vcpu->arch.hpte_hash_vpte_long[index]);
+ &vcpu3s->hpte_hash_vpte_long[index]);
- spin_unlock(&vcpu->arch.mmu_lock);
+ spin_unlock(&vcpu3s->mmu_lock);
}
static void free_pte_rcu(struct rcu_head *head)
@@ -92,16 +94,18 @@ static void free_pte_rcu(struct rcu_head *head)
static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+
trace_kvm_book3s_mmu_invalidate(pte);
/* Different for 32 and 64 bit */
kvmppc_mmu_invalidate_pte(vcpu, pte);
- spin_lock(&vcpu->arch.mmu_lock);
+ spin_lock(&vcpu3s->mmu_lock);
/* pte already invalidated in between? */
if (hlist_unhashed(&pte->list_pte)) {
- spin_unlock(&vcpu->arch.mmu_lock);
+ spin_unlock(&vcpu3s->mmu_lock);
return;
}
@@ -115,14 +119,15 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
else
kvm_release_pfn_clean(pte->pfn);
- spin_unlock(&vcpu->arch.mmu_lock);
+ spin_unlock(&vcpu3s->mmu_lock);
- vcpu->arch.hpte_cache_count--;
+ vcpu3s->hpte_cache_count--;
call_rcu(&pte->rcu_head, free_pte_rcu);
}
static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
struct hpte_cache *pte;
struct hlist_node *node;
int i;
@@ -130,7 +135,7 @@ static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
rcu_read_lock();
for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
- struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i];
+ struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i];
hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
invalidate_pte(vcpu, pte);
@@ -141,12 +146,13 @@ static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea)
{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
struct hlist_head *list;
struct hlist_node *node;
struct hpte_cache *pte;
/* Find the list of entries in the map */
- list = &vcpu->arch.hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)];
+ list = &vcpu3s->hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)];
rcu_read_lock();
@@ -160,12 +166,13 @@ static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea)
static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea)
{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
struct hlist_head *list;
struct hlist_node *node;
struct hpte_cache *pte;
/* Find the list of entries in the map */
- list = &vcpu->arch.hpte_hash_pte_long[
+ list = &vcpu3s->hpte_hash_pte_long[
kvmppc_mmu_hash_pte_long(guest_ea)];
rcu_read_lock();
@@ -203,12 +210,13 @@ void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
/* Flush with mask 0xfffffffff */
static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
struct hlist_head *list;
struct hlist_node *node;
struct hpte_cache *pte;
u64 vp_mask = 0xfffffffffULL;
- list = &vcpu->arch.hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)];
+ list = &vcpu3s->hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)];
rcu_read_lock();
@@ -223,12 +231,13 @@ static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
/* Flush with mask 0xffffff000 */
static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
struct hlist_head *list;
struct hlist_node *node;
struct hpte_cache *pte;
u64 vp_mask = 0xffffff000ULL;
- list = &vcpu->arch.hpte_hash_vpte_long[
+ list = &vcpu3s->hpte_hash_vpte_long[
kvmppc_mmu_hash_vpte_long(guest_vp)];
rcu_read_lock();
@@ -261,6 +270,7 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
struct hlist_node *node;
struct hpte_cache *pte;
int i;
@@ -270,7 +280,7 @@ void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
rcu_read_lock();
for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
- struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i];
+ struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i];
hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
if ((pte->pte.raddr >= pa_start) &&
@@ -283,12 +293,13 @@ void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
struct hpte_cache *pte;
pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
- vcpu->arch.hpte_cache_count++;
+ vcpu3s->hpte_cache_count++;
- if (vcpu->arch.hpte_cache_count == HPTEG_CACHE_NUM)
+ if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM)
kvmppc_mmu_pte_flush_all(vcpu);
return pte;
@@ -309,17 +320,19 @@ static void kvmppc_mmu_hpte_init_hash(struct hlist_head *hash_list, int len)
int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu)
{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+
/* init hpte lookup hashes */
- kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte,
- ARRAY_SIZE(vcpu->arch.hpte_hash_pte));
- kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte_long,
- ARRAY_SIZE(vcpu->arch.hpte_hash_pte_long));
- kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte,
- ARRAY_SIZE(vcpu->arch.hpte_hash_vpte));
- kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte_long,
- ARRAY_SIZE(vcpu->arch.hpte_hash_vpte_long));
-
- spin_lock_init(&vcpu->arch.mmu_lock);
+ kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_pte,
+ ARRAY_SIZE(vcpu3s->hpte_hash_pte));
+ kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_pte_long,
+ ARRAY_SIZE(vcpu3s->hpte_hash_pte_long));
+ kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte,
+ ARRAY_SIZE(vcpu3s->hpte_hash_vpte));
+ kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long,
+ ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long));
+
+ spin_lock_init(&vcpu3s->mmu_lock);
return 0;
}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
new file mode 100644
index 000000000000..0c0d3f274437
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -0,0 +1,1029 @@
+/*
+ * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ * Alexander Graf <agraf@suse.de>
+ * Kevin Wolf <mail@kevin-wolf.de>
+ * Paul Mackerras <paulus@samba.org>
+ *
+ * Description:
+ * Functions relating to running KVM on Book 3S processors where
+ * we don't have access to hypervisor mode, and we run the guest
+ * in problem state (user mode).
+ *
+ * This file is derived from arch/powerpc/kvm/44x.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu_context.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+
+#include "trace.h"
+
+/* #define EXIT_DEBUG */
+/* #define DEBUG_EXT */
+
+static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
+ ulong msr);
+
+/* Some compatibility defines */
+#ifdef CONFIG_PPC_BOOK3S_32
+#define MSR_USER32 MSR_USER
+#define MSR_USER64 MSR_USER
+#define HW_PAGE_SIZE PAGE_SIZE
+#endif
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb));
+ memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
+ sizeof(get_paca()->shadow_vcpu));
+ to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max;
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+ current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
+#endif
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb));
+ memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
+ sizeof(get_paca()->shadow_vcpu));
+ to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max;
+#endif
+
+ kvmppc_giveup_ext(vcpu, MSR_FP);
+ kvmppc_giveup_ext(vcpu, MSR_VEC);
+ kvmppc_giveup_ext(vcpu, MSR_VSX);
+}
+
+static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
+{
+ ulong smsr = vcpu->arch.shared->msr;
+
+ /* Guest MSR values */
+ smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE;
+ /* Process MSR values */
+ smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
+ /* External providers the guest reserved */
+ smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext);
+ /* 64-bit Process MSR values */
+#ifdef CONFIG_PPC_BOOK3S_64
+ smsr |= MSR_ISF | MSR_HV;
+#endif
+ vcpu->arch.shadow_msr = smsr;
+}
+
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+{
+ ulong old_msr = vcpu->arch.shared->msr;
+
+#ifdef EXIT_DEBUG
+ printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
+#endif
+
+ msr &= to_book3s(vcpu)->msr_mask;
+ vcpu->arch.shared->msr = msr;
+ kvmppc_recalc_shadow_msr(vcpu);
+
+ if (msr & MSR_POW) {
+ if (!vcpu->arch.pending_exceptions) {
+ kvm_vcpu_block(vcpu);
+ vcpu->stat.halt_wakeup++;
+
+ /* Unset POW bit after we woke up */
+ msr &= ~MSR_POW;
+ vcpu->arch.shared->msr = msr;
+ }
+ }
+
+ if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) !=
+ (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
+ kvmppc_mmu_flush_segments(vcpu);
+ kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+
+ /* Preload magic page segment when in kernel mode */
+ if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) {
+ struct kvm_vcpu_arch *a = &vcpu->arch;
+
+ if (msr & MSR_DR)
+ kvmppc_mmu_map_segment(vcpu, a->magic_page_ea);
+ else
+ kvmppc_mmu_map_segment(vcpu, a->magic_page_pa);
+ }
+ }
+
+ /* Preload FPU if it's enabled */
+ if (vcpu->arch.shared->msr & MSR_FP)
+ kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
+}
+
+void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
+{
+ u32 host_pvr;
+
+ vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
+ vcpu->arch.pvr = pvr;
+#ifdef CONFIG_PPC_BOOK3S_64
+ if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
+ kvmppc_mmu_book3s_64_init(vcpu);
+ to_book3s(vcpu)->hior = 0xfff00000;
+ to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
+ } else
+#endif
+ {
+ kvmppc_mmu_book3s_32_init(vcpu);
+ to_book3s(vcpu)->hior = 0;
+ to_book3s(vcpu)->msr_mask = 0xffffffffULL;
+ }
+
+ /* If we are in hypervisor level on 970, we can tell the CPU to
+ * treat DCBZ as 32 bytes store */
+ vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
+ if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) &&
+ !strcmp(cur_cpu_spec->platform, "ppc970"))
+ vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
+
+ /* Cell performs badly if MSR_FEx are set. So let's hope nobody
+ really needs them in a VM on Cell and force disable them. */
+ if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
+ to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
+
+#ifdef CONFIG_PPC_BOOK3S_32
+ /* 32 bit Book3S always has 32 byte dcbz */
+ vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
+#endif
+
+ /* On some CPUs we can execute paired single operations natively */
+ asm ( "mfpvr %0" : "=r"(host_pvr));
+ switch (host_pvr) {
+ case 0x00080200: /* lonestar 2.0 */
+ case 0x00088202: /* lonestar 2.2 */
+ case 0x70000100: /* gekko 1.0 */
+ case 0x00080100: /* gekko 2.0 */
+ case 0x00083203: /* gekko 2.3a */
+ case 0x00083213: /* gekko 2.3b */
+ case 0x00083204: /* gekko 2.4 */
+ case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */
+ case 0x00087200: /* broadway */
+ vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS;
+ /* Enable HID2.PSE - in case we need it later */
+ mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29));
+ }
+}
+
+/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
+ * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
+ * emulate 32 bytes dcbz length.
+ *
+ * The Book3s_64 inventors also realized this case and implemented a special bit
+ * in the HID5 register, which is a hypervisor ressource. Thus we can't use it.
+ *
+ * My approach here is to patch the dcbz instruction on executing pages.
+ */
+static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
+{
+ struct page *hpage;
+ u64 hpage_offset;
+ u32 *page;
+ int i;
+
+ hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
+ if (is_error_page(hpage)) {
+ kvm_release_page_clean(hpage);
+ return;
+ }
+
+ hpage_offset = pte->raddr & ~PAGE_MASK;
+ hpage_offset &= ~0xFFFULL;
+ hpage_offset /= 4;
+
+ get_page(hpage);
+ page = kmap_atomic(hpage, KM_USER0);
+
+ /* patch dcbz into reserved instruction, so we trap */
+ for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
+ if ((page[i] & 0xff0007ff) == INS_DCBZ)
+ page[i] &= 0xfffffff7;
+
+ kunmap_atomic(page, KM_USER0);
+ put_page(hpage);
+}
+
+static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ ulong mp_pa = vcpu->arch.magic_page_pa;
+
+ if (unlikely(mp_pa) &&
+ unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
+ return 1;
+ }
+
+ return kvm_is_visible_gfn(vcpu->kvm, gfn);
+}
+
+int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ ulong eaddr, int vec)
+{
+ bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
+ int r = RESUME_GUEST;
+ int relocated;
+ int page_found = 0;
+ struct kvmppc_pte pte;
+ bool is_mmio = false;
+ bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false;
+ bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false;
+ u64 vsid;
+
+ relocated = data ? dr : ir;
+
+ /* Resolve real address if translation turned on */
+ if (relocated) {
+ page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data);
+ } else {
+ pte.may_execute = true;
+ pte.may_read = true;
+ pte.may_write = true;
+ pte.raddr = eaddr & KVM_PAM;
+ pte.eaddr = eaddr;
+ pte.vpage = eaddr >> 12;
+ }
+
+ switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+ case 0:
+ pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
+ break;
+ case MSR_DR:
+ case MSR_IR:
+ vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
+
+ if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR)
+ pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
+ else
+ pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
+ pte.vpage |= vsid;
+
+ if (vsid == -1)
+ page_found = -EINVAL;
+ break;
+ }
+
+ if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+ (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
+ /*
+ * If we do the dcbz hack, we have to NX on every execution,
+ * so we can patch the executing code. This renders our guest
+ * NX-less.
+ */
+ pte.may_execute = !data;
+ }
+
+ if (page_found == -ENOENT) {
+ /* Page not found in guest PTE entries */
+ vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+ vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
+ vcpu->arch.shared->msr |=
+ (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
+ kvmppc_book3s_queue_irqprio(vcpu, vec);
+ } else if (page_found == -EPERM) {
+ /* Storage protection */
+ vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+ vcpu->arch.shared->dsisr =
+ to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
+ vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
+ vcpu->arch.shared->msr |=
+ (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
+ kvmppc_book3s_queue_irqprio(vcpu, vec);
+ } else if (page_found == -EINVAL) {
+ /* Page not found in guest SLB */
+ vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+ kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
+ } else if (!is_mmio &&
+ kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
+ /* The guest's PTE is not mapped yet. Map on the host */
+ kvmppc_mmu_map_page(vcpu, &pte);
+ if (data)
+ vcpu->stat.sp_storage++;
+ else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+ (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
+ kvmppc_patch_dcbz(vcpu, &pte);
+ } else {
+ /* MMIO */
+ vcpu->stat.mmio_exits++;
+ vcpu->arch.paddr_accessed = pte.raddr;
+ r = kvmppc_emulate_mmio(run, vcpu);
+ if ( r == RESUME_HOST_NV )
+ r = RESUME_HOST;
+ }
+
+ return r;
+}
+
+static inline int get_fpr_index(int i)
+{
+#ifdef CONFIG_VSX
+ i *= 2;
+#endif
+ return i;
+}
+
+/* Give up external provider (FPU, Altivec, VSX) */
+void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
+{
+ struct thread_struct *t = &current->thread;
+ u64 *vcpu_fpr = vcpu->arch.fpr;
+#ifdef CONFIG_VSX
+ u64 *vcpu_vsx = vcpu->arch.vsr;
+#endif
+ u64 *thread_fpr = (u64*)t->fpr;
+ int i;
+
+ if (!(vcpu->arch.guest_owned_ext & msr))
+ return;
+
+#ifdef DEBUG_EXT
+ printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
+#endif
+
+ switch (msr) {
+ case MSR_FP:
+ giveup_fpu(current);
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
+ vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
+
+ vcpu->arch.fpscr = t->fpscr.val;
+ break;
+ case MSR_VEC:
+#ifdef CONFIG_ALTIVEC
+ giveup_altivec(current);
+ memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
+ vcpu->arch.vscr = t->vscr;
+#endif
+ break;
+ case MSR_VSX:
+#ifdef CONFIG_VSX
+ __giveup_vsx(current);
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
+ vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
+#endif
+ break;
+ default:
+ BUG();
+ }
+
+ vcpu->arch.guest_owned_ext &= ~msr;
+ current->thread.regs->msr &= ~msr;
+ kvmppc_recalc_shadow_msr(vcpu);
+}
+
+static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
+{
+ ulong srr0 = kvmppc_get_pc(vcpu);
+ u32 last_inst = kvmppc_get_last_inst(vcpu);
+ int ret;
+
+ ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
+ if (ret == -ENOENT) {
+ ulong msr = vcpu->arch.shared->msr;
+
+ msr = kvmppc_set_field(msr, 33, 33, 1);
+ msr = kvmppc_set_field(msr, 34, 36, 0);
+ vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0);
+ kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
+ return EMULATE_AGAIN;
+ }
+
+ return EMULATE_DONE;
+}
+
+static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr)
+{
+
+ /* Need to do paired single emulation? */
+ if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
+ return EMULATE_DONE;
+
+ /* Read out the instruction */
+ if (kvmppc_read_inst(vcpu) == EMULATE_DONE)
+ /* Need to emulate */
+ return EMULATE_FAIL;
+
+ return EMULATE_AGAIN;
+}
+
+/* Handle external providers (FPU, Altivec, VSX) */
+static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
+ ulong msr)
+{
+ struct thread_struct *t = &current->thread;
+ u64 *vcpu_fpr = vcpu->arch.fpr;
+#ifdef CONFIG_VSX
+ u64 *vcpu_vsx = vcpu->arch.vsr;
+#endif
+ u64 *thread_fpr = (u64*)t->fpr;
+ int i;
+
+ /* When we have paired singles, we emulate in software */
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
+ return RESUME_GUEST;
+
+ if (!(vcpu->arch.shared->msr & msr)) {
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ return RESUME_GUEST;
+ }
+
+ /* We already own the ext */
+ if (vcpu->arch.guest_owned_ext & msr) {
+ return RESUME_GUEST;
+ }
+
+#ifdef DEBUG_EXT
+ printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
+#endif
+
+ current->thread.regs->msr |= msr;
+
+ switch (msr) {
+ case MSR_FP:
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
+ thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
+
+ t->fpscr.val = vcpu->arch.fpscr;
+ t->fpexc_mode = 0;
+ kvmppc_load_up_fpu();
+ break;
+ case MSR_VEC:
+#ifdef CONFIG_ALTIVEC
+ memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
+ t->vscr = vcpu->arch.vscr;
+ t->vrsave = -1;
+ kvmppc_load_up_altivec();
+#endif
+ break;
+ case MSR_VSX:
+#ifdef CONFIG_VSX
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
+ thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
+ kvmppc_load_up_vsx();
+#endif
+ break;
+ default:
+ BUG();
+ }
+
+ vcpu->arch.guest_owned_ext |= msr;
+
+ kvmppc_recalc_shadow_msr(vcpu);
+
+ return RESUME_GUEST;
+}
+
+int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int exit_nr)
+{
+ int r = RESUME_HOST;
+
+ vcpu->stat.sum_exits++;
+
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+ run->ready_for_interrupt_injection = 1;
+
+ trace_kvm_book3s_exit(exit_nr, vcpu);
+ kvm_resched(vcpu);
+ switch (exit_nr) {
+ case BOOK3S_INTERRUPT_INST_STORAGE:
+ vcpu->stat.pf_instruc++;
+
+#ifdef CONFIG_PPC_BOOK3S_32
+ /* We set segments as unused segments when invalidating them. So
+ * treat the respective fault as segment fault. */
+ if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]
+ == SR_INVALID) {
+ kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+ r = RESUME_GUEST;
+ break;
+ }
+#endif
+
+ /* only care about PTEG not found errors, but leave NX alone */
+ if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) {
+ r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
+ vcpu->stat.sp_instruc++;
+ } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+ (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
+ /*
+ * XXX If we do the dcbz hack we use the NX bit to flush&patch the page,
+ * so we can't use the NX bit inside the guest. Let's cross our fingers,
+ * that no guest that needs the dcbz hack does NX.
+ */
+ kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
+ r = RESUME_GUEST;
+ } else {
+ vcpu->arch.shared->msr |=
+ to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ r = RESUME_GUEST;
+ }
+ break;
+ case BOOK3S_INTERRUPT_DATA_STORAGE:
+ {
+ ulong dar = kvmppc_get_fault_dar(vcpu);
+ vcpu->stat.pf_storage++;
+
+#ifdef CONFIG_PPC_BOOK3S_32
+ /* We set segments as unused segments when invalidating them. So
+ * treat the respective fault as segment fault. */
+ if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) {
+ kvmppc_mmu_map_segment(vcpu, dar);
+ r = RESUME_GUEST;
+ break;
+ }
+#endif
+
+ /* The only case we need to handle is missing shadow PTEs */
+ if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) {
+ r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
+ } else {
+ vcpu->arch.shared->dar = dar;
+ vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ r = RESUME_GUEST;
+ }
+ break;
+ }
+ case BOOK3S_INTERRUPT_DATA_SEGMENT:
+ if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
+ vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_DATA_SEGMENT);
+ }
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_INST_SEGMENT:
+ if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) {
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_INST_SEGMENT);
+ }
+ r = RESUME_GUEST;
+ break;
+ /* We're good on these - the host merely wanted to get our attention */
+ case BOOK3S_INTERRUPT_DECREMENTER:
+ vcpu->stat.dec_exits++;
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_EXTERNAL:
+ vcpu->stat.ext_intr_exits++;
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_PERFMON:
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_PROGRAM:
+ {
+ enum emulation_result er;
+ ulong flags;
+
+program_interrupt:
+ flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull;
+
+ if (vcpu->arch.shared->msr & MSR_PR) {
+#ifdef EXIT_DEBUG
+ printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
+#endif
+ if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) !=
+ (INS_DCBZ & 0xfffffff7)) {
+ kvmppc_core_queue_program(vcpu, flags);
+ r = RESUME_GUEST;
+ break;
+ }
+ }
+
+ vcpu->stat.emulated_inst_exits++;
+ er = kvmppc_emulate_instruction(run, vcpu);
+ switch (er) {
+ case EMULATE_DONE:
+ r = RESUME_GUEST_NV;
+ break;
+ case EMULATE_AGAIN:
+ r = RESUME_GUEST;
+ break;
+ case EMULATE_FAIL:
+ printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
+ __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
+ kvmppc_core_queue_program(vcpu, flags);
+ r = RESUME_GUEST;
+ break;
+ case EMULATE_DO_MMIO:
+ run->exit_reason = KVM_EXIT_MMIO;
+ r = RESUME_HOST_NV;
+ break;
+ default:
+ BUG();
+ }
+ break;
+ }
+ case BOOK3S_INTERRUPT_SYSCALL:
+ if (vcpu->arch.osi_enabled &&
+ (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
+ (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
+ /* MOL hypercalls */
+ u64 *gprs = run->osi.gprs;
+ int i;
+
+ run->exit_reason = KVM_EXIT_OSI;
+ for (i = 0; i < 32; i++)
+ gprs[i] = kvmppc_get_gpr(vcpu, i);
+ vcpu->arch.osi_needed = 1;
+ r = RESUME_HOST_NV;
+ } else if (!(vcpu->arch.shared->msr & MSR_PR) &&
+ (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
+ /* KVM PV hypercalls */
+ kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+ r = RESUME_GUEST;
+ } else {
+ /* Guest syscalls */
+ vcpu->stat.syscall_exits++;
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ r = RESUME_GUEST;
+ }
+ break;
+ case BOOK3S_INTERRUPT_FP_UNAVAIL:
+ case BOOK3S_INTERRUPT_ALTIVEC:
+ case BOOK3S_INTERRUPT_VSX:
+ {
+ int ext_msr = 0;
+
+ switch (exit_nr) {
+ case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break;
+ case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break;
+ case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break;
+ }
+
+ switch (kvmppc_check_ext(vcpu, exit_nr)) {
+ case EMULATE_DONE:
+ /* everything ok - let's enable the ext */
+ r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
+ break;
+ case EMULATE_FAIL:
+ /* we need to emulate this instruction */
+ goto program_interrupt;
+ break;
+ default:
+ /* nothing to worry about - go again */
+ break;
+ }
+ break;
+ }
+ case BOOK3S_INTERRUPT_ALIGNMENT:
+ if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
+ vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu,
+ kvmppc_get_last_inst(vcpu));
+ vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu,
+ kvmppc_get_last_inst(vcpu));
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ }
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_MACHINE_CHECK:
+ case BOOK3S_INTERRUPT_TRACE:
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ r = RESUME_GUEST;
+ break;
+ default:
+ /* Ugh - bork here! What did we get? */
+ printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
+ exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1);
+ r = RESUME_HOST;
+ BUG();
+ break;
+ }
+
+
+ if (!(r & RESUME_HOST)) {
+ /* To avoid clobbering exit_reason, only check for signals if
+ * we aren't already exiting to userspace for some other
+ * reason. */
+ if (signal_pending(current)) {
+#ifdef EXIT_DEBUG
+ printk(KERN_EMERG "KVM: Going back to host\n");
+#endif
+ vcpu->stat.signal_exits++;
+ run->exit_reason = KVM_EXIT_INTR;
+ r = -EINTR;
+ } else {
+ /* In case an interrupt came in that was triggered
+ * from userspace (like DEC), we need to check what
+ * to inject now! */
+ kvmppc_core_deliver_interrupts(vcpu);
+ }
+ }
+
+ trace_kvm_book3s_reenter(r, vcpu);
+
+ return r;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ int i;
+
+ sregs->pvr = vcpu->arch.pvr;
+
+ sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
+ for (i = 0; i < 64; i++) {
+ sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige | i;
+ sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
+ }
+ } else {
+ for (i = 0; i < 16; i++)
+ sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i];
+
+ for (i = 0; i < 8; i++) {
+ sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
+ sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
+ }
+ }
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ int i;
+
+ kvmppc_set_pvr(vcpu, sregs->pvr);
+
+ vcpu3s->sdr1 = sregs->u.s.sdr1;
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
+ for (i = 0; i < 64; i++) {
+ vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv,
+ sregs->u.s.ppc64.slb[i].slbe);
+ }
+ } else {
+ for (i = 0; i < 16; i++) {
+ vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
+ }
+ for (i = 0; i < 8; i++) {
+ kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false,
+ (u32)sregs->u.s.ppc32.ibat[i]);
+ kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true,
+ (u32)(sregs->u.s.ppc32.ibat[i] >> 32));
+ kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false,
+ (u32)sregs->u.s.ppc32.dbat[i]);
+ kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true,
+ (u32)(sregs->u.s.ppc32.dbat[i] >> 32));
+ }
+ }
+
+ /* Flush the MMU after messing with the segments */
+ kvmppc_mmu_pte_flush(vcpu, 0, 0);
+
+ return 0;
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+ return 0;
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ struct kvmppc_vcpu_book3s *vcpu_book3s;
+ struct kvm_vcpu *vcpu;
+ int err = -ENOMEM;
+ unsigned long p;
+
+ vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
+ if (!vcpu_book3s)
+ goto out;
+
+ vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *)
+ kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
+ if (!vcpu_book3s->shadow_vcpu)
+ goto free_vcpu;
+
+ vcpu = &vcpu_book3s->vcpu;
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_shadow_vcpu;
+
+ p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
+ /* the real shared page fills the last 4k of our page */
+ vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
+ if (!p)
+ goto uninit_vcpu;
+
+ vcpu->arch.host_retip = kvm_return_point;
+ vcpu->arch.host_msr = mfmsr();
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* default to book3s_64 (970fx) */
+ vcpu->arch.pvr = 0x3C0301;
+#else
+ /* default to book3s_32 (750) */
+ vcpu->arch.pvr = 0x84202;
+#endif
+ kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+ vcpu->arch.slb_nr = 64;
+
+ /* remember where some real-mode handlers are */
+ vcpu->arch.trampoline_lowmem = __pa(kvmppc_handler_lowmem_trampoline);
+ vcpu->arch.trampoline_enter = __pa(kvmppc_handler_trampoline_enter);
+ vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
+#ifdef CONFIG_PPC_BOOK3S_64
+ vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
+#else
+ vcpu->arch.rmcall = (ulong)kvmppc_rmcall;
+#endif
+
+ vcpu->arch.shadow_msr = MSR_USER64;
+
+ err = kvmppc_mmu_init(vcpu);
+ if (err < 0)
+ goto uninit_vcpu;
+
+ return vcpu;
+
+uninit_vcpu:
+ kvm_vcpu_uninit(vcpu);
+free_shadow_vcpu:
+ kfree(vcpu_book3s->shadow_vcpu);
+free_vcpu:
+ vfree(vcpu_book3s);
+out:
+ return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+
+ free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
+ kvm_vcpu_uninit(vcpu);
+ kfree(vcpu_book3s->shadow_vcpu);
+ vfree(vcpu_book3s);
+}
+
+int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+ int ret;
+ double fpr[32][TS_FPRWIDTH];
+ unsigned int fpscr;
+ int fpexc_mode;
+#ifdef CONFIG_ALTIVEC
+ vector128 vr[32];
+ vector128 vscr;
+ unsigned long uninitialized_var(vrsave);
+ int used_vr;
+#endif
+#ifdef CONFIG_VSX
+ int used_vsr;
+#endif
+ ulong ext_msr;
+
+ /* No need to go into the guest when all we do is going out */
+ if (signal_pending(current)) {
+ kvm_run->exit_reason = KVM_EXIT_INTR;
+ return -EINTR;
+ }
+
+ /* Save FPU state in stack */
+ if (current->thread.regs->msr & MSR_FP)
+ giveup_fpu(current);
+ memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
+ fpscr = current->thread.fpscr.val;
+ fpexc_mode = current->thread.fpexc_mode;
+
+#ifdef CONFIG_ALTIVEC
+ /* Save Altivec state in stack */
+ used_vr = current->thread.used_vr;
+ if (used_vr) {
+ if (current->thread.regs->msr & MSR_VEC)
+ giveup_altivec(current);
+ memcpy(vr, current->thread.vr, sizeof(current->thread.vr));
+ vscr = current->thread.vscr;
+ vrsave = current->thread.vrsave;
+ }
+#endif
+
+#ifdef CONFIG_VSX
+ /* Save VSX state in stack */
+ used_vsr = current->thread.used_vsr;
+ if (used_vsr && (current->thread.regs->msr & MSR_VSX))
+ __giveup_vsx(current);
+#endif
+
+ /* Remember the MSR with disabled extensions */
+ ext_msr = current->thread.regs->msr;
+
+ /* Preload FPU if it's enabled */
+ if (vcpu->arch.shared->msr & MSR_FP)
+ kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
+
+ kvm_guest_enter();
+
+ ret = __kvmppc_vcpu_run(kvm_run, vcpu);
+
+ kvm_guest_exit();
+
+ local_irq_disable();
+
+ current->thread.regs->msr = ext_msr;
+
+ /* Make sure we save the guest FPU/Altivec/VSX state */
+ kvmppc_giveup_ext(vcpu, MSR_FP);
+ kvmppc_giveup_ext(vcpu, MSR_VEC);
+ kvmppc_giveup_ext(vcpu, MSR_VSX);
+
+ /* Restore FPU state from stack */
+ memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
+ current->thread.fpscr.val = fpscr;
+ current->thread.fpexc_mode = fpexc_mode;
+
+#ifdef CONFIG_ALTIVEC
+ /* Restore Altivec state from stack */
+ if (used_vr && current->thread.used_vr) {
+ memcpy(current->thread.vr, vr, sizeof(current->thread.vr));
+ current->thread.vscr = vscr;
+ current->thread.vrsave = vrsave;
+ }
+ current->thread.used_vr = used_vr;
+#endif
+
+#ifdef CONFIG_VSX
+ current->thread.used_vsr = used_vsr;
+#endif
+
+ return ret;
+}
+
+int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem)
+{
+ return 0;
+}
+
+void kvmppc_core_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem)
+{
+}
+
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+ return 0;
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+}
+
+static int kvmppc_book3s_init(void)
+{
+ int r;
+
+ r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0,
+ THIS_MODULE);
+
+ if (r)
+ return r;
+
+ r = kvmppc_mmu_hpte_sysinit();
+
+ return r;
+}
+
+static void kvmppc_book3s_exit(void)
+{
+ kvmppc_mmu_hpte_sysexit();
+ kvm_exit();
+}
+
+module_init(kvmppc_book3s_init);
+module_exit(kvmppc_book3s_exit);
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 1a1b34487e71..c1f877c4a884 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -36,41 +36,44 @@
#if defined(CONFIG_PPC_BOOK3S_64)
#define LOAD_SHADOW_VCPU(reg) GET_PACA(reg)
-#define SHADOW_VCPU_OFF PACA_KVM_SVCPU
#define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR)
#define FUNC(name) GLUE(.,name)
+kvmppc_skip_interrupt:
+ /*
+ * Here all GPRs are unchanged from when the interrupt happened
+ * except for r13, which is saved in SPRG_SCRATCH0.
+ */
+ mfspr r13, SPRN_SRR0
+ addi r13, r13, 4
+ mtspr SPRN_SRR0, r13
+ GET_SCRATCH0(r13)
+ rfid
+ b .
+
+kvmppc_skip_Hinterrupt:
+ /*
+ * Here all GPRs are unchanged from when the interrupt happened
+ * except for r13, which is saved in SPRG_SCRATCH0.
+ */
+ mfspr r13, SPRN_HSRR0
+ addi r13, r13, 4
+ mtspr SPRN_HSRR0, r13
+ GET_SCRATCH0(r13)
+ hrfid
+ b .
+
#elif defined(CONFIG_PPC_BOOK3S_32)
-#define LOAD_SHADOW_VCPU(reg) \
- mfspr reg, SPRN_SPRG_THREAD; \
- lwz reg, THREAD_KVM_SVCPU(reg); \
- /* PPC32 can have a NULL pointer - let's check for that */ \
- mtspr SPRN_SPRG_SCRATCH1, r12; /* Save r12 */ \
- mfcr r12; \
- cmpwi reg, 0; \
- bne 1f; \
- mfspr reg, SPRN_SPRG_SCRATCH0; \
- mtcr r12; \
- mfspr r12, SPRN_SPRG_SCRATCH1; \
- b kvmppc_resume_\intno; \
-1:; \
- mtcr r12; \
- mfspr r12, SPRN_SPRG_SCRATCH1; \
- tophys(reg, reg)
-
-#define SHADOW_VCPU_OFF 0
#define MSR_NOIRQ MSR_KERNEL
#define FUNC(name) name
-#endif
-
.macro INTERRUPT_TRAMPOLINE intno
.global kvmppc_trampoline_\intno
kvmppc_trampoline_\intno:
- SET_SCRATCH0(r13) /* Save r13 */
+ mtspr SPRN_SPRG_SCRATCH0, r13 /* Save r13 */
/*
* First thing to do is to find out if we're coming
@@ -78,19 +81,28 @@ kvmppc_trampoline_\intno:
*
* To distinguish, we check a magic byte in the PACA/current
*/
- LOAD_SHADOW_VCPU(r13)
- PPC_STL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
+ mfspr r13, SPRN_SPRG_THREAD
+ lwz r13, THREAD_KVM_SVCPU(r13)
+ /* PPC32 can have a NULL pointer - let's check for that */
+ mtspr SPRN_SPRG_SCRATCH1, r12 /* Save r12 */
mfcr r12
- stw r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
- lbz r12, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13)
+ cmpwi r13, 0
+ bne 1f
+2: mtcr r12
+ mfspr r12, SPRN_SPRG_SCRATCH1
+ mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */
+ b kvmppc_resume_\intno /* Get back original handler */
+
+1: tophys(r13, r13)
+ stw r12, HSTATE_SCRATCH1(r13)
+ mfspr r12, SPRN_SPRG_SCRATCH1
+ stw r12, HSTATE_SCRATCH0(r13)
+ lbz r12, HSTATE_IN_GUEST(r13)
cmpwi r12, KVM_GUEST_MODE_NONE
bne ..kvmppc_handler_hasmagic_\intno
/* No KVM guest? Then jump back to the Linux handler! */
- lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
- mtcr r12
- PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
- GET_SCRATCH0(r13) /* r13 = original r13 */
- b kvmppc_resume_\intno /* Get back original handler */
+ lwz r12, HSTATE_SCRATCH1(r13)
+ b 2b
/* Now we know we're handling a KVM guest */
..kvmppc_handler_hasmagic_\intno:
@@ -112,9 +124,6 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_MACHINE_CHECK
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_STORAGE
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_STORAGE
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL
-#ifdef CONFIG_PPC_BOOK3S_64
-INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL_HV
-#endif
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALIGNMENT
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PROGRAM
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_FP_UNAVAIL
@@ -124,14 +133,6 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_TRACE
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PERFMON
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC
-/* Those are only available on 64 bit machines */
-
-#ifdef CONFIG_PPC_BOOK3S_64
-INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_SEGMENT
-INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_SEGMENT
-INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX
-#endif
-
/*
* Bring us back to the faulting code, but skip the
* faulting instruction.
@@ -143,8 +144,8 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX
*
* R12 = free
* R13 = Shadow VCPU (PACA)
- * SVCPU.SCRATCH0 = guest R12
- * SVCPU.SCRATCH1 = guest CR
+ * HSTATE.SCRATCH0 = guest R12
+ * HSTATE.SCRATCH1 = guest CR
* SPRG_SCRATCH0 = guest R13
*
*/
@@ -156,13 +157,14 @@ kvmppc_handler_skip_ins:
mtsrr0 r12
/* Clean up all state */
- lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
+ lwz r12, HSTATE_SCRATCH1(r13)
mtcr r12
- PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
+ PPC_LL r12, HSTATE_SCRATCH0(r13)
GET_SCRATCH0(r13)
/* And get back into the code */
RFI
+#endif
/*
* This trampoline brings us back to a real mode handler
@@ -251,12 +253,4 @@ define_load_up(altivec)
define_load_up(vsx)
#endif
-.global kvmppc_trampoline_lowmem
-kvmppc_trampoline_lowmem:
- PPC_LONG kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START
-
-.global kvmppc_trampoline_enter
-kvmppc_trampoline_enter:
- PPC_LONG kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START
-
#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 451264274b8c..aed32e517212 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -22,7 +22,7 @@
#if defined(CONFIG_PPC_BOOK3S_64)
#define GET_SHADOW_VCPU(reg) \
- addi reg, r13, PACA_KVM_SVCPU
+ mr reg, r13
#elif defined(CONFIG_PPC_BOOK3S_32)
@@ -71,6 +71,10 @@ kvmppc_handler_trampoline_enter:
/* r3 = shadow vcpu */
GET_SHADOW_VCPU(r3)
+ /* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */
+ PPC_STL r1, HSTATE_HOST_R1(r3)
+ PPC_STL r2, HSTATE_HOST_R2(r3)
+
/* Move SRR0 and SRR1 into the respective regs */
PPC_LL r9, SVCPU_PC(r3)
mtsrr0 r9
@@ -78,36 +82,36 @@ kvmppc_handler_trampoline_enter:
/* Activate guest mode, so faults get handled by KVM */
li r11, KVM_GUEST_MODE_GUEST
- stb r11, SVCPU_IN_GUEST(r3)
+ stb r11, HSTATE_IN_GUEST(r3)
/* Switch to guest segment. This is subarch specific. */
LOAD_GUEST_SEGMENTS
/* Enter guest */
- PPC_LL r4, (SVCPU_CTR)(r3)
- PPC_LL r5, (SVCPU_LR)(r3)
- lwz r6, (SVCPU_CR)(r3)
- lwz r7, (SVCPU_XER)(r3)
+ PPC_LL r4, SVCPU_CTR(r3)
+ PPC_LL r5, SVCPU_LR(r3)
+ lwz r6, SVCPU_CR(r3)
+ lwz r7, SVCPU_XER(r3)
mtctr r4
mtlr r5
mtcr r6
mtxer r7
- PPC_LL r0, (SVCPU_R0)(r3)
- PPC_LL r1, (SVCPU_R1)(r3)
- PPC_LL r2, (SVCPU_R2)(r3)
- PPC_LL r4, (SVCPU_R4)(r3)
- PPC_LL r5, (SVCPU_R5)(r3)
- PPC_LL r6, (SVCPU_R6)(r3)
- PPC_LL r7, (SVCPU_R7)(r3)
- PPC_LL r8, (SVCPU_R8)(r3)
- PPC_LL r9, (SVCPU_R9)(r3)
- PPC_LL r10, (SVCPU_R10)(r3)
- PPC_LL r11, (SVCPU_R11)(r3)
- PPC_LL r12, (SVCPU_R12)(r3)
- PPC_LL r13, (SVCPU_R13)(r3)
+ PPC_LL r0, SVCPU_R0(r3)
+ PPC_LL r1, SVCPU_R1(r3)
+ PPC_LL r2, SVCPU_R2(r3)
+ PPC_LL r4, SVCPU_R4(r3)
+ PPC_LL r5, SVCPU_R5(r3)
+ PPC_LL r6, SVCPU_R6(r3)
+ PPC_LL r7, SVCPU_R7(r3)
+ PPC_LL r8, SVCPU_R8(r3)
+ PPC_LL r9, SVCPU_R9(r3)
+ PPC_LL r10, SVCPU_R10(r3)
+ PPC_LL r11, SVCPU_R11(r3)
+ PPC_LL r12, SVCPU_R12(r3)
+ PPC_LL r13, SVCPU_R13(r3)
PPC_LL r3, (SVCPU_R3)(r3)
@@ -125,56 +129,63 @@ kvmppc_handler_trampoline_enter_end:
.global kvmppc_handler_trampoline_exit
kvmppc_handler_trampoline_exit:
+.global kvmppc_interrupt
+kvmppc_interrupt:
+
/* Register usage at this point:
*
* SPRG_SCRATCH0 = guest R13
* R12 = exit handler id
- * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64]
- * SVCPU.SCRATCH0 = guest R12
- * SVCPU.SCRATCH1 = guest CR
+ * R13 = shadow vcpu (32-bit) or PACA (64-bit)
+ * HSTATE.SCRATCH0 = guest R12
+ * HSTATE.SCRATCH1 = guest CR
*
*/
/* Save registers */
- PPC_STL r0, (SHADOW_VCPU_OFF + SVCPU_R0)(r13)
- PPC_STL r1, (SHADOW_VCPU_OFF + SVCPU_R1)(r13)
- PPC_STL r2, (SHADOW_VCPU_OFF + SVCPU_R2)(r13)
- PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_R3)(r13)
- PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_R4)(r13)
- PPC_STL r5, (SHADOW_VCPU_OFF + SVCPU_R5)(r13)
- PPC_STL r6, (SHADOW_VCPU_OFF + SVCPU_R6)(r13)
- PPC_STL r7, (SHADOW_VCPU_OFF + SVCPU_R7)(r13)
- PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_R8)(r13)
- PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_R9)(r13)
- PPC_STL r10, (SHADOW_VCPU_OFF + SVCPU_R10)(r13)
- PPC_STL r11, (SHADOW_VCPU_OFF + SVCPU_R11)(r13)
+ PPC_STL r0, SVCPU_R0(r13)
+ PPC_STL r1, SVCPU_R1(r13)
+ PPC_STL r2, SVCPU_R2(r13)
+ PPC_STL r3, SVCPU_R3(r13)
+ PPC_STL r4, SVCPU_R4(r13)
+ PPC_STL r5, SVCPU_R5(r13)
+ PPC_STL r6, SVCPU_R6(r13)
+ PPC_STL r7, SVCPU_R7(r13)
+ PPC_STL r8, SVCPU_R8(r13)
+ PPC_STL r9, SVCPU_R9(r13)
+ PPC_STL r10, SVCPU_R10(r13)
+ PPC_STL r11, SVCPU_R11(r13)
/* Restore R1/R2 so we can handle faults */
- PPC_LL r1, (SHADOW_VCPU_OFF + SVCPU_HOST_R1)(r13)
- PPC_LL r2, (SHADOW_VCPU_OFF + SVCPU_HOST_R2)(r13)
+ PPC_LL r1, HSTATE_HOST_R1(r13)
+ PPC_LL r2, HSTATE_HOST_R2(r13)
/* Save guest PC and MSR */
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
andi. r0,r12,0x2
beq 1f
mfspr r3,SPRN_HSRR0
mfspr r4,SPRN_HSRR1
andi. r12,r12,0x3ffd
b 2f
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+#endif
1: mfsrr0 r3
mfsrr1 r4
2:
- PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_PC)(r13)
- PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_SHADOW_SRR1)(r13)
+ PPC_STL r3, SVCPU_PC(r13)
+ PPC_STL r4, SVCPU_SHADOW_SRR1(r13)
/* Get scratch'ed off registers */
GET_SCRATCH0(r9)
- PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
- lwz r7, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
+ PPC_LL r8, HSTATE_SCRATCH0(r13)
+ lwz r7, HSTATE_SCRATCH1(r13)
- PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_R13)(r13)
- PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_R12)(r13)
- stw r7, (SHADOW_VCPU_OFF + SVCPU_CR)(r13)
+ PPC_STL r9, SVCPU_R13(r13)
+ PPC_STL r8, SVCPU_R12(r13)
+ stw r7, SVCPU_CR(r13)
/* Save more register state */
@@ -184,11 +195,11 @@ kvmppc_handler_trampoline_exit:
mfctr r8
mflr r9
- stw r5, (SHADOW_VCPU_OFF + SVCPU_XER)(r13)
- PPC_STL r6, (SHADOW_VCPU_OFF + SVCPU_FAULT_DAR)(r13)
- stw r7, (SHADOW_VCPU_OFF + SVCPU_FAULT_DSISR)(r13)
- PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_CTR)(r13)
- PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_LR)(r13)
+ stw r5, SVCPU_XER(r13)
+ PPC_STL r6, SVCPU_FAULT_DAR(r13)
+ stw r7, SVCPU_FAULT_DSISR(r13)
+ PPC_STL r8, SVCPU_CTR(r13)
+ PPC_STL r9, SVCPU_LR(r13)
/*
* In order for us to easily get the last instruction,
@@ -218,7 +229,7 @@ ld_last_inst:
/* Set guest mode to 'jump over instruction' so if lwz faults
* we'll just continue at the next IP. */
li r9, KVM_GUEST_MODE_SKIP
- stb r9, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13)
+ stb r9, HSTATE_IN_GUEST(r13)
/* 1) enable paging for data */
mfmsr r9
@@ -232,13 +243,13 @@ ld_last_inst:
sync
#endif
- stw r0, (SHADOW_VCPU_OFF + SVCPU_LAST_INST)(r13)
+ stw r0, SVCPU_LAST_INST(r13)
no_ld_last_inst:
/* Unset guest mode */
li r9, KVM_GUEST_MODE_NONE
- stb r9, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13)
+ stb r9, HSTATE_IN_GUEST(r13)
/* Switch back to host MMU */
LOAD_HOST_SEGMENTS
@@ -248,7 +259,7 @@ no_ld_last_inst:
* R1 = host R1
* R2 = host R2
* R12 = exit handler id
- * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64]
+ * R13 = shadow vcpu (32-bit) or PACA (64-bit)
* SVCPU.* = guest *
*
*/
@@ -258,7 +269,7 @@ no_ld_last_inst:
ori r7, r7, MSR_IR|MSR_DR|MSR_RI|MSR_ME /* Enable paging */
mtsrr1 r7
/* Load highmem handler address */
- PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_VMHANDLER)(r13)
+ PPC_LL r8, HSTATE_VMHANDLER(r13)
mtsrr0 r8
RFI
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 8462b3a1c1c7..ee45fa01220e 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -13,6 +13,7 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2007
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
* Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
@@ -78,6 +79,60 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
}
}
+#ifdef CONFIG_SPE
+void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+ enable_kernel_spe();
+ kvmppc_save_guest_spe(vcpu);
+ vcpu->arch.shadow_msr &= ~MSR_SPE;
+ preempt_enable();
+}
+
+static void kvmppc_vcpu_enable_spe(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+ enable_kernel_spe();
+ kvmppc_load_guest_spe(vcpu);
+ vcpu->arch.shadow_msr |= MSR_SPE;
+ preempt_enable();
+}
+
+static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.shared->msr & MSR_SPE) {
+ if (!(vcpu->arch.shadow_msr & MSR_SPE))
+ kvmppc_vcpu_enable_spe(vcpu);
+ } else if (vcpu->arch.shadow_msr & MSR_SPE) {
+ kvmppc_vcpu_disable_spe(vcpu);
+ }
+}
+#else
+static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
+{
+}
+#endif
+
+/*
+ * Helper function for "full" MSR writes. No need to call this if only
+ * EE/CE/ME/DE/RI are changing.
+ */
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
+{
+ u32 old_msr = vcpu->arch.shared->msr;
+
+ vcpu->arch.shared->msr = new_msr;
+
+ kvmppc_mmu_msr_notify(vcpu, old_msr);
+
+ if (vcpu->arch.shared->msr & MSR_WE) {
+ kvm_vcpu_block(vcpu);
+ kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
+ };
+
+ kvmppc_vcpu_sync_spe(vcpu);
+}
+
static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
unsigned int priority)
{
@@ -257,6 +312,19 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
vcpu->arch.shared->int_pending = 0;
}
+int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ local_irq_disable();
+ kvm_guest_enter();
+ ret = __kvmppc_vcpu_run(kvm_run, vcpu);
+ kvm_guest_exit();
+ local_irq_enable();
+
+ return ret;
+}
+
/**
* kvmppc_handle_exit
*
@@ -344,10 +412,16 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
- case BOOKE_INTERRUPT_SPE_UNAVAIL:
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_UNAVAIL);
+#ifdef CONFIG_SPE
+ case BOOKE_INTERRUPT_SPE_UNAVAIL: {
+ if (vcpu->arch.shared->msr & MSR_SPE)
+ kvmppc_vcpu_enable_spe(vcpu);
+ else
+ kvmppc_booke_queue_irqprio(vcpu,
+ BOOKE_IRQPRIO_SPE_UNAVAIL);
r = RESUME_GUEST;
break;
+ }
case BOOKE_INTERRUPT_SPE_FP_DATA:
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA);
@@ -358,6 +432,28 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
r = RESUME_GUEST;
break;
+#else
+ case BOOKE_INTERRUPT_SPE_UNAVAIL:
+ /*
+ * Guest wants SPE, but host kernel doesn't support it. Send
+ * an "unimplemented operation" program check to the guest.
+ */
+ kvmppc_core_queue_program(vcpu, ESR_PUO | ESR_SPV);
+ r = RESUME_GUEST;
+ break;
+
+ /*
+ * These really should never happen without CONFIG_SPE,
+ * as we should never enable the real MSR[SPE] in the guest.
+ */
+ case BOOKE_INTERRUPT_SPE_FP_DATA:
+ case BOOKE_INTERRUPT_SPE_FP_ROUND:
+ printk(KERN_CRIT "%s: unexpected SPE interrupt %u at %08lx\n",
+ __func__, exit_nr, vcpu->arch.pc);
+ run->hw.hardware_exit_reason = exit_nr;
+ r = RESUME_HOST;
+ break;
+#endif
case BOOKE_INTERRUPT_DATA_STORAGE:
kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear,
@@ -392,6 +488,17 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
gpa_t gpaddr;
gfn_t gfn;
+#ifdef CONFIG_KVM_E500
+ if (!(vcpu->arch.shared->msr & MSR_PR) &&
+ (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) {
+ kvmppc_map_magic(vcpu);
+ kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
+ r = RESUME_GUEST;
+
+ break;
+ }
+#endif
+
/* Check the guest TLB. */
gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
if (gtlb_index < 0) {
@@ -514,6 +621,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.pc = 0;
vcpu->arch.shared->msr = 0;
+ vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
vcpu->arch.shadow_pid = 1;
@@ -770,6 +878,26 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
return -ENOTSUPP;
}
+int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem)
+{
+ return 0;
+}
+
+void kvmppc_core_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem)
+{
+}
+
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+ return 0;
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+}
+
int __init kvmppc_booke_init(void)
{
unsigned long ivor[16];
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 492bb7030358..8e1fe33d64e5 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -52,24 +52,19 @@
extern unsigned long kvmppc_booke_handlers;
-/* Helper function for "full" MSR writes. No need to call this if only EE is
- * changing. */
-static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
-{
- if ((new_msr & MSR_PR) != (vcpu->arch.shared->msr & MSR_PR))
- kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
-
- vcpu->arch.shared->msr = new_msr;
-
- if (vcpu->arch.shared->msr & MSR_WE) {
- kvm_vcpu_block(vcpu);
- kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
- };
-}
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
+void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance);
int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
+/* low-level asm code to transfer guest state */
+void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu);
+void kvmppc_save_guest_spe(struct kvm_vcpu *vcpu);
+
+/* high-level function, manages flags, host state */
+void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu);
+
#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index b58ccae95904..42f2fb1f66e9 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -13,6 +13,7 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2007
+ * Copyright 2011 Freescale Semiconductor, Inc.
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
@@ -24,8 +25,6 @@
#include <asm/page.h>
#include <asm/asm-offsets.h>
-#define KVMPPC_MSR_MASK (MSR_CE|MSR_EE|MSR_PR|MSR_DE|MSR_ME|MSR_IS|MSR_DS)
-
#define VCPU_GPR(n) (VCPU_GPRS + (n * 4))
/* The host stack layout: */
@@ -192,6 +191,12 @@ _GLOBAL(kvmppc_resume_host)
lwz r3, VCPU_HOST_PID(r4)
mtspr SPRN_PID, r3
+#ifdef CONFIG_FSL_BOOKE
+ /* we cheat and know that Linux doesn't use PID1 which is always 0 */
+ lis r3, 0
+ mtspr SPRN_PID1, r3
+#endif
+
/* Restore host IVPR before re-enabling interrupts. We cheat and know
* that Linux IVPR is always 0xc0000000. */
lis r3, 0xc000
@@ -241,6 +246,14 @@ _GLOBAL(kvmppc_resume_host)
heavyweight_exit:
/* Not returning to guest. */
+#ifdef CONFIG_SPE
+ /* save guest SPEFSCR and load host SPEFSCR */
+ mfspr r9, SPRN_SPEFSCR
+ stw r9, VCPU_SPEFSCR(r4)
+ lwz r9, VCPU_HOST_SPEFSCR(r4)
+ mtspr SPRN_SPEFSCR, r9
+#endif
+
/* We already saved guest volatile register state; now save the
* non-volatiles. */
stw r15, VCPU_GPR(r15)(r4)
@@ -342,6 +355,14 @@ _GLOBAL(__kvmppc_vcpu_run)
lwz r30, VCPU_GPR(r30)(r4)
lwz r31, VCPU_GPR(r31)(r4)
+#ifdef CONFIG_SPE
+ /* save host SPEFSCR and load guest SPEFSCR */
+ mfspr r3, SPRN_SPEFSCR
+ stw r3, VCPU_HOST_SPEFSCR(r4)
+ lwz r3, VCPU_SPEFSCR(r4)
+ mtspr SPRN_SPEFSCR, r3
+#endif
+
lightweight_exit:
stw r2, HOST_R2(r1)
@@ -350,6 +371,11 @@ lightweight_exit:
lwz r3, VCPU_SHADOW_PID(r4)
mtspr SPRN_PID, r3
+#ifdef CONFIG_FSL_BOOKE
+ lwz r3, VCPU_SHADOW_PID1(r4)
+ mtspr SPRN_PID1, r3
+#endif
+
#ifdef CONFIG_44x
iccci 0, 0 /* XXX hack */
#endif
@@ -405,20 +431,17 @@ lightweight_exit:
/* Finish loading guest volatiles and jump to guest. */
lwz r3, VCPU_CTR(r4)
+ lwz r5, VCPU_CR(r4)
+ lwz r6, VCPU_PC(r4)
+ lwz r7, VCPU_SHADOW_MSR(r4)
mtctr r3
- lwz r3, VCPU_CR(r4)
- mtcr r3
+ mtcr r5
+ mtsrr0 r6
+ mtsrr1 r7
lwz r5, VCPU_GPR(r5)(r4)
lwz r6, VCPU_GPR(r6)(r4)
lwz r7, VCPU_GPR(r7)(r4)
lwz r8, VCPU_GPR(r8)(r4)
- lwz r3, VCPU_PC(r4)
- mtsrr0 r3
- lwz r3, VCPU_SHARED(r4)
- lwz r3, (VCPU_SHARED_MSR + 4)(r3)
- oris r3, r3, KVMPPC_MSR_MASK@h
- ori r3, r3, KVMPPC_MSR_MASK@l
- mtsrr1 r3
/* Clear any debug events which occurred since we disabled MSR[DE].
* XXX This gives us a 3-instruction window in which a breakpoint
@@ -430,3 +453,24 @@ lightweight_exit:
lwz r3, VCPU_GPR(r3)(r4)
lwz r4, VCPU_GPR(r4)(r4)
rfi
+
+#ifdef CONFIG_SPE
+_GLOBAL(kvmppc_save_guest_spe)
+ cmpi 0,r3,0
+ beqlr-
+ SAVE_32EVRS(0, r4, r3, VCPU_EVR)
+ evxor evr6, evr6, evr6
+ evmwumiaa evr6, evr6, evr6
+ li r4,VCPU_ACC
+ evstddx evr6, r4, r3 /* save acc */
+ blr
+
+_GLOBAL(kvmppc_load_guest_spe)
+ cmpi 0,r3,0
+ beqlr-
+ li r4,VCPU_ACC
+ evlddx evr6,r4,r3
+ evmra evr6,evr6 /* load acc */
+ REST_32EVRS(0, r4, r3, VCPU_EVR)
+ blr
+#endif
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 318dbc61ba44..797a7447c268 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
*
* Author: Yu Liu, <yu.liu@freescale.com>
*
@@ -41,6 +41,11 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
{
kvmppc_e500_tlb_put(vcpu);
+
+#ifdef CONFIG_SPE
+ if (vcpu->arch.shadow_msr & MSR_SPE)
+ kvmppc_vcpu_disable_spe(vcpu);
+#endif
}
int kvmppc_core_check_processor_compat(void)
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 69cd665a0caf..d48ae396f41e 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -81,8 +81,12 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
kvmppc_set_pid(vcpu, spr_val);
break;
case SPRN_PID1:
+ if (spr_val != 0)
+ return EMULATE_FAIL;
vcpu_e500->pid[1] = spr_val; break;
case SPRN_PID2:
+ if (spr_val != 0)
+ return EMULATE_FAIL;
vcpu_e500->pid[2] = spr_val; break;
case SPRN_MAS0:
vcpu_e500->mas0 = spr_val; break;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index b18fe353397d..13c432ea2fa8 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -28,8 +28,196 @@
#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
+struct id {
+ unsigned long val;
+ struct id **pentry;
+};
+
+#define NUM_TIDS 256
+
+/*
+ * This table provide mappings from:
+ * (guestAS,guestTID,guestPR) --> ID of physical cpu
+ * guestAS [0..1]
+ * guestTID [0..255]
+ * guestPR [0..1]
+ * ID [1..255]
+ * Each vcpu keeps one vcpu_id_table.
+ */
+struct vcpu_id_table {
+ struct id id[2][NUM_TIDS][2];
+};
+
+/*
+ * This table provide reversed mappings of vcpu_id_table:
+ * ID --> address of vcpu_id_table item.
+ * Each physical core has one pcpu_id_table.
+ */
+struct pcpu_id_table {
+ struct id *entry[NUM_TIDS];
+};
+
+static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids);
+
+/* This variable keeps last used shadow ID on local core.
+ * The valid range of shadow ID is [1..255] */
+static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
+
static unsigned int tlb1_entry_num;
+/*
+ * Allocate a free shadow id and setup a valid sid mapping in given entry.
+ * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+static inline int local_sid_setup_one(struct id *entry)
+{
+ unsigned long sid;
+ int ret = -1;
+
+ sid = ++(__get_cpu_var(pcpu_last_used_sid));
+ if (sid < NUM_TIDS) {
+ __get_cpu_var(pcpu_sids).entry[sid] = entry;
+ entry->val = sid;
+ entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid];
+ ret = sid;
+ }
+
+ /*
+ * If sid == NUM_TIDS, we've run out of sids. We return -1, and
+ * the caller will invalidate everything and start over.
+ *
+ * sid > NUM_TIDS indicates a race, which we disable preemption to
+ * avoid.
+ */
+ WARN_ON(sid > NUM_TIDS);
+
+ return ret;
+}
+
+/*
+ * Check if given entry contain a valid shadow id mapping.
+ * An ID mapping is considered valid only if
+ * both vcpu and pcpu know this mapping.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+static inline int local_sid_lookup(struct id *entry)
+{
+ if (entry && entry->val != 0 &&
+ __get_cpu_var(pcpu_sids).entry[entry->val] == entry &&
+ entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val])
+ return entry->val;
+ return -1;
+}
+
+/* Invalidate all id mappings on local core */
+static inline void local_sid_destroy_all(void)
+{
+ preempt_disable();
+ __get_cpu_var(pcpu_last_used_sid) = 0;
+ memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids)));
+ preempt_enable();
+}
+
+static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL);
+ return vcpu_e500->idt;
+}
+
+static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ kfree(vcpu_e500->idt);
+}
+
+/* Invalidate all mappings on vcpu */
+static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table));
+
+ /* Update shadow pid when mappings are changed */
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+}
+
+/* Invalidate one ID mapping on vcpu */
+static inline void kvmppc_e500_id_table_reset_one(
+ struct kvmppc_vcpu_e500 *vcpu_e500,
+ int as, int pid, int pr)
+{
+ struct vcpu_id_table *idt = vcpu_e500->idt;
+
+ BUG_ON(as >= 2);
+ BUG_ON(pid >= NUM_TIDS);
+ BUG_ON(pr >= 2);
+
+ idt->id[as][pid][pr].val = 0;
+ idt->id[as][pid][pr].pentry = NULL;
+
+ /* Update shadow pid when mappings are changed */
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+}
+
+/*
+ * Map guest (vcpu,AS,ID,PR) to physical core shadow id.
+ * This function first lookup if a valid mapping exists,
+ * if not, then creates a new one.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+static unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
+ unsigned int as, unsigned int gid,
+ unsigned int pr, int avoid_recursion)
+{
+ struct vcpu_id_table *idt = vcpu_e500->idt;
+ int sid;
+
+ BUG_ON(as >= 2);
+ BUG_ON(gid >= NUM_TIDS);
+ BUG_ON(pr >= 2);
+
+ sid = local_sid_lookup(&idt->id[as][gid][pr]);
+
+ while (sid <= 0) {
+ /* No mapping yet */
+ sid = local_sid_setup_one(&idt->id[as][gid][pr]);
+ if (sid <= 0) {
+ _tlbil_all();
+ local_sid_destroy_all();
+ }
+
+ /* Update shadow pid when mappings are changed */
+ if (!avoid_recursion)
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+ }
+
+ return sid;
+}
+
+/* Map guest pid to shadow.
+ * We use PID to keep shadow of current guest non-zero PID,
+ * and use PID1 to keep shadow of guest zero PID.
+ * So that guest tlbe with TID=0 can be accessed at any time */
+void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ preempt_disable();
+ vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500,
+ get_cur_as(&vcpu_e500->vcpu),
+ get_cur_pid(&vcpu_e500->vcpu),
+ get_cur_pr(&vcpu_e500->vcpu), 1);
+ vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500,
+ get_cur_as(&vcpu_e500->vcpu), 0,
+ get_cur_pr(&vcpu_e500->vcpu), 1);
+ preempt_enable();
+}
+
void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -41,25 +229,14 @@ void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
for (tlbsel = 0; tlbsel < 2; tlbsel++) {
printk("Guest TLB%d:\n", tlbsel);
- for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) {
- tlbe = &vcpu_e500->guest_tlb[tlbsel][i];
+ for (i = 0; i < vcpu_e500->gtlb_size[tlbsel]; i++) {
+ tlbe = &vcpu_e500->gtlb_arch[tlbsel][i];
if (tlbe->mas1 & MAS1_VALID)
printk(" G[%d][%3d] | %08X | %08X | %08X | %08X |\n",
tlbsel, i, tlbe->mas1, tlbe->mas2,
tlbe->mas3, tlbe->mas7);
}
}
-
- for (tlbsel = 0; tlbsel < 2; tlbsel++) {
- printk("Shadow TLB%d:\n", tlbsel);
- for (i = 0; i < vcpu_e500->shadow_tlb_size[tlbsel]; i++) {
- tlbe = &vcpu_e500->shadow_tlb[tlbsel][i];
- if (tlbe->mas1 & MAS1_VALID)
- printk(" S[%d][%3d] | %08X | %08X | %08X | %08X |\n",
- tlbsel, i, tlbe->mas1, tlbe->mas2,
- tlbe->mas3, tlbe->mas7);
- }
- }
}
static inline unsigned int tlb0_get_next_victim(
@@ -67,16 +244,17 @@ static inline unsigned int tlb0_get_next_victim(
{
unsigned int victim;
- victim = vcpu_e500->guest_tlb_nv[0]++;
- if (unlikely(vcpu_e500->guest_tlb_nv[0] >= KVM_E500_TLB0_WAY_NUM))
- vcpu_e500->guest_tlb_nv[0] = 0;
+ victim = vcpu_e500->gtlb_nv[0]++;
+ if (unlikely(vcpu_e500->gtlb_nv[0] >= KVM_E500_TLB0_WAY_NUM))
+ vcpu_e500->gtlb_nv[0] = 0;
return victim;
}
static inline unsigned int tlb1_max_shadow_size(void)
{
- return tlb1_entry_num - tlbcam_index;
+ /* reserve one entry for magic page */
+ return tlb1_entry_num - tlbcam_index - 1;
}
static inline int tlbe_is_writable(struct tlbe *tlbe)
@@ -112,72 +290,149 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
/*
* writing shadow tlb entry to host TLB
*/
-static inline void __write_host_tlbe(struct tlbe *stlbe)
+static inline void __write_host_tlbe(struct tlbe *stlbe, uint32_t mas0)
{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ mtspr(SPRN_MAS0, mas0);
mtspr(SPRN_MAS1, stlbe->mas1);
mtspr(SPRN_MAS2, stlbe->mas2);
mtspr(SPRN_MAS3, stlbe->mas3);
mtspr(SPRN_MAS7, stlbe->mas7);
- __asm__ __volatile__ ("tlbwe\n" : : );
+ asm volatile("isync; tlbwe" : : : "memory");
+ local_irq_restore(flags);
}
static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
- int tlbsel, int esel)
+ int tlbsel, int esel, struct tlbe *stlbe)
{
- struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
-
- local_irq_disable();
if (tlbsel == 0) {
- __write_host_tlbe(stlbe);
+ __write_host_tlbe(stlbe,
+ MAS0_TLBSEL(0) |
+ MAS0_ESEL(esel & (KVM_E500_TLB0_WAY_NUM - 1)));
} else {
- unsigned register mas0;
-
- mas0 = mfspr(SPRN_MAS0);
-
- mtspr(SPRN_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel)));
- __write_host_tlbe(stlbe);
-
- mtspr(SPRN_MAS0, mas0);
+ __write_host_tlbe(stlbe,
+ MAS0_TLBSEL(1) |
+ MAS0_ESEL(to_htlb1_esel(esel)));
}
- local_irq_enable();
+ trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
+ stlbe->mas3, stlbe->mas7);
+}
+
+void kvmppc_map_magic(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ struct tlbe magic;
+ ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
+ unsigned int stid;
+ pfn_t pfn;
+
+ pfn = (pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT;
+ get_page(pfn_to_page(pfn));
+
+ preempt_disable();
+ stid = kvmppc_e500_get_sid(vcpu_e500, 0, 0, 0, 0);
+
+ magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) |
+ MAS1_TSIZE(BOOK3E_PAGESZ_4K);
+ magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M;
+ magic.mas3 = (pfn << PAGE_SHIFT) |
+ MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
+ magic.mas7 = pfn >> (32 - PAGE_SHIFT);
+
+ __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
+ preempt_enable();
}
void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- int i;
- unsigned register mas0;
-
- /* Load all valid TLB1 entries to reduce guest tlb miss fault */
- local_irq_disable();
- mas0 = mfspr(SPRN_MAS0);
- for (i = 0; i < tlb1_max_shadow_size(); i++) {
- struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
-
- if (get_tlb_v(stlbe)) {
- mtspr(SPRN_MAS0, MAS0_TLBSEL(1)
- | MAS0_ESEL(to_htlb1_esel(i)));
- __write_host_tlbe(stlbe);
- }
- }
- mtspr(SPRN_MAS0, mas0);
- local_irq_enable();
+
+ /* Shadow PID may be expired on local core */
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
}
void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
{
- _tlbil_all();
+}
+
+static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int esel)
+{
+ struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+ struct vcpu_id_table *idt = vcpu_e500->idt;
+ unsigned int pr, tid, ts, pid;
+ u32 val, eaddr;
+ unsigned long flags;
+
+ ts = get_tlb_ts(gtlbe);
+ tid = get_tlb_tid(gtlbe);
+
+ preempt_disable();
+
+ /* One guest ID may be mapped to two shadow IDs */
+ for (pr = 0; pr < 2; pr++) {
+ /*
+ * The shadow PID can have a valid mapping on at most one
+ * host CPU. In the common case, it will be valid on this
+ * CPU, in which case (for TLB0) we do a local invalidation
+ * of the specific address.
+ *
+ * If the shadow PID is not valid on the current host CPU, or
+ * if we're invalidating a TLB1 entry, we invalidate the
+ * entire shadow PID.
+ */
+ if (tlbsel == 1 ||
+ (pid = local_sid_lookup(&idt->id[ts][tid][pr])) <= 0) {
+ kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr);
+ continue;
+ }
+
+ /*
+ * The guest is invalidating a TLB0 entry which is in a PID
+ * that has a valid shadow mapping on this host CPU. We
+ * search host TLB0 to invalidate it's shadow TLB entry,
+ * similar to __tlbil_va except that we need to look in AS1.
+ */
+ val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS;
+ eaddr = get_tlb_eaddr(gtlbe);
+
+ local_irq_save(flags);
+
+ mtspr(SPRN_MAS6, val);
+ asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr));
+ val = mfspr(SPRN_MAS1);
+ if (val & MAS1_VALID) {
+ mtspr(SPRN_MAS1, val & ~MAS1_VALID);
+ asm volatile("tlbwe");
+ }
+
+ local_irq_restore(flags);
+ }
+
+ preempt_enable();
}
/* Search the guest TLB for a matching entry. */
static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
gva_t eaddr, int tlbsel, unsigned int pid, int as)
{
+ int size = vcpu_e500->gtlb_size[tlbsel];
+ int set_base;
int i;
- /* XXX Replace loop with fancy data structures. */
- for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) {
- struct tlbe *tlbe = &vcpu_e500->guest_tlb[tlbsel][i];
+ if (tlbsel == 0) {
+ int mask = size / KVM_E500_TLB0_WAY_NUM - 1;
+ set_base = (eaddr >> PAGE_SHIFT) & mask;
+ set_base *= KVM_E500_TLB0_WAY_NUM;
+ size = KVM_E500_TLB0_WAY_NUM;
+ } else {
+ set_base = 0;
+ }
+
+ for (i = 0; i < size; i++) {
+ struct tlbe *tlbe = &vcpu_e500->gtlb_arch[tlbsel][set_base + i];
unsigned int tid;
if (eaddr < get_tlb_eaddr(tlbe))
@@ -196,66 +451,32 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
if (get_tlb_ts(tlbe) != as && as != -1)
continue;
- return i;
+ return set_base + i;
}
return -1;
}
-static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500,
- int tlbsel, int esel)
-{
- struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
- struct page *page = vcpu_e500->shadow_pages[tlbsel][esel];
-
- if (page) {
- vcpu_e500->shadow_pages[tlbsel][esel] = NULL;
-
- if (get_tlb_v(stlbe)) {
- if (tlbe_is_writable(stlbe))
- kvm_release_page_dirty(page);
- else
- kvm_release_page_clean(page);
- }
- }
-}
-
-static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
- int tlbsel, int esel)
+static inline void kvmppc_e500_priv_setup(struct tlbe_priv *priv,
+ struct tlbe *gtlbe,
+ pfn_t pfn)
{
- struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+ priv->pfn = pfn;
+ priv->flags = E500_TLB_VALID;
- kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
- stlbe->mas1 = 0;
- trace_kvm_stlb_inval(index_of(tlbsel, esel));
+ if (tlbe_is_writable(gtlbe))
+ priv->flags |= E500_TLB_DIRTY;
}
-static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
- gva_t eaddr, gva_t eend, u32 tid)
+static inline void kvmppc_e500_priv_release(struct tlbe_priv *priv)
{
- unsigned int pid = tid & 0xff;
- unsigned int i;
-
- /* XXX Replace loop with fancy data structures. */
- for (i = 0; i < vcpu_e500->guest_tlb_size[1]; i++) {
- struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
- unsigned int tid;
-
- if (!get_tlb_v(stlbe))
- continue;
-
- if (eend < get_tlb_eaddr(stlbe))
- continue;
+ if (priv->flags & E500_TLB_VALID) {
+ if (priv->flags & E500_TLB_DIRTY)
+ kvm_release_pfn_dirty(priv->pfn);
+ else
+ kvm_release_pfn_clean(priv->pfn);
- if (eaddr > get_tlb_end(stlbe))
- continue;
-
- tid = get_tlb_tid(stlbe);
- if (tid && (tid != pid))
- continue;
-
- kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
- write_host_tlbe(vcpu_e500, 1, i);
+ priv->flags = 0;
}
}
@@ -273,7 +494,7 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
tsized = (vcpu_e500->mas4 >> 7) & 0x1f;
vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
- | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+ | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
vcpu_e500->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
| MAS1_TID(vcpu_e500->pid[pidsel])
| MAS1_TSIZE(tsized);
@@ -286,56 +507,154 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
vcpu_e500->mas7 = 0;
}
-static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
- u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel)
+static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct tlbe *gtlbe, int tsize,
+ struct tlbe_priv *priv,
+ u64 gvaddr, struct tlbe *stlbe)
{
- struct page *new_page;
- struct tlbe *stlbe;
- hpa_t hpaddr;
-
- stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
-
- /* Get reference to new page. */
- new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
- if (is_error_page(new_page)) {
- printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n",
- (long)gfn);
- kvm_release_page_clean(new_page);
- return;
- }
- hpaddr = page_to_phys(new_page);
-
- /* Drop reference to old page. */
- kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
+ pfn_t pfn = priv->pfn;
+ unsigned int stid;
- vcpu_e500->shadow_pages[tlbsel][esel] = new_page;
+ stid = kvmppc_e500_get_sid(vcpu_e500, get_tlb_ts(gtlbe),
+ get_tlb_tid(gtlbe),
+ get_cur_pr(&vcpu_e500->vcpu), 0);
- /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
- stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
- | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
+ /* Force TS=1 IPROT=0 for all guest mappings. */
+ stlbe->mas1 = MAS1_TSIZE(tsize)
+ | MAS1_TID(stid) | MAS1_TS | MAS1_VALID;
stlbe->mas2 = (gvaddr & MAS2_EPN)
| e500_shadow_mas2_attrib(gtlbe->mas2,
vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
- stlbe->mas3 = (hpaddr & MAS3_RPN)
+ stlbe->mas3 = ((pfn << PAGE_SHIFT) & MAS3_RPN)
| e500_shadow_mas3_attrib(gtlbe->mas3,
vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
- stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
+ stlbe->mas7 = (pfn >> (32 - PAGE_SHIFT)) & MAS7_RPN;
+}
- trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
- stlbe->mas3, stlbe->mas7);
+
+static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+ u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel,
+ struct tlbe *stlbe)
+{
+ struct kvm_memory_slot *slot;
+ unsigned long pfn, hva;
+ int pfnmap = 0;
+ int tsize = BOOK3E_PAGESZ_4K;
+ struct tlbe_priv *priv;
+
+ /*
+ * Translate guest physical to true physical, acquiring
+ * a page reference if it is normal, non-reserved memory.
+ *
+ * gfn_to_memslot() must succeed because otherwise we wouldn't
+ * have gotten this far. Eventually we should just pass the slot
+ * pointer through from the first lookup.
+ */
+ slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn);
+ hva = gfn_to_hva_memslot(slot, gfn);
+
+ if (tlbsel == 1) {
+ struct vm_area_struct *vma;
+ down_read(&current->mm->mmap_sem);
+
+ vma = find_vma(current->mm, hva);
+ if (vma && hva >= vma->vm_start &&
+ (vma->vm_flags & VM_PFNMAP)) {
+ /*
+ * This VMA is a physically contiguous region (e.g.
+ * /dev/mem) that bypasses normal Linux page
+ * management. Find the overlap between the
+ * vma and the memslot.
+ */
+
+ unsigned long start, end;
+ unsigned long slot_start, slot_end;
+
+ pfnmap = 1;
+
+ start = vma->vm_pgoff;
+ end = start +
+ ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT);
+
+ pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT);
+
+ slot_start = pfn - (gfn - slot->base_gfn);
+ slot_end = slot_start + slot->npages;
+
+ if (start < slot_start)
+ start = slot_start;
+ if (end > slot_end)
+ end = slot_end;
+
+ tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
+ MAS1_TSIZE_SHIFT;
+
+ /*
+ * e500 doesn't implement the lowest tsize bit,
+ * or 1K pages.
+ */
+ tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
+
+ /*
+ * Now find the largest tsize (up to what the guest
+ * requested) that will cover gfn, stay within the
+ * range, and for which gfn and pfn are mutually
+ * aligned.
+ */
+
+ for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
+ unsigned long gfn_start, gfn_end, tsize_pages;
+ tsize_pages = 1 << (tsize - 2);
+
+ gfn_start = gfn & ~(tsize_pages - 1);
+ gfn_end = gfn_start + tsize_pages;
+
+ if (gfn_start + pfn - gfn < start)
+ continue;
+ if (gfn_end + pfn - gfn > end)
+ continue;
+ if ((gfn & (tsize_pages - 1)) !=
+ (pfn & (tsize_pages - 1)))
+ continue;
+
+ gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
+ pfn &= ~(tsize_pages - 1);
+ break;
+ }
+ }
+
+ up_read(&current->mm->mmap_sem);
+ }
+
+ if (likely(!pfnmap)) {
+ pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn);
+ if (is_error_pfn(pfn)) {
+ printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
+ (long)gfn);
+ kvm_release_pfn_clean(pfn);
+ return;
+ }
+ }
+
+ /* Drop old priv and setup new one. */
+ priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
+ kvmppc_e500_priv_release(priv);
+ kvmppc_e500_priv_setup(priv, gtlbe, pfn);
+
+ kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, tsize, priv, gvaddr, stlbe);
}
/* XXX only map the one-one case, for now use TLB0 */
-static int kvmppc_e500_stlbe_map(struct kvmppc_vcpu_e500 *vcpu_e500,
- int tlbsel, int esel)
+static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int esel, struct tlbe *stlbe)
{
struct tlbe *gtlbe;
- gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+ gtlbe = &vcpu_e500->gtlb_arch[0][esel];
kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
- gtlbe, tlbsel, esel);
+ gtlbe, 0, esel, stlbe);
return esel;
}
@@ -344,53 +663,37 @@ static int kvmppc_e500_stlbe_map(struct kvmppc_vcpu_e500 *vcpu_e500,
* the shadow TLB. */
/* XXX for both one-one and one-to-many , for now use TLB1 */
static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
- u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe)
+ u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, struct tlbe *stlbe)
{
unsigned int victim;
- victim = vcpu_e500->guest_tlb_nv[1]++;
+ victim = vcpu_e500->gtlb_nv[1]++;
- if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size()))
- vcpu_e500->guest_tlb_nv[1] = 0;
+ if (unlikely(vcpu_e500->gtlb_nv[1] >= tlb1_max_shadow_size()))
+ vcpu_e500->gtlb_nv[1] = 0;
- kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim);
+ kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim, stlbe);
return victim;
}
-/* Invalidate all guest kernel mappings when enter usermode,
- * so that when they fault back in they will get the
- * proper permission bits. */
-void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
+void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
{
- if (usermode) {
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- int i;
-
- /* XXX Replace loop with fancy data structures. */
- for (i = 0; i < tlb1_max_shadow_size(); i++)
- kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- _tlbil_all();
- }
+ /* Recalc shadow pid since MSR changes */
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
}
-static int kvmppc_e500_gtlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
- int tlbsel, int esel)
+static inline int kvmppc_e500_gtlbe_invalidate(
+ struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int esel)
{
- struct tlbe *gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+ struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
if (unlikely(get_tlb_iprot(gtlbe)))
return -1;
- if (tlbsel == 1) {
- kvmppc_e500_tlb1_invalidate(vcpu_e500, get_tlb_eaddr(gtlbe),
- get_tlb_end(gtlbe),
- get_tlb_tid(gtlbe));
- } else {
- kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);
- }
-
gtlbe->mas1 = 0;
return 0;
@@ -401,13 +704,14 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
int esel;
if (value & MMUCSR0_TLB0FI)
- for (esel = 0; esel < vcpu_e500->guest_tlb_size[0]; esel++)
+ for (esel = 0; esel < vcpu_e500->gtlb_size[0]; esel++)
kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel);
if (value & MMUCSR0_TLB1FI)
- for (esel = 0; esel < vcpu_e500->guest_tlb_size[1]; esel++)
+ for (esel = 0; esel < vcpu_e500->gtlb_size[1]; esel++)
kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
- _tlbil_all();
+ /* Invalidate all vcpu id mappings */
+ kvmppc_e500_id_table_reset_all(vcpu_e500);
return EMULATE_DONE;
}
@@ -428,7 +732,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
if (ia) {
/* invalidate all entries */
- for (esel = 0; esel < vcpu_e500->guest_tlb_size[tlbsel]; esel++)
+ for (esel = 0; esel < vcpu_e500->gtlb_size[tlbsel]; esel++)
kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
} else {
ea &= 0xfffff000;
@@ -438,7 +742,8 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
}
- _tlbil_all();
+ /* Invalidate all vcpu id mappings */
+ kvmppc_e500_id_table_reset_all(vcpu_e500);
return EMULATE_DONE;
}
@@ -452,9 +757,9 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
tlbsel = get_tlb_tlbsel(vcpu_e500);
esel = get_tlb_esel(vcpu_e500, tlbsel);
- gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+ gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
vcpu_e500->mas0 &= ~MAS0_NV(~0);
- vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+ vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
vcpu_e500->mas1 = gtlbe->mas1;
vcpu_e500->mas2 = gtlbe->mas2;
vcpu_e500->mas3 = gtlbe->mas3;
@@ -477,14 +782,14 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
for (tlbsel = 0; tlbsel < 2; tlbsel++) {
esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
if (esel >= 0) {
- gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+ gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
break;
}
}
if (gtlbe) {
vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel)
- | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+ | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
vcpu_e500->mas1 = gtlbe->mas1;
vcpu_e500->mas2 = gtlbe->mas2;
vcpu_e500->mas3 = gtlbe->mas3;
@@ -497,7 +802,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
- | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+ | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
vcpu_e500->mas1 = (vcpu_e500->mas6 & MAS6_SPID0)
| (vcpu_e500->mas6 & (MAS6_SAS ? MAS1_TS : 0))
| (vcpu_e500->mas4 & MAS4_TSIZED(~0));
@@ -514,23 +819,16 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- u64 eaddr;
- u64 raddr;
- u32 tid;
struct tlbe *gtlbe;
- int tlbsel, esel, stlbsel, sesel;
+ int tlbsel, esel;
tlbsel = get_tlb_tlbsel(vcpu_e500);
esel = get_tlb_esel(vcpu_e500, tlbsel);
- gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+ gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
- if (get_tlb_v(gtlbe) && tlbsel == 1) {
- eaddr = get_tlb_eaddr(gtlbe);
- tid = get_tlb_tid(gtlbe);
- kvmppc_e500_tlb1_invalidate(vcpu_e500, eaddr,
- get_tlb_end(gtlbe), tid);
- }
+ if (get_tlb_v(gtlbe))
+ kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);
gtlbe->mas1 = vcpu_e500->mas1;
gtlbe->mas2 = vcpu_e500->mas2;
@@ -542,6 +840,12 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
if (tlbe_is_host_safe(vcpu, gtlbe)) {
+ struct tlbe stlbe;
+ int stlbsel, sesel;
+ u64 eaddr;
+ u64 raddr;
+
+ preempt_disable();
switch (tlbsel) {
case 0:
/* TLB0 */
@@ -549,7 +853,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
stlbsel = 0;
- sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel);
+ sesel = kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
break;
@@ -564,13 +868,14 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
* are mapped on the fly. */
stlbsel = 1;
sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr,
- raddr >> PAGE_SHIFT, gtlbe);
+ raddr >> PAGE_SHIFT, gtlbe, &stlbe);
break;
default:
BUG();
}
- write_host_tlbe(vcpu_e500, stlbsel, sesel);
+ write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
+ preempt_enable();
}
kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
@@ -610,7 +915,7 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
struct tlbe *gtlbe =
- &vcpu_e500->guest_tlb[tlbsel_of(index)][esel_of(index)];
+ &vcpu_e500->gtlb_arch[tlbsel_of(index)][esel_of(index)];
u64 pgmask = get_tlb_bytes(gtlbe) - 1;
return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
@@ -618,38 +923,37 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- int tlbsel, i;
-
- for (tlbsel = 0; tlbsel < 2; tlbsel++)
- for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++)
- kvmppc_e500_shadow_release(vcpu_e500, tlbsel, i);
-
- /* discard all guest mapping */
- _tlbil_all();
}
void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
unsigned int index)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ struct tlbe_priv *priv;
+ struct tlbe *gtlbe, stlbe;
int tlbsel = tlbsel_of(index);
int esel = esel_of(index);
int stlbsel, sesel;
+ gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+
+ preempt_disable();
switch (tlbsel) {
case 0:
stlbsel = 0;
sesel = esel;
+ priv = &vcpu_e500->gtlb_priv[stlbsel][sesel];
+
+ kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, BOOK3E_PAGESZ_4K,
+ priv, eaddr, &stlbe);
break;
case 1: {
gfn_t gfn = gpaddr >> PAGE_SHIFT;
- struct tlbe *gtlbe
- = &vcpu_e500->guest_tlb[tlbsel][esel];
stlbsel = 1;
- sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe);
+ sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn,
+ gtlbe, &stlbe);
break;
}
@@ -657,7 +961,9 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
BUG();
break;
}
- write_host_tlbe(vcpu_e500, stlbsel, sesel);
+
+ write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
+ preempt_enable();
}
int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
@@ -679,8 +985,10 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
- vcpu->arch.pid = pid;
+ if (vcpu->arch.pid != pid) {
+ vcpu_e500->pid[0] = vcpu->arch.pid = pid;
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+ }
}
void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
@@ -688,14 +996,14 @@ void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
struct tlbe *tlbe;
/* Insert large initial mapping for guest. */
- tlbe = &vcpu_e500->guest_tlb[1][0];
+ tlbe = &vcpu_e500->gtlb_arch[1][0];
tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
tlbe->mas2 = 0;
tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
tlbe->mas7 = 0;
/* 4K map for serial output. Used by kernel wrapper. */
- tlbe = &vcpu_e500->guest_tlb[1][1];
+ tlbe = &vcpu_e500->gtlb_arch[1][1];
tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
@@ -706,68 +1014,64 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
{
tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF;
- vcpu_e500->guest_tlb_size[0] = KVM_E500_TLB0_SIZE;
- vcpu_e500->guest_tlb[0] =
+ vcpu_e500->gtlb_size[0] = KVM_E500_TLB0_SIZE;
+ vcpu_e500->gtlb_arch[0] =
kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
- if (vcpu_e500->guest_tlb[0] == NULL)
+ if (vcpu_e500->gtlb_arch[0] == NULL)
goto err_out;
- vcpu_e500->shadow_tlb_size[0] = KVM_E500_TLB0_SIZE;
- vcpu_e500->shadow_tlb[0] =
- kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
- if (vcpu_e500->shadow_tlb[0] == NULL)
- goto err_out_guest0;
-
- vcpu_e500->guest_tlb_size[1] = KVM_E500_TLB1_SIZE;
- vcpu_e500->guest_tlb[1] =
+ vcpu_e500->gtlb_size[1] = KVM_E500_TLB1_SIZE;
+ vcpu_e500->gtlb_arch[1] =
kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
- if (vcpu_e500->guest_tlb[1] == NULL)
- goto err_out_shadow0;
+ if (vcpu_e500->gtlb_arch[1] == NULL)
+ goto err_out_guest0;
- vcpu_e500->shadow_tlb_size[1] = tlb1_entry_num;
- vcpu_e500->shadow_tlb[1] =
- kzalloc(sizeof(struct tlbe) * tlb1_entry_num, GFP_KERNEL);
- if (vcpu_e500->shadow_tlb[1] == NULL)
+ vcpu_e500->gtlb_priv[0] = (struct tlbe_priv *)
+ kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
+ if (vcpu_e500->gtlb_priv[0] == NULL)
goto err_out_guest1;
+ vcpu_e500->gtlb_priv[1] = (struct tlbe_priv *)
+ kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
- vcpu_e500->shadow_pages[0] = (struct page **)
- kzalloc(sizeof(struct page *) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
- if (vcpu_e500->shadow_pages[0] == NULL)
- goto err_out_shadow1;
+ if (vcpu_e500->gtlb_priv[1] == NULL)
+ goto err_out_priv0;
- vcpu_e500->shadow_pages[1] = (struct page **)
- kzalloc(sizeof(struct page *) * tlb1_entry_num, GFP_KERNEL);
- if (vcpu_e500->shadow_pages[1] == NULL)
- goto err_out_page0;
+ if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
+ goto err_out_priv1;
/* Init TLB configuration register */
vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
- vcpu_e500->tlb0cfg |= vcpu_e500->guest_tlb_size[0];
+ vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_size[0];
vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL;
- vcpu_e500->tlb1cfg |= vcpu_e500->guest_tlb_size[1];
+ vcpu_e500->tlb1cfg |= vcpu_e500->gtlb_size[1];
return 0;
-err_out_page0:
- kfree(vcpu_e500->shadow_pages[0]);
-err_out_shadow1:
- kfree(vcpu_e500->shadow_tlb[1]);
+err_out_priv1:
+ kfree(vcpu_e500->gtlb_priv[1]);
+err_out_priv0:
+ kfree(vcpu_e500->gtlb_priv[0]);
err_out_guest1:
- kfree(vcpu_e500->guest_tlb[1]);
-err_out_shadow0:
- kfree(vcpu_e500->shadow_tlb[0]);
+ kfree(vcpu_e500->gtlb_arch[1]);
err_out_guest0:
- kfree(vcpu_e500->guest_tlb[0]);
+ kfree(vcpu_e500->gtlb_arch[0]);
err_out:
return -1;
}
void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
{
- kfree(vcpu_e500->shadow_pages[1]);
- kfree(vcpu_e500->shadow_pages[0]);
- kfree(vcpu_e500->shadow_tlb[1]);
- kfree(vcpu_e500->guest_tlb[1]);
- kfree(vcpu_e500->shadow_tlb[0]);
- kfree(vcpu_e500->guest_tlb[0]);
+ int stlbsel, i;
+
+ /* release all privs */
+ for (stlbsel = 0; stlbsel < 2; stlbsel++)
+ for (i = 0; i < vcpu_e500->gtlb_size[stlbsel]; i++) {
+ struct tlbe_priv *priv =
+ &vcpu_e500->gtlb_priv[stlbsel][i];
+ kvmppc_e500_priv_release(priv);
+ }
+
+ kvmppc_e500_id_table_free(vcpu_e500);
+ kfree(vcpu_e500->gtlb_arch[1]);
+ kfree(vcpu_e500->gtlb_arch[0]);
}
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
index 458946b4775d..59b88e99a235 100644
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
*
* Author: Yu Liu, yu.liu@freescale.com
*
@@ -55,6 +55,7 @@ extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int);
extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *);
extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *);
extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
+extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *);
/* TLB helper functions */
static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
@@ -110,6 +111,16 @@ static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
return vcpu->arch.pid & 0xff;
}
+static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu)
+{
+ return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS));
+}
+
+static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu)
+{
+ return !!(vcpu->arch.shared->msr & MSR_PR);
+}
+
static inline unsigned int get_cur_spid(
const struct kvmppc_vcpu_e500 *vcpu_e500)
{
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 616dd516ca1f..a107c9be0fb1 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -30,6 +30,7 @@
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
#include <asm/tlbflush.h>
+#include <asm/cputhreads.h>
#include "timing.h"
#include "../mm/mmu_decl.h"
@@ -38,8 +39,12 @@
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
+#ifndef CONFIG_KVM_BOOK3S_64_HV
return !(v->arch.shared->msr & MSR_WE) ||
!!(v->arch.pending_exceptions);
+#else
+ return !(v->arch.ceded) || !!(v->arch.pending_exceptions);
+#endif
}
int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
@@ -73,7 +78,8 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
}
case HC_VENDOR_KVM | KVM_HC_FEATURES:
r = HC_EV_SUCCESS;
-#if defined(CONFIG_PPC_BOOK3S) /* XXX Missing magic page on BookE */
+#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500)
+ /* XXX Missing magic page on 44x */
r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
#endif
@@ -147,7 +153,7 @@ void kvm_arch_check_processor_compat(void *rtn)
int kvm_arch_init_vm(struct kvm *kvm)
{
- return 0;
+ return kvmppc_core_init_vm(kvm);
}
void kvm_arch_destroy_vm(struct kvm *kvm)
@@ -163,6 +169,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm->vcpus[i] = NULL;
atomic_set(&kvm->online_vcpus, 0);
+
+ kvmppc_core_destroy_vm(kvm);
+
mutex_unlock(&kvm->lock);
}
@@ -180,10 +189,13 @@ int kvm_dev_ioctl_check_extension(long ext)
#else
case KVM_CAP_PPC_SEGSTATE:
#endif
- case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_UNSET_IRQ:
case KVM_CAP_PPC_IRQ_LEVEL:
case KVM_CAP_ENABLE_CAP:
+ r = 1;
+ break;
+#ifndef CONFIG_KVM_BOOK3S_64_HV
+ case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_OSI:
case KVM_CAP_PPC_GET_PVINFO:
r = 1;
@@ -191,6 +203,21 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_COALESCED_MMIO:
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
+#endif
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ case KVM_CAP_SPAPR_TCE:
+ r = 1;
+ break;
+ case KVM_CAP_PPC_SMT:
+ r = threads_per_core;
+ break;
+ case KVM_CAP_PPC_RMA:
+ r = 1;
+ /* PPC970 requires an RMA */
+ if (cpu_has_feature(CPU_FTR_ARCH_201))
+ r = 2;
+ break;
+#endif
default:
r = 0;
break;
@@ -211,7 +238,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
int user_alloc)
{
- return 0;
+ return kvmppc_core_prepare_memory_region(kvm, mem);
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
@@ -219,7 +246,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_memory_slot old,
int user_alloc)
{
- return;
+ kvmppc_core_commit_memory_region(kvm, mem);
}
@@ -287,6 +314,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
+ vcpu->arch.dec_expires = ~(u64)0;
#ifdef CONFIG_KVM_EXIT_TIMING
mutex_init(&vcpu->arch.exit_timing_lock);
@@ -313,6 +341,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
#endif
kvmppc_core_vcpu_load(vcpu, cpu);
+ vcpu->cpu = smp_processor_id();
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -321,6 +350,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
#ifdef CONFIG_BOOKE
vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
#endif
+ vcpu->cpu = -1;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
@@ -492,15 +522,18 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
for (i = 0; i < 32; i++)
kvmppc_set_gpr(vcpu, i, gprs[i]);
vcpu->arch.osi_needed = 0;
+ } else if (vcpu->arch.hcall_needed) {
+ int i;
+
+ kvmppc_set_gpr(vcpu, 3, run->papr_hcall.ret);
+ for (i = 0; i < 9; ++i)
+ kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]);
+ vcpu->arch.hcall_needed = 0;
}
kvmppc_core_deliver_interrupts(vcpu);
- local_irq_disable();
- kvm_guest_enter();
- r = __kvmppc_vcpu_run(run, vcpu);
- kvm_guest_exit();
- local_irq_enable();
+ r = kvmppc_vcpu_run(run, vcpu);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -518,6 +551,8 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
vcpu->stat.halt_wakeup++;
+ } else if (vcpu->cpu != -1) {
+ smp_send_reschedule(vcpu->cpu);
}
return 0;
@@ -633,6 +668,29 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ case KVM_CREATE_SPAPR_TCE: {
+ struct kvm_create_spapr_tce create_tce;
+ struct kvm *kvm = filp->private_data;
+
+ r = -EFAULT;
+ if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
+ goto out;
+ r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
+ goto out;
+ }
+
+ case KVM_ALLOCATE_RMA: {
+ struct kvm *kvm = filp->private_data;
+ struct kvm_allocate_rma rma;
+
+ r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
+ if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
+ r = -EFAULT;
+ break;
+ }
+#endif /* CONFIG_KVM_BOOK3S_64_HV */
+
default:
r = -ENOTTY;
}
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index 319177df9587..07b6110a4bb7 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -56,15 +56,6 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
{
u64 old;
- do_div(duration, tb_ticks_per_usec);
- if (unlikely(duration > 0xFFFFFFFF)) {
- printk(KERN_ERR"%s - duration too big -> overflow"
- " duration %lld type %d exit #%d\n",
- __func__, duration, type,
- vcpu->arch.timing_count_type[type]);
- return;
- }
-
mutex_lock(&vcpu->arch.exit_timing_lock);
vcpu->arch.timing_count_type[type]++;
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 3aca1b042b8c..b135d3d397db 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -103,7 +103,7 @@ TRACE_EVENT(kvm_gtlb_write,
* Book3S trace points *
*************************************************************************/
-#ifdef CONFIG_PPC_BOOK3S
+#ifdef CONFIG_KVM_BOOK3S_PR
TRACE_EVENT(kvm_book3s_exit,
TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
@@ -252,7 +252,7 @@ TRACE_EVENT(kvm_book3s_mmu_flush,
),
TP_fast_assign(
- __entry->count = vcpu->arch.hpte_cache_count;
+ __entry->count = to_book3s(vcpu)->hpte_cache_count;
__entry->p1 = p1;
__entry->p2 = p2;
__entry->type = type;