summaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig5
-rw-r--r--arch/powerpc/Kconfig.debug23
-rw-r--r--arch/powerpc/boot/dts/mpc5121.dtsi2
-rw-r--r--arch/powerpc/include/asm/context_tracking.h10
-rw-r--r--arch/powerpc/include/asm/cputable.h2
-rw-r--r--arch/powerpc/include/asm/firmware.h4
-rw-r--r--arch/powerpc/include/asm/hvcall.h3
-rw-r--r--arch/powerpc/include/asm/hw_irq.h17
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h7
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h13
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h8
-rw-r--r--arch/powerpc/include/asm/kvm_booke.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h41
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h114
-rw-r--r--arch/powerpc/include/asm/machdep.h3
-rw-r--r--arch/powerpc/include/asm/opal.h5
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h8
-rw-r--r--arch/powerpc/include/asm/pgalloc-64.h2
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h4
-rw-r--r--arch/powerpc/include/asm/pte-hash64-64k.h2
-rw-r--r--arch/powerpc/include/asm/reg.h1
-rw-r--r--arch/powerpc/include/asm/rtas.h2
-rw-r--r--arch/powerpc/include/asm/thread_info.h7
-rw-r--r--arch/powerpc/include/asm/udbg.h1
-rw-r--r--arch/powerpc/include/uapi/asm/cputable.h9
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h94
-rw-r--r--arch/powerpc/kernel/asm-offsets.c4
-rw-r--r--arch/powerpc/kernel/cputable.c14
-rw-r--r--arch/powerpc/kernel/entry_32.S2
-rw-r--r--arch/powerpc/kernel/entry_64.S7
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S8
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c4
-rw-r--r--arch/powerpc/kernel/misc_32.S11
-rw-r--r--arch/powerpc/kernel/misc_64.S11
-rw-r--r--arch/powerpc/kernel/pci-common.c106
-rw-r--r--arch/powerpc/kernel/pci_32.c2
-rw-r--r--arch/powerpc/kernel/pci_64.c2
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c3
-rw-r--r--arch/powerpc/kernel/process.c8
-rw-r--r--arch/powerpc/kernel/ptrace.c5
-rw-r--r--arch/powerpc/kernel/rtas.c113
-rw-r--r--arch/powerpc/kernel/rtas_flash.c10
-rw-r--r--arch/powerpc/kernel/signal.c7
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c8
-rw-r--r--arch/powerpc/kernel/sysfs.c2
-rw-r--r--arch/powerpc/kernel/traps.c90
-rw-r--r--arch/powerpc/kernel/udbg.c3
-rw-r--r--arch/powerpc/kvm/44x.c12
-rw-r--r--arch/powerpc/kvm/Kconfig26
-rw-r--r--arch/powerpc/kvm/Makefile12
-rw-r--r--arch/powerpc/kvm/book3s.c36
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c120
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c92
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c11
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c406
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S228
-rw-r--r--arch/powerpc/kvm/book3s_pr.c7
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c21
-rw-r--r--arch/powerpc/kvm/book3s_rtas.c274
-rw-r--r--arch/powerpc/kvm/book3s_xics.c1270
-rw-r--r--arch/powerpc/kvm/book3s_xics.h130
-rw-r--r--arch/powerpc/kvm/booke.c158
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S42
-rw-r--r--arch/powerpc/kvm/e500.c14
-rw-r--r--arch/powerpc/kvm/e500.h22
-rw-r--r--arch/powerpc/kvm/e500_emulate.c19
-rw-r--r--arch/powerpc/kvm/e500_mmu.c192
-rw-r--r--arch/powerpc/kvm/e500mc.c16
-rw-r--r--arch/powerpc/kvm/emulate.c2
-rw-r--r--arch/powerpc/kvm/irq.h20
-rw-r--r--arch/powerpc/kvm/mpic.c1853
-rw-r--r--arch/powerpc/kvm/powerpc.c133
-rw-r--r--arch/powerpc/mm/fault.c41
-rw-r--r--arch/powerpc/mm/hash_utils_64.c37
-rw-r--r--arch/powerpc/mm/init_64.c3
-rw-r--r--arch/powerpc/perf/core-book3s.c280
-rw-r--r--arch/powerpc/platforms/40x/Kconfig1
-rw-r--r--arch/powerpc/platforms/44x/Kconfig1
-rw-r--r--arch/powerpc/platforms/85xx/Kconfig3
-rw-r--r--arch/powerpc/platforms/86xx/Kconfig3
-rw-r--r--arch/powerpc/platforms/8xx/Kconfig1
-rw-r--r--arch/powerpc/platforms/Kconfig6
-rw-r--r--arch/powerpc/platforms/cell/iommu.c2
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/mpc10x.h11
-rw-r--r--arch/powerpc/platforms/powermac/pci.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal.c45
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c21
-rw-r--r--arch/powerpc/platforms/powernv/pci.c12
-rw-r--r--arch/powerpc/platforms/powernv/pci.h2
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c16
-rw-r--r--arch/powerpc/platforms/powernv/smp.c66
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig1
-rw-r--r--arch/powerpc/platforms/pseries/msi.c21
-rw-r--r--arch/powerpc/platforms/pseries/pci.c53
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h4
-rw-r--r--arch/powerpc/platforms/pseries/setup.c2
-rw-r--r--arch/powerpc/platforms/pseries/suspend.c22
-rw-r--r--arch/powerpc/platforms/wsp/ics.c2
-rw-r--r--arch/powerpc/platforms/wsp/wsp_pci.c2
-rw-r--r--arch/powerpc/sysdev/Makefile2
-rw-r--r--arch/powerpc/sysdev/ehv_pic.c2
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c11
-rw-r--r--arch/powerpc/sysdev/mpic.c16
-rw-r--r--arch/powerpc/sysdev/ppc4xx_pci.c15
-rw-r--r--arch/powerpc/sysdev/udbg_memcons.c105
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c8
-rw-r--r--arch/powerpc/sysdev/xics/ics-opal.c2
110 files changed, 6169 insertions, 585 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index bbbe02197afb..c33e3ad2c8fd 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -82,11 +82,6 @@ config GENERIC_HWEIGHT
bool
default y
-config GENERIC_GPIO
- bool
- help
- Generic GPIO API support
-
config PPC
bool
default y
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 5416e28a7538..863d877e0b5f 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -262,8 +262,31 @@ config PPC_EARLY_DEBUG_OPAL_HVSI
Select this to enable early debugging for the PowerNV platform
using an "hvsi" console
+config PPC_EARLY_DEBUG_MEMCONS
+ bool "In memory console"
+ help
+ Select this to enable early debugging using an in memory console.
+ This console provides input and output buffers stored within the
+ kernel BSS and should be safe to select on any system. A debugger
+ can then be used to read kernel output or send input to the console.
endchoice
+config PPC_MEMCONS_OUTPUT_SIZE
+ int "In memory console output buffer size"
+ depends on PPC_EARLY_DEBUG_MEMCONS
+ default 4096
+ help
+ Selects the size of the output buffer (in bytes) of the in memory
+ console.
+
+config PPC_MEMCONS_INPUT_SIZE
+ int "In memory console input buffer size"
+ depends on PPC_EARLY_DEBUG_MEMCONS
+ default 128
+ help
+ Selects the size of the input buffer (in bytes) of the in memory
+ console.
+
config PPC_EARLY_DEBUG_OPAL
def_bool y
depends on PPC_EARLY_DEBUG_OPAL_RAW || PPC_EARLY_DEBUG_OPAL_HVSI
diff --git a/arch/powerpc/boot/dts/mpc5121.dtsi b/arch/powerpc/boot/dts/mpc5121.dtsi
index 2e82d0e71dd3..bd14c00e5146 100644
--- a/arch/powerpc/boot/dts/mpc5121.dtsi
+++ b/arch/powerpc/boot/dts/mpc5121.dtsi
@@ -152,6 +152,8 @@
compatible = "fsl,mpc5121-sdhc";
reg = <0x1500 0x100>;
interrupts = <8 0x8>;
+ dmas = <&dma0 30>;
+ dma-names = "rx-tx";
};
i2c@1700 {
diff --git a/arch/powerpc/include/asm/context_tracking.h b/arch/powerpc/include/asm/context_tracking.h
new file mode 100644
index 000000000000..b6f5a33b8ee2
--- /dev/null
+++ b/arch/powerpc/include/asm/context_tracking.h
@@ -0,0 +1,10 @@
+#ifndef _ASM_POWERPC_CONTEXT_TRACKING_H
+#define _ASM_POWERPC_CONTEXT_TRACKING_H
+
+#ifdef CONFIG_CONTEXT_TRACKING
+#define SCHEDULE_USER bl .schedule_user
+#else
+#define SCHEDULE_USER bl .schedule
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index fcc54ad159ba..26807e5aff51 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -224,8 +224,10 @@ extern const char *powerpc_base_platform;
/* We only set the TM feature if the kernel was compiled with TM supprt */
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
#define CPU_FTR_TM_COMP CPU_FTR_TM
+#define PPC_FEATURE2_HTM_COMP PPC_FEATURE2_HTM
#else
#define CPU_FTR_TM_COMP 0
+#define PPC_FEATURE2_HTM_COMP 0
#endif
/* We need to mark all pages as being coherent if we're SMP or we have a
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 0df54646f968..681bc0314b6b 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -52,6 +52,7 @@
#define FW_FEATURE_BEST_ENERGY ASM_CONST(0x0000000080000000)
#define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000)
#define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000)
+#define FW_FEATURE_OPALv3 ASM_CONST(0x0000000400000000)
#ifndef __ASSEMBLY__
@@ -69,7 +70,8 @@ enum {
FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN,
FW_FEATURE_PSERIES_ALWAYS = 0,
- FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv2,
+ FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv2 |
+ FW_FEATURE_OPALv3,
FW_FEATURE_POWERNV_ALWAYS = 0,
FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 4bc2c3dad6ad..cf4df8e2139a 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -270,6 +270,9 @@
#define H_SET_MODE 0x31C
#define MAX_HCALL_OPCODE H_SET_MODE
+/* Platform specific hcalls, used by KVM */
+#define H_RTAS 0xf000
+
#ifndef __ASSEMBLY__
/**
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index e45c4947a772..ba713f166fa5 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -95,15 +95,14 @@ static inline bool arch_irqs_disabled(void)
#define __hard_irq_disable() __mtmsrd(local_paca->kernel_msr, 1)
#endif
-static inline void hard_irq_disable(void)
-{
- __hard_irq_disable();
- get_paca()->soft_enabled = 0;
- get_paca()->irq_happened |= PACA_IRQ_HARD_DIS;
-}
-
-/* include/linux/interrupt.h needs hard_irq_disable to be a macro */
-#define hard_irq_disable hard_irq_disable
+#define hard_irq_disable() do { \
+ u8 _was_enabled = get_paca()->soft_enabled; \
+ __hard_irq_disable(); \
+ get_paca()->soft_enabled = 0; \
+ get_paca()->irq_happened |= PACA_IRQ_HARD_DIS; \
+ if (_was_enabled) \
+ trace_hardirqs_off(); \
+} while(0)
static inline bool lazy_irq_pending(void)
{
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 5a56e1c5f851..349ed85c7d61 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -142,6 +142,8 @@ extern int kvmppc_mmu_hv_init(void);
extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
+extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
+ unsigned int vec);
extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags);
extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
bool upper, u32 val);
@@ -156,7 +158,8 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
unsigned long pte_index);
extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
unsigned long *nb_ret);
-extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
+extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
+ unsigned long gpa, bool dirty);
extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel);
extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
@@ -458,6 +461,8 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
#define OSI_SC_MAGIC_R4 0x77810F9B
#define INS_DCBZ 0x7c0007ec
+/* TO = 31 for unconditional trap */
+#define INS_TW 0x7fe00008
/* LPIDs we support with this build -- runtime limit may be lower */
#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 38bec1dc9928..9c1ff330c805 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -268,4 +268,17 @@ static inline int is_vrma_hpte(unsigned long hpte_v)
(HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
}
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+/*
+ * Note modification of an HPTE; set the HPTE modified bit
+ * if anyone is interested.
+ */
+static inline void note_hpte_modification(struct kvm *kvm,
+ struct revmap_entry *rev)
+{
+ if (atomic_read(&kvm->arch.hpte_mod_interest))
+ rev->guest_rpte |= HPTE_GR_MODIFIED;
+}
+#endif /* CONFIG_KVM_BOOK3S_64_HV */
+
#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index cdc3d2717cc6..9039d3c97eec 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -20,6 +20,11 @@
#ifndef __ASM_KVM_BOOK3S_ASM_H__
#define __ASM_KVM_BOOK3S_ASM_H__
+/* XICS ICP register offsets */
+#define XICS_XIRR 4
+#define XICS_MFRR 0xc
+#define XICS_IPI 2 /* interrupt source # for IPIs */
+
#ifdef __ASSEMBLY__
#ifdef CONFIG_KVM_BOOK3S_HANDLER
@@ -81,10 +86,11 @@ struct kvmppc_host_state {
#ifdef CONFIG_KVM_BOOK3S_64_HV
u8 hwthread_req;
u8 hwthread_state;
-
+ u8 host_ipi;
struct kvm_vcpu *kvm_vcpu;
struct kvmppc_vcore *kvm_vcore;
unsigned long xics_phys;
+ u32 saved_xirr;
u64 dabr;
u64 host_mmcr[3];
u32 host_pmc[8];
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index b7cd3356a532..d3c1eb34c986 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -26,6 +26,8 @@
/* LPIDs we support with this build -- runtime limit may be lower */
#define KVMPPC_NR_LPIDS 64
+#define KVMPPC_INST_EHPRIV 0x7c00021c
+
static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
{
vcpu->arch.gpr[num] = val;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d1bb86074721..af326cde7cb6 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -44,6 +44,10 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#endif
+/* These values are internal and can be increased later */
+#define KVM_NR_IRQCHIPS 1
+#define KVM_IRQCHIP_NUM_PINS 256
+
#if !defined(CONFIG_KVM_440)
#include <linux/mmu_notifier.h>
@@ -188,6 +192,10 @@ struct kvmppc_linear_info {
int type;
};
+/* XICS components, defined in book3s_xics.c */
+struct kvmppc_xics;
+struct kvmppc_icp;
+
/*
* The reverse mapping array has one entry for each HPTE,
* which stores the guest's view of the second word of the HPTE
@@ -255,6 +263,13 @@ struct kvm_arch {
#endif /* CONFIG_KVM_BOOK3S_64_HV */
#ifdef CONFIG_PPC_BOOK3S_64
struct list_head spapr_tce_tables;
+ struct list_head rtas_tokens;
+#endif
+#ifdef CONFIG_KVM_MPIC
+ struct openpic *mpic;
+#endif
+#ifdef CONFIG_KVM_XICS
+ struct kvmppc_xics *xics;
#endif
};
@@ -301,11 +316,13 @@ struct kvmppc_vcore {
* that a guest can register.
*/
struct kvmppc_vpa {
+ unsigned long gpa; /* Current guest phys addr */
void *pinned_addr; /* Address in kernel linear mapping */
void *pinned_end; /* End of region */
unsigned long next_gpa; /* Guest phys addr for update */
unsigned long len; /* Number of bytes required */
u8 update_pending; /* 1 => update pinned_addr from next_gpa */
+ bool dirty; /* true => area has been modified by kernel */
};
struct kvmppc_pte {
@@ -359,6 +376,11 @@ struct kvmppc_slb {
#define KVMPPC_BOOKE_MAX_IAC 4
#define KVMPPC_BOOKE_MAX_DAC 2
+/* KVMPPC_EPR_USER takes precedence over KVMPPC_EPR_KERNEL */
+#define KVMPPC_EPR_NONE 0 /* EPR not supported */
+#define KVMPPC_EPR_USER 1 /* exit to userspace to fill EPR */
+#define KVMPPC_EPR_KERNEL 2 /* in-kernel irqchip */
+
struct kvmppc_booke_debug_reg {
u32 dbcr0;
u32 dbcr1;
@@ -370,6 +392,12 @@ struct kvmppc_booke_debug_reg {
u64 dac[KVMPPC_BOOKE_MAX_DAC];
};
+#define KVMPPC_IRQ_DEFAULT 0
+#define KVMPPC_IRQ_MPIC 1
+#define KVMPPC_IRQ_XICS 2
+
+struct openpic;
+
struct kvm_vcpu_arch {
ulong host_stack;
u32 host_pid;
@@ -502,8 +530,11 @@ struct kvm_vcpu_arch {
spinlock_t wdt_lock;
struct timer_list wdt_timer;
u32 tlbcfg[4];
+ u32 tlbps[4];
u32 mmucfg;
+ u32 eptcfg;
u32 epr;
+ u32 crit_save;
struct kvmppc_booke_debug_reg dbg_reg;
#endif
gpa_t paddr_accessed;
@@ -521,7 +552,7 @@ struct kvm_vcpu_arch {
u8 sane;
u8 cpu_type;
u8 hcall_needed;
- u8 epr_enabled;
+ u8 epr_flags; /* KVMPPC_EPR_xxx */
u8 epr_needed;
u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
@@ -548,6 +579,13 @@ struct kvm_vcpu_arch {
unsigned long magic_page_pa; /* phys addr to map the magic page to */
unsigned long magic_page_ea; /* effect. addr to map the magic page to */
+ int irq_type; /* one of KVM_IRQ_* */
+ int irq_cpu_id;
+ struct openpic *mpic; /* KVM_IRQ_MPIC */
+#ifdef CONFIG_KVM_XICS
+ struct kvmppc_icp *icp; /* XICS presentation controller */
+#endif
+
#ifdef CONFIG_KVM_BOOK3S_64_HV
struct kvm_vcpu_arch_shared shregs;
@@ -588,5 +626,6 @@ struct kvm_vcpu_arch {
#define KVM_MMIO_REG_FQPR 0x0060
#define __KVM_HAVE_ARCH_WQP
+#define __KVM_HAVE_CREATE_DEVICE
#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 44a657adf416..a5287fe03d77 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -44,7 +44,7 @@ enum emulation_result {
EMULATE_DO_DCR, /* kvm_run filled with DCR request */
EMULATE_FAIL, /* can't emulate this instruction */
EMULATE_AGAIN, /* something went wrong. go again */
- EMULATE_DO_PAPR, /* kvm_run filled with PAPR request */
+ EMULATE_EXIT_USER, /* emulation requires exit to user-space */
};
extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
@@ -104,8 +104,7 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
-extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
- struct kvm_interrupt *irq);
+extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu);
extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
@@ -131,6 +130,7 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm,
extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot, unsigned long porder);
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
+
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
@@ -152,7 +152,7 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
- struct kvm_memory_slot old);
+ const struct kvm_memory_slot *old);
extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
struct kvm_ppc_smmu_info *info);
extern void kvmppc_core_flush_memslot(struct kvm *kvm,
@@ -165,6 +165,18 @@ extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
+int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
+
+extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
+extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
+extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
+extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server,
+ u32 priority);
+extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
+ u32 *priority);
+extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
+extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
+
/*
* Cuts out inst bits with ordering according to spec.
* That means the leftmost bit is zero. All given bits are included.
@@ -246,12 +258,29 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
+struct openpic;
+
#ifdef CONFIG_KVM_BOOK3S_64_HV
static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
{
paca[cpu].kvm_hstate.xics_phys = addr;
}
+static inline u32 kvmppc_get_xics_latch(void)
+{
+ u32 xirr = get_paca()->kvm_hstate.saved_xirr;
+
+ get_paca()->kvm_hstate.saved_xirr = 0;
+
+ return xirr;
+}
+
+static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+{
+ paca[cpu].kvm_hstate.host_ipi = host_ipi;
+}
+
+extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
extern void kvm_linear_init(void);
#else
@@ -260,6 +289,46 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
static inline void kvm_linear_init(void)
{}
+
+static inline u32 kvmppc_get_xics_latch(void)
+{
+ return 0;
+}
+
+static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+{}
+
+static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+ kvm_vcpu_kick(vcpu);
+}
+#endif
+
+#ifdef CONFIG_KVM_XICS
+static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
+}
+extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
+extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
+extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
+extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
+extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
+extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
+extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 cpu);
+#else
+static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
+ { return 0; }
+static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
+static inline int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu,
+ unsigned long server)
+ { return -EINVAL; }
+static inline int kvm_vm_ioctl_xics_irq(struct kvm *kvm,
+ struct kvm_irq_level *args)
+ { return -ENOTTY; }
+static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
+ { return 0; }
#endif
static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
@@ -271,6 +340,32 @@ static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
#endif
}
+#ifdef CONFIG_KVM_MPIC
+
+void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu);
+int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
+ u32 cpu);
+void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu);
+
+#else
+
+static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline int kvmppc_mpic_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 cpu)
+{
+ return -EINVAL;
+}
+
+static inline void kvmppc_mpic_disconnect_vcpu(struct openpic *opp,
+ struct kvm_vcpu *vcpu)
+{
+}
+
+#endif /* CONFIG_KVM_MPIC */
+
int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
struct kvm_config_tlb *cfg);
int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
@@ -283,8 +378,15 @@ void kvmppc_init_lpid(unsigned long nr_lpids);
static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
{
- /* Clear i-cache for new pages */
struct page *page;
+ /*
+ * We can only access pages that the kernel maps
+ * as memory. Bail out for unmapped ones.
+ */
+ if (!pfn_valid(pfn))
+ return;
+
+ /* Clear i-cache for new pages */
page = pfn_to_page(pfn);
if (!test_bit(PG_arch_1, &page->flags)) {
flush_dcache_icache_page(page);
@@ -324,4 +426,6 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
return ea;
}
+extern void xics_wake_cpu(int cpu);
+
#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 3f3f691be2e7..92386fc4e82a 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -29,6 +29,7 @@ struct rtc_time;
struct file;
struct pci_controller;
struct kimage;
+struct pci_host_bridge;
struct machdep_calls {
char *name;
@@ -108,6 +109,8 @@ struct machdep_calls {
void (*pcibios_fixup)(void);
int (*pci_probe_mode)(struct pci_bus *);
void (*pci_irq_fixup)(struct pci_dev *dev);
+ int (*pcibios_root_bridge_prepare)(struct pci_host_bridge
+ *bridge);
/* To setup PHBs when using automatic OF platform driver for PCI */
int (*pci_setup_phb)(struct pci_controller *host);
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index b6c8b58b1d76..cbb9305ab15a 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -243,7 +243,8 @@ enum OpalMCE_TlbErrorType {
enum OpalThreadStatus {
OPAL_THREAD_INACTIVE = 0x0,
- OPAL_THREAD_STARTED = 0x1
+ OPAL_THREAD_STARTED = 0x1,
+ OPAL_THREAD_UNAVAILABLE = 0x2 /* opal-v3 */
};
enum OpalPciBusCompare {
@@ -563,6 +564,8 @@ extern void opal_nvram_init(void);
extern int opal_machine_check(struct pt_regs *regs);
+extern void opal_shutdown(void);
+
#endif /* __ASSEMBLY__ */
#endif /* __OPAL_H */
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index ffbc5fd549ac..8b11b5bd9938 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -39,11 +39,6 @@ struct pci_controller {
resource_size_t io_base_phys;
resource_size_t pci_io_size;
- /* Some machines (PReP) have a non 1:1 mapping of
- * the PCI memory space in the CPU bus space
- */
- resource_size_t pci_mem_offset;
-
/* Some machines have a special region to forward the ISA
* "memory" cycles such as VGA memory regions. Left to 0
* if unsupported
@@ -86,6 +81,7 @@ struct pci_controller {
*/
struct resource io_resource;
struct resource mem_resources[3];
+ resource_size_t mem_offset[3];
int global_number; /* PCI domain number */
resource_size_t dma_window_base_cur;
@@ -163,6 +159,8 @@ struct pci_dn {
int pci_ext_config_space; /* for pci devices */
+ int force_32bit_msi:1;
+
struct pci_dev *pcidev; /* back-pointer to the pci device */
#ifdef CONFIG_EEH
struct eeh_dev *edev; /* eeh device */
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
index 91acb12bac92..b66ae722a8e9 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -186,7 +186,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
static inline pgtable_t pmd_pgtable(pmd_t pmd)
{
- return (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE);
+ return (pgtable_t)(pmd_val(pmd) & ~PMD_MASKED_BITS);
}
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 0c34e4803499..eccfc161e58e 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -115,6 +115,10 @@
#define PPC_INST_MFSPR_DSCR_MASK 0xfc1fffff
#define PPC_INST_MTSPR_DSCR 0x7c1103a6
#define PPC_INST_MTSPR_DSCR_MASK 0xfc1fffff
+#define PPC_INST_MFSPR_DSCR_USER 0x7c0302a6
+#define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1fffff
+#define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6
+#define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1fffff
#define PPC_INST_SLBFEE 0x7c0007a7
#define PPC_INST_STRING 0x7c00042a
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
index 3e13e23e4fdf..d836d945068d 100644
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ b/arch/powerpc/include/asm/pte-hash64-64k.h
@@ -47,7 +47,7 @@
* generic accessors and iterators here
*/
#define __real_pte(e,p) ((real_pte_t) { \
- (e), ((e) & _PAGE_COMBO) ? \
+ (e), (pte_val(e) & _PAGE_COMBO) ? \
(pte_val(*((p) + PTRS_PER_PTE))) : 0 })
#define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \
(((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf))
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 3d17427e4fd7..a6136515c7f2 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -300,6 +300,7 @@
#define LPCR_PECE1 0x00002000 /* decrementer can cause exit */
#define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */
#define LPCR_MER 0x00000800 /* Mediated External Exception */
+#define LPCR_MER_SH 11
#define LPCR_LPES 0x0000000c
#define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */
#define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index a8bc2bb4adc9..34fd70488d83 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -264,6 +264,8 @@ extern void rtas_progress(char *s, unsigned short hex);
extern void rtas_initialize(void);
extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data);
extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
+extern int rtas_online_cpus_mask(cpumask_var_t cpus);
+extern int rtas_offline_cpus_mask(cpumask_var_t cpus);
extern int rtas_ibm_suspend_me(struct rtas_args *);
struct rtc_time;
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 8ceea14d6fe4..ba7b1973866e 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -97,7 +97,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_PERFMON_CTXSW 6 /* perfmon needs ctxsw calls */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SINGLESTEP 8 /* singlestepping active */
-#define TIF_MEMDIE 9 /* is terminating due to OOM killer */
+#define TIF_NOHZ 9 /* in adaptive nohz mode */
#define TIF_SECCOMP 10 /* secure computing */
#define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */
#define TIF_NOERROR 12 /* Force successful syscall return */
@@ -106,6 +106,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */
#define TIF_EMULATE_STACK_STORE 16 /* Is an instruction emulation
for stack store? */
+#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
/* as above, but as bit values */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
@@ -124,8 +125,10 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_UPROBE (1<<TIF_UPROBE)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
#define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
+#define _TIF_NOHZ (1<<TIF_NOHZ)
#define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
- _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)
+ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
+ _TIF_NOHZ)
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE)
diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h
index 5a7510e9d09d..dc590919f8eb 100644
--- a/arch/powerpc/include/asm/udbg.h
+++ b/arch/powerpc/include/asm/udbg.h
@@ -52,6 +52,7 @@ extern void __init udbg_init_40x_realmode(void);
extern void __init udbg_init_cpm(void);
extern void __init udbg_init_usbgecko(void);
extern void __init udbg_init_wsp(void);
+extern void __init udbg_init_memcons(void);
extern void __init udbg_init_ehv_bc(void);
extern void __init udbg_init_ps3gelic(void);
extern void __init udbg_init_debug_opal_raw(void);
diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index ed9dd8156962..5b7657959faa 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -1,6 +1,7 @@
#ifndef _UAPI__ASM_POWERPC_CPUTABLE_H
#define _UAPI__ASM_POWERPC_CPUTABLE_H
+/* in AT_HWCAP */
#define PPC_FEATURE_32 0x80000000
#define PPC_FEATURE_64 0x40000000
#define PPC_FEATURE_601_INSTR 0x20000000
@@ -33,4 +34,12 @@
#define PPC_FEATURE_TRUE_LE 0x00000002
#define PPC_FEATURE_PPC_LE 0x00000001
+/* in AT_HWCAP2 */
+#define PPC_FEATURE2_ARCH_2_07 0x80000000
+#define PPC_FEATURE2_HTM 0x40000000
+#define PPC_FEATURE2_DSCR 0x20000000
+#define PPC_FEATURE2_EBB 0x10000000
+#define PPC_FEATURE2_ISEL 0x08000000
+#define PPC_FEATURE2_TAR 0x04000000
+
#endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 16064d00adb9..0fb1a6e9ff90 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -25,6 +25,8 @@
/* Select powerpc specific features in <linux/kvm.h> */
#define __KVM_HAVE_SPAPR_TCE
#define __KVM_HAVE_PPC_SMT
+#define __KVM_HAVE_IRQCHIP
+#define __KVM_HAVE_IRQ_LINE
struct kvm_regs {
__u64 pc;
@@ -272,8 +274,31 @@ struct kvm_debug_exit_arch {
/* for KVM_SET_GUEST_DEBUG */
struct kvm_guest_debug_arch {
+ struct {
+ /* H/W breakpoint/watchpoint address */
+ __u64 addr;
+ /*
+ * Type denotes h/w breakpoint, read watchpoint, write
+ * watchpoint or watchpoint (both read and write).
+ */
+#define KVMPPC_DEBUG_NONE 0x0
+#define KVMPPC_DEBUG_BREAKPOINT (1UL << 1)
+#define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2)
+#define KVMPPC_DEBUG_WATCH_READ (1UL << 3)
+ __u32 type;
+ __u32 reserved;
+ } bp[16];
};
+/* Debug related defines */
+/*
+ * kvm_guest_debug->control is a 32 bit field. The lower 16 bits are generic
+ * and upper 16 bits are architecture specific. Architecture specific defines
+ * that ioctl is for setting hardware breakpoint or software breakpoint.
+ */
+#define KVM_GUESTDBG_USE_SW_BP 0x00010000
+#define KVM_GUESTDBG_USE_HW_BP 0x00020000
+
/* definition of registers in kvm_run */
struct kvm_sync_regs {
};
@@ -299,6 +324,12 @@ struct kvm_allocate_rma {
__u64 rma_size;
};
+/* for KVM_CAP_PPC_RTAS */
+struct kvm_rtas_token_args {
+ char name[120];
+ __u64 token; /* Use a token of 0 to undefine a mapping */
+};
+
struct kvm_book3e_206_tlb_entry {
__u32 mas8;
__u32 mas1;
@@ -359,6 +390,26 @@ struct kvm_get_htab_header {
__u16 n_invalid;
};
+/* Per-vcpu XICS interrupt controller state */
+#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
+
+#define KVM_REG_PPC_ICP_CPPR_SHIFT 56 /* current proc priority */
+#define KVM_REG_PPC_ICP_CPPR_MASK 0xff
+#define KVM_REG_PPC_ICP_XISR_SHIFT 32 /* interrupt status field */
+#define KVM_REG_PPC_ICP_XISR_MASK 0xffffff
+#define KVM_REG_PPC_ICP_MFRR_SHIFT 24 /* pending IPI priority */
+#define KVM_REG_PPC_ICP_MFRR_MASK 0xff
+#define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */
+#define KVM_REG_PPC_ICP_PPRI_MASK 0xff
+
+/* Device control API: PPC-specific devices */
+#define KVM_DEV_MPIC_GRP_MISC 1
+#define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */
+
+#define KVM_DEV_MPIC_GRP_REGISTER 2 /* 32-bit */
+#define KVM_DEV_MPIC_GRP_IRQ_ACTIVE 3 /* 32-bit */
+
+/* One-Reg API: PPC-specific registers */
#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
#define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
#define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
@@ -417,4 +468,47 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
#define KVM_REG_PPC_EPR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86)
+/* Timer Status Register OR/CLEAR interface */
+#define KVM_REG_PPC_OR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x87)
+#define KVM_REG_PPC_CLEAR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x88)
+#define KVM_REG_PPC_TCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89)
+#define KVM_REG_PPC_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a)
+
+/* Debugging: Special instruction for software breakpoint */
+#define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b)
+
+/* MMU registers */
+#define KVM_REG_PPC_MAS0 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8c)
+#define KVM_REG_PPC_MAS1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8d)
+#define KVM_REG_PPC_MAS2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8e)
+#define KVM_REG_PPC_MAS7_3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8f)
+#define KVM_REG_PPC_MAS4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x90)
+#define KVM_REG_PPC_MAS6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x91)
+#define KVM_REG_PPC_MMUCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x92)
+/*
+ * TLBnCFG fields TLBnCFG_N_ENTRY and TLBnCFG_ASSOC can be changed only using
+ * KVM_CAP_SW_TLB ioctl
+ */
+#define KVM_REG_PPC_TLB0CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x93)
+#define KVM_REG_PPC_TLB1CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94)
+#define KVM_REG_PPC_TLB2CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95)
+#define KVM_REG_PPC_TLB3CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96)
+#define KVM_REG_PPC_TLB0PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x97)
+#define KVM_REG_PPC_TLB1PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98)
+#define KVM_REG_PPC_TLB2PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99)
+#define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
+#define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
+
+/* PPC64 eXternal Interrupt Controller Specification */
+#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
+
+/* Layout of 64-bit source attribute values */
+#define KVM_XICS_DESTINATION_SHIFT 0
+#define KVM_XICS_DESTINATION_MASK 0xffffffffULL
+#define KVM_XICS_PRIORITY_SHIFT 32
+#define KVM_XICS_PRIORITY_MASK 0xff
+#define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40)
+#define KVM_XICS_MASKED (1ULL << 41)
+#define KVM_XICS_PENDING (1ULL << 42)
+
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 172233eab799..b51a97cfedf8 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -480,6 +480,7 @@ int main(void)
DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
+ DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
#endif
#ifdef CONFIG_PPC_BOOK3S
DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
@@ -576,6 +577,8 @@ int main(void)
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
+ HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
+ HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
HSTATE_FIELD(HSTATE_PMC, host_pmc);
HSTATE_FIELD(HSTATE_PURR, host_purr);
@@ -599,6 +602,7 @@ int main(void)
DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
+ DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save));
#endif /* CONFIG_PPC_BOOK3S */
#endif /* CONFIG_KVM */
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index ae9f433daabf..c60bbec25c1f 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -98,10 +98,14 @@ extern void __restore_cpu_e6500(void);
PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
PPC_FEATURE_TRUE_LE | \
PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER2_POWER7 (PPC_FEATURE2_DSCR)
#define COMMON_USER_POWER8 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
PPC_FEATURE_TRUE_LE | \
PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \
+ PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \
+ PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR)
#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
PPC_FEATURE_TRUE_LE | \
PPC_FEATURE_HAS_ALTIVEC_COMP)
@@ -428,6 +432,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER7 (architected)",
.cpu_features = CPU_FTRS_POWER7,
.cpu_user_features = COMMON_USER_POWER7,
+ .cpu_user_features2 = COMMON_USER2_POWER7,
.mmu_features = MMU_FTRS_POWER7,
.icache_bsize = 128,
.dcache_bsize = 128,
@@ -443,6 +448,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER8 (architected)",
.cpu_features = CPU_FTRS_POWER8,
.cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
.mmu_features = MMU_FTRS_POWER8,
.icache_bsize = 128,
.dcache_bsize = 128,
@@ -458,6 +464,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER7 (raw)",
.cpu_features = CPU_FTRS_POWER7,
.cpu_user_features = COMMON_USER_POWER7,
+ .cpu_user_features2 = COMMON_USER2_POWER7,
.mmu_features = MMU_FTRS_POWER7,
.icache_bsize = 128,
.dcache_bsize = 128,
@@ -475,6 +482,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER7+ (raw)",
.cpu_features = CPU_FTRS_POWER7,
.cpu_user_features = COMMON_USER_POWER7,
+ .cpu_user_features = COMMON_USER2_POWER7,
.mmu_features = MMU_FTRS_POWER7,
.icache_bsize = 128,
.dcache_bsize = 128,
@@ -492,6 +500,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER8 (raw)",
.cpu_features = CPU_FTRS_POWER8,
.cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
.mmu_features = MMU_FTRS_POWER8,
.icache_bsize = 128,
.dcache_bsize = 128,
@@ -1995,6 +2004,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_user_features = COMMON_USER_BOOKE |
PPC_FEATURE_HAS_SPE_COMP |
PPC_FEATURE_HAS_EFP_SINGLE_COMP,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
.mmu_features = MMU_FTR_TYPE_FSL_E,
.icache_bsize = 32,
.dcache_bsize = 32,
@@ -2014,6 +2024,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
PPC_FEATURE_HAS_SPE_COMP |
PPC_FEATURE_HAS_EFP_SINGLE_COMP |
PPC_FEATURE_HAS_EFP_DOUBLE_COMP,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
.mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS,
.icache_bsize = 32,
.dcache_bsize = 32,
@@ -2030,6 +2041,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "e500mc",
.cpu_features = CPU_FTRS_E500MC,
.cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
.mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
MMU_FTR_USE_TLBILX,
.icache_bsize = 64,
@@ -2048,6 +2060,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "e5500",
.cpu_features = CPU_FTRS_E5500,
.cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
.mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
MMU_FTR_USE_TLBILX,
.icache_bsize = 64,
@@ -2069,6 +2082,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_features = CPU_FTRS_E6500,
.cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU |
PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
.mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
MMU_FTR_USE_TLBILX,
.icache_bsize = 64,
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index e514de57a125..d22e73e4618b 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -439,8 +439,6 @@ ret_from_fork:
ret_from_kernel_thread:
REST_NVGPRS(r1)
bl schedule_tail
- li r3,0
- stw r3,0(r1)
mtlr r14
mr r3,r15
PPC440EP_ERR42
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 3fe5259e2fea..51cfb8fc301f 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -33,6 +33,7 @@
#include <asm/irqflags.h>
#include <asm/ftrace.h>
#include <asm/hw_irq.h>
+#include <asm/context_tracking.h>
/*
* System calls.
@@ -150,7 +151,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
CURRENT_THREAD_INFO(r11, r1)
ld r10,TI_FLAGS(r11)
andi. r11,r10,_TIF_SYSCALL_T_OR_A
- bne- syscall_dotrace
+ bne syscall_dotrace
.Lsyscall_dotrace_cont:
cmpldi 0,r0,NR_syscalls
bge- syscall_enosys
@@ -376,8 +377,6 @@ _GLOBAL(ret_from_fork)
_GLOBAL(ret_from_kernel_thread)
bl .schedule_tail
REST_NVGPRS(r1)
- li r3,0
- std r3,0(r1)
ld r14, 0(r14)
mtlr r14
mr r3,r15
@@ -634,7 +633,7 @@ _GLOBAL(ret_from_except_lite)
andi. r0,r4,_TIF_NEED_RESCHED
beq 1f
bl .restore_interrupts
- bl .schedule
+ SCHEDULE_USER
b .ret_from_except_lite
1: bl .save_nvgprs
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 42a756eec9ff..645170a07ada 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -489,7 +489,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
*/
mfspr r14,SPRN_DBSR /* check single-step/branch taken */
- andis. r15,r14,DBSR_IC@h
+ andis. r15,r14,(DBSR_IC|DBSR_BT)@h
beq+ 1f
LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
@@ -500,7 +500,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
bge+ cr1,1f
/* here it looks like we got an inappropriate debug exception. */
- lis r14,DBSR_IC@h /* clear the IC event */
+ lis r14,(DBSR_IC|DBSR_BT)@h /* clear the event */
rlwinm r11,r11,0,~MSR_DE /* clear DE in the CSRR1 value */
mtspr SPRN_DBSR,r14
mtspr SPRN_CSRR1,r11
@@ -555,7 +555,7 @@ kernel_dbg_exc:
*/
mfspr r14,SPRN_DBSR /* check single-step/branch taken */
- andis. r15,r14,DBSR_IC@h
+ andis. r15,r14,(DBSR_IC|DBSR_BT)@h
beq+ 1f
LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
@@ -566,7 +566,7 @@ kernel_dbg_exc:
bge+ cr1,1f
/* here it looks like we got an inappropriate debug exception. */
- lis r14,DBSR_IC@h /* clear the IC event */
+ lis r14,(DBSR_IC|DBSR_BT)@h /* clear the event */
rlwinm r11,r11,0,~MSR_DE /* clear DE in the DSRR1 value */
mtspr SPRN_DBSR,r14
mtspr SPRN_DSRR1,r11
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 466a2908bb63..611acdf30096 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -17,6 +17,7 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/cpu.h>
+#include <linux/hardirq.h>
#include <asm/page.h>
#include <asm/current.h>
@@ -335,10 +336,13 @@ void default_machine_kexec(struct kimage *image)
pr_debug("kexec: Starting switchover sequence.\n");
/* switch to a staticly allocated stack. Based on irq stack code.
+ * We setup preempt_count to avoid using VMX in memcpy.
* XXX: the task struct will likely be invalid once we do the copy!
*/
kexec_stack.thread_info.task = current_thread_info()->task;
kexec_stack.thread_info.flags = 0;
+ kexec_stack.thread_info.preempt_count = HARDIRQ_OFFSET;
+ kexec_stack.thread_info.cpu = current_thread_info()->cpu;
/* We need a static PACA, too; copy this CPU's PACA over and switch to
* it. Also poison per_cpu_offset to catch anyone using non-static
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 19e096bd0e73..e469f30e6eeb 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -657,6 +657,17 @@ _GLOBAL(__ucmpdi2)
li r3,2
blr
+_GLOBAL(__bswapdi2)
+ rotlwi r9,r4,8
+ rotlwi r10,r3,8
+ rlwimi r9,r4,24,0,7
+ rlwimi r10,r3,24,0,7
+ rlwimi r9,r4,24,16,23
+ rlwimi r10,r3,24,16,23
+ mr r3,r9
+ mr r4,r10
+ blr
+
_GLOBAL(abs)
srawi r4,r3,31
xor r3,r3,r4
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 5cfa8008693b..6820e45f557b 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -234,6 +234,17 @@ _GLOBAL(__flush_dcache_icache)
isync
blr
+_GLOBAL(__bswapdi2)
+ srdi r8,r3,32
+ rlwinm r7,r3,8,0xffffffff
+ rlwimi r7,r3,24,0,7
+ rlwinm r9,r8,8,0xffffffff
+ rlwimi r7,r3,24,16,23
+ rlwimi r9,r8,24,0,7
+ rlwimi r9,r8,24,16,23
+ sldi r7,r7,32
+ or r3,r7,r9
+ blr
#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)
/*
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index f325dc923409..6053f037ef0a 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -359,7 +359,6 @@ static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
enum pci_mmap_state mmap_state,
int write_combine)
{
- unsigned long prot = pgprot_val(protection);
/* Write combine is always 0 on non-memory space mappings. On
* memory space, if the user didn't pass 1, we check for a
@@ -376,9 +375,9 @@ static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
/* XXX would be nice to have a way to ask for write-through */
if (write_combine)
- return pgprot_noncached_wc(prot);
+ return pgprot_noncached_wc(protection);
else
- return pgprot_noncached(prot);
+ return pgprot_noncached(protection);
}
/*
@@ -786,22 +785,8 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
hose->isa_mem_size = size;
}
- /* We get the PCI/Mem offset from the first range or
- * the, current one if the offset came from an ISA
- * hole. If they don't match, bugger.
- */
- if (memno == 0 ||
- (isa_hole >= 0 && pci_addr != 0 &&
- hose->pci_mem_offset == isa_mb))
- hose->pci_mem_offset = cpu_addr - pci_addr;
- else if (pci_addr != 0 &&
- hose->pci_mem_offset != cpu_addr - pci_addr) {
- printk(KERN_INFO
- " \\--> Skipped (offset mismatch) !\n");
- continue;
- }
-
/* Build resource */
+ hose->mem_offset[memno] = cpu_addr - pci_addr;
res = &hose->mem_resources[memno++];
res->flags = IORESOURCE_MEM;
if (pci_space & 0x40000000)
@@ -817,20 +802,6 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
res->child = NULL;
}
}
-
- /* If there's an ISA hole and the pci_mem_offset is -not- matching
- * the ISA hole offset, then we need to remove the ISA hole from
- * the resource list for that brige
- */
- if (isa_hole >= 0 && hose->pci_mem_offset != isa_mb) {
- unsigned int next = isa_hole + 1;
- printk(KERN_INFO " Removing ISA hole at 0x%016llx\n", isa_mb);
- if (next < memno)
- memmove(&hose->mem_resources[isa_hole],
- &hose->mem_resources[next],
- sizeof(struct resource) * (memno - next));
- hose->mem_resources[--memno].flags = 0;
- }
}
/* Decide whether to display the domain number in /proc */
@@ -845,6 +816,14 @@ int pci_proc_domain(struct pci_bus *bus)
return 1;
}
+int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+ if (ppc_md.pcibios_root_bridge_prepare)
+ return ppc_md.pcibios_root_bridge_prepare(bridge);
+
+ return 0;
+}
+
/* This header fixup will do the resource fixup for all devices as they are
* probed, but not for bridge ranges
*/
@@ -908,6 +887,7 @@ static int pcibios_uninitialized_bridge_resource(struct pci_bus *bus,
struct pci_controller *hose = pci_bus_to_host(bus);
struct pci_dev *dev = bus->self;
resource_size_t offset;
+ struct pci_bus_region region;
u16 command;
int i;
@@ -917,10 +897,10 @@ static int pcibios_uninitialized_bridge_resource(struct pci_bus *bus,
/* Job is a bit different between memory and IO */
if (res->flags & IORESOURCE_MEM) {
- /* If the BAR is non-0 (res != pci_mem_offset) then it's probably been
- * initialized by somebody
- */
- if (res->start != hose->pci_mem_offset)
+ pcibios_resource_to_bus(dev, &region, res);
+
+ /* If the BAR is non-0 then it's probably been initialized */
+ if (region.start != 0)
return 0;
/* The BAR is 0, let's check if memory decoding is enabled on
@@ -932,11 +912,11 @@ static int pcibios_uninitialized_bridge_resource(struct pci_bus *bus,
/* Memory decoding is enabled and the BAR is 0. If any of the bridge
* resources covers that starting address (0 then it's good enough for
- * us for memory
+ * us for memory space)
*/
for (i = 0; i < 3; i++) {
if ((hose->mem_resources[i].flags & IORESOURCE_MEM) &&
- hose->mem_resources[i].start == hose->pci_mem_offset)
+ hose->mem_resources[i].start == hose->mem_offset[i])
return 0;
}
@@ -1373,10 +1353,9 @@ static void __init pcibios_reserve_legacy_regions(struct pci_bus *bus)
no_io:
/* Check for memory */
- offset = hose->pci_mem_offset;
- pr_debug("hose mem offset: %016llx\n", (unsigned long long)offset);
for (i = 0; i < 3; i++) {
pres = &hose->mem_resources[i];
+ offset = hose->mem_offset[i];
if (!(pres->flags & IORESOURCE_MEM))
continue;
pr_debug("hose mem res: %pR\n", pres);
@@ -1516,6 +1495,7 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose,
struct list_head *resources)
{
struct resource *res;
+ resource_size_t offset;
int i;
/* Hookup PHB IO resource */
@@ -1525,49 +1505,37 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose,
printk(KERN_WARNING "PCI: I/O resource not set for host"
" bridge %s (domain %d)\n",
hose->dn->full_name, hose->global_number);
-#ifdef CONFIG_PPC32
- /* Workaround for lack of IO resource only on 32-bit */
- res->start = (unsigned long)hose->io_base_virt - isa_io_base;
- res->end = res->start + IO_SPACE_LIMIT;
- res->flags = IORESOURCE_IO;
-#endif /* CONFIG_PPC32 */
- }
+ } else {
+ offset = pcibios_io_space_offset(hose);
- pr_debug("PCI: PHB IO resource = %016llx-%016llx [%lx]\n",
- (unsigned long long)res->start,
- (unsigned long long)res->end,
- (unsigned long)res->flags);
- pci_add_resource_offset(resources, res, pcibios_io_space_offset(hose));
+ pr_debug("PCI: PHB IO resource = %08llx-%08llx [%lx] off 0x%08llx\n",
+ (unsigned long long)res->start,
+ (unsigned long long)res->end,
+ (unsigned long)res->flags,
+ (unsigned long long)offset);
+ pci_add_resource_offset(resources, res, offset);
+ }
/* Hookup PHB Memory resources */
for (i = 0; i < 3; ++i) {
res = &hose->mem_resources[i];
if (!res->flags) {
- if (i > 0)
- continue;
printk(KERN_ERR "PCI: Memory resource 0 not set for "
"host bridge %s (domain %d)\n",
hose->dn->full_name, hose->global_number);
-#ifdef CONFIG_PPC32
- /* Workaround for lack of MEM resource only on 32-bit */
- res->start = hose->pci_mem_offset;
- res->end = (resource_size_t)-1LL;
- res->flags = IORESOURCE_MEM;
-#endif /* CONFIG_PPC32 */
+ continue;
}
+ offset = hose->mem_offset[i];
- pr_debug("PCI: PHB MEM resource %d = %016llx-%016llx [%lx]\n", i,
+
+ pr_debug("PCI: PHB MEM resource %d = %08llx-%08llx [%lx] off 0x%08llx\n", i,
(unsigned long long)res->start,
(unsigned long long)res->end,
- (unsigned long)res->flags);
- pci_add_resource_offset(resources, res, hose->pci_mem_offset);
- }
-
- pr_debug("PCI: PHB MEM offset = %016llx\n",
- (unsigned long long)hose->pci_mem_offset);
- pr_debug("PCI: PHB IO offset = %08lx\n",
- (unsigned long)hose->io_base_virt - _IO_BASE);
+ (unsigned long)res->flags,
+ (unsigned long long)offset);
+ pci_add_resource_offset(resources, res, offset);
+ }
}
/*
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index e37c2152acf4..432459c817fa 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -295,7 +295,7 @@ long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn)
case IOBASE_BRIDGE_NUMBER:
return (long)hose->first_busno;
case IOBASE_MEMORY:
- return (long)hose->pci_mem_offset;
+ return (long)hose->mem_offset[0];
case IOBASE_IO:
return (long)hose->io_base_phys;
case IOBASE_ISA_IO:
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 51a133a78a09..873050d26840 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -246,7 +246,7 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,
case IOBASE_BRIDGE_NUMBER:
return (long)hose->first_busno;
case IOBASE_MEMORY:
- return (long)hose->pci_mem_offset;
+ return (long)hose->mem_offset[0];
case IOBASE_IO:
return (long)hose->io_base_phys;
case IOBASE_ISA_IO:
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 78b8766fd79e..c29666586998 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -143,7 +143,8 @@ EXPORT_SYMBOL(__lshrdi3);
int __ucmpdi2(unsigned long long, unsigned long long);
EXPORT_SYMBOL(__ucmpdi2);
#endif
-
+long long __bswapdi2(long long);
+EXPORT_SYMBOL(__bswapdi2);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memmove);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ceb4e7b62cf4..a902723fdc69 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -339,6 +339,13 @@ static void set_debug_reg_defaults(struct thread_struct *thread)
static void prime_debug_regs(struct thread_struct *thread)
{
+ /*
+ * We could have inherited MSR_DE from userspace, since
+ * it doesn't get cleared on exception entry. Make sure
+ * MSR_DE is clear before we enable any debug events.
+ */
+ mtmsr(mfmsr() & ~MSR_DE);
+
mtspr(SPRN_IAC1, thread->iac1);
mtspr(SPRN_IAC2, thread->iac2);
#if CONFIG_PPC_ADV_DEBUG_IACS > 2
@@ -971,6 +978,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
* do some house keeping and then return from the fork or clone
* system call, using the stack frame created above.
*/
+ ((unsigned long *)sp)[0] = 0;
sp -= sizeof(struct pt_regs);
kregs = (struct pt_regs *) sp;
sp -= STACK_FRAME_OVERHEAD;
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 3b14d320e69f..98c2fc198712 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -32,6 +32,7 @@
#include <trace/syscall.h>
#include <linux/hw_breakpoint.h>
#include <linux/perf_event.h>
+#include <linux/context_tracking.h>
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -1788,6 +1789,8 @@ long do_syscall_trace_enter(struct pt_regs *regs)
{
long ret = 0;
+ user_exit();
+
secure_computing_strict(regs->gpr[0]);
if (test_thread_flag(TIF_SYSCALL_TRACE) &&
@@ -1832,4 +1835,6 @@ void do_syscall_trace_leave(struct pt_regs *regs)
step = test_thread_flag(TIF_SINGLESTEP);
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, step);
+
+ user_enter();
}
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 1fd6e7b2f390..52add6f3e201 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -19,6 +19,7 @@
#include <linux/init.h>
#include <linux/capability.h>
#include <linux/delay.h>
+#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
@@ -807,6 +808,95 @@ static void rtas_percpu_suspend_me(void *info)
__rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1);
}
+enum rtas_cpu_state {
+ DOWN,
+ UP,
+};
+
+#ifndef CONFIG_SMP
+static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
+ cpumask_var_t cpus)
+{
+ if (!cpumask_empty(cpus)) {
+ cpumask_clear(cpus);
+ return -EINVAL;
+ } else
+ return 0;
+}
+#else
+/* On return cpumask will be altered to indicate CPUs changed.
+ * CPUs with states changed will be set in the mask,
+ * CPUs with status unchanged will be unset in the mask. */
+static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
+ cpumask_var_t cpus)
+{
+ int cpu;
+ int cpuret = 0;
+ int ret = 0;
+
+ if (cpumask_empty(cpus))
+ return 0;
+
+ for_each_cpu(cpu, cpus) {
+ switch (state) {
+ case DOWN:
+ cpuret = cpu_down(cpu);
+ break;
+ case UP:
+ cpuret = cpu_up(cpu);
+ break;
+ }
+ if (cpuret) {
+ pr_debug("%s: cpu_%s for cpu#%d returned %d.\n",
+ __func__,
+ ((state == UP) ? "up" : "down"),
+ cpu, cpuret);
+ if (!ret)
+ ret = cpuret;
+ if (state == UP) {
+ /* clear bits for unchanged cpus, return */
+ cpumask_shift_right(cpus, cpus, cpu);
+ cpumask_shift_left(cpus, cpus, cpu);
+ break;
+ } else {
+ /* clear bit for unchanged cpu, continue */
+ cpumask_clear_cpu(cpu, cpus);
+ }
+ }
+ }
+
+ return ret;
+}
+#endif
+
+int rtas_online_cpus_mask(cpumask_var_t cpus)
+{
+ int ret;
+
+ ret = rtas_cpu_state_change_mask(UP, cpus);
+
+ if (ret) {
+ cpumask_var_t tmp_mask;
+
+ if (!alloc_cpumask_var(&tmp_mask, GFP_TEMPORARY))
+ return ret;
+
+ /* Use tmp_mask to preserve cpus mask from first failure */
+ cpumask_copy(tmp_mask, cpus);
+ rtas_offline_cpus_mask(tmp_mask);
+ free_cpumask_var(tmp_mask);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(rtas_online_cpus_mask);
+
+int rtas_offline_cpus_mask(cpumask_var_t cpus)
+{
+ return rtas_cpu_state_change_mask(DOWN, cpus);
+}
+EXPORT_SYMBOL(rtas_offline_cpus_mask);
+
int rtas_ibm_suspend_me(struct rtas_args *args)
{
long state;
@@ -814,6 +904,8 @@ int rtas_ibm_suspend_me(struct rtas_args *args)
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
struct rtas_suspend_me_data data;
DECLARE_COMPLETION_ONSTACK(done);
+ cpumask_var_t offline_mask;
+ int cpuret;
if (!rtas_service_present("ibm,suspend-me"))
return -ENOSYS;
@@ -837,11 +929,24 @@ int rtas_ibm_suspend_me(struct rtas_args *args)
return 0;
}
+ if (!alloc_cpumask_var(&offline_mask, GFP_TEMPORARY))
+ return -ENOMEM;
+
atomic_set(&data.working, 0);
atomic_set(&data.done, 0);
atomic_set(&data.error, 0);
data.token = rtas_token("ibm,suspend-me");
data.complete = &done;
+
+ /* All present CPUs must be online */
+ cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask);
+ cpuret = rtas_online_cpus_mask(offline_mask);
+ if (cpuret) {
+ pr_err("%s: Could not bring present CPUs online.\n", __func__);
+ atomic_set(&data.error, cpuret);
+ goto out;
+ }
+
stop_topology_update();
/* Call function on all CPUs. One of us will make the
@@ -857,6 +962,14 @@ int rtas_ibm_suspend_me(struct rtas_args *args)
start_topology_update();
+ /* Take down CPUs not online prior to suspend */
+ cpuret = rtas_offline_cpus_mask(offline_mask);
+ if (cpuret)
+ pr_warn("%s: Could not restore CPUs to offline state.\n",
+ __func__);
+
+out:
+ free_cpumask_var(offline_mask);
return atomic_read(&data.error);
}
#else /* CONFIG_PPC_PSERIES */
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 5b3022470126..2f3cdb01506d 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -89,6 +89,7 @@
/* Array sizes */
#define VALIDATE_BUF_SIZE 4096
+#define VALIDATE_MSG_LEN 256
#define RTAS_MSG_MAXLEN 64
/* Quirk - RTAS requires 4k list length and block size */
@@ -466,7 +467,7 @@ static void validate_flash(struct rtas_validate_flash_t *args_buf)
}
static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf,
- char *msg)
+ char *msg, int msglen)
{
int n;
@@ -474,7 +475,8 @@ static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf,
n = sprintf(msg, "%d\n", args_buf->update_results);
if ((args_buf->update_results >= VALIDATE_CUR_UNKNOWN) ||
(args_buf->update_results == VALIDATE_TMP_UPDATE))
- n += sprintf(msg + n, "%s\n", args_buf->buf);
+ n += snprintf(msg + n, msglen - n, "%s\n",
+ args_buf->buf);
} else {
n = sprintf(msg, "%d\n", args_buf->status);
}
@@ -486,11 +488,11 @@ static ssize_t validate_flash_read(struct file *file, char __user *buf,
{
struct rtas_validate_flash_t *const args_buf =
&rtas_validate_flash_data;
- char msg[RTAS_MSG_MAXLEN];
+ char msg[VALIDATE_MSG_LEN];
int msglen;
mutex_lock(&rtas_validate_flash_mutex);
- msglen = get_validate_flash_msg(args_buf, msg);
+ msglen = get_validate_flash_msg(args_buf, msg, VALIDATE_MSG_LEN);
mutex_unlock(&rtas_validate_flash_mutex);
return simple_read_from_buffer(buf, count, ppos, msg, msglen);
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index cf12eae02de5..577a8aa69c6e 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -13,6 +13,7 @@
#include <linux/signal.h>
#include <linux/uprobes.h>
#include <linux/key.h>
+#include <linux/context_tracking.h>
#include <asm/hw_breakpoint.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -24,7 +25,7 @@
* through debug.exception-trace sysctl.
*/
-int show_unhandled_signals = 0;
+int show_unhandled_signals = 1;
/*
* Allocate space for the signal frame
@@ -159,6 +160,8 @@ static int do_signal(struct pt_regs *regs)
void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
{
+ user_exit();
+
if (thread_info_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
@@ -169,4 +172,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
}
+
+ user_enter();
}
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index cd6e19d263b3..8a285876aef8 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -126,11 +126,3 @@ asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags,
return sys_sync_file_range(fd, offset, nbytes, flags);
}
-
-asmlinkage long compat_sys_fanotify_mark(int fanotify_fd, unsigned int flags,
- unsigned mask_hi, unsigned mask_lo,
- int dfd, const char __user *pathname)
-{
- u64 mask = ((u64)mask_hi << 32) | mask_lo;
- return sys_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname);
-}
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 3ce1f864c2d3..e68a84568b8b 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -180,7 +180,7 @@ SYSFS_PMCSETUP(dscr, SPRN_DSCR);
SYSFS_PMCSETUP(pir, SPRN_PIR);
static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
-static DEVICE_ATTR(spurr, 0600, show_spurr, NULL);
+static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
static DEVICE_ATTR(purr, 0600, show_purr, store_purr);
static DEVICE_ATTR(pir, 0400, show_pir, NULL);
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 37cc40ef5043..a7a648f6b750 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -35,6 +35,7 @@
#include <linux/kdebug.h>
#include <linux/debugfs.h>
#include <linux/ratelimit.h>
+#include <linux/context_tracking.h>
#include <asm/emulated_ops.h>
#include <asm/pgtable.h>
@@ -667,6 +668,7 @@ int machine_check_generic(struct pt_regs *regs)
void machine_check_exception(struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
int recover = 0;
__get_cpu_var(irq_stat).mce_exceptions++;
@@ -683,7 +685,7 @@ void machine_check_exception(struct pt_regs *regs)
recover = cur_cpu_spec->machine_check(regs);
if (recover > 0)
- return;
+ goto bail;
#if defined(CONFIG_8xx) && defined(CONFIG_PCI)
/* the qspan pci read routines can cause machine checks -- Cort
@@ -693,20 +695,23 @@ void machine_check_exception(struct pt_regs *regs)
* -- BenH
*/
bad_page_fault(regs, regs->dar, SIGBUS);
- return;
+ goto bail;
#endif
if (debugger_fault_handler(regs))
- return;
+ goto bail;
if (check_io_access(regs))
- return;
+ goto bail;
die("Machine check", regs, SIGBUS);
/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
panic("Unrecoverable Machine check");
+
+bail:
+ exception_exit(prev_state);
}
void SMIException(struct pt_regs *regs)
@@ -716,20 +721,29 @@ void SMIException(struct pt_regs *regs)
void unknown_exception(struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
+
printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
regs->nip, regs->msr, regs->trap);
_exception(SIGTRAP, regs, 0, 0);
+
+ exception_exit(prev_state);
}
void instruction_breakpoint_exception(struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
+
if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
5, SIGTRAP) == NOTIFY_STOP)
- return;
+ goto bail;
if (debugger_iabr_match(regs))
- return;
+ goto bail;
_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
+
+bail:
+ exception_exit(prev_state);
}
void RunModeException(struct pt_regs *regs)
@@ -739,15 +753,20 @@ void RunModeException(struct pt_regs *regs)
void __kprobes single_step_exception(struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
+
clear_single_step(regs);
if (notify_die(DIE_SSTEP, "single_step", regs, 5,
5, SIGTRAP) == NOTIFY_STOP)
- return;
+ goto bail;
if (debugger_sstep(regs))
- return;
+ goto bail;
_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
+
+bail:
+ exception_exit(prev_state);
}
/*
@@ -970,7 +989,10 @@ static int emulate_instruction(struct pt_regs *regs)
#ifdef CONFIG_PPC64
/* Emulate the mfspr rD, DSCR. */
- if (((instword & PPC_INST_MFSPR_DSCR_MASK) == PPC_INST_MFSPR_DSCR) &&
+ if ((((instword & PPC_INST_MFSPR_DSCR_USER_MASK) ==
+ PPC_INST_MFSPR_DSCR_USER) ||
+ ((instword & PPC_INST_MFSPR_DSCR_MASK) ==
+ PPC_INST_MFSPR_DSCR)) &&
cpu_has_feature(CPU_FTR_DSCR)) {
PPC_WARN_EMULATED(mfdscr, regs);
rd = (instword >> 21) & 0x1f;
@@ -978,7 +1000,10 @@ static int emulate_instruction(struct pt_regs *regs)
return 0;
}
/* Emulate the mtspr DSCR, rD. */
- if (((instword & PPC_INST_MTSPR_DSCR_MASK) == PPC_INST_MTSPR_DSCR) &&
+ if ((((instword & PPC_INST_MTSPR_DSCR_USER_MASK) ==
+ PPC_INST_MTSPR_DSCR_USER) ||
+ ((instword & PPC_INST_MTSPR_DSCR_MASK) ==
+ PPC_INST_MTSPR_DSCR)) &&
cpu_has_feature(CPU_FTR_DSCR)) {
PPC_WARN_EMULATED(mtdscr, regs);
rd = (instword >> 21) & 0x1f;
@@ -999,6 +1024,7 @@ int is_valid_bugaddr(unsigned long addr)
void __kprobes program_check_exception(struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
unsigned int reason = get_reason(regs);
extern int do_mathemu(struct pt_regs *regs);
@@ -1008,26 +1034,26 @@ void __kprobes program_check_exception(struct pt_regs *regs)
if (reason & REASON_FP) {
/* IEEE FP exception */
parse_fpe(regs);
- return;
+ goto bail;
}
if (reason & REASON_TRAP) {
/* Debugger is first in line to stop recursive faults in
* rcu_lock, notify_die, or atomic_notifier_call_chain */
if (debugger_bpt(regs))
- return;
+ goto bail;
/* trap exception */
if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
== NOTIFY_STOP)
- return;
+ goto bail;
if (!(regs->msr & MSR_PR) && /* not user-mode */
report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) {
regs->nip += 4;
- return;
+ goto bail;
}
_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
- return;
+ goto bail;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (reason & REASON_TM) {
@@ -1043,7 +1069,7 @@ void __kprobes program_check_exception(struct pt_regs *regs)
if (!user_mode(regs) &&
report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) {
regs->nip += 4;
- return;
+ goto bail;
}
/* If usermode caused this, it's done something illegal and
* gets a SIGILL slap on the wrist. We call it an illegal
@@ -1053,7 +1079,7 @@ void __kprobes program_check_exception(struct pt_regs *regs)
*/
if (user_mode(regs)) {
_exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
- return;
+ goto bail;
} else {
printk(KERN_EMERG "Unexpected TM Bad Thing exception "
"at %lx (msr 0x%x)\n", regs->nip, reason);
@@ -1077,16 +1103,16 @@ void __kprobes program_check_exception(struct pt_regs *regs)
switch (do_mathemu(regs)) {
case 0:
emulate_single_step(regs);
- return;
+ goto bail;
case 1: {
int code = 0;
code = __parse_fpscr(current->thread.fpscr.val);
_exception(SIGFPE, regs, code, regs->nip);
- return;
+ goto bail;
}
case -EFAULT:
_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
- return;
+ goto bail;
}
/* fall through on any other errors */
#endif /* CONFIG_MATH_EMULATION */
@@ -1097,10 +1123,10 @@ void __kprobes program_check_exception(struct pt_regs *regs)
case 0:
regs->nip += 4;
emulate_single_step(regs);
- return;
+ goto bail;
case -EFAULT:
_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
- return;
+ goto bail;
}
}
@@ -1108,10 +1134,14 @@ void __kprobes program_check_exception(struct pt_regs *regs)
_exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
else
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+
+bail:
+ exception_exit(prev_state);
}
void alignment_exception(struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
int sig, code, fixed = 0;
/* We restore the interrupt state now */
@@ -1125,7 +1155,7 @@ void alignment_exception(struct pt_regs *regs)
if (fixed == 1) {
regs->nip += 4; /* skip over emulated instruction */
emulate_single_step(regs);
- return;
+ goto bail;
}
/* Operand address was bad */
@@ -1140,6 +1170,9 @@ void alignment_exception(struct pt_regs *regs)
_exception(sig, regs, code, regs->dar);
else
bad_page_fault(regs, regs->dar, sig);
+
+bail:
+ exception_exit(prev_state);
}
void StackOverflow(struct pt_regs *regs)
@@ -1168,23 +1201,32 @@ void trace_syscall(struct pt_regs *regs)
void kernel_fp_unavailable_exception(struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
+
printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
"%lx at %lx\n", regs->trap, regs->nip);
die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
+
+ exception_exit(prev_state);
}
void altivec_unavailable_exception(struct pt_regs *regs)
{
+ enum ctx_state prev_state = exception_enter();
+
if (user_mode(regs)) {
/* A user program has executed an altivec instruction,
but this kernel doesn't support altivec. */
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
- return;
+ goto bail;
}
printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
"%lx at %lx\n", regs->trap, regs->nip);
die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
+
+bail:
+ exception_exit(prev_state);
}
void vsx_unavailable_exception(struct pt_regs *regs)
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index 13b867093499..9d3fdcd66290 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -64,6 +64,9 @@ void __init udbg_early_init(void)
udbg_init_usbgecko();
#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP)
udbg_init_wsp();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_MEMCONS)
+ /* In memory console */
+ udbg_init_memcons();
#elif defined(CONFIG_PPC_EARLY_DEBUG_EHV_BC)
udbg_init_ehv_bc();
#elif defined(CONFIG_PPC_EARLY_DEBUG_PS3GELIC)
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 3d7fd21c65f9..2f5c6b6d6877 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -124,6 +124,18 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return kvmppc_set_sregs_ivor(vcpu, sregs);
}
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ return -EINVAL;
+}
+
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ return -EINVAL;
+}
+
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvmppc_vcpu_44x *vcpu_44x;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 63c67ec72e43..eb643f862579 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -136,21 +136,41 @@ config KVM_E500V2
If unsure, say N.
config KVM_E500MC
- bool "KVM support for PowerPC E500MC/E5500 processors"
+ bool "KVM support for PowerPC E500MC/E5500/E6500 processors"
depends on PPC_E500MC
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
select MMU_NOTIFIER
---help---
- Support running unmodified E500MC/E5500 (32-bit) guest kernels in
- virtual machines on E500MC/E5500 host processors.
+ Support running unmodified E500MC/E5500/E6500 guest kernels in
+ virtual machines on E500MC/E5500/E6500 host processors.
This module provides access to the hardware capabilities through
a character device node named /dev/kvm.
If unsure, say N.
+config KVM_MPIC
+ bool "KVM in-kernel MPIC emulation"
+ depends on KVM && E500
+ select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_IRQ_ROUTING
+ select HAVE_KVM_MSI
+ help
+ Enable support for emulating MPIC devices inside the
+ host kernel, rather than relying on userspace to emulate.
+ Currently, support is limited to certain versions of
+ Freescale's MPIC implementation.
+
+config KVM_XICS
+ bool "KVM in-kernel XICS emulation"
+ depends on KVM_BOOK3S_64 && !KVM_MPIC
+ ---help---
+ Include support for the XICS (eXternal Interrupt Controller
+ Specification) interrupt controller architecture used on
+ IBM POWER (pSeries) servers.
+
source drivers/vhost/Kconfig
endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index b772eded8c26..422de3f4d46c 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -72,12 +72,18 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_hv.o \
book3s_hv_interrupts.o \
book3s_64_mmu_hv.o
+kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
+ book3s_hv_rm_xics.o
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_hv_rmhandlers.o \
book3s_hv_rm_mmu.o \
book3s_64_vio_hv.o \
book3s_hv_ras.o \
- book3s_hv_builtin.o
+ book3s_hv_builtin.o \
+ $(kvm-book3s_64-builtin-xics-objs-y)
+
+kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
+ book3s_xics.o
kvm-book3s_64-module-objs := \
../../../virt/kvm/kvm_main.o \
@@ -86,6 +92,7 @@ kvm-book3s_64-module-objs := \
emulate.o \
book3s.o \
book3s_64_vio.o \
+ book3s_rtas.o \
$(kvm-book3s_64-objs-y)
kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
@@ -103,6 +110,9 @@ kvm-book3s_32-objs := \
book3s_32_mmu.o
kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
+kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
+kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o)
+
kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
obj-$(CONFIG_KVM_440) += kvm.o
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index a4b645285240..700df6f1d32c 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -104,7 +104,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
return prio;
}
-static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
+void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
unsigned int vec)
{
unsigned long old_pending = vcpu->arch.pending_exceptions;
@@ -160,8 +160,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
kvmppc_book3s_queue_irqprio(vcpu, vec);
}
-void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
- struct kvm_interrupt *irq)
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
{
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
@@ -530,6 +529,21 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]);
break;
#endif /* CONFIG_ALTIVEC */
+ case KVM_REG_PPC_DEBUG_INST: {
+ u32 opcode = INS_TW;
+ r = copy_to_user((u32 __user *)(long)reg->addr,
+ &opcode, sizeof(u32));
+ break;
+ }
+#ifdef CONFIG_KVM_XICS
+ case KVM_REG_PPC_ICP_STATE:
+ if (!vcpu->arch.icp) {
+ r = -ENXIO;
+ break;
+ }
+ val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));
+ break;
+#endif /* CONFIG_KVM_XICS */
default:
r = -EINVAL;
break;
@@ -592,6 +606,16 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
break;
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_KVM_XICS
+ case KVM_REG_PPC_ICP_STATE:
+ if (!vcpu->arch.icp) {
+ r = -ENXIO;
+ break;
+ }
+ r = kvmppc_xics_set_icp(vcpu,
+ set_reg_val(reg->id, val));
+ break;
+#endif /* CONFIG_KVM_XICS */
default:
r = -EINVAL;
break;
@@ -607,6 +631,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
return 0;
}
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ return -EINVAL;
+}
+
void kvmppc_decrementer_func(unsigned long data)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index da98e26f6e45..5880dfb31074 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -893,7 +893,10 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
/* Harvest R and C */
rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
- rev[i].guest_rpte = ptel | rcbits;
+ if (rcbits & ~rev[i].guest_rpte) {
+ rev[i].guest_rpte = ptel | rcbits;
+ note_hpte_modification(kvm, &rev[i]);
+ }
}
unlock_rmap(rmapp);
hptep[0] &= ~HPTE_V_HVLOCK;
@@ -976,7 +979,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
/* Now check and modify the HPTE */
if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) {
kvmppc_clear_ref_hpte(kvm, hptep, i);
- rev[i].guest_rpte |= HPTE_R_R;
+ if (!(rev[i].guest_rpte & HPTE_R_R)) {
+ rev[i].guest_rpte |= HPTE_R_R;
+ note_hpte_modification(kvm, &rev[i]);
+ }
ret = 1;
}
hptep[0] &= ~HPTE_V_HVLOCK;
@@ -1080,7 +1086,10 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
hptep[1] &= ~HPTE_R_C;
eieio();
hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
- rev[i].guest_rpte |= HPTE_R_C;
+ if (!(rev[i].guest_rpte & HPTE_R_C)) {
+ rev[i].guest_rpte |= HPTE_R_C;
+ note_hpte_modification(kvm, &rev[i]);
+ }
ret = 1;
}
hptep[0] &= ~HPTE_V_HVLOCK;
@@ -1090,11 +1099,30 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
return ret;
}
+static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
+ struct kvm_memory_slot *memslot,
+ unsigned long *map)
+{
+ unsigned long gfn;
+
+ if (!vpa->dirty || !vpa->pinned_addr)
+ return;
+ gfn = vpa->gpa >> PAGE_SHIFT;
+ if (gfn < memslot->base_gfn ||
+ gfn >= memslot->base_gfn + memslot->npages)
+ return;
+
+ vpa->dirty = false;
+ if (map)
+ __set_bit_le(gfn - memslot->base_gfn, map);
+}
+
long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long *map)
{
unsigned long i;
unsigned long *rmapp;
+ struct kvm_vcpu *vcpu;
preempt_disable();
rmapp = memslot->arch.rmap;
@@ -1103,6 +1131,15 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
__set_bit_le(i, map);
++rmapp;
}
+
+ /* Harvest dirty bits from VPA and DTL updates */
+ /* Note: we never modify the SLB shadow buffer areas */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map);
+ harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ }
preempt_enable();
return 0;
}
@@ -1114,7 +1151,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
unsigned long gfn = gpa >> PAGE_SHIFT;
struct page *page, *pages[1];
int npages;
- unsigned long hva, psize, offset;
+ unsigned long hva, offset;
unsigned long pa;
unsigned long *physp;
int srcu_idx;
@@ -1146,14 +1183,9 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
- psize = PAGE_SIZE;
- if (PageHuge(page)) {
- page = compound_head(page);
- psize <<= compound_order(page);
- }
- offset = gpa & (psize - 1);
+ offset = gpa & (PAGE_SIZE - 1);
if (nb_ret)
- *nb_ret = psize - offset;
+ *nb_ret = PAGE_SIZE - offset;
return page_address(page) + offset;
err:
@@ -1161,11 +1193,31 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
return NULL;
}
-void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
+void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
+ bool dirty)
{
struct page *page = virt_to_page(va);
+ struct kvm_memory_slot *memslot;
+ unsigned long gfn;
+ unsigned long *rmap;
+ int srcu_idx;
put_page(page);
+
+ if (!dirty || !kvm->arch.using_mmu_notifiers)
+ return;
+
+ /* We need to mark this page dirty in the rmap chain */
+ gfn = gpa >> PAGE_SHIFT;
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ memslot = gfn_to_memslot(kvm, gfn);
+ if (memslot) {
+ rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
+ lock_rmap(rmap);
+ *rmap |= KVMPPC_RMAP_CHANGED;
+ unlock_rmap(rmap);
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
}
/*
@@ -1193,16 +1245,36 @@ struct kvm_htab_ctx {
#define HPTE_SIZE (2 * sizeof(unsigned long))
+/*
+ * Returns 1 if this HPT entry has been modified or has pending
+ * R/C bit changes.
+ */
+static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp)
+{
+ unsigned long rcbits_unset;
+
+ if (revp->guest_rpte & HPTE_GR_MODIFIED)
+ return 1;
+
+ /* Also need to consider changes in reference and changed bits */
+ rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
+ if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset))
+ return 1;
+
+ return 0;
+}
+
static long record_hpte(unsigned long flags, unsigned long *hptp,
unsigned long *hpte, struct revmap_entry *revp,
int want_valid, int first_pass)
{
unsigned long v, r;
+ unsigned long rcbits_unset;
int ok = 1;
int valid, dirty;
/* Unmodified entries are uninteresting except on the first pass */
- dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
+ dirty = hpte_dirty(revp, hptp);
if (!first_pass && !dirty)
return 0;
@@ -1223,16 +1295,28 @@ static long record_hpte(unsigned long flags, unsigned long *hptp,
while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
cpu_relax();
v = hptp[0];
+
+ /* re-evaluate valid and dirty from synchronized HPTE value */
+ valid = !!(v & HPTE_V_VALID);
+ dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
+
+ /* Harvest R and C into guest view if necessary */
+ rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
+ if (valid && (rcbits_unset & hptp[1])) {
+ revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) |
+ HPTE_GR_MODIFIED;
+ dirty = 1;
+ }
+
if (v & HPTE_V_ABSENT) {
v &= ~HPTE_V_ABSENT;
v |= HPTE_V_VALID;
+ valid = 1;
}
- /* re-evaluate valid and dirty from synchronized HPTE value */
- valid = !!(v & HPTE_V_VALID);
if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
valid = 0;
- r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C));
- dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
+
+ r = revp->guest_rpte;
/* only clear modified if this is the right sort of entry */
if (valid == want_valid && dirty) {
r &= ~HPTE_GR_MODIFIED;
@@ -1288,7 +1372,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
/* Skip uninteresting entries, i.e. clean on not-first pass */
if (!first_pass) {
while (i < kvm->arch.hpt_npte &&
- !(revp->guest_rpte & HPTE_GR_MODIFIED)) {
+ !hpte_dirty(revp, hptp)) {
++i;
hptp += 2;
++revp;
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 836c56975e21..1f6344c4408d 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -194,7 +194,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
run->papr_hcall.args[i] = gpr;
}
- emulated = EMULATE_DO_PAPR;
+ run->exit_reason = KVM_EXIT_PAPR_HCALL;
+ vcpu->arch.hcall_needed = 1;
+ emulated = EMULATE_EXIT_USER;
break;
}
#endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index f5416934932b..9de24f8e03c7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -66,6 +66,31 @@
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
+void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+ int me;
+ int cpu = vcpu->cpu;
+ wait_queue_head_t *wqp;
+
+ wqp = kvm_arch_vcpu_wq(vcpu);
+ if (waitqueue_active(wqp)) {
+ wake_up_interruptible(wqp);
+ ++vcpu->stat.halt_wakeup;
+ }
+
+ me = get_cpu();
+
+ /* CPU points to the first thread of the core */
+ if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
+ int real_cpu = cpu + vcpu->arch.ptid;
+ if (paca[real_cpu].kvm_hstate.xics_phys)
+ xics_wake_cpu(real_cpu);
+ else if (cpu_online(cpu))
+ smp_send_reschedule(cpu);
+ }
+ put_cpu();
+}
+
/*
* We use the vcpu_load/put functions to measure stolen time.
* Stolen time is counted as time when either the vcpu is able to
@@ -259,7 +284,7 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
len = ((struct reg_vpa *)va)->length.hword;
else
len = ((struct reg_vpa *)va)->length.word;
- kvmppc_unpin_guest_page(kvm, va);
+ kvmppc_unpin_guest_page(kvm, va, vpa, false);
/* Check length */
if (len > nb || len < sizeof(struct reg_vpa))
@@ -359,13 +384,13 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
va = NULL;
nb = 0;
if (gpa)
- va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
+ va = kvmppc_pin_guest_page(kvm, gpa, &nb);
spin_lock(&vcpu->arch.vpa_update_lock);
if (gpa == vpap->next_gpa)
break;
/* sigh... unpin that one and try again */
if (va)
- kvmppc_unpin_guest_page(kvm, va);
+ kvmppc_unpin_guest_page(kvm, va, gpa, false);
}
vpap->update_pending = 0;
@@ -375,12 +400,15 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
* has changed the mappings underlying guest memory,
* so unregister the region.
*/
- kvmppc_unpin_guest_page(kvm, va);
+ kvmppc_unpin_guest_page(kvm, va, gpa, false);
va = NULL;
}
if (vpap->pinned_addr)
- kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
+ kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
+ vpap->dirty);
+ vpap->gpa = gpa;
vpap->pinned_addr = va;
+ vpap->dirty = false;
if (va)
vpap->pinned_end = va + vpap->len;
}
@@ -472,6 +500,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
/* order writing *dt vs. writing vpa->dtl_idx */
smp_wmb();
vpa->dtl_idx = ++vcpu->arch.dtl_index;
+ vcpu->arch.dtl.dirty = true;
}
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
@@ -479,7 +508,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
unsigned long req = kvmppc_get_gpr(vcpu, 3);
unsigned long target, ret = H_SUCCESS;
struct kvm_vcpu *tvcpu;
- int idx;
+ int idx, rc;
switch (req) {
case H_ENTER:
@@ -515,6 +544,28 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
break;
+ case H_RTAS:
+ if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+ return RESUME_HOST;
+
+ rc = kvmppc_rtas_hcall(vcpu);
+
+ if (rc == -ENOENT)
+ return RESUME_HOST;
+ else if (rc == 0)
+ break;
+
+ /* Send the error out to userspace via KVM_RUN */
+ return rc;
+
+ case H_XIRR:
+ case H_CPPR:
+ case H_EOI:
+ case H_IPI:
+ if (kvmppc_xics_enabled(vcpu)) {
+ ret = kvmppc_xics_hcall(vcpu, req);
+ break;
+ } /* fallthrough */
default:
return RESUME_HOST;
}
@@ -913,15 +964,19 @@ out:
return ERR_PTR(err);
}
+static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
+{
+ if (vpa->pinned_addr)
+ kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa,
+ vpa->dirty);
+}
+
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
{
spin_lock(&vcpu->arch.vpa_update_lock);
- if (vcpu->arch.dtl.pinned_addr)
- kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr);
- if (vcpu->arch.slb_shadow.pinned_addr)
- kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
- if (vcpu->arch.vpa.pinned_addr)
- kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
+ unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
+ unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
+ unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
spin_unlock(&vcpu->arch.vpa_update_lock);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
@@ -955,7 +1010,6 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
}
extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
-extern void xics_wake_cpu(int cpu);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
struct kvm_vcpu *vcpu)
@@ -1330,9 +1384,12 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
break;
vc->runner = vcpu;
n_ceded = 0;
- list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
+ list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
if (!v->arch.pending_exceptions)
n_ceded += v->arch.ceded;
+ else
+ v->arch.ceded = 0;
+ }
if (n_ceded == vc->n_runnable)
kvmppc_vcore_blocked(vc);
else
@@ -1645,12 +1702,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
void kvmppc_core_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
- struct kvm_memory_slot old)
+ const struct kvm_memory_slot *old)
{
unsigned long npages = mem->memory_size >> PAGE_SHIFT;
struct kvm_memory_slot *memslot;
- if (npages && old.npages) {
+ if (npages && old->npages) {
/*
* If modifying a memslot, reset all the rmap dirty bits.
* If this is a new memslot, we don't need to do anything
@@ -1827,6 +1884,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
cpumask_setall(&kvm->arch.need_tlb_flush);
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+ INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
kvm->arch.rma = NULL;
@@ -1872,6 +1930,8 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
kvm->arch.rma = NULL;
}
+ kvmppc_rtas_tokens_free(kvm);
+
kvmppc_free_hpt(kvm);
WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 19c93bae1aea..6dcbb49105a4 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -97,17 +97,6 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
}
EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
-/*
- * Note modification of an HPTE; set the HPTE modified bit
- * if anyone is interested.
- */
-static inline void note_hpte_modification(struct kvm *kvm,
- struct revmap_entry *rev)
-{
- if (atomic_read(&kvm->arch.hpte_mod_interest))
- rev->guest_rpte |= HPTE_GR_MODIFIED;
-}
-
/* Remove this HPTE from the chain for a real page */
static void remove_revmap_chain(struct kvm *kvm, long pte_index,
struct revmap_entry *rev,
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
new file mode 100644
index 000000000000..b4b0082f761c
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -0,0 +1,406 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/debug.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+
+#include "book3s_xics.h"
+
+#define DEBUG_PASSUP
+
+static inline void rm_writeb(unsigned long paddr, u8 val)
+{
+ __asm__ __volatile__("sync; stbcix %0,0,%1"
+ : : "r" (val), "r" (paddr) : "memory");
+}
+
+static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *this_vcpu)
+{
+ struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
+ unsigned long xics_phys;
+ int cpu;
+
+ /* Mark the target VCPU as having an interrupt pending */
+ vcpu->stat.queue_intr++;
+ set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
+
+ /* Kick self ? Just set MER and return */
+ if (vcpu == this_vcpu) {
+ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER);
+ return;
+ }
+
+ /* Check if the core is loaded, if not, too hard */
+ cpu = vcpu->cpu;
+ if (cpu < 0 || cpu >= nr_cpu_ids) {
+ this_icp->rm_action |= XICS_RM_KICK_VCPU;
+ this_icp->rm_kick_target = vcpu;
+ return;
+ }
+ /* In SMT cpu will always point to thread 0, we adjust it */
+ cpu += vcpu->arch.ptid;
+
+ /* Not too hard, then poke the target */
+ xics_phys = paca[cpu].kvm_hstate.xics_phys;
+ rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+}
+
+static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
+{
+ /* Note: Only called on self ! */
+ clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
+ &vcpu->arch.pending_exceptions);
+ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
+}
+
+static inline bool icp_rm_try_update(struct kvmppc_icp *icp,
+ union kvmppc_icp_state old,
+ union kvmppc_icp_state new)
+{
+ struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu;
+ bool success;
+
+ /* Calculate new output value */
+ new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
+
+ /* Attempt atomic update */
+ success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
+ if (!success)
+ goto bail;
+
+ /*
+ * Check for output state update
+ *
+ * Note that this is racy since another processor could be updating
+ * the state already. This is why we never clear the interrupt output
+ * here, we only ever set it. The clear only happens prior to doing
+ * an update and only by the processor itself. Currently we do it
+ * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
+ *
+ * We also do not try to figure out whether the EE state has changed,
+ * we unconditionally set it if the new state calls for it. The reason
+ * for that is that we opportunistically remove the pending interrupt
+ * flag when raising CPPR, so we need to set it back here if an
+ * interrupt is still pending.
+ */
+ if (new.out_ee)
+ icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu);
+
+ /* Expose the state change for debug purposes */
+ this_vcpu->arch.icp->rm_dbgstate = new;
+ this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu;
+
+ bail:
+ return success;
+}
+
+static inline int check_too_hard(struct kvmppc_xics *xics,
+ struct kvmppc_icp *icp)
+{
+ return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS;
+}
+
+static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+ u8 new_cppr)
+{
+ union kvmppc_icp_state old_state, new_state;
+ bool resend;
+
+ /*
+ * This handles several related states in one operation:
+ *
+ * ICP State: Down_CPPR
+ *
+ * Load CPPR with new value and if the XISR is 0
+ * then check for resends:
+ *
+ * ICP State: Resend
+ *
+ * If MFRR is more favored than CPPR, check for IPIs
+ * and notify ICS of a potential resend. This is done
+ * asynchronously (when used in real mode, we will have
+ * to exit here).
+ *
+ * We do not handle the complete Check_IPI as documented
+ * here. In the PAPR, this state will be used for both
+ * Set_MFRR and Down_CPPR. However, we know that we aren't
+ * changing the MFRR state here so we don't need to handle
+ * the case of an MFRR causing a reject of a pending irq,
+ * this will have been handled when the MFRR was set in the
+ * first place.
+ *
+ * Thus we don't have to handle rejects, only resends.
+ *
+ * When implementing real mode for HV KVM, resend will lead to
+ * a H_TOO_HARD return and the whole transaction will be handled
+ * in virtual mode.
+ */
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ /* Down_CPPR */
+ new_state.cppr = new_cppr;
+
+ /*
+ * Cut down Resend / Check_IPI / IPI
+ *
+ * The logic is that we cannot have a pending interrupt
+ * trumped by an IPI at this point (see above), so we
+ * know that either the pending interrupt is already an
+ * IPI (in which case we don't care to override it) or
+ * it's either more favored than us or non existent
+ */
+ if (new_state.mfrr < new_cppr &&
+ new_state.mfrr <= new_state.pending_pri) {
+ new_state.pending_pri = new_state.mfrr;
+ new_state.xisr = XICS_IPI;
+ }
+
+ /* Latch/clear resend bit */
+ resend = new_state.need_resend;
+ new_state.need_resend = 0;
+
+ } while (!icp_rm_try_update(icp, old_state, new_state));
+
+ /*
+ * Now handle resend checks. Those are asynchronous to the ICP
+ * state update in HW (ie bus transactions) so we can handle them
+ * separately here as well.
+ */
+ if (resend)
+ icp->rm_action |= XICS_RM_CHECK_RESEND;
+}
+
+
+unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
+{
+ union kvmppc_icp_state old_state, new_state;
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ u32 xirr;
+
+ if (!xics || !xics->real_mode)
+ return H_TOO_HARD;
+
+ /* First clear the interrupt */
+ icp_rm_clr_vcpu_irq(icp->vcpu);
+
+ /*
+ * ICP State: Accept_Interrupt
+ *
+ * Return the pending interrupt (if any) along with the
+ * current CPPR, then clear the XISR & set CPPR to the
+ * pending priority
+ */
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
+ if (!old_state.xisr)
+ break;
+ new_state.cppr = new_state.pending_pri;
+ new_state.pending_pri = 0xff;
+ new_state.xisr = 0;
+
+ } while (!icp_rm_try_update(icp, old_state, new_state));
+
+ /* Return the result in GPR4 */
+ vcpu->arch.gpr[4] = xirr;
+
+ return check_too_hard(xics, icp);
+}
+
+int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+ unsigned long mfrr)
+{
+ union kvmppc_icp_state old_state, new_state;
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp, *this_icp = vcpu->arch.icp;
+ u32 reject;
+ bool resend;
+ bool local;
+
+ if (!xics || !xics->real_mode)
+ return H_TOO_HARD;
+
+ local = this_icp->server_num == server;
+ if (local)
+ icp = this_icp;
+ else
+ icp = kvmppc_xics_find_server(vcpu->kvm, server);
+ if (!icp)
+ return H_PARAMETER;
+
+ /*
+ * ICP state: Set_MFRR
+ *
+ * If the CPPR is more favored than the new MFRR, then
+ * nothing needs to be done as there can be no XISR to
+ * reject.
+ *
+ * If the CPPR is less favored, then we might be replacing
+ * an interrupt, and thus need to possibly reject it as in
+ *
+ * ICP state: Check_IPI
+ */
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ /* Set_MFRR */
+ new_state.mfrr = mfrr;
+
+ /* Check_IPI */
+ reject = 0;
+ resend = false;
+ if (mfrr < new_state.cppr) {
+ /* Reject a pending interrupt if not an IPI */
+ if (mfrr <= new_state.pending_pri)
+ reject = new_state.xisr;
+ new_state.pending_pri = mfrr;
+ new_state.xisr = XICS_IPI;
+ }
+
+ if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+ resend = new_state.need_resend;
+ new_state.need_resend = 0;
+ }
+ } while (!icp_rm_try_update(icp, old_state, new_state));
+
+ /* Pass rejects to virtual mode */
+ if (reject && reject != XICS_IPI) {
+ this_icp->rm_action |= XICS_RM_REJECT;
+ this_icp->rm_reject = reject;
+ }
+
+ /* Pass resends to virtual mode */
+ if (resend)
+ this_icp->rm_action |= XICS_RM_CHECK_RESEND;
+
+ return check_too_hard(xics, this_icp);
+}
+
+int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+ union kvmppc_icp_state old_state, new_state;
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ u32 reject;
+
+ if (!xics || !xics->real_mode)
+ return H_TOO_HARD;
+
+ /*
+ * ICP State: Set_CPPR
+ *
+ * We can safely compare the new value with the current
+ * value outside of the transaction as the CPPR is only
+ * ever changed by the processor on itself
+ */
+ if (cppr > icp->state.cppr) {
+ icp_rm_down_cppr(xics, icp, cppr);
+ goto bail;
+ } else if (cppr == icp->state.cppr)
+ return H_SUCCESS;
+
+ /*
+ * ICP State: Up_CPPR
+ *
+ * The processor is raising its priority, this can result
+ * in a rejection of a pending interrupt:
+ *
+ * ICP State: Reject_Current
+ *
+ * We can remove EE from the current processor, the update
+ * transaction will set it again if needed
+ */
+ icp_rm_clr_vcpu_irq(icp->vcpu);
+
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ reject = 0;
+ new_state.cppr = cppr;
+
+ if (cppr <= new_state.pending_pri) {
+ reject = new_state.xisr;
+ new_state.xisr = 0;
+ new_state.pending_pri = 0xff;
+ }
+
+ } while (!icp_rm_try_update(icp, old_state, new_state));
+
+ /* Pass rejects to virtual mode */
+ if (reject && reject != XICS_IPI) {
+ icp->rm_action |= XICS_RM_REJECT;
+ icp->rm_reject = reject;
+ }
+ bail:
+ return check_too_hard(xics, icp);
+}
+
+int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *state;
+ u32 irq = xirr & 0x00ffffff;
+ u16 src;
+
+ if (!xics || !xics->real_mode)
+ return H_TOO_HARD;
+
+ /*
+ * ICP State: EOI
+ *
+ * Note: If EOI is incorrectly used by SW to lower the CPPR
+ * value (ie more favored), we do not check for rejection of
+ * a pending interrupt, this is a SW error and PAPR sepcifies
+ * that we don't have to deal with it.
+ *
+ * The sending of an EOI to the ICS is handled after the
+ * CPPR update
+ *
+ * ICP State: Down_CPPR which we handle
+ * in a separate function as it's shared with H_CPPR.
+ */
+ icp_rm_down_cppr(xics, icp, xirr >> 24);
+
+ /* IPIs have no EOI */
+ if (irq == XICS_IPI)
+ goto bail;
+ /*
+ * EOI handling: If the interrupt is still asserted, we need to
+ * resend it. We can take a lockless "peek" at the ICS state here.
+ *
+ * "Message" interrupts will never have "asserted" set
+ */
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics)
+ goto bail;
+ state = &ics->irq_state[src];
+
+ /* Still asserted, resend it, we make it look like a reject */
+ if (state->asserted) {
+ icp->rm_action |= XICS_RM_REJECT;
+ icp->rm_reject = irq;
+ }
+ bail:
+ return check_too_hard(xics, icp);
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e33d11f1b977..b02f91e4c70d 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -79,10 +79,6 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
* *
*****************************************************************************/
-#define XICS_XIRR 4
-#define XICS_QIRR 0xc
-#define XICS_IPI 2 /* interrupt source # for IPIs */
-
/*
* We come in here when wakened from nap mode on a secondary hw thread.
* Relocation is off and most register values are lost.
@@ -101,50 +97,51 @@ kvm_start_guest:
li r0,1
stb r0,PACA_NAPSTATELOST(r13)
- /* get vcpu pointer, NULL if we have no vcpu to run */
- ld r4,HSTATE_KVM_VCPU(r13)
- cmpdi cr1,r4,0
+ /* were we napping due to cede? */
+ lbz r0,HSTATE_NAPPING(r13)
+ cmpwi r0,0
+ bne kvm_end_cede
+
+ /*
+ * We weren't napping due to cede, so this must be a secondary
+ * thread being woken up to run a guest, or being woken up due
+ * to a stray IPI. (Or due to some machine check or hypervisor
+ * maintenance interrupt while the core is in KVM.)
+ */
/* Check the wake reason in SRR1 to see why we got here */
mfspr r3,SPRN_SRR1
rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
cmpwi r3,4 /* was it an external interrupt? */
- bne 27f
-
- /*
- * External interrupt - for now assume it is an IPI, since we
- * should never get any other interrupts sent to offline threads.
- * Only do this for secondary threads.
- */
- beq cr1,25f
- lwz r3,VCPU_PTID(r4)
- cmpwi r3,0
- beq 27f
-25: ld r5,HSTATE_XICS_PHYS(r13)
- li r0,0xff
- li r6,XICS_QIRR
- li r7,XICS_XIRR
+ bne 27f /* if not */
+ ld r5,HSTATE_XICS_PHYS(r13)
+ li r7,XICS_XIRR /* if it was an external interrupt, */
lwzcix r8,r5,r7 /* get and ack the interrupt */
sync
clrldi. r9,r8,40 /* get interrupt source ID. */
- beq 27f /* none there? */
- cmpwi r9,XICS_IPI
- bne 26f
+ beq 28f /* none there? */
+ cmpwi r9,XICS_IPI /* was it an IPI? */
+ bne 29f
+ li r0,0xff
+ li r6,XICS_MFRR
stbcix r0,r5,r6 /* clear IPI */
-26: stwcix r8,r5,r7 /* EOI the interrupt */
-
-27: /* XXX should handle hypervisor maintenance interrupts etc. here */
+ stwcix r8,r5,r7 /* EOI the interrupt */
+ sync /* order loading of vcpu after that */
- /* reload vcpu pointer after clearing the IPI */
+ /* get vcpu pointer, NULL if we have no vcpu to run */
ld r4,HSTATE_KVM_VCPU(r13)
cmpdi r4,0
/* if we have no vcpu to run, go back to sleep */
beq kvm_no_guest
+ b kvmppc_hv_entry
- /* were we napping due to cede? */
- lbz r0,HSTATE_NAPPING(r13)
- cmpwi r0,0
- bne kvm_end_cede
+27: /* XXX should handle hypervisor maintenance interrupts etc. here */
+ b kvm_no_guest
+28: /* SRR1 said external but ICP said nope?? */
+ b kvm_no_guest
+29: /* External non-IPI interrupt to offline secondary thread? help?? */
+ stw r8,HSTATE_SAVED_XIRR(r13)
+ b kvm_no_guest
.global kvmppc_hv_entry
kvmppc_hv_entry:
@@ -260,6 +257,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
lwz r5, LPPACA_YIELDCOUNT(r3)
addi r5, r5, 1
stw r5, LPPACA_YIELDCOUNT(r3)
+ li r6, 1
+ stb r6, VCPU_VPA_DIRTY(r4)
25:
/* Load up DAR and DSISR */
ld r5, VCPU_DAR(r4)
@@ -485,20 +484,20 @@ toc_tlbie_lock:
mtctr r6
mtxer r7
+ ld r10, VCPU_PC(r4)
+ ld r11, VCPU_MSR(r4)
kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
ld r6, VCPU_SRR0(r4)
ld r7, VCPU_SRR1(r4)
- ld r10, VCPU_PC(r4)
- ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */
+ /* r11 = vcpu->arch.msr & ~MSR_HV */
rldicl r11, r11, 63 - MSR_HV_LG, 1
rotldi r11, r11, 1 + MSR_HV_LG
ori r11, r11, MSR_ME
/* Check if we can deliver an external or decrementer interrupt now */
ld r0,VCPU_PENDING_EXC(r4)
- li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL)
- oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
+ lis r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
and r0,r0,r8
cmpdi cr1,r0,0
andi. r0,r11,MSR_EE
@@ -526,10 +525,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
/* Move SRR0 and SRR1 into the respective regs */
5: mtspr SPRN_SRR0, r6
mtspr SPRN_SRR1, r7
- li r0,0
- stb r0,VCPU_CEDED(r4) /* cancel cede */
fast_guest_return:
+ li r0,0
+ stb r0,VCPU_CEDED(r4) /* cancel cede */
mtspr SPRN_HSRR0,r10
mtspr SPRN_HSRR1,r11
@@ -676,17 +675,99 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
beq hcall_try_real_mode
- /* Check for mediated interrupts (could be done earlier really ...) */
+ /* Only handle external interrupts here on arch 206 and later */
BEGIN_FTR_SECTION
- cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL
- bne+ 1f
- andi. r0,r11,MSR_EE
- beq 1f
- mfspr r5,SPRN_LPCR
- andi. r0,r5,LPCR_MER
- bne bounce_ext_interrupt
-1:
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+ b ext_interrupt_to_host
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
+
+ /* External interrupt ? */
+ cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
+ bne+ ext_interrupt_to_host
+
+ /* External interrupt, first check for host_ipi. If this is
+ * set, we know the host wants us out so let's do it now
+ */
+do_ext_interrupt:
+ lbz r0, HSTATE_HOST_IPI(r13)
+ cmpwi r0, 0
+ bne ext_interrupt_to_host
+
+ /* Now read the interrupt from the ICP */
+ ld r5, HSTATE_XICS_PHYS(r13)
+ li r7, XICS_XIRR
+ cmpdi r5, 0
+ beq- ext_interrupt_to_host
+ lwzcix r3, r5, r7
+ rlwinm. r0, r3, 0, 0xffffff
+ sync
+ beq 3f /* if nothing pending in the ICP */
+
+ /* We found something in the ICP...
+ *
+ * If it's not an IPI, stash it in the PACA and return to
+ * the host, we don't (yet) handle directing real external
+ * interrupts directly to the guest
+ */
+ cmpwi r0, XICS_IPI
+ bne ext_stash_for_host
+
+ /* It's an IPI, clear the MFRR and EOI it */
+ li r0, 0xff
+ li r6, XICS_MFRR
+ stbcix r0, r5, r6 /* clear the IPI */
+ stwcix r3, r5, r7 /* EOI it */
+ sync
+
+ /* We need to re-check host IPI now in case it got set in the
+ * meantime. If it's clear, we bounce the interrupt to the
+ * guest
+ */
+ lbz r0, HSTATE_HOST_IPI(r13)
+ cmpwi r0, 0
+ bne- 1f
+
+ /* Allright, looks like an IPI for the guest, we need to set MER */
+3:
+ /* Check if any CPU is heading out to the host, if so head out too */
+ ld r5, HSTATE_KVM_VCORE(r13)
+ lwz r0, VCORE_ENTRY_EXIT(r5)
+ cmpwi r0, 0x100
+ bge ext_interrupt_to_host
+
+ /* See if there is a pending interrupt for the guest */
+ mfspr r8, SPRN_LPCR
+ ld r0, VCPU_PENDING_EXC(r9)
+ /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
+ rldicl. r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
+ rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
+ beq 2f
+
+ /* And if the guest EE is set, we can deliver immediately, else
+ * we return to the guest with MER set
+ */
+ andi. r0, r11, MSR_EE
+ beq 2f
+ mtspr SPRN_SRR0, r10
+ mtspr SPRN_SRR1, r11
+ li r10, BOOK3S_INTERRUPT_EXTERNAL
+ li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
+ rotldi r11, r11, 63
+2: mr r4, r9
+ mtspr SPRN_LPCR, r8
+ b fast_guest_return
+
+ /* We raced with the host, we need to resend that IPI, bummer */
+1: li r0, IPI_PRIORITY
+ stbcix r0, r5, r6 /* set the IPI */
+ sync
+ b ext_interrupt_to_host
+
+ext_stash_for_host:
+ /* It's not an IPI and it's for the host, stash it in the PACA
+ * before exit, it will be picked up by the host ICP driver
+ */
+ stw r3, HSTATE_SAVED_XIRR(r13)
+ext_interrupt_to_host:
guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
/* Save DEC */
@@ -829,7 +910,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
beq 44f
ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */
li r0,IPI_PRIORITY
- li r7,XICS_QIRR
+ li r7,XICS_MFRR
stbcix r0,r7,r8 /* trigger the IPI */
44: srdi. r3,r3,1
addi r6,r6,PACA_SIZE
@@ -1018,6 +1099,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
lwz r3, LPPACA_YIELDCOUNT(r8)
addi r3, r3, 1
stw r3, LPPACA_YIELDCOUNT(r8)
+ li r3, 1
+ stb r3, VCPU_VPA_DIRTY(r9)
25:
/* Save PMU registers if requested */
/* r8 and cr0.eq are live here */
@@ -1350,11 +1433,19 @@ hcall_real_table:
.long 0 /* 0x58 */
.long 0 /* 0x5c */
.long 0 /* 0x60 */
- .long 0 /* 0x64 */
- .long 0 /* 0x68 */
- .long 0 /* 0x6c */
- .long 0 /* 0x70 */
- .long 0 /* 0x74 */
+#ifdef CONFIG_KVM_XICS
+ .long .kvmppc_rm_h_eoi - hcall_real_table
+ .long .kvmppc_rm_h_cppr - hcall_real_table
+ .long .kvmppc_rm_h_ipi - hcall_real_table
+ .long 0 /* 0x70 - H_IPOLL */
+ .long .kvmppc_rm_h_xirr - hcall_real_table
+#else
+ .long 0 /* 0x64 - H_EOI */
+ .long 0 /* 0x68 - H_CPPR */
+ .long 0 /* 0x6c - H_IPI */
+ .long 0 /* 0x70 - H_IPOLL */
+ .long 0 /* 0x74 - H_XIRR */
+#endif
.long 0 /* 0x78 */
.long 0 /* 0x7c */
.long 0 /* 0x80 */
@@ -1405,15 +1496,6 @@ ignore_hdec:
mr r4,r9
b fast_guest_return
-bounce_ext_interrupt:
- mr r4,r9
- mtspr SPRN_SRR0,r10
- mtspr SPRN_SRR1,r11
- li r10,BOOK3S_INTERRUPT_EXTERNAL
- li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
- rotldi r11,r11,63
- b fast_guest_return
-
_GLOBAL(kvmppc_h_set_dabr)
std r4,VCPU_DABR(r3)
/* Work around P7 bug where DABR can get corrupted on mtspr */
@@ -1519,6 +1601,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
b .
kvm_end_cede:
+ /* get vcpu pointer */
+ ld r4, HSTATE_KVM_VCPU(r13)
+
/* Woken by external or decrementer interrupt */
ld r1, HSTATE_HOST_R1(r13)
@@ -1558,6 +1643,16 @@ kvm_end_cede:
li r0,0
stb r0,HSTATE_NAPPING(r13)
+ /* Check the wake reason in SRR1 to see why we got here */
+ mfspr r3, SPRN_SRR1
+ rlwinm r3, r3, 44-31, 0x7 /* extract wake reason field */
+ cmpwi r3, 4 /* was it an external interrupt? */
+ li r12, BOOK3S_INTERRUPT_EXTERNAL
+ mr r9, r4
+ ld r10, VCPU_PC(r9)
+ ld r11, VCPU_MSR(r9)
+ beq do_ext_interrupt /* if so */
+
/* see if any other thread is already exiting */
lwz r0,VCORE_ENTRY_EXIT(r5)
cmpwi r0,0x100
@@ -1577,8 +1672,7 @@ kvm_cede_prodded:
/* we've ceded but we want to give control to the host */
kvm_cede_exit:
- li r3,H_TOO_HARD
- blr
+ b hcall_real_fallback
/* Try to handle a machine check in real mode */
machine_check_realmode:
@@ -1626,7 +1720,7 @@ secondary_nap:
beq 37f
sync
li r0, 0xff
- li r6, XICS_QIRR
+ li r6, XICS_MFRR
stbcix r0, r5, r6 /* clear the IPI */
stwcix r3, r5, r7 /* EOI it */
37: sync
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index dbdc15aa8127..bdc40b8e77d9 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -762,9 +762,7 @@ program_interrupt:
run->exit_reason = KVM_EXIT_MMIO;
r = RESUME_HOST_NV;
break;
- case EMULATE_DO_PAPR:
- run->exit_reason = KVM_EXIT_PAPR_HCALL;
- vcpu->arch.hcall_needed = 1;
+ case EMULATE_EXIT_USER:
r = RESUME_HOST_NV;
break;
default:
@@ -1283,7 +1281,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
void kvmppc_core_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
- struct kvm_memory_slot old)
+ const struct kvm_memory_slot *old)
{
}
@@ -1298,6 +1296,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
{
#ifdef CONFIG_PPC64
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+ INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
#endif
if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index ee02b30878ed..b24309c6c2d5 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -227,6 +227,13 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
+static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
+{
+ long rc = kvmppc_xics_hcall(vcpu, cmd);
+ kvmppc_set_gpr(vcpu, 3, rc);
+ return EMULATE_DONE;
+}
+
int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
{
switch (cmd) {
@@ -246,6 +253,20 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
vcpu->stat.halt_wakeup++;
return EMULATE_DONE;
+ case H_XIRR:
+ case H_CPPR:
+ case H_EOI:
+ case H_IPI:
+ if (kvmppc_xics_enabled(vcpu))
+ return kvmppc_h_pr_xics_hcall(vcpu, cmd);
+ break;
+ case H_RTAS:
+ if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+ return RESUME_HOST;
+ if (kvmppc_rtas_hcall(vcpu))
+ break;
+ kvmppc_set_gpr(vcpu, 3, 0);
+ return EMULATE_DONE;
}
return EMULATE_FAIL;
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
new file mode 100644
index 000000000000..3219ba895246
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/err.h>
+
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/rtas.h>
+
+#ifdef CONFIG_KVM_XICS
+static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+ u32 irq, server, priority;
+ int rc;
+
+ if (args->nargs != 3 || args->nret != 1) {
+ rc = -3;
+ goto out;
+ }
+
+ irq = args->args[0];
+ server = args->args[1];
+ priority = args->args[2];
+
+ rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
+ if (rc)
+ rc = -3;
+out:
+ args->rets[0] = rc;
+}
+
+static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+ u32 irq, server, priority;
+ int rc;
+
+ if (args->nargs != 1 || args->nret != 3) {
+ rc = -3;
+ goto out;
+ }
+
+ irq = args->args[0];
+
+ server = priority = 0;
+ rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
+ if (rc) {
+ rc = -3;
+ goto out;
+ }
+
+ args->rets[1] = server;
+ args->rets[2] = priority;
+out:
+ args->rets[0] = rc;
+}
+
+static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+ u32 irq;
+ int rc;
+
+ if (args->nargs != 1 || args->nret != 1) {
+ rc = -3;
+ goto out;
+ }
+
+ irq = args->args[0];
+
+ rc = kvmppc_xics_int_off(vcpu->kvm, irq);
+ if (rc)
+ rc = -3;
+out:
+ args->rets[0] = rc;
+}
+
+static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+ u32 irq;
+ int rc;
+
+ if (args->nargs != 1 || args->nret != 1) {
+ rc = -3;
+ goto out;
+ }
+
+ irq = args->args[0];
+
+ rc = kvmppc_xics_int_on(vcpu->kvm, irq);
+ if (rc)
+ rc = -3;
+out:
+ args->rets[0] = rc;
+}
+#endif /* CONFIG_KVM_XICS */
+
+struct rtas_handler {
+ void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
+ char *name;
+};
+
+static struct rtas_handler rtas_handlers[] = {
+#ifdef CONFIG_KVM_XICS
+ { .name = "ibm,set-xive", .handler = kvm_rtas_set_xive },
+ { .name = "ibm,get-xive", .handler = kvm_rtas_get_xive },
+ { .name = "ibm,int-off", .handler = kvm_rtas_int_off },
+ { .name = "ibm,int-on", .handler = kvm_rtas_int_on },
+#endif
+};
+
+struct rtas_token_definition {
+ struct list_head list;
+ struct rtas_handler *handler;
+ u64 token;
+};
+
+static int rtas_name_matches(char *s1, char *s2)
+{
+ struct kvm_rtas_token_args args;
+ return !strncmp(s1, s2, sizeof(args.name));
+}
+
+static int rtas_token_undefine(struct kvm *kvm, char *name)
+{
+ struct rtas_token_definition *d, *tmp;
+
+ lockdep_assert_held(&kvm->lock);
+
+ list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
+ if (rtas_name_matches(d->handler->name, name)) {
+ list_del(&d->list);
+ kfree(d);
+ return 0;
+ }
+ }
+
+ /* It's not an error to undefine an undefined token */
+ return 0;
+}
+
+static int rtas_token_define(struct kvm *kvm, char *name, u64 token)
+{
+ struct rtas_token_definition *d;
+ struct rtas_handler *h = NULL;
+ bool found;
+ int i;
+
+ lockdep_assert_held(&kvm->lock);
+
+ list_for_each_entry(d, &kvm->arch.rtas_tokens, list) {
+ if (d->token == token)
+ return -EEXIST;
+ }
+
+ found = false;
+ for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) {
+ h = &rtas_handlers[i];
+ if (rtas_name_matches(h->name, name)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ return -ENOENT;
+
+ d = kzalloc(sizeof(*d), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+
+ d->handler = h;
+ d->token = token;
+
+ list_add_tail(&d->list, &kvm->arch.rtas_tokens);
+
+ return 0;
+}
+
+int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp)
+{
+ struct kvm_rtas_token_args args;
+ int rc;
+
+ if (copy_from_user(&args, argp, sizeof(args)))
+ return -EFAULT;
+
+ mutex_lock(&kvm->lock);
+
+ if (args.token)
+ rc = rtas_token_define(kvm, args.name, args.token);
+ else
+ rc = rtas_token_undefine(kvm, args.name);
+
+ mutex_unlock(&kvm->lock);
+
+ return rc;
+}
+
+int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
+{
+ struct rtas_token_definition *d;
+ struct rtas_args args;
+ rtas_arg_t *orig_rets;
+ gpa_t args_phys;
+ int rc;
+
+ /* r4 contains the guest physical address of the RTAS args */
+ args_phys = kvmppc_get_gpr(vcpu, 4);
+
+ rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
+ if (rc)
+ goto fail;
+
+ /*
+ * args->rets is a pointer into args->args. Now that we've
+ * copied args we need to fix it up to point into our copy,
+ * not the guest args. We also need to save the original
+ * value so we can restore it on the way out.
+ */
+ orig_rets = args.rets;
+ args.rets = &args.args[args.nargs];
+
+ mutex_lock(&vcpu->kvm->lock);
+
+ rc = -ENOENT;
+ list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) {
+ if (d->token == args.token) {
+ d->handler->handler(vcpu, &args);
+ rc = 0;
+ break;
+ }
+ }
+
+ mutex_unlock(&vcpu->kvm->lock);
+
+ if (rc == 0) {
+ args.rets = orig_rets;
+ rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args));
+ if (rc)
+ goto fail;
+ }
+
+ return rc;
+
+fail:
+ /*
+ * We only get here if the guest has called RTAS with a bogus
+ * args pointer. That means we can't get to the args, and so we
+ * can't fail the RTAS call. So fail right out to userspace,
+ * which should kill the guest.
+ */
+ return rc;
+}
+
+void kvmppc_rtas_tokens_free(struct kvm *kvm)
+{
+ struct rtas_token_definition *d, *tmp;
+
+ lockdep_assert_held(&kvm->lock);
+
+ list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
+ list_del(&d->list);
+ kfree(d);
+ }
+}
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
new file mode 100644
index 000000000000..f7a103756618
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -0,0 +1,1270 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/anon_inodes.h>
+
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/debug.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "book3s_xics.h"
+
+#if 1
+#define XICS_DBG(fmt...) do { } while (0)
+#else
+#define XICS_DBG(fmt...) trace_printk(fmt)
+#endif
+
+#define ENABLE_REALMODE true
+#define DEBUG_REALMODE false
+
+/*
+ * LOCKING
+ * =======
+ *
+ * Each ICS has a mutex protecting the information about the IRQ
+ * sources and avoiding simultaneous deliveries if the same interrupt.
+ *
+ * ICP operations are done via a single compare & swap transaction
+ * (most ICP state fits in the union kvmppc_icp_state)
+ */
+
+/*
+ * TODO
+ * ====
+ *
+ * - To speed up resends, keep a bitmap of "resend" set bits in the
+ * ICS
+ *
+ * - Speed up server# -> ICP lookup (array ? hash table ?)
+ *
+ * - Make ICS lockless as well, or at least a per-interrupt lock or hashed
+ * locks array to improve scalability
+ */
+
+/* -- ICS routines -- */
+
+static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+ u32 new_irq);
+
+static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level,
+ bool report_status)
+{
+ struct ics_irq_state *state;
+ struct kvmppc_ics *ics;
+ u16 src;
+
+ XICS_DBG("ics deliver %#x (level: %d)\n", irq, level);
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics) {
+ XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq);
+ return -EINVAL;
+ }
+ state = &ics->irq_state[src];
+ if (!state->exists)
+ return -EINVAL;
+
+ if (report_status)
+ return state->asserted;
+
+ /*
+ * We set state->asserted locklessly. This should be fine as
+ * we are the only setter, thus concurrent access is undefined
+ * to begin with.
+ */
+ if (level == KVM_INTERRUPT_SET_LEVEL)
+ state->asserted = 1;
+ else if (level == KVM_INTERRUPT_UNSET) {
+ state->asserted = 0;
+ return 0;
+ }
+
+ /* Attempt delivery */
+ icp_deliver_irq(xics, NULL, irq);
+
+ return state->asserted;
+}
+
+static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
+ struct kvmppc_icp *icp)
+{
+ int i;
+
+ mutex_lock(&ics->lock);
+
+ for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+ struct ics_irq_state *state = &ics->irq_state[i];
+
+ if (!state->resend)
+ continue;
+
+ XICS_DBG("resend %#x prio %#x\n", state->number,
+ state->priority);
+
+ mutex_unlock(&ics->lock);
+ icp_deliver_irq(xics, icp, state->number);
+ mutex_lock(&ics->lock);
+ }
+
+ mutex_unlock(&ics->lock);
+}
+
+static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
+ struct ics_irq_state *state,
+ u32 server, u32 priority, u32 saved_priority)
+{
+ bool deliver;
+
+ mutex_lock(&ics->lock);
+
+ state->server = server;
+ state->priority = priority;
+ state->saved_priority = saved_priority;
+ deliver = false;
+ if ((state->masked_pending || state->resend) && priority != MASKED) {
+ state->masked_pending = 0;
+ deliver = true;
+ }
+
+ mutex_unlock(&ics->lock);
+
+ return deliver;
+}
+
+int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
+{
+ struct kvmppc_xics *xics = kvm->arch.xics;
+ struct kvmppc_icp *icp;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *state;
+ u16 src;
+
+ if (!xics)
+ return -ENODEV;
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics)
+ return -EINVAL;
+ state = &ics->irq_state[src];
+
+ icp = kvmppc_xics_find_server(kvm, server);
+ if (!icp)
+ return -EINVAL;
+
+ XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n",
+ irq, server, priority,
+ state->masked_pending, state->resend);
+
+ if (write_xive(xics, ics, state, server, priority, priority))
+ icp_deliver_irq(xics, icp, irq);
+
+ return 0;
+}
+
+int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
+{
+ struct kvmppc_xics *xics = kvm->arch.xics;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *state;
+ u16 src;
+
+ if (!xics)
+ return -ENODEV;
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics)
+ return -EINVAL;
+ state = &ics->irq_state[src];
+
+ mutex_lock(&ics->lock);
+ *server = state->server;
+ *priority = state->priority;
+ mutex_unlock(&ics->lock);
+
+ return 0;
+}
+
+int kvmppc_xics_int_on(struct kvm *kvm, u32 irq)
+{
+ struct kvmppc_xics *xics = kvm->arch.xics;
+ struct kvmppc_icp *icp;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *state;
+ u16 src;
+
+ if (!xics)
+ return -ENODEV;
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics)
+ return -EINVAL;
+ state = &ics->irq_state[src];
+
+ icp = kvmppc_xics_find_server(kvm, state->server);
+ if (!icp)
+ return -EINVAL;
+
+ if (write_xive(xics, ics, state, state->server, state->saved_priority,
+ state->saved_priority))
+ icp_deliver_irq(xics, icp, irq);
+
+ return 0;
+}
+
+int kvmppc_xics_int_off(struct kvm *kvm, u32 irq)
+{
+ struct kvmppc_xics *xics = kvm->arch.xics;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *state;
+ u16 src;
+
+ if (!xics)
+ return -ENODEV;
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics)
+ return -EINVAL;
+ state = &ics->irq_state[src];
+
+ write_xive(xics, ics, state, state->server, MASKED, state->priority);
+
+ return 0;
+}
+
+/* -- ICP routines, including hcalls -- */
+
+static inline bool icp_try_update(struct kvmppc_icp *icp,
+ union kvmppc_icp_state old,
+ union kvmppc_icp_state new,
+ bool change_self)
+{
+ bool success;
+
+ /* Calculate new output value */
+ new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
+
+ /* Attempt atomic update */
+ success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
+ if (!success)
+ goto bail;
+
+ XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
+ icp->server_num,
+ old.cppr, old.mfrr, old.pending_pri, old.xisr,
+ old.need_resend, old.out_ee);
+ XICS_DBG("UPD - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
+ new.cppr, new.mfrr, new.pending_pri, new.xisr,
+ new.need_resend, new.out_ee);
+ /*
+ * Check for output state update
+ *
+ * Note that this is racy since another processor could be updating
+ * the state already. This is why we never clear the interrupt output
+ * here, we only ever set it. The clear only happens prior to doing
+ * an update and only by the processor itself. Currently we do it
+ * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
+ *
+ * We also do not try to figure out whether the EE state has changed,
+ * we unconditionally set it if the new state calls for it. The reason
+ * for that is that we opportunistically remove the pending interrupt
+ * flag when raising CPPR, so we need to set it back here if an
+ * interrupt is still pending.
+ */
+ if (new.out_ee) {
+ kvmppc_book3s_queue_irqprio(icp->vcpu,
+ BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+ if (!change_self)
+ kvmppc_fast_vcpu_kick(icp->vcpu);
+ }
+ bail:
+ return success;
+}
+
+static void icp_check_resend(struct kvmppc_xics *xics,
+ struct kvmppc_icp *icp)
+{
+ u32 icsid;
+
+ /* Order this load with the test for need_resend in the caller */
+ smp_rmb();
+ for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) {
+ struct kvmppc_ics *ics = xics->ics[icsid];
+
+ if (!test_and_clear_bit(icsid, icp->resend_map))
+ continue;
+ if (!ics)
+ continue;
+ ics_check_resend(xics, ics, icp);
+ }
+}
+
+static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
+ u32 *reject)
+{
+ union kvmppc_icp_state old_state, new_state;
+ bool success;
+
+ XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority,
+ icp->server_num);
+
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ *reject = 0;
+
+ /* See if we can deliver */
+ success = new_state.cppr > priority &&
+ new_state.mfrr > priority &&
+ new_state.pending_pri > priority;
+
+ /*
+ * If we can, check for a rejection and perform the
+ * delivery
+ */
+ if (success) {
+ *reject = new_state.xisr;
+ new_state.xisr = irq;
+ new_state.pending_pri = priority;
+ } else {
+ /*
+ * If we failed to deliver we set need_resend
+ * so a subsequent CPPR state change causes us
+ * to try a new delivery.
+ */
+ new_state.need_resend = true;
+ }
+
+ } while (!icp_try_update(icp, old_state, new_state, false));
+
+ return success;
+}
+
+static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+ u32 new_irq)
+{
+ struct ics_irq_state *state;
+ struct kvmppc_ics *ics;
+ u32 reject;
+ u16 src;
+
+ /*
+ * This is used both for initial delivery of an interrupt and
+ * for subsequent rejection.
+ *
+ * Rejection can be racy vs. resends. We have evaluated the
+ * rejection in an atomic ICP transaction which is now complete,
+ * so potentially the ICP can already accept the interrupt again.
+ *
+ * So we need to retry the delivery. Essentially the reject path
+ * boils down to a failed delivery. Always.
+ *
+ * Now the interrupt could also have moved to a different target,
+ * thus we may need to re-do the ICP lookup as well
+ */
+
+ again:
+ /* Get the ICS state and lock it */
+ ics = kvmppc_xics_find_ics(xics, new_irq, &src);
+ if (!ics) {
+ XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq);
+ return;
+ }
+ state = &ics->irq_state[src];
+
+ /* Get a lock on the ICS */
+ mutex_lock(&ics->lock);
+
+ /* Get our server */
+ if (!icp || state->server != icp->server_num) {
+ icp = kvmppc_xics_find_server(xics->kvm, state->server);
+ if (!icp) {
+ pr_warn("icp_deliver_irq: IRQ 0x%06x server 0x%x not found !\n",
+ new_irq, state->server);
+ goto out;
+ }
+ }
+
+ /* Clear the resend bit of that interrupt */
+ state->resend = 0;
+
+ /*
+ * If masked, bail out
+ *
+ * Note: PAPR doesn't mention anything about masked pending
+ * when doing a resend, only when doing a delivery.
+ *
+ * However that would have the effect of losing a masked
+ * interrupt that was rejected and isn't consistent with
+ * the whole masked_pending business which is about not
+ * losing interrupts that occur while masked.
+ *
+ * I don't differenciate normal deliveries and resends, this
+ * implementation will differ from PAPR and not lose such
+ * interrupts.
+ */
+ if (state->priority == MASKED) {
+ XICS_DBG("irq %#x masked pending\n", new_irq);
+ state->masked_pending = 1;
+ goto out;
+ }
+
+ /*
+ * Try the delivery, this will set the need_resend flag
+ * in the ICP as part of the atomic transaction if the
+ * delivery is not possible.
+ *
+ * Note that if successful, the new delivery might have itself
+ * rejected an interrupt that was "delivered" before we took the
+ * icp mutex.
+ *
+ * In this case we do the whole sequence all over again for the
+ * new guy. We cannot assume that the rejected interrupt is less
+ * favored than the new one, and thus doesn't need to be delivered,
+ * because by the time we exit icp_try_to_deliver() the target
+ * processor may well have alrady consumed & completed it, and thus
+ * the rejected interrupt might actually be already acceptable.
+ */
+ if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) {
+ /*
+ * Delivery was successful, did we reject somebody else ?
+ */
+ if (reject && reject != XICS_IPI) {
+ mutex_unlock(&ics->lock);
+ new_irq = reject;
+ goto again;
+ }
+ } else {
+ /*
+ * We failed to deliver the interrupt we need to set the
+ * resend map bit and mark the ICS state as needing a resend
+ */
+ set_bit(ics->icsid, icp->resend_map);
+ state->resend = 1;
+
+ /*
+ * If the need_resend flag got cleared in the ICP some time
+ * between icp_try_to_deliver() atomic update and now, then
+ * we know it might have missed the resend_map bit. So we
+ * retry
+ */
+ smp_mb();
+ if (!icp->state.need_resend) {
+ mutex_unlock(&ics->lock);
+ goto again;
+ }
+ }
+ out:
+ mutex_unlock(&ics->lock);
+}
+
+static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+ u8 new_cppr)
+{
+ union kvmppc_icp_state old_state, new_state;
+ bool resend;
+
+ /*
+ * This handles several related states in one operation:
+ *
+ * ICP State: Down_CPPR
+ *
+ * Load CPPR with new value and if the XISR is 0
+ * then check for resends:
+ *
+ * ICP State: Resend
+ *
+ * If MFRR is more favored than CPPR, check for IPIs
+ * and notify ICS of a potential resend. This is done
+ * asynchronously (when used in real mode, we will have
+ * to exit here).
+ *
+ * We do not handle the complete Check_IPI as documented
+ * here. In the PAPR, this state will be used for both
+ * Set_MFRR and Down_CPPR. However, we know that we aren't
+ * changing the MFRR state here so we don't need to handle
+ * the case of an MFRR causing a reject of a pending irq,
+ * this will have been handled when the MFRR was set in the
+ * first place.
+ *
+ * Thus we don't have to handle rejects, only resends.
+ *
+ * When implementing real mode for HV KVM, resend will lead to
+ * a H_TOO_HARD return and the whole transaction will be handled
+ * in virtual mode.
+ */
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ /* Down_CPPR */
+ new_state.cppr = new_cppr;
+
+ /*
+ * Cut down Resend / Check_IPI / IPI
+ *
+ * The logic is that we cannot have a pending interrupt
+ * trumped by an IPI at this point (see above), so we
+ * know that either the pending interrupt is already an
+ * IPI (in which case we don't care to override it) or
+ * it's either more favored than us or non existent
+ */
+ if (new_state.mfrr < new_cppr &&
+ new_state.mfrr <= new_state.pending_pri) {
+ WARN_ON(new_state.xisr != XICS_IPI &&
+ new_state.xisr != 0);
+ new_state.pending_pri = new_state.mfrr;
+ new_state.xisr = XICS_IPI;
+ }
+
+ /* Latch/clear resend bit */
+ resend = new_state.need_resend;
+ new_state.need_resend = 0;
+
+ } while (!icp_try_update(icp, old_state, new_state, true));
+
+ /*
+ * Now handle resend checks. Those are asynchronous to the ICP
+ * state update in HW (ie bus transactions) so we can handle them
+ * separately here too
+ */
+ if (resend)
+ icp_check_resend(xics, icp);
+}
+
+static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
+{
+ union kvmppc_icp_state old_state, new_state;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ u32 xirr;
+
+ /* First, remove EE from the processor */
+ kvmppc_book3s_dequeue_irqprio(icp->vcpu,
+ BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+
+ /*
+ * ICP State: Accept_Interrupt
+ *
+ * Return the pending interrupt (if any) along with the
+ * current CPPR, then clear the XISR & set CPPR to the
+ * pending priority
+ */
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
+ if (!old_state.xisr)
+ break;
+ new_state.cppr = new_state.pending_pri;
+ new_state.pending_pri = 0xff;
+ new_state.xisr = 0;
+
+ } while (!icp_try_update(icp, old_state, new_state, true));
+
+ XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr);
+
+ return xirr;
+}
+
+static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+ unsigned long mfrr)
+{
+ union kvmppc_icp_state old_state, new_state;
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp;
+ u32 reject;
+ bool resend;
+ bool local;
+
+ XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n",
+ vcpu->vcpu_id, server, mfrr);
+
+ icp = vcpu->arch.icp;
+ local = icp->server_num == server;
+ if (!local) {
+ icp = kvmppc_xics_find_server(vcpu->kvm, server);
+ if (!icp)
+ return H_PARAMETER;
+ }
+
+ /*
+ * ICP state: Set_MFRR
+ *
+ * If the CPPR is more favored than the new MFRR, then
+ * nothing needs to be rejected as there can be no XISR to
+ * reject. If the MFRR is being made less favored then
+ * there might be a previously-rejected interrupt needing
+ * to be resent.
+ *
+ * If the CPPR is less favored, then we might be replacing
+ * an interrupt, and thus need to possibly reject it as in
+ *
+ * ICP state: Check_IPI
+ */
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ /* Set_MFRR */
+ new_state.mfrr = mfrr;
+
+ /* Check_IPI */
+ reject = 0;
+ resend = false;
+ if (mfrr < new_state.cppr) {
+ /* Reject a pending interrupt if not an IPI */
+ if (mfrr <= new_state.pending_pri)
+ reject = new_state.xisr;
+ new_state.pending_pri = mfrr;
+ new_state.xisr = XICS_IPI;
+ }
+
+ if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+ resend = new_state.need_resend;
+ new_state.need_resend = 0;
+ }
+ } while (!icp_try_update(icp, old_state, new_state, local));
+
+ /* Handle reject */
+ if (reject && reject != XICS_IPI)
+ icp_deliver_irq(xics, icp, reject);
+
+ /* Handle resend */
+ if (resend)
+ icp_check_resend(xics, icp);
+
+ return H_SUCCESS;
+}
+
+static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+ union kvmppc_icp_state old_state, new_state;
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ u32 reject;
+
+ XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr);
+
+ /*
+ * ICP State: Set_CPPR
+ *
+ * We can safely compare the new value with the current
+ * value outside of the transaction as the CPPR is only
+ * ever changed by the processor on itself
+ */
+ if (cppr > icp->state.cppr)
+ icp_down_cppr(xics, icp, cppr);
+ else if (cppr == icp->state.cppr)
+ return;
+
+ /*
+ * ICP State: Up_CPPR
+ *
+ * The processor is raising its priority, this can result
+ * in a rejection of a pending interrupt:
+ *
+ * ICP State: Reject_Current
+ *
+ * We can remove EE from the current processor, the update
+ * transaction will set it again if needed
+ */
+ kvmppc_book3s_dequeue_irqprio(icp->vcpu,
+ BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ reject = 0;
+ new_state.cppr = cppr;
+
+ if (cppr <= new_state.pending_pri) {
+ reject = new_state.xisr;
+ new_state.xisr = 0;
+ new_state.pending_pri = 0xff;
+ }
+
+ } while (!icp_try_update(icp, old_state, new_state, true));
+
+ /*
+ * Check for rejects. They are handled by doing a new delivery
+ * attempt (see comments in icp_deliver_irq).
+ */
+ if (reject && reject != XICS_IPI)
+ icp_deliver_irq(xics, icp, reject);
+}
+
+static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *state;
+ u32 irq = xirr & 0x00ffffff;
+ u16 src;
+
+ XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr);
+
+ /*
+ * ICP State: EOI
+ *
+ * Note: If EOI is incorrectly used by SW to lower the CPPR
+ * value (ie more favored), we do not check for rejection of
+ * a pending interrupt, this is a SW error and PAPR sepcifies
+ * that we don't have to deal with it.
+ *
+ * The sending of an EOI to the ICS is handled after the
+ * CPPR update
+ *
+ * ICP State: Down_CPPR which we handle
+ * in a separate function as it's shared with H_CPPR.
+ */
+ icp_down_cppr(xics, icp, xirr >> 24);
+
+ /* IPIs have no EOI */
+ if (irq == XICS_IPI)
+ return H_SUCCESS;
+ /*
+ * EOI handling: If the interrupt is still asserted, we need to
+ * resend it. We can take a lockless "peek" at the ICS state here.
+ *
+ * "Message" interrupts will never have "asserted" set
+ */
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics) {
+ XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq);
+ return H_PARAMETER;
+ }
+ state = &ics->irq_state[src];
+
+ /* Still asserted, resend it */
+ if (state->asserted)
+ icp_deliver_irq(xics, icp, irq);
+
+ return H_SUCCESS;
+}
+
+static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
+{
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+
+ XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n",
+ hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt);
+
+ if (icp->rm_action & XICS_RM_KICK_VCPU)
+ kvmppc_fast_vcpu_kick(icp->rm_kick_target);
+ if (icp->rm_action & XICS_RM_CHECK_RESEND)
+ icp_check_resend(xics, icp);
+ if (icp->rm_action & XICS_RM_REJECT)
+ icp_deliver_irq(xics, icp, icp->rm_reject);
+
+ icp->rm_action = 0;
+
+ return H_SUCCESS;
+}
+
+int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+{
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ unsigned long res;
+ int rc = H_SUCCESS;
+
+ /* Check if we have an ICP */
+ if (!xics || !vcpu->arch.icp)
+ return H_HARDWARE;
+
+ /* Check for real mode returning too hard */
+ if (xics->real_mode)
+ return kvmppc_xics_rm_complete(vcpu, req);
+
+ switch (req) {
+ case H_XIRR:
+ res = kvmppc_h_xirr(vcpu);
+ kvmppc_set_gpr(vcpu, 4, res);
+ break;
+ case H_CPPR:
+ kvmppc_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
+ break;
+ case H_EOI:
+ rc = kvmppc_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+ break;
+ case H_IPI:
+ rc = kvmppc_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ break;
+ }
+
+ return rc;
+}
+
+
+/* -- Initialisation code etc. -- */
+
+static int xics_debug_show(struct seq_file *m, void *private)
+{
+ struct kvmppc_xics *xics = m->private;
+ struct kvm *kvm = xics->kvm;
+ struct kvm_vcpu *vcpu;
+ int icsid, i;
+
+ if (!kvm)
+ return 0;
+
+ seq_printf(m, "=========\nICP state\n=========\n");
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ union kvmppc_icp_state state;
+
+ if (!icp)
+ continue;
+
+ state.raw = ACCESS_ONCE(icp->state.raw);
+ seq_printf(m, "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n",
+ icp->server_num, state.xisr,
+ state.pending_pri, state.cppr, state.mfrr,
+ state.out_ee, state.need_resend);
+ }
+
+ for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) {
+ struct kvmppc_ics *ics = xics->ics[icsid];
+
+ if (!ics)
+ continue;
+
+ seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n",
+ icsid);
+
+ mutex_lock(&ics->lock);
+
+ for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+ struct ics_irq_state *irq = &ics->irq_state[i];
+
+ seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n",
+ irq->number, irq->server, irq->priority,
+ irq->saved_priority, irq->asserted,
+ irq->resend, irq->masked_pending);
+
+ }
+ mutex_unlock(&ics->lock);
+ }
+ return 0;
+}
+
+static int xics_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, xics_debug_show, inode->i_private);
+}
+
+static const struct file_operations xics_debug_fops = {
+ .open = xics_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void xics_debugfs_init(struct kvmppc_xics *xics)
+{
+ char *name;
+
+ name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics);
+ if (!name) {
+ pr_err("%s: no memory for name\n", __func__);
+ return;
+ }
+
+ xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
+ xics, &xics_debug_fops);
+
+ pr_debug("%s: created %s\n", __func__, name);
+ kfree(name);
+}
+
+static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
+ struct kvmppc_xics *xics, int irq)
+{
+ struct kvmppc_ics *ics;
+ int i, icsid;
+
+ icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
+
+ mutex_lock(&kvm->lock);
+
+ /* ICS already exists - somebody else got here first */
+ if (xics->ics[icsid])
+ goto out;
+
+ /* Create the ICS */
+ ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL);
+ if (!ics)
+ goto out;
+
+ mutex_init(&ics->lock);
+ ics->icsid = icsid;
+
+ for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+ ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i;
+ ics->irq_state[i].priority = MASKED;
+ ics->irq_state[i].saved_priority = MASKED;
+ }
+ smp_wmb();
+ xics->ics[icsid] = ics;
+
+ if (icsid > xics->max_icsid)
+ xics->max_icsid = icsid;
+
+ out:
+ mutex_unlock(&kvm->lock);
+ return xics->ics[icsid];
+}
+
+int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num)
+{
+ struct kvmppc_icp *icp;
+
+ if (!vcpu->kvm->arch.xics)
+ return -ENODEV;
+
+ if (kvmppc_xics_find_server(vcpu->kvm, server_num))
+ return -EEXIST;
+
+ icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL);
+ if (!icp)
+ return -ENOMEM;
+
+ icp->vcpu = vcpu;
+ icp->server_num = server_num;
+ icp->state.mfrr = MASKED;
+ icp->state.pending_pri = MASKED;
+ vcpu->arch.icp = icp;
+
+ XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id);
+
+ return 0;
+}
+
+u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ union kvmppc_icp_state state;
+
+ if (!icp)
+ return 0;
+ state = icp->state;
+ return ((u64)state.cppr << KVM_REG_PPC_ICP_CPPR_SHIFT) |
+ ((u64)state.xisr << KVM_REG_PPC_ICP_XISR_SHIFT) |
+ ((u64)state.mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) |
+ ((u64)state.pending_pri << KVM_REG_PPC_ICP_PPRI_SHIFT);
+}
+
+int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
+{
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ union kvmppc_icp_state old_state, new_state;
+ struct kvmppc_ics *ics;
+ u8 cppr, mfrr, pending_pri;
+ u32 xisr;
+ u16 src;
+ bool resend;
+
+ if (!icp || !xics)
+ return -ENOENT;
+
+ cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT;
+ xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) &
+ KVM_REG_PPC_ICP_XISR_MASK;
+ mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT;
+ pending_pri = icpval >> KVM_REG_PPC_ICP_PPRI_SHIFT;
+
+ /* Require the new state to be internally consistent */
+ if (xisr == 0) {
+ if (pending_pri != 0xff)
+ return -EINVAL;
+ } else if (xisr == XICS_IPI) {
+ if (pending_pri != mfrr || pending_pri >= cppr)
+ return -EINVAL;
+ } else {
+ if (pending_pri >= mfrr || pending_pri >= cppr)
+ return -EINVAL;
+ ics = kvmppc_xics_find_ics(xics, xisr, &src);
+ if (!ics)
+ return -EINVAL;
+ }
+
+ new_state.raw = 0;
+ new_state.cppr = cppr;
+ new_state.xisr = xisr;
+ new_state.mfrr = mfrr;
+ new_state.pending_pri = pending_pri;
+
+ /*
+ * Deassert the CPU interrupt request.
+ * icp_try_update will reassert it if necessary.
+ */
+ kvmppc_book3s_dequeue_irqprio(icp->vcpu,
+ BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+
+ /*
+ * Note that if we displace an interrupt from old_state.xisr,
+ * we don't mark it as rejected. We expect userspace to set
+ * the state of the interrupt sources to be consistent with
+ * the ICP states (either before or afterwards, which doesn't
+ * matter). We do handle resends due to CPPR becoming less
+ * favoured because that is necessary to end up with a
+ * consistent state in the situation where userspace restores
+ * the ICS states before the ICP states.
+ */
+ do {
+ old_state = ACCESS_ONCE(icp->state);
+
+ if (new_state.mfrr <= old_state.mfrr) {
+ resend = false;
+ new_state.need_resend = old_state.need_resend;
+ } else {
+ resend = old_state.need_resend;
+ new_state.need_resend = 0;
+ }
+ } while (!icp_try_update(icp, old_state, new_state, false));
+
+ if (resend)
+ icp_check_resend(xics, icp);
+
+ return 0;
+}
+
+static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
+{
+ int ret;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *irqp;
+ u64 __user *ubufp = (u64 __user *) addr;
+ u16 idx;
+ u64 val, prio;
+
+ ics = kvmppc_xics_find_ics(xics, irq, &idx);
+ if (!ics)
+ return -ENOENT;
+
+ irqp = &ics->irq_state[idx];
+ mutex_lock(&ics->lock);
+ ret = -ENOENT;
+ if (irqp->exists) {
+ val = irqp->server;
+ prio = irqp->priority;
+ if (prio == MASKED) {
+ val |= KVM_XICS_MASKED;
+ prio = irqp->saved_priority;
+ }
+ val |= prio << KVM_XICS_PRIORITY_SHIFT;
+ if (irqp->asserted)
+ val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING;
+ else if (irqp->masked_pending || irqp->resend)
+ val |= KVM_XICS_PENDING;
+ ret = 0;
+ }
+ mutex_unlock(&ics->lock);
+
+ if (!ret && put_user(val, ubufp))
+ ret = -EFAULT;
+
+ return ret;
+}
+
+static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
+{
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *irqp;
+ u64 __user *ubufp = (u64 __user *) addr;
+ u16 idx;
+ u64 val;
+ u8 prio;
+ u32 server;
+
+ if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
+ return -ENOENT;
+
+ ics = kvmppc_xics_find_ics(xics, irq, &idx);
+ if (!ics) {
+ ics = kvmppc_xics_create_ics(xics->kvm, xics, irq);
+ if (!ics)
+ return -ENOMEM;
+ }
+ irqp = &ics->irq_state[idx];
+ if (get_user(val, ubufp))
+ return -EFAULT;
+
+ server = val & KVM_XICS_DESTINATION_MASK;
+ prio = val >> KVM_XICS_PRIORITY_SHIFT;
+ if (prio != MASKED &&
+ kvmppc_xics_find_server(xics->kvm, server) == NULL)
+ return -EINVAL;
+
+ mutex_lock(&ics->lock);
+ irqp->server = server;
+ irqp->saved_priority = prio;
+ if (val & KVM_XICS_MASKED)
+ prio = MASKED;
+ irqp->priority = prio;
+ irqp->resend = 0;
+ irqp->masked_pending = 0;
+ irqp->asserted = 0;
+ if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE))
+ irqp->asserted = 1;
+ irqp->exists = 1;
+ mutex_unlock(&ics->lock);
+
+ if (val & KVM_XICS_PENDING)
+ icp_deliver_irq(xics, NULL, irqp->number);
+
+ return 0;
+}
+
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+ bool line_status)
+{
+ struct kvmppc_xics *xics = kvm->arch.xics;
+
+ return ics_deliver_irq(xics, irq, level, line_status);
+}
+
+static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ struct kvmppc_xics *xics = dev->private;
+
+ switch (attr->group) {
+ case KVM_DEV_XICS_GRP_SOURCES:
+ return xics_set_source(xics, attr->attr, attr->addr);
+ }
+ return -ENXIO;
+}
+
+static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ struct kvmppc_xics *xics = dev->private;
+
+ switch (attr->group) {
+ case KVM_DEV_XICS_GRP_SOURCES:
+ return xics_get_source(xics, attr->attr, attr->addr);
+ }
+ return -ENXIO;
+}
+
+static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_XICS_GRP_SOURCES:
+ if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
+ attr->attr < KVMPPC_XICS_NR_IRQS)
+ return 0;
+ break;
+ }
+ return -ENXIO;
+}
+
+static void kvmppc_xics_free(struct kvm_device *dev)
+{
+ struct kvmppc_xics *xics = dev->private;
+ int i;
+ struct kvm *kvm = xics->kvm;
+
+ debugfs_remove(xics->dentry);
+
+ if (kvm)
+ kvm->arch.xics = NULL;
+
+ for (i = 0; i <= xics->max_icsid; i++)
+ kfree(xics->ics[i]);
+ kfree(xics);
+ kfree(dev);
+}
+
+static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
+{
+ struct kvmppc_xics *xics;
+ struct kvm *kvm = dev->kvm;
+ int ret = 0;
+
+ xics = kzalloc(sizeof(*xics), GFP_KERNEL);
+ if (!xics)
+ return -ENOMEM;
+
+ dev->private = xics;
+ xics->dev = dev;
+ xics->kvm = kvm;
+
+ /* Already there ? */
+ mutex_lock(&kvm->lock);
+ if (kvm->arch.xics)
+ ret = -EEXIST;
+ else
+ kvm->arch.xics = xics;
+ mutex_unlock(&kvm->lock);
+
+ if (ret)
+ return ret;
+
+ xics_debugfs_init(xics);
+
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ /* Enable real mode support */
+ xics->real_mode = ENABLE_REALMODE;
+ xics->real_mode_dbg = DEBUG_REALMODE;
+ }
+#endif /* CONFIG_KVM_BOOK3S_64_HV */
+
+ return 0;
+}
+
+struct kvm_device_ops kvm_xics_ops = {
+ .name = "kvm-xics",
+ .create = kvmppc_xics_create,
+ .destroy = kvmppc_xics_free,
+ .set_attr = xics_set_attr,
+ .get_attr = xics_get_attr,
+ .has_attr = xics_has_attr,
+};
+
+int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
+ u32 xcpu)
+{
+ struct kvmppc_xics *xics = dev->private;
+ int r = -EBUSY;
+
+ if (dev->ops != &kvm_xics_ops)
+ return -EPERM;
+ if (xics->kvm != vcpu->kvm)
+ return -EPERM;
+ if (vcpu->arch.irq_type)
+ return -EBUSY;
+
+ r = kvmppc_xics_create_icp(vcpu, xcpu);
+ if (!r)
+ vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
+
+ return r;
+}
+
+void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu->arch.icp)
+ return;
+ kfree(vcpu->arch.icp);
+ vcpu->arch.icp = NULL;
+ vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
+}
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
new file mode 100644
index 000000000000..dd9326c5c19b
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _KVM_PPC_BOOK3S_XICS_H
+#define _KVM_PPC_BOOK3S_XICS_H
+
+/*
+ * We use a two-level tree to store interrupt source information.
+ * There are up to 1024 ICS nodes, each of which can represent
+ * 1024 sources.
+ */
+#define KVMPPC_XICS_MAX_ICS_ID 1023
+#define KVMPPC_XICS_ICS_SHIFT 10
+#define KVMPPC_XICS_IRQ_PER_ICS (1 << KVMPPC_XICS_ICS_SHIFT)
+#define KVMPPC_XICS_SRC_MASK (KVMPPC_XICS_IRQ_PER_ICS - 1)
+
+/*
+ * Interrupt source numbers below this are reserved, for example
+ * 0 is "no interrupt", and 2 is used for IPIs.
+ */
+#define KVMPPC_XICS_FIRST_IRQ 16
+#define KVMPPC_XICS_NR_IRQS ((KVMPPC_XICS_MAX_ICS_ID + 1) * \
+ KVMPPC_XICS_IRQ_PER_ICS)
+
+/* Priority value to use for disabling an interrupt */
+#define MASKED 0xff
+
+/* State for one irq source */
+struct ics_irq_state {
+ u32 number;
+ u32 server;
+ u8 priority;
+ u8 saved_priority;
+ u8 resend;
+ u8 masked_pending;
+ u8 asserted; /* Only for LSI */
+ u8 exists;
+};
+
+/* Atomic ICP state, updated with a single compare & swap */
+union kvmppc_icp_state {
+ unsigned long raw;
+ struct {
+ u8 out_ee:1;
+ u8 need_resend:1;
+ u8 cppr;
+ u8 mfrr;
+ u8 pending_pri;
+ u32 xisr;
+ };
+};
+
+/* One bit per ICS */
+#define ICP_RESEND_MAP_SIZE (KVMPPC_XICS_MAX_ICS_ID / BITS_PER_LONG + 1)
+
+struct kvmppc_icp {
+ struct kvm_vcpu *vcpu;
+ unsigned long server_num;
+ union kvmppc_icp_state state;
+ unsigned long resend_map[ICP_RESEND_MAP_SIZE];
+
+ /* Real mode might find something too hard, here's the action
+ * it might request from virtual mode
+ */
+#define XICS_RM_KICK_VCPU 0x1
+#define XICS_RM_CHECK_RESEND 0x2
+#define XICS_RM_REJECT 0x4
+ u32 rm_action;
+ struct kvm_vcpu *rm_kick_target;
+ u32 rm_reject;
+
+ /* Debug stuff for real mode */
+ union kvmppc_icp_state rm_dbgstate;
+ struct kvm_vcpu *rm_dbgtgt;
+};
+
+struct kvmppc_ics {
+ struct mutex lock;
+ u16 icsid;
+ struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
+};
+
+struct kvmppc_xics {
+ struct kvm *kvm;
+ struct kvm_device *dev;
+ struct dentry *dentry;
+ u32 max_icsid;
+ bool real_mode;
+ bool real_mode_dbg;
+ struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
+};
+
+static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
+ u32 nr)
+{
+ struct kvm_vcpu *vcpu = NULL;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num)
+ return vcpu->arch.icp;
+ }
+ return NULL;
+}
+
+static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
+ u32 irq, u16 *source)
+{
+ u32 icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
+ u16 src = irq & KVMPPC_XICS_SRC_MASK;
+ struct kvmppc_ics *ics;
+
+ if (source)
+ *source = src;
+ if (icsid > KVMPPC_XICS_MAX_ICS_ID)
+ return NULL;
+ ics = xics->ics[icsid];
+ if (!ics)
+ return NULL;
+ return ics;
+}
+
+
+#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 020923e43134..1020119226db 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -222,8 +222,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
kvmppc_booke_queue_irqprio(vcpu, prio);
}
-void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
- struct kvm_interrupt *irq)
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
{
clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
@@ -347,7 +346,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
keep_irq = true;
}
- if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled)
+ if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_flags)
update_epr = true;
switch (priority) {
@@ -428,8 +427,14 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
set_guest_esr(vcpu, vcpu->arch.queued_esr);
if (update_dear == true)
set_guest_dear(vcpu, vcpu->arch.queued_dear);
- if (update_epr == true)
- kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
+ if (update_epr == true) {
+ if (vcpu->arch.epr_flags & KVMPPC_EPR_USER)
+ kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
+ else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) {
+ BUG_ON(vcpu->arch.irq_type != KVMPPC_IRQ_MPIC);
+ kvmppc_mpic_set_epr(vcpu);
+ }
+ }
new_msr &= msr_mask;
#if defined(CONFIG_64BIT)
@@ -746,6 +751,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
kvmppc_core_queue_program(vcpu, ESR_PIL);
return RESUME_HOST;
+ case EMULATE_EXIT_USER:
+ return RESUME_HOST;
+
default:
BUG();
}
@@ -1148,6 +1156,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
return r;
}
+static void kvmppc_set_tsr(struct kvm_vcpu *vcpu, u32 new_tsr)
+{
+ u32 old_tsr = vcpu->arch.tsr;
+
+ vcpu->arch.tsr = new_tsr;
+
+ if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
+ arm_next_watchdog(vcpu);
+
+ update_timer_ints(vcpu);
+}
+
/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
@@ -1287,16 +1307,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
kvmppc_emulate_dec(vcpu);
}
- if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
- u32 old_tsr = vcpu->arch.tsr;
-
- vcpu->arch.tsr = sregs->u.e.tsr;
-
- if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
- arm_next_watchdog(vcpu);
-
- update_timer_ints(vcpu);
- }
+ if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR)
+ kvmppc_set_tsr(vcpu, sregs->u.e.tsr);
return 0;
}
@@ -1409,84 +1421,134 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
{
- int r = -EINVAL;
+ int r = 0;
+ union kvmppc_one_reg val;
+ int size;
+ long int i;
+
+ size = one_reg_size(reg->id);
+ if (size > sizeof(val))
+ return -EINVAL;
switch (reg->id) {
case KVM_REG_PPC_IAC1:
case KVM_REG_PPC_IAC2:
case KVM_REG_PPC_IAC3:
- case KVM_REG_PPC_IAC4: {
- int iac = reg->id - KVM_REG_PPC_IAC1;
- r = copy_to_user((u64 __user *)(long)reg->addr,
- &vcpu->arch.dbg_reg.iac[iac], sizeof(u64));
+ case KVM_REG_PPC_IAC4:
+ i = reg->id - KVM_REG_PPC_IAC1;
+ val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]);
break;
- }
case KVM_REG_PPC_DAC1:
- case KVM_REG_PPC_DAC2: {
- int dac = reg->id - KVM_REG_PPC_DAC1;
- r = copy_to_user((u64 __user *)(long)reg->addr,
- &vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
+ case KVM_REG_PPC_DAC2:
+ i = reg->id - KVM_REG_PPC_DAC1;
+ val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]);
break;
- }
case KVM_REG_PPC_EPR: {
u32 epr = get_guest_epr(vcpu);
- r = put_user(epr, (u32 __user *)(long)reg->addr);
+ val = get_reg_val(reg->id, epr);
break;
}
#if defined(CONFIG_64BIT)
case KVM_REG_PPC_EPCR:
- r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr);
+ val = get_reg_val(reg->id, vcpu->arch.epcr);
break;
#endif
+ case KVM_REG_PPC_TCR:
+ val = get_reg_val(reg->id, vcpu->arch.tcr);
+ break;
+ case KVM_REG_PPC_TSR:
+ val = get_reg_val(reg->id, vcpu->arch.tsr);
+ break;
+ case KVM_REG_PPC_DEBUG_INST:
+ val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV);
+ break;
default:
+ r = kvmppc_get_one_reg(vcpu, reg->id, &val);
break;
}
+
+ if (r)
+ return r;
+
+ if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
+ r = -EFAULT;
+
return r;
}
int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
{
- int r = -EINVAL;
+ int r = 0;
+ union kvmppc_one_reg val;
+ int size;
+ long int i;
+
+ size = one_reg_size(reg->id);
+ if (size > sizeof(val))
+ return -EINVAL;
+
+ if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
+ return -EFAULT;
switch (reg->id) {
case KVM_REG_PPC_IAC1:
case KVM_REG_PPC_IAC2:
case KVM_REG_PPC_IAC3:
- case KVM_REG_PPC_IAC4: {
- int iac = reg->id - KVM_REG_PPC_IAC1;
- r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac],
- (u64 __user *)(long)reg->addr, sizeof(u64));
+ case KVM_REG_PPC_IAC4:
+ i = reg->id - KVM_REG_PPC_IAC1;
+ vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val);
break;
- }
case KVM_REG_PPC_DAC1:
- case KVM_REG_PPC_DAC2: {
- int dac = reg->id - KVM_REG_PPC_DAC1;
- r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac],
- (u64 __user *)(long)reg->addr, sizeof(u64));
+ case KVM_REG_PPC_DAC2:
+ i = reg->id - KVM_REG_PPC_DAC1;
+ vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val);
break;
- }
case KVM_REG_PPC_EPR: {
- u32 new_epr;
- r = get_user(new_epr, (u32 __user *)(long)reg->addr);
- if (!r)
- kvmppc_set_epr(vcpu, new_epr);
+ u32 new_epr = set_reg_val(reg->id, val);
+ kvmppc_set_epr(vcpu, new_epr);
break;
}
#if defined(CONFIG_64BIT)
case KVM_REG_PPC_EPCR: {
- u32 new_epcr;
- r = get_user(new_epcr, (u32 __user *)(long)reg->addr);
- if (r == 0)
- kvmppc_set_epcr(vcpu, new_epcr);
+ u32 new_epcr = set_reg_val(reg->id, val);
+ kvmppc_set_epcr(vcpu, new_epcr);
break;
}
#endif
+ case KVM_REG_PPC_OR_TSR: {
+ u32 tsr_bits = set_reg_val(reg->id, val);
+ kvmppc_set_tsr_bits(vcpu, tsr_bits);
+ break;
+ }
+ case KVM_REG_PPC_CLEAR_TSR: {
+ u32 tsr_bits = set_reg_val(reg->id, val);
+ kvmppc_clr_tsr_bits(vcpu, tsr_bits);
+ break;
+ }
+ case KVM_REG_PPC_TSR: {
+ u32 tsr = set_reg_val(reg->id, val);
+ kvmppc_set_tsr(vcpu, tsr);
+ break;
+ }
+ case KVM_REG_PPC_TCR: {
+ u32 tcr = set_reg_val(reg->id, val);
+ kvmppc_set_tcr(vcpu, tcr);
+ break;
+ }
default:
+ r = kvmppc_set_one_reg(vcpu, reg->id, &val);
break;
}
+
return r;
}
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ return -EINVAL;
+}
+
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
return -ENOTSUPP;
@@ -1531,7 +1593,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
void kvmppc_core_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
- struct kvm_memory_slot old)
+ const struct kvm_memory_slot *old)
{
}
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index f4bb55c96517..2c6deb5ef2fe 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -54,8 +54,7 @@
(1<<BOOKE_INTERRUPT_DTLB_MISS) | \
(1<<BOOKE_INTERRUPT_ALIGNMENT))
-.macro KVM_HANDLER ivor_nr scratch srr0
-_GLOBAL(kvmppc_handler_\ivor_nr)
+.macro __KVM_HANDLER ivor_nr scratch srr0
/* Get pointer to vcpu and record exit number. */
mtspr \scratch , r4
mfspr r4, SPRN_SPRG_THREAD
@@ -76,6 +75,43 @@ _GLOBAL(kvmppc_handler_\ivor_nr)
bctr
.endm
+.macro KVM_HANDLER ivor_nr scratch srr0
+_GLOBAL(kvmppc_handler_\ivor_nr)
+ __KVM_HANDLER \ivor_nr \scratch \srr0
+.endm
+
+.macro KVM_DBG_HANDLER ivor_nr scratch srr0
+_GLOBAL(kvmppc_handler_\ivor_nr)
+ mtspr \scratch, r4
+ mfspr r4, SPRN_SPRG_THREAD
+ lwz r4, THREAD_KVM_VCPU(r4)
+ stw r3, VCPU_CRIT_SAVE(r4)
+ mfcr r3
+ mfspr r4, SPRN_CSRR1
+ andi. r4, r4, MSR_PR
+ bne 1f
+ /* debug interrupt happened in enter/exit path */
+ mfspr r4, SPRN_CSRR1
+ rlwinm r4, r4, 0, ~MSR_DE
+ mtspr SPRN_CSRR1, r4
+ lis r4, 0xffff
+ ori r4, r4, 0xffff
+ mtspr SPRN_DBSR, r4
+ mfspr r4, SPRN_SPRG_THREAD
+ lwz r4, THREAD_KVM_VCPU(r4)
+ mtcr r3
+ lwz r3, VCPU_CRIT_SAVE(r4)
+ mfspr r4, \scratch
+ rfci
+1: /* debug interrupt happened in guest */
+ mtcr r3
+ mfspr r4, SPRN_SPRG_THREAD
+ lwz r4, THREAD_KVM_VCPU(r4)
+ lwz r3, VCPU_CRIT_SAVE(r4)
+ mfspr r4, \scratch
+ __KVM_HANDLER \ivor_nr \scratch \srr0
+.endm
+
.macro KVM_HANDLER_ADDR ivor_nr
.long kvmppc_handler_\ivor_nr
.endm
@@ -100,7 +136,7 @@ KVM_HANDLER BOOKE_INTERRUPT_FIT SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
+KVM_DBG_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 6dd4de7802bf..ce6b73c29612 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -425,6 +425,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return kvmppc_set_sregs_ivor(vcpu, sregs);
}
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+ return r;
+}
+
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+ return r;
+}
+
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvmppc_vcpu_e500 *vcpu_e500;
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index 33db48a8ce24..c2e5e98453a6 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -23,6 +23,10 @@
#include <asm/mmu-book3e.h>
#include <asm/tlb.h>
+enum vcpu_ftr {
+ VCPU_FTR_MMU_V2
+};
+
#define E500_PID_NUM 3
#define E500_TLB_NUM 2
@@ -131,6 +135,10 @@ void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val);
+int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val);
#ifdef CONFIG_KVM_E500V2
unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
@@ -295,4 +303,18 @@ static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu)
#define get_tlb_sts(gtlbe) (MAS1_TS)
#endif /* !BOOKE_HV */
+static inline bool has_feature(const struct kvm_vcpu *vcpu,
+ enum vcpu_ftr ftr)
+{
+ bool has_ftr;
+ switch (ftr) {
+ case VCPU_FTR_MMU_V2:
+ has_ftr = ((vcpu->arch.mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2);
+ break;
+ default:
+ return false;
+ }
+ return has_ftr;
+}
+
#endif /* KVM_E500_H */
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index e78f353a836a..b10a01243abd 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -284,6 +284,16 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_TLB1CFG:
*spr_val = vcpu->arch.tlbcfg[1];
break;
+ case SPRN_TLB0PS:
+ if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+ return EMULATE_FAIL;
+ *spr_val = vcpu->arch.tlbps[0];
+ break;
+ case SPRN_TLB1PS:
+ if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+ return EMULATE_FAIL;
+ *spr_val = vcpu->arch.tlbps[1];
+ break;
case SPRN_L1CSR0:
*spr_val = vcpu_e500->l1csr0;
break;
@@ -307,6 +317,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_MMUCFG:
*spr_val = vcpu->arch.mmucfg;
break;
+ case SPRN_EPTCFG:
+ if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+ return EMULATE_FAIL;
+ /*
+ * Legacy Linux guests access EPTCFG register even if the E.PT
+ * category is disabled in the VM. Give them a chance to live.
+ */
+ *spr_val = vcpu->arch.eptcfg;
+ break;
/* extra exceptions */
case SPRN_IVOR32:
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 5c4475983f78..c41a5a96b558 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -596,6 +596,140 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return 0;
}
+int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ int r = 0;
+ long int i;
+
+ switch (id) {
+ case KVM_REG_PPC_MAS0:
+ *val = get_reg_val(id, vcpu->arch.shared->mas0);
+ break;
+ case KVM_REG_PPC_MAS1:
+ *val = get_reg_val(id, vcpu->arch.shared->mas1);
+ break;
+ case KVM_REG_PPC_MAS2:
+ *val = get_reg_val(id, vcpu->arch.shared->mas2);
+ break;
+ case KVM_REG_PPC_MAS7_3:
+ *val = get_reg_val(id, vcpu->arch.shared->mas7_3);
+ break;
+ case KVM_REG_PPC_MAS4:
+ *val = get_reg_val(id, vcpu->arch.shared->mas4);
+ break;
+ case KVM_REG_PPC_MAS6:
+ *val = get_reg_val(id, vcpu->arch.shared->mas6);
+ break;
+ case KVM_REG_PPC_MMUCFG:
+ *val = get_reg_val(id, vcpu->arch.mmucfg);
+ break;
+ case KVM_REG_PPC_EPTCFG:
+ *val = get_reg_val(id, vcpu->arch.eptcfg);
+ break;
+ case KVM_REG_PPC_TLB0CFG:
+ case KVM_REG_PPC_TLB1CFG:
+ case KVM_REG_PPC_TLB2CFG:
+ case KVM_REG_PPC_TLB3CFG:
+ i = id - KVM_REG_PPC_TLB0CFG;
+ *val = get_reg_val(id, vcpu->arch.tlbcfg[i]);
+ break;
+ case KVM_REG_PPC_TLB0PS:
+ case KVM_REG_PPC_TLB1PS:
+ case KVM_REG_PPC_TLB2PS:
+ case KVM_REG_PPC_TLB3PS:
+ i = id - KVM_REG_PPC_TLB0PS;
+ *val = get_reg_val(id, vcpu->arch.tlbps[i]);
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ return r;
+}
+
+int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ int r = 0;
+ long int i;
+
+ switch (id) {
+ case KVM_REG_PPC_MAS0:
+ vcpu->arch.shared->mas0 = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_MAS1:
+ vcpu->arch.shared->mas1 = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_MAS2:
+ vcpu->arch.shared->mas2 = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_MAS7_3:
+ vcpu->arch.shared->mas7_3 = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_MAS4:
+ vcpu->arch.shared->mas4 = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_MAS6:
+ vcpu->arch.shared->mas6 = set_reg_val(id, *val);
+ break;
+ /* Only allow MMU registers to be set to the config supported by KVM */
+ case KVM_REG_PPC_MMUCFG: {
+ u32 reg = set_reg_val(id, *val);
+ if (reg != vcpu->arch.mmucfg)
+ r = -EINVAL;
+ break;
+ }
+ case KVM_REG_PPC_EPTCFG: {
+ u32 reg = set_reg_val(id, *val);
+ if (reg != vcpu->arch.eptcfg)
+ r = -EINVAL;
+ break;
+ }
+ case KVM_REG_PPC_TLB0CFG:
+ case KVM_REG_PPC_TLB1CFG:
+ case KVM_REG_PPC_TLB2CFG:
+ case KVM_REG_PPC_TLB3CFG: {
+ /* MMU geometry (N_ENTRY/ASSOC) can be set only using SW_TLB */
+ u32 reg = set_reg_val(id, *val);
+ i = id - KVM_REG_PPC_TLB0CFG;
+ if (reg != vcpu->arch.tlbcfg[i])
+ r = -EINVAL;
+ break;
+ }
+ case KVM_REG_PPC_TLB0PS:
+ case KVM_REG_PPC_TLB1PS:
+ case KVM_REG_PPC_TLB2PS:
+ case KVM_REG_PPC_TLB3PS: {
+ u32 reg = set_reg_val(id, *val);
+ i = id - KVM_REG_PPC_TLB0PS;
+ if (reg != vcpu->arch.tlbps[i])
+ r = -EINVAL;
+ break;
+ }
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ return r;
+}
+
+static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu,
+ struct kvm_book3e_206_tlb_params *params)
+{
+ vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ if (params->tlb_sizes[0] <= 2048)
+ vcpu->arch.tlbcfg[0] |= params->tlb_sizes[0];
+ vcpu->arch.tlbcfg[0] |= params->tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
+
+ vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ vcpu->arch.tlbcfg[1] |= params->tlb_sizes[1];
+ vcpu->arch.tlbcfg[1] |= params->tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
+ return 0;
+}
+
int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
struct kvm_config_tlb *cfg)
{
@@ -692,16 +826,8 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
vcpu_e500->gtlb_offset[0] = 0;
vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
- vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
-
- vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
- if (params.tlb_sizes[0] <= 2048)
- vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0];
- vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
-
- vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
- vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1];
- vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
+ /* Update vcpu's MMU geometry based on SW_TLB input */
+ vcpu_mmu_geometry_update(vcpu, &params);
vcpu_e500->shared_tlb_pages = pages;
vcpu_e500->num_shared_tlb_pages = num_pages;
@@ -737,6 +863,39 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
return 0;
}
+/* Vcpu's MMU default configuration */
+static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
+ struct kvmppc_e500_tlb_params *params)
+{
+ /* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host values*/
+ vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
+
+ /* Initialize TLBnCFG fields with host values and SW_TLB geometry*/
+ vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
+ ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ vcpu->arch.tlbcfg[0] |= params[0].entries;
+ vcpu->arch.tlbcfg[0] |= params[0].ways << TLBnCFG_ASSOC_SHIFT;
+
+ vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
+ ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ vcpu->arch.tlbcfg[1] |= params[1].entries;
+ vcpu->arch.tlbcfg[1] |= params[1].ways << TLBnCFG_ASSOC_SHIFT;
+
+ if (has_feature(vcpu, VCPU_FTR_MMU_V2)) {
+ vcpu->arch.tlbps[0] = mfspr(SPRN_TLB0PS);
+ vcpu->arch.tlbps[1] = mfspr(SPRN_TLB1PS);
+
+ vcpu->arch.mmucfg &= ~MMUCFG_LRAT;
+
+ /* Guest mmu emulation currently doesn't handle E.PT */
+ vcpu->arch.eptcfg = 0;
+ vcpu->arch.tlbcfg[0] &= ~TLBnCFG_PT;
+ vcpu->arch.tlbcfg[1] &= ~TLBnCFG_IND;
+ }
+
+ return 0;
+}
+
int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
{
struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
@@ -781,18 +940,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
if (!vcpu_e500->g2h_tlb1_map)
goto err;
- /* Init TLB configuration register */
- vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
- ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
- vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries;
- vcpu->arch.tlbcfg[0] |=
- vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT;
-
- vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
- ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
- vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries;
- vcpu->arch.tlbcfg[1] |=
- vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT;
+ vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params);
kvmppc_recalc_tlb1map_range(vcpu_e500);
return 0;
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 2f4baa074b2e..753cc99eff2b 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -177,6 +177,8 @@ int kvmppc_core_check_processor_compat(void)
r = 0;
else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
r = 0;
+ else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0)
+ r = 0;
else
r = -ENOTSUPP;
@@ -260,6 +262,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return kvmppc_set_sregs_ivor(vcpu, sregs);
}
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+ return r;
+}
+
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
+ return r;
+}
+
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvmppc_vcpu_e500 *vcpu_e500;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 7a73b6f72a8b..631a2650e4e4 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -38,6 +38,7 @@
#define OP_31_XOP_TRAP 4
#define OP_31_XOP_LWZX 23
+#define OP_31_XOP_DCBST 54
#define OP_31_XOP_TRAP_64 68
#define OP_31_XOP_DCBF 86
#define OP_31_XOP_LBZX 87
@@ -370,6 +371,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs);
break;
+ case OP_31_XOP_DCBST:
case OP_31_XOP_DCBF:
case OP_31_XOP_DCBI:
/* Do nothing. The guest is performing dcbi because
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
new file mode 100644
index 000000000000..5a9a10b90762
--- /dev/null
+++ b/arch/powerpc/kvm/irq.h
@@ -0,0 +1,20 @@
+#ifndef __IRQ_H
+#define __IRQ_H
+
+#include <linux/kvm_host.h>
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+ int ret = 0;
+
+#ifdef CONFIG_KVM_MPIC
+ ret = ret || (kvm->arch.mpic != NULL);
+#endif
+#ifdef CONFIG_KVM_XICS
+ ret = ret || (kvm->arch.xics != NULL);
+#endif
+ smp_rmb();
+ return ret;
+}
+
+#endif
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
new file mode 100644
index 000000000000..2861ae9eaae6
--- /dev/null
+++ b/arch/powerpc/kvm/mpic.c
@@ -0,0 +1,1853 @@
+/*
+ * OpenPIC emulation
+ *
+ * Copyright (c) 2004 Jocelyn Mayer
+ * 2011 Alexander Graf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/anon_inodes.h>
+#include <asm/uaccess.h>
+#include <asm/mpic.h>
+#include <asm/kvm_para.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_ppc.h>
+#include "iodev.h"
+
+#define MAX_CPU 32
+#define MAX_SRC 256
+#define MAX_TMR 4
+#define MAX_IPI 4
+#define MAX_MSI 8
+#define MAX_IRQ (MAX_SRC + MAX_IPI + MAX_TMR)
+#define VID 0x03 /* MPIC version ID */
+
+/* OpenPIC capability flags */
+#define OPENPIC_FLAG_IDR_CRIT (1 << 0)
+#define OPENPIC_FLAG_ILR (2 << 0)
+
+/* OpenPIC address map */
+#define OPENPIC_REG_SIZE 0x40000
+#define OPENPIC_GLB_REG_START 0x0
+#define OPENPIC_GLB_REG_SIZE 0x10F0
+#define OPENPIC_TMR_REG_START 0x10F0
+#define OPENPIC_TMR_REG_SIZE 0x220
+#define OPENPIC_MSI_REG_START 0x1600
+#define OPENPIC_MSI_REG_SIZE 0x200
+#define OPENPIC_SUMMARY_REG_START 0x3800
+#define OPENPIC_SUMMARY_REG_SIZE 0x800
+#define OPENPIC_SRC_REG_START 0x10000
+#define OPENPIC_SRC_REG_SIZE (MAX_SRC * 0x20)
+#define OPENPIC_CPU_REG_START 0x20000
+#define OPENPIC_CPU_REG_SIZE (0x100 + ((MAX_CPU - 1) * 0x1000))
+
+struct fsl_mpic_info {
+ int max_ext;
+};
+
+static struct fsl_mpic_info fsl_mpic_20 = {
+ .max_ext = 12,
+};
+
+static struct fsl_mpic_info fsl_mpic_42 = {
+ .max_ext = 12,
+};
+
+#define FRR_NIRQ_SHIFT 16
+#define FRR_NCPU_SHIFT 8
+#define FRR_VID_SHIFT 0
+
+#define VID_REVISION_1_2 2
+#define VID_REVISION_1_3 3
+
+#define VIR_GENERIC 0x00000000 /* Generic Vendor ID */
+
+#define GCR_RESET 0x80000000
+#define GCR_MODE_PASS 0x00000000
+#define GCR_MODE_MIXED 0x20000000
+#define GCR_MODE_PROXY 0x60000000
+
+#define TBCR_CI 0x80000000 /* count inhibit */
+#define TCCR_TOG 0x80000000 /* toggles when decrement to zero */
+
+#define IDR_EP_SHIFT 31
+#define IDR_EP_MASK (1 << IDR_EP_SHIFT)
+#define IDR_CI0_SHIFT 30
+#define IDR_CI1_SHIFT 29
+#define IDR_P1_SHIFT 1
+#define IDR_P0_SHIFT 0
+
+#define ILR_INTTGT_MASK 0x000000ff
+#define ILR_INTTGT_INT 0x00
+#define ILR_INTTGT_CINT 0x01 /* critical */
+#define ILR_INTTGT_MCP 0x02 /* machine check */
+#define NUM_OUTPUTS 3
+
+#define MSIIR_OFFSET 0x140
+#define MSIIR_SRS_SHIFT 29
+#define MSIIR_SRS_MASK (0x7 << MSIIR_SRS_SHIFT)
+#define MSIIR_IBS_SHIFT 24
+#define MSIIR_IBS_MASK (0x1f << MSIIR_IBS_SHIFT)
+
+static int get_current_cpu(void)
+{
+#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
+ struct kvm_vcpu *vcpu = current->thread.kvm_vcpu;
+ return vcpu ? vcpu->arch.irq_cpu_id : -1;
+#else
+ /* XXX */
+ return -1;
+#endif
+}
+
+static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
+ u32 val, int idx);
+static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
+ u32 *ptr, int idx);
+
+enum irq_type {
+ IRQ_TYPE_NORMAL = 0,
+ IRQ_TYPE_FSLINT, /* FSL internal interrupt -- level only */
+ IRQ_TYPE_FSLSPECIAL, /* FSL timer/IPI interrupt, edge, no polarity */
+};
+
+struct irq_queue {
+ /* Round up to the nearest 64 IRQs so that the queue length
+ * won't change when moving between 32 and 64 bit hosts.
+ */
+ unsigned long queue[BITS_TO_LONGS((MAX_IRQ + 63) & ~63)];
+ int next;
+ int priority;
+};
+
+struct irq_source {
+ uint32_t ivpr; /* IRQ vector/priority register */
+ uint32_t idr; /* IRQ destination register */
+ uint32_t destmask; /* bitmap of CPU destinations */
+ int last_cpu;
+ int output; /* IRQ level, e.g. ILR_INTTGT_INT */
+ int pending; /* TRUE if IRQ is pending */
+ enum irq_type type;
+ bool level:1; /* level-triggered */
+ bool nomask:1; /* critical interrupts ignore mask on some FSL MPICs */
+};
+
+#define IVPR_MASK_SHIFT 31
+#define IVPR_MASK_MASK (1 << IVPR_MASK_SHIFT)
+#define IVPR_ACTIVITY_SHIFT 30
+#define IVPR_ACTIVITY_MASK (1 << IVPR_ACTIVITY_SHIFT)
+#define IVPR_MODE_SHIFT 29
+#define IVPR_MODE_MASK (1 << IVPR_MODE_SHIFT)
+#define IVPR_POLARITY_SHIFT 23
+#define IVPR_POLARITY_MASK (1 << IVPR_POLARITY_SHIFT)
+#define IVPR_SENSE_SHIFT 22
+#define IVPR_SENSE_MASK (1 << IVPR_SENSE_SHIFT)
+
+#define IVPR_PRIORITY_MASK (0xF << 16)
+#define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16))
+#define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask)
+
+/* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */
+#define IDR_EP 0x80000000 /* external pin */
+#define IDR_CI 0x40000000 /* critical interrupt */
+
+struct irq_dest {
+ struct kvm_vcpu *vcpu;
+
+ int32_t ctpr; /* CPU current task priority */
+ struct irq_queue raised;
+ struct irq_queue servicing;
+
+ /* Count of IRQ sources asserting on non-INT outputs */
+ uint32_t outputs_active[NUM_OUTPUTS];
+};
+
+#define MAX_MMIO_REGIONS 10
+
+struct openpic {
+ struct kvm *kvm;
+ struct kvm_device *dev;
+ struct kvm_io_device mmio;
+ const struct mem_reg *mmio_regions[MAX_MMIO_REGIONS];
+ int num_mmio_regions;
+
+ gpa_t reg_base;
+ spinlock_t lock;
+
+ /* Behavior control */
+ struct fsl_mpic_info *fsl;
+ uint32_t model;
+ uint32_t flags;
+ uint32_t nb_irqs;
+ uint32_t vid;
+ uint32_t vir; /* Vendor identification register */
+ uint32_t vector_mask;
+ uint32_t tfrr_reset;
+ uint32_t ivpr_reset;
+ uint32_t idr_reset;
+ uint32_t brr1;
+ uint32_t mpic_mode_mask;
+
+ /* Global registers */
+ uint32_t frr; /* Feature reporting register */
+ uint32_t gcr; /* Global configuration register */
+ uint32_t pir; /* Processor initialization register */
+ uint32_t spve; /* Spurious vector register */
+ uint32_t tfrr; /* Timer frequency reporting register */
+ /* Source registers */
+ struct irq_source src[MAX_IRQ];
+ /* Local registers per output pin */
+ struct irq_dest dst[MAX_CPU];
+ uint32_t nb_cpus;
+ /* Timer registers */
+ struct {
+ uint32_t tccr; /* Global timer current count register */
+ uint32_t tbcr; /* Global timer base count register */
+ } timers[MAX_TMR];
+ /* Shared MSI registers */
+ struct {
+ uint32_t msir; /* Shared Message Signaled Interrupt Register */
+ } msi[MAX_MSI];
+ uint32_t max_irq;
+ uint32_t irq_ipi0;
+ uint32_t irq_tim0;
+ uint32_t irq_msi;
+};
+
+
+static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst,
+ int output)
+{
+ struct kvm_interrupt irq = {
+ .irq = KVM_INTERRUPT_SET_LEVEL,
+ };
+
+ if (!dst->vcpu) {
+ pr_debug("%s: destination cpu %d does not exist\n",
+ __func__, (int)(dst - &opp->dst[0]));
+ return;
+ }
+
+ pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
+ output);
+
+ if (output != ILR_INTTGT_INT) /* TODO */
+ return;
+
+ kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq);
+}
+
+static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst,
+ int output)
+{
+ if (!dst->vcpu) {
+ pr_debug("%s: destination cpu %d does not exist\n",
+ __func__, (int)(dst - &opp->dst[0]));
+ return;
+ }
+
+ pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
+ output);
+
+ if (output != ILR_INTTGT_INT) /* TODO */
+ return;
+
+ kvmppc_core_dequeue_external(dst->vcpu);
+}
+
+static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ)
+{
+ set_bit(n_IRQ, q->queue);
+}
+
+static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ)
+{
+ clear_bit(n_IRQ, q->queue);
+}
+
+static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ)
+{
+ return test_bit(n_IRQ, q->queue);
+}
+
+static void IRQ_check(struct openpic *opp, struct irq_queue *q)
+{
+ int irq = -1;
+ int next = -1;
+ int priority = -1;
+
+ for (;;) {
+ irq = find_next_bit(q->queue, opp->max_irq, irq + 1);
+ if (irq == opp->max_irq)
+ break;
+
+ pr_debug("IRQ_check: irq %d set ivpr_pr=%d pr=%d\n",
+ irq, IVPR_PRIORITY(opp->src[irq].ivpr), priority);
+
+ if (IVPR_PRIORITY(opp->src[irq].ivpr) > priority) {
+ next = irq;
+ priority = IVPR_PRIORITY(opp->src[irq].ivpr);
+ }
+ }
+
+ q->next = next;
+ q->priority = priority;
+}
+
+static int IRQ_get_next(struct openpic *opp, struct irq_queue *q)
+{
+ /* XXX: optimize */
+ IRQ_check(opp, q);
+
+ return q->next;
+}
+
+static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ,
+ bool active, bool was_active)
+{
+ struct irq_dest *dst;
+ struct irq_source *src;
+ int priority;
+
+ dst = &opp->dst[n_CPU];
+ src = &opp->src[n_IRQ];
+
+ pr_debug("%s: IRQ %d active %d was %d\n",
+ __func__, n_IRQ, active, was_active);
+
+ if (src->output != ILR_INTTGT_INT) {
+ pr_debug("%s: output %d irq %d active %d was %d count %d\n",
+ __func__, src->output, n_IRQ, active, was_active,
+ dst->outputs_active[src->output]);
+
+ /* On Freescale MPIC, critical interrupts ignore priority,
+ * IACK, EOI, etc. Before MPIC v4.1 they also ignore
+ * masking.
+ */
+ if (active) {
+ if (!was_active &&
+ dst->outputs_active[src->output]++ == 0) {
+ pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n",
+ __func__, src->output, n_CPU, n_IRQ);
+ mpic_irq_raise(opp, dst, src->output);
+ }
+ } else {
+ if (was_active &&
+ --dst->outputs_active[src->output] == 0) {
+ pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n",
+ __func__, src->output, n_CPU, n_IRQ);
+ mpic_irq_lower(opp, dst, src->output);
+ }
+ }
+
+ return;
+ }
+
+ priority = IVPR_PRIORITY(src->ivpr);
+
+ /* Even if the interrupt doesn't have enough priority,
+ * it is still raised, in case ctpr is lowered later.
+ */
+ if (active)
+ IRQ_setbit(&dst->raised, n_IRQ);
+ else
+ IRQ_resetbit(&dst->raised, n_IRQ);
+
+ IRQ_check(opp, &dst->raised);
+
+ if (active && priority <= dst->ctpr) {
+ pr_debug("%s: IRQ %d priority %d too low for ctpr %d on CPU %d\n",
+ __func__, n_IRQ, priority, dst->ctpr, n_CPU);
+ active = 0;
+ }
+
+ if (active) {
+ if (IRQ_get_next(opp, &dst->servicing) >= 0 &&
+ priority <= dst->servicing.priority) {
+ pr_debug("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d\n",
+ __func__, n_IRQ, dst->servicing.next, n_CPU);
+ } else {
+ pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n",
+ __func__, n_CPU, n_IRQ, dst->raised.next);
+ mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
+ }
+ } else {
+ IRQ_get_next(opp, &dst->servicing);
+ if (dst->raised.priority > dst->ctpr &&
+ dst->raised.priority > dst->servicing.priority) {
+ pr_debug("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d\n",
+ __func__, n_IRQ, dst->raised.next,
+ dst->raised.priority, dst->ctpr,
+ dst->servicing.priority, n_CPU);
+ /* IRQ line stays asserted */
+ } else {
+ pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n",
+ __func__, n_IRQ, dst->ctpr,
+ dst->servicing.priority, n_CPU);
+ mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
+ }
+ }
+}
+
+/* update pic state because registers for n_IRQ have changed value */
+static void openpic_update_irq(struct openpic *opp, int n_IRQ)
+{
+ struct irq_source *src;
+ bool active, was_active;
+ int i;
+
+ src = &opp->src[n_IRQ];
+ active = src->pending;
+
+ if ((src->ivpr & IVPR_MASK_MASK) && !src->nomask) {
+ /* Interrupt source is disabled */
+ pr_debug("%s: IRQ %d is disabled\n", __func__, n_IRQ);
+ active = false;
+ }
+
+ was_active = !!(src->ivpr & IVPR_ACTIVITY_MASK);
+
+ /*
+ * We don't have a similar check for already-active because
+ * ctpr may have changed and we need to withdraw the interrupt.
+ */
+ if (!active && !was_active) {
+ pr_debug("%s: IRQ %d is already inactive\n", __func__, n_IRQ);
+ return;
+ }
+
+ if (active)
+ src->ivpr |= IVPR_ACTIVITY_MASK;
+ else
+ src->ivpr &= ~IVPR_ACTIVITY_MASK;
+
+ if (src->destmask == 0) {
+ /* No target */
+ pr_debug("%s: IRQ %d has no target\n", __func__, n_IRQ);
+ return;
+ }
+
+ if (src->destmask == (1 << src->last_cpu)) {
+ /* Only one CPU is allowed to receive this IRQ */
+ IRQ_local_pipe(opp, src->last_cpu, n_IRQ, active, was_active);
+ } else if (!(src->ivpr & IVPR_MODE_MASK)) {
+ /* Directed delivery mode */
+ for (i = 0; i < opp->nb_cpus; i++) {
+ if (src->destmask & (1 << i)) {
+ IRQ_local_pipe(opp, i, n_IRQ, active,
+ was_active);
+ }
+ }
+ } else {
+ /* Distributed delivery mode */
+ for (i = src->last_cpu + 1; i != src->last_cpu; i++) {
+ if (i == opp->nb_cpus)
+ i = 0;
+
+ if (src->destmask & (1 << i)) {
+ IRQ_local_pipe(opp, i, n_IRQ, active,
+ was_active);
+ src->last_cpu = i;
+ break;
+ }
+ }
+ }
+}
+
+static void openpic_set_irq(void *opaque, int n_IRQ, int level)
+{
+ struct openpic *opp = opaque;
+ struct irq_source *src;
+
+ if (n_IRQ >= MAX_IRQ) {
+ WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ);
+ return;
+ }
+
+ src = &opp->src[n_IRQ];
+ pr_debug("openpic: set irq %d = %d ivpr=0x%08x\n",
+ n_IRQ, level, src->ivpr);
+ if (src->level) {
+ /* level-sensitive irq */
+ src->pending = level;
+ openpic_update_irq(opp, n_IRQ);
+ } else {
+ /* edge-sensitive irq */
+ if (level) {
+ src->pending = 1;
+ openpic_update_irq(opp, n_IRQ);
+ }
+
+ if (src->output != ILR_INTTGT_INT) {
+ /* Edge-triggered interrupts shouldn't be used
+ * with non-INT delivery, but just in case,
+ * try to make it do something sane rather than
+ * cause an interrupt storm. This is close to
+ * what you'd probably see happen in real hardware.
+ */
+ src->pending = 0;
+ openpic_update_irq(opp, n_IRQ);
+ }
+ }
+}
+
+static void openpic_reset(struct openpic *opp)
+{
+ int i;
+
+ opp->gcr = GCR_RESET;
+ /* Initialise controller registers */
+ opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
+ (opp->vid << FRR_VID_SHIFT);
+
+ opp->pir = 0;
+ opp->spve = -1 & opp->vector_mask;
+ opp->tfrr = opp->tfrr_reset;
+ /* Initialise IRQ sources */
+ for (i = 0; i < opp->max_irq; i++) {
+ opp->src[i].ivpr = opp->ivpr_reset;
+ opp->src[i].idr = opp->idr_reset;
+
+ switch (opp->src[i].type) {
+ case IRQ_TYPE_NORMAL:
+ opp->src[i].level =
+ !!(opp->ivpr_reset & IVPR_SENSE_MASK);
+ break;
+
+ case IRQ_TYPE_FSLINT:
+ opp->src[i].ivpr |= IVPR_POLARITY_MASK;
+ break;
+
+ case IRQ_TYPE_FSLSPECIAL:
+ break;
+ }
+ }
+ /* Initialise IRQ destinations */
+ for (i = 0; i < MAX_CPU; i++) {
+ opp->dst[i].ctpr = 15;
+ memset(&opp->dst[i].raised, 0, sizeof(struct irq_queue));
+ opp->dst[i].raised.next = -1;
+ memset(&opp->dst[i].servicing, 0, sizeof(struct irq_queue));
+ opp->dst[i].servicing.next = -1;
+ }
+ /* Initialise timers */
+ for (i = 0; i < MAX_TMR; i++) {
+ opp->timers[i].tccr = 0;
+ opp->timers[i].tbcr = TBCR_CI;
+ }
+ /* Go out of RESET state */
+ opp->gcr = 0;
+}
+
+static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ)
+{
+ return opp->src[n_IRQ].idr;
+}
+
+static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ)
+{
+ if (opp->flags & OPENPIC_FLAG_ILR)
+ return opp->src[n_IRQ].output;
+
+ return 0xffffffff;
+}
+
+static inline uint32_t read_IRQreg_ivpr(struct openpic *opp, int n_IRQ)
+{
+ return opp->src[n_IRQ].ivpr;
+}
+
+static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
+ uint32_t val)
+{
+ struct irq_source *src = &opp->src[n_IRQ];
+ uint32_t normal_mask = (1UL << opp->nb_cpus) - 1;
+ uint32_t crit_mask = 0;
+ uint32_t mask = normal_mask;
+ int crit_shift = IDR_EP_SHIFT - opp->nb_cpus;
+ int i;
+
+ if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
+ crit_mask = mask << crit_shift;
+ mask |= crit_mask | IDR_EP;
+ }
+
+ src->idr = val & mask;
+ pr_debug("Set IDR %d to 0x%08x\n", n_IRQ, src->idr);
+
+ if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
+ if (src->idr & crit_mask) {
+ if (src->idr & normal_mask) {
+ pr_debug("%s: IRQ configured for multiple output types, using critical\n",
+ __func__);
+ }
+
+ src->output = ILR_INTTGT_CINT;
+ src->nomask = true;
+ src->destmask = 0;
+
+ for (i = 0; i < opp->nb_cpus; i++) {
+ int n_ci = IDR_CI0_SHIFT - i;
+
+ if (src->idr & (1UL << n_ci))
+ src->destmask |= 1UL << i;
+ }
+ } else {
+ src->output = ILR_INTTGT_INT;
+ src->nomask = false;
+ src->destmask = src->idr & normal_mask;
+ }
+ } else {
+ src->destmask = src->idr;
+ }
+}
+
+static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ,
+ uint32_t val)
+{
+ if (opp->flags & OPENPIC_FLAG_ILR) {
+ struct irq_source *src = &opp->src[n_IRQ];
+
+ src->output = val & ILR_INTTGT_MASK;
+ pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr,
+ src->output);
+
+ /* TODO: on MPIC v4.0 only, set nomask for non-INT */
+ }
+}
+
+static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ,
+ uint32_t val)
+{
+ uint32_t mask;
+
+ /* NOTE when implementing newer FSL MPIC models: starting with v4.0,
+ * the polarity bit is read-only on internal interrupts.
+ */
+ mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK |
+ IVPR_POLARITY_MASK | opp->vector_mask;
+
+ /* ACTIVITY bit is read-only */
+ opp->src[n_IRQ].ivpr =
+ (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask);
+
+ /* For FSL internal interrupts, The sense bit is reserved and zero,
+ * and the interrupt is always level-triggered. Timers and IPIs
+ * have no sense or polarity bits, and are edge-triggered.
+ */
+ switch (opp->src[n_IRQ].type) {
+ case IRQ_TYPE_NORMAL:
+ opp->src[n_IRQ].level =
+ !!(opp->src[n_IRQ].ivpr & IVPR_SENSE_MASK);
+ break;
+
+ case IRQ_TYPE_FSLINT:
+ opp->src[n_IRQ].ivpr &= ~IVPR_SENSE_MASK;
+ break;
+
+ case IRQ_TYPE_FSLSPECIAL:
+ opp->src[n_IRQ].ivpr &= ~(IVPR_POLARITY_MASK | IVPR_SENSE_MASK);
+ break;
+ }
+
+ openpic_update_irq(opp, n_IRQ);
+ pr_debug("Set IVPR %d to 0x%08x -> 0x%08x\n", n_IRQ, val,
+ opp->src[n_IRQ].ivpr);
+}
+
+static void openpic_gcr_write(struct openpic *opp, uint64_t val)
+{
+ if (val & GCR_RESET) {
+ openpic_reset(opp);
+ return;
+ }
+
+ opp->gcr &= ~opp->mpic_mode_mask;
+ opp->gcr |= val & opp->mpic_mode_mask;
+}
+
+static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val)
+{
+ struct openpic *opp = opaque;
+ int err = 0;
+
+ pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
+ if (addr & 0xF)
+ return 0;
+
+ switch (addr) {
+ case 0x00: /* Block Revision Register1 (BRR1) is Readonly */
+ break;
+ case 0x40:
+ case 0x50:
+ case 0x60:
+ case 0x70:
+ case 0x80:
+ case 0x90:
+ case 0xA0:
+ case 0xB0:
+ err = openpic_cpu_write_internal(opp, addr, val,
+ get_current_cpu());
+ break;
+ case 0x1000: /* FRR */
+ break;
+ case 0x1020: /* GCR */
+ openpic_gcr_write(opp, val);
+ break;
+ case 0x1080: /* VIR */
+ break;
+ case 0x1090: /* PIR */
+ /*
+ * This register is used to reset a CPU core --
+ * let userspace handle it.
+ */
+ err = -ENXIO;
+ break;
+ case 0x10A0: /* IPI_IVPR */
+ case 0x10B0:
+ case 0x10C0:
+ case 0x10D0: {
+ int idx;
+ idx = (addr - 0x10A0) >> 4;
+ write_IRQreg_ivpr(opp, opp->irq_ipi0 + idx, val);
+ break;
+ }
+ case 0x10E0: /* SPVE */
+ opp->spve = val & opp->vector_mask;
+ break;
+ default:
+ break;
+ }
+
+ return err;
+}
+
+static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+ struct openpic *opp = opaque;
+ u32 retval;
+ int err = 0;
+
+ pr_debug("%s: addr %#llx\n", __func__, addr);
+ retval = 0xFFFFFFFF;
+ if (addr & 0xF)
+ goto out;
+
+ switch (addr) {
+ case 0x1000: /* FRR */
+ retval = opp->frr;
+ retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT;
+ break;
+ case 0x1020: /* GCR */
+ retval = opp->gcr;
+ break;
+ case 0x1080: /* VIR */
+ retval = opp->vir;
+ break;
+ case 0x1090: /* PIR */
+ retval = 0x00000000;
+ break;
+ case 0x00: /* Block Revision Register1 (BRR1) */
+ retval = opp->brr1;
+ break;
+ case 0x40:
+ case 0x50:
+ case 0x60:
+ case 0x70:
+ case 0x80:
+ case 0x90:
+ case 0xA0:
+ case 0xB0:
+ err = openpic_cpu_read_internal(opp, addr,
+ &retval, get_current_cpu());
+ break;
+ case 0x10A0: /* IPI_IVPR */
+ case 0x10B0:
+ case 0x10C0:
+ case 0x10D0:
+ {
+ int idx;
+ idx = (addr - 0x10A0) >> 4;
+ retval = read_IRQreg_ivpr(opp, opp->irq_ipi0 + idx);
+ }
+ break;
+ case 0x10E0: /* SPVE */
+ retval = opp->spve;
+ break;
+ default:
+ break;
+ }
+
+out:
+ pr_debug("%s: => 0x%08x\n", __func__, retval);
+ *ptr = retval;
+ return err;
+}
+
+static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val)
+{
+ struct openpic *opp = opaque;
+ int idx;
+
+ addr += 0x10f0;
+
+ pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
+ if (addr & 0xF)
+ return 0;
+
+ if (addr == 0x10f0) {
+ /* TFRR */
+ opp->tfrr = val;
+ return 0;
+ }
+
+ idx = (addr >> 6) & 0x3;
+ addr = addr & 0x30;
+
+ switch (addr & 0x30) {
+ case 0x00: /* TCCR */
+ break;
+ case 0x10: /* TBCR */
+ if ((opp->timers[idx].tccr & TCCR_TOG) != 0 &&
+ (val & TBCR_CI) == 0 &&
+ (opp->timers[idx].tbcr & TBCR_CI) != 0)
+ opp->timers[idx].tccr &= ~TCCR_TOG;
+
+ opp->timers[idx].tbcr = val;
+ break;
+ case 0x20: /* TVPR */
+ write_IRQreg_ivpr(opp, opp->irq_tim0 + idx, val);
+ break;
+ case 0x30: /* TDR */
+ write_IRQreg_idr(opp, opp->irq_tim0 + idx, val);
+ break;
+ }
+
+ return 0;
+}
+
+static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+ struct openpic *opp = opaque;
+ uint32_t retval = -1;
+ int idx;
+
+ pr_debug("%s: addr %#llx\n", __func__, addr);
+ if (addr & 0xF)
+ goto out;
+
+ idx = (addr >> 6) & 0x3;
+ if (addr == 0x0) {
+ /* TFRR */
+ retval = opp->tfrr;
+ goto out;
+ }
+
+ switch (addr & 0x30) {
+ case 0x00: /* TCCR */
+ retval = opp->timers[idx].tccr;
+ break;
+ case 0x10: /* TBCR */
+ retval = opp->timers[idx].tbcr;
+ break;
+ case 0x20: /* TIPV */
+ retval = read_IRQreg_ivpr(opp, opp->irq_tim0 + idx);
+ break;
+ case 0x30: /* TIDE (TIDR) */
+ retval = read_IRQreg_idr(opp, opp->irq_tim0 + idx);
+ break;
+ }
+
+out:
+ pr_debug("%s: => 0x%08x\n", __func__, retval);
+ *ptr = retval;
+ return 0;
+}
+
+static int openpic_src_write(void *opaque, gpa_t addr, u32 val)
+{
+ struct openpic *opp = opaque;
+ int idx;
+
+ pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
+
+ addr = addr & 0xffff;
+ idx = addr >> 5;
+
+ switch (addr & 0x1f) {
+ case 0x00:
+ write_IRQreg_ivpr(opp, idx, val);
+ break;
+ case 0x10:
+ write_IRQreg_idr(opp, idx, val);
+ break;
+ case 0x18:
+ write_IRQreg_ilr(opp, idx, val);
+ break;
+ }
+
+ return 0;
+}
+
+static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+ struct openpic *opp = opaque;
+ uint32_t retval;
+ int idx;
+
+ pr_debug("%s: addr %#llx\n", __func__, addr);
+ retval = 0xFFFFFFFF;
+
+ addr = addr & 0xffff;
+ idx = addr >> 5;
+
+ switch (addr & 0x1f) {
+ case 0x00:
+ retval = read_IRQreg_ivpr(opp, idx);
+ break;
+ case 0x10:
+ retval = read_IRQreg_idr(opp, idx);
+ break;
+ case 0x18:
+ retval = read_IRQreg_ilr(opp, idx);
+ break;
+ }
+
+ pr_debug("%s: => 0x%08x\n", __func__, retval);
+ *ptr = retval;
+ return 0;
+}
+
+static int openpic_msi_write(void *opaque, gpa_t addr, u32 val)
+{
+ struct openpic *opp = opaque;
+ int idx = opp->irq_msi;
+ int srs, ibs;
+
+ pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
+ if (addr & 0xF)
+ return 0;
+
+ switch (addr) {
+ case MSIIR_OFFSET:
+ srs = val >> MSIIR_SRS_SHIFT;
+ idx += srs;
+ ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT;
+ opp->msi[srs].msir |= 1 << ibs;
+ openpic_set_irq(opp, idx, 1);
+ break;
+ default:
+ /* most registers are read-only, thus ignored */
+ break;
+ }
+
+ return 0;
+}
+
+static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+ struct openpic *opp = opaque;
+ uint32_t r = 0;
+ int i, srs;
+
+ pr_debug("%s: addr %#llx\n", __func__, addr);
+ if (addr & 0xF)
+ return -ENXIO;
+
+ srs = addr >> 4;
+
+ switch (addr) {
+ case 0x00:
+ case 0x10:
+ case 0x20:
+ case 0x30:
+ case 0x40:
+ case 0x50:
+ case 0x60:
+ case 0x70: /* MSIRs */
+ r = opp->msi[srs].msir;
+ /* Clear on read */
+ opp->msi[srs].msir = 0;
+ openpic_set_irq(opp, opp->irq_msi + srs, 0);
+ break;
+ case 0x120: /* MSISR */
+ for (i = 0; i < MAX_MSI; i++)
+ r |= (opp->msi[i].msir ? 1 : 0) << i;
+ break;
+ }
+
+ pr_debug("%s: => 0x%08x\n", __func__, r);
+ *ptr = r;
+ return 0;
+}
+
+static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+ uint32_t r = 0;
+
+ pr_debug("%s: addr %#llx\n", __func__, addr);
+
+ /* TODO: EISR/EIMR */
+
+ *ptr = r;
+ return 0;
+}
+
+static int openpic_summary_write(void *opaque, gpa_t addr, u32 val)
+{
+ pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
+
+ /* TODO: EISR/EIMR */
+ return 0;
+}
+
+static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
+ u32 val, int idx)
+{
+ struct openpic *opp = opaque;
+ struct irq_source *src;
+ struct irq_dest *dst;
+ int s_IRQ, n_IRQ;
+
+ pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx,
+ addr, val);
+
+ if (idx < 0)
+ return 0;
+
+ if (addr & 0xF)
+ return 0;
+
+ dst = &opp->dst[idx];
+ addr &= 0xFF0;
+ switch (addr) {
+ case 0x40: /* IPIDR */
+ case 0x50:
+ case 0x60:
+ case 0x70:
+ idx = (addr - 0x40) >> 4;
+ /* we use IDE as mask which CPUs to deliver the IPI to still. */
+ opp->src[opp->irq_ipi0 + idx].destmask |= val;
+ openpic_set_irq(opp, opp->irq_ipi0 + idx, 1);
+ openpic_set_irq(opp, opp->irq_ipi0 + idx, 0);
+ break;
+ case 0x80: /* CTPR */
+ dst->ctpr = val & 0x0000000F;
+
+ pr_debug("%s: set CPU %d ctpr to %d, raised %d servicing %d\n",
+ __func__, idx, dst->ctpr, dst->raised.priority,
+ dst->servicing.priority);
+
+ if (dst->raised.priority <= dst->ctpr) {
+ pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n",
+ __func__, idx);
+ mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
+ } else if (dst->raised.priority > dst->servicing.priority) {
+ pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n",
+ __func__, idx, dst->raised.next);
+ mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
+ }
+
+ break;
+ case 0x90: /* WHOAMI */
+ /* Read-only register */
+ break;
+ case 0xA0: /* IACK */
+ /* Read-only register */
+ break;
+ case 0xB0: { /* EOI */
+ int notify_eoi;
+
+ pr_debug("EOI\n");
+ s_IRQ = IRQ_get_next(opp, &dst->servicing);
+
+ if (s_IRQ < 0) {
+ pr_debug("%s: EOI with no interrupt in service\n",
+ __func__);
+ break;
+ }
+
+ IRQ_resetbit(&dst->servicing, s_IRQ);
+ /* Notify listeners that the IRQ is over */
+ notify_eoi = s_IRQ;
+ /* Set up next servicing IRQ */
+ s_IRQ = IRQ_get_next(opp, &dst->servicing);
+ /* Check queued interrupts. */
+ n_IRQ = IRQ_get_next(opp, &dst->raised);
+ src = &opp->src[n_IRQ];
+ if (n_IRQ != -1 &&
+ (s_IRQ == -1 ||
+ IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) {
+ pr_debug("Raise OpenPIC INT output cpu %d irq %d\n",
+ idx, n_IRQ);
+ mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
+ }
+
+ spin_unlock(&opp->lock);
+ kvm_notify_acked_irq(opp->kvm, 0, notify_eoi);
+ spin_lock(&opp->lock);
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val)
+{
+ struct openpic *opp = opaque;
+
+ return openpic_cpu_write_internal(opp, addr, val,
+ (addr & 0x1f000) >> 12);
+}
+
+static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst,
+ int cpu)
+{
+ struct irq_source *src;
+ int retval, irq;
+
+ pr_debug("Lower OpenPIC INT output\n");
+ mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
+
+ irq = IRQ_get_next(opp, &dst->raised);
+ pr_debug("IACK: irq=%d\n", irq);
+
+ if (irq == -1)
+ /* No more interrupt pending */
+ return opp->spve;
+
+ src = &opp->src[irq];
+ if (!(src->ivpr & IVPR_ACTIVITY_MASK) ||
+ !(IVPR_PRIORITY(src->ivpr) > dst->ctpr)) {
+ pr_err("%s: bad raised IRQ %d ctpr %d ivpr 0x%08x\n",
+ __func__, irq, dst->ctpr, src->ivpr);
+ openpic_update_irq(opp, irq);
+ retval = opp->spve;
+ } else {
+ /* IRQ enter servicing state */
+ IRQ_setbit(&dst->servicing, irq);
+ retval = IVPR_VECTOR(opp, src->ivpr);
+ }
+
+ if (!src->level) {
+ /* edge-sensitive IRQ */
+ src->ivpr &= ~IVPR_ACTIVITY_MASK;
+ src->pending = 0;
+ IRQ_resetbit(&dst->raised, irq);
+ }
+
+ if ((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + MAX_IPI))) {
+ src->destmask &= ~(1 << cpu);
+ if (src->destmask && !src->level) {
+ /* trigger on CPUs that didn't know about it yet */
+ openpic_set_irq(opp, irq, 1);
+ openpic_set_irq(opp, irq, 0);
+ /* if all CPUs knew about it, set active bit again */
+ src->ivpr |= IVPR_ACTIVITY_MASK;
+ }
+ }
+
+ return retval;
+}
+
+void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
+{
+ struct openpic *opp = vcpu->arch.mpic;
+ int cpu = vcpu->arch.irq_cpu_id;
+ unsigned long flags;
+
+ spin_lock_irqsave(&opp->lock, flags);
+
+ if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY)
+ kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu));
+
+ spin_unlock_irqrestore(&opp->lock, flags);
+}
+
+static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
+ u32 *ptr, int idx)
+{
+ struct openpic *opp = opaque;
+ struct irq_dest *dst;
+ uint32_t retval;
+
+ pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr);
+ retval = 0xFFFFFFFF;
+
+ if (idx < 0)
+ goto out;
+
+ if (addr & 0xF)
+ goto out;
+
+ dst = &opp->dst[idx];
+ addr &= 0xFF0;
+ switch (addr) {
+ case 0x80: /* CTPR */
+ retval = dst->ctpr;
+ break;
+ case 0x90: /* WHOAMI */
+ retval = idx;
+ break;
+ case 0xA0: /* IACK */
+ retval = openpic_iack(opp, dst, idx);
+ break;
+ case 0xB0: /* EOI */
+ retval = 0;
+ break;
+ default:
+ break;
+ }
+ pr_debug("%s: => 0x%08x\n", __func__, retval);
+
+out:
+ *ptr = retval;
+ return 0;
+}
+
+static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+ struct openpic *opp = opaque;
+
+ return openpic_cpu_read_internal(opp, addr, ptr,
+ (addr & 0x1f000) >> 12);
+}
+
+struct mem_reg {
+ int (*read)(void *opaque, gpa_t addr, u32 *ptr);
+ int (*write)(void *opaque, gpa_t addr, u32 val);
+ gpa_t start_addr;
+ int size;
+};
+
+static const struct mem_reg openpic_gbl_mmio = {
+ .write = openpic_gbl_write,
+ .read = openpic_gbl_read,
+ .start_addr = OPENPIC_GLB_REG_START,
+ .size = OPENPIC_GLB_REG_SIZE,
+};
+
+static const struct mem_reg openpic_tmr_mmio = {
+ .write = openpic_tmr_write,
+ .read = openpic_tmr_read,
+ .start_addr = OPENPIC_TMR_REG_START,
+ .size = OPENPIC_TMR_REG_SIZE,
+};
+
+static const struct mem_reg openpic_cpu_mmio = {
+ .write = openpic_cpu_write,
+ .read = openpic_cpu_read,
+ .start_addr = OPENPIC_CPU_REG_START,
+ .size = OPENPIC_CPU_REG_SIZE,
+};
+
+static const struct mem_reg openpic_src_mmio = {
+ .write = openpic_src_write,
+ .read = openpic_src_read,
+ .start_addr = OPENPIC_SRC_REG_START,
+ .size = OPENPIC_SRC_REG_SIZE,
+};
+
+static const struct mem_reg openpic_msi_mmio = {
+ .read = openpic_msi_read,
+ .write = openpic_msi_write,
+ .start_addr = OPENPIC_MSI_REG_START,
+ .size = OPENPIC_MSI_REG_SIZE,
+};
+
+static const struct mem_reg openpic_summary_mmio = {
+ .read = openpic_summary_read,
+ .write = openpic_summary_write,
+ .start_addr = OPENPIC_SUMMARY_REG_START,
+ .size = OPENPIC_SUMMARY_REG_SIZE,
+};
+
+static void add_mmio_region(struct openpic *opp, const struct mem_reg *mr)
+{
+ if (opp->num_mmio_regions >= MAX_MMIO_REGIONS) {
+ WARN(1, "kvm mpic: too many mmio regions\n");
+ return;
+ }
+
+ opp->mmio_regions[opp->num_mmio_regions++] = mr;
+}
+
+static void fsl_common_init(struct openpic *opp)
+{
+ int i;
+ int virq = MAX_SRC;
+
+ add_mmio_region(opp, &openpic_msi_mmio);
+ add_mmio_region(opp, &openpic_summary_mmio);
+
+ opp->vid = VID_REVISION_1_2;
+ opp->vir = VIR_GENERIC;
+ opp->vector_mask = 0xFFFF;
+ opp->tfrr_reset = 0;
+ opp->ivpr_reset = IVPR_MASK_MASK;
+ opp->idr_reset = 1 << 0;
+ opp->max_irq = MAX_IRQ;
+
+ opp->irq_ipi0 = virq;
+ virq += MAX_IPI;
+ opp->irq_tim0 = virq;
+ virq += MAX_TMR;
+
+ BUG_ON(virq > MAX_IRQ);
+
+ opp->irq_msi = 224;
+
+ for (i = 0; i < opp->fsl->max_ext; i++)
+ opp->src[i].level = false;
+
+ /* Internal interrupts, including message and MSI */
+ for (i = 16; i < MAX_SRC; i++) {
+ opp->src[i].type = IRQ_TYPE_FSLINT;
+ opp->src[i].level = true;
+ }
+
+ /* timers and IPIs */
+ for (i = MAX_SRC; i < virq; i++) {
+ opp->src[i].type = IRQ_TYPE_FSLSPECIAL;
+ opp->src[i].level = false;
+ }
+}
+
+static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr)
+{
+ int i;
+
+ for (i = 0; i < opp->num_mmio_regions; i++) {
+ const struct mem_reg *mr = opp->mmio_regions[i];
+
+ if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
+ continue;
+
+ return mr->read(opp, addr - mr->start_addr, ptr);
+ }
+
+ return -ENXIO;
+}
+
+static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
+{
+ int i;
+
+ for (i = 0; i < opp->num_mmio_regions; i++) {
+ const struct mem_reg *mr = opp->mmio_regions[i];
+
+ if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
+ continue;
+
+ return mr->write(opp, addr - mr->start_addr, val);
+ }
+
+ return -ENXIO;
+}
+
+static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
+ int len, void *ptr)
+{
+ struct openpic *opp = container_of(this, struct openpic, mmio);
+ int ret;
+ union {
+ u32 val;
+ u8 bytes[4];
+ } u;
+
+ if (addr & (len - 1)) {
+ pr_debug("%s: bad alignment %llx/%d\n",
+ __func__, addr, len);
+ return -EINVAL;
+ }
+
+ spin_lock_irq(&opp->lock);
+ ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val);
+ spin_unlock_irq(&opp->lock);
+
+ /*
+ * Technically only 32-bit accesses are allowed, but be nice to
+ * people dumping registers a byte at a time -- it works in real
+ * hardware (reads only, not writes).
+ */
+ if (len == 4) {
+ *(u32 *)ptr = u.val;
+ pr_debug("%s: addr %llx ret %d len 4 val %x\n",
+ __func__, addr, ret, u.val);
+ } else if (len == 1) {
+ *(u8 *)ptr = u.bytes[addr & 3];
+ pr_debug("%s: addr %llx ret %d len 1 val %x\n",
+ __func__, addr, ret, u.bytes[addr & 3]);
+ } else {
+ pr_debug("%s: bad length %d\n", __func__, len);
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr,
+ int len, const void *ptr)
+{
+ struct openpic *opp = container_of(this, struct openpic, mmio);
+ int ret;
+
+ if (len != 4) {
+ pr_debug("%s: bad length %d\n", __func__, len);
+ return -EOPNOTSUPP;
+ }
+ if (addr & 3) {
+ pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len);
+ return -EOPNOTSUPP;
+ }
+
+ spin_lock_irq(&opp->lock);
+ ret = kvm_mpic_write_internal(opp, addr - opp->reg_base,
+ *(const u32 *)ptr);
+ spin_unlock_irq(&opp->lock);
+
+ pr_debug("%s: addr %llx ret %d val %x\n",
+ __func__, addr, ret, *(const u32 *)ptr);
+
+ return ret;
+}
+
+static const struct kvm_io_device_ops mpic_mmio_ops = {
+ .read = kvm_mpic_read,
+ .write = kvm_mpic_write,
+};
+
+static void map_mmio(struct openpic *opp)
+{
+ kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops);
+
+ kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS,
+ opp->reg_base, OPENPIC_REG_SIZE,
+ &opp->mmio);
+}
+
+static void unmap_mmio(struct openpic *opp)
+{
+ kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio);
+}
+
+static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr)
+{
+ u64 base;
+
+ if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64)))
+ return -EFAULT;
+
+ if (base & 0x3ffff) {
+ pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n",
+ __func__, base);
+ return -EINVAL;
+ }
+
+ if (base == opp->reg_base)
+ return 0;
+
+ mutex_lock(&opp->kvm->slots_lock);
+
+ unmap_mmio(opp);
+ opp->reg_base = base;
+
+ pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n",
+ __func__, base);
+
+ if (base == 0)
+ goto out;
+
+ map_mmio(opp);
+
+out:
+ mutex_unlock(&opp->kvm->slots_lock);
+ return 0;
+}
+
+#define ATTR_SET 0
+#define ATTR_GET 1
+
+static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type)
+{
+ int ret;
+
+ if (addr & 3)
+ return -ENXIO;
+
+ spin_lock_irq(&opp->lock);
+
+ if (type == ATTR_SET)
+ ret = kvm_mpic_write_internal(opp, addr, *val);
+ else
+ ret = kvm_mpic_read_internal(opp, addr, val);
+
+ spin_unlock_irq(&opp->lock);
+
+ pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val);
+
+ return ret;
+}
+
+static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ struct openpic *opp = dev->private;
+ u32 attr32;
+
+ switch (attr->group) {
+ case KVM_DEV_MPIC_GRP_MISC:
+ switch (attr->attr) {
+ case KVM_DEV_MPIC_BASE_ADDR:
+ return set_base_addr(opp, attr);
+ }
+
+ break;
+
+ case KVM_DEV_MPIC_GRP_REGISTER:
+ if (get_user(attr32, (u32 __user *)(long)attr->addr))
+ return -EFAULT;
+
+ return access_reg(opp, attr->attr, &attr32, ATTR_SET);
+
+ case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
+ if (attr->attr > MAX_SRC)
+ return -EINVAL;
+
+ if (get_user(attr32, (u32 __user *)(long)attr->addr))
+ return -EFAULT;
+
+ if (attr32 != 0 && attr32 != 1)
+ return -EINVAL;
+
+ spin_lock_irq(&opp->lock);
+ openpic_set_irq(opp, attr->attr, attr32);
+ spin_unlock_irq(&opp->lock);
+ return 0;
+ }
+
+ return -ENXIO;
+}
+
+static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ struct openpic *opp = dev->private;
+ u64 attr64;
+ u32 attr32;
+ int ret;
+
+ switch (attr->group) {
+ case KVM_DEV_MPIC_GRP_MISC:
+ switch (attr->attr) {
+ case KVM_DEV_MPIC_BASE_ADDR:
+ mutex_lock(&opp->kvm->slots_lock);
+ attr64 = opp->reg_base;
+ mutex_unlock(&opp->kvm->slots_lock);
+
+ if (copy_to_user((u64 __user *)(long)attr->addr,
+ &attr64, sizeof(u64)))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ break;
+
+ case KVM_DEV_MPIC_GRP_REGISTER:
+ ret = access_reg(opp, attr->attr, &attr32, ATTR_GET);
+ if (ret)
+ return ret;
+
+ if (put_user(attr32, (u32 __user *)(long)attr->addr))
+ return -EFAULT;
+
+ return 0;
+
+ case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
+ if (attr->attr > MAX_SRC)
+ return -EINVAL;
+
+ spin_lock_irq(&opp->lock);
+ attr32 = opp->src[attr->attr].pending;
+ spin_unlock_irq(&opp->lock);
+
+ if (put_user(attr32, (u32 __user *)(long)attr->addr))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ return -ENXIO;
+}
+
+static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_MPIC_GRP_MISC:
+ switch (attr->attr) {
+ case KVM_DEV_MPIC_BASE_ADDR:
+ return 0;
+ }
+
+ break;
+
+ case KVM_DEV_MPIC_GRP_REGISTER:
+ return 0;
+
+ case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
+ if (attr->attr > MAX_SRC)
+ break;
+
+ return 0;
+ }
+
+ return -ENXIO;
+}
+
+static void mpic_destroy(struct kvm_device *dev)
+{
+ struct openpic *opp = dev->private;
+
+ dev->kvm->arch.mpic = NULL;
+ kfree(opp);
+}
+
+static int mpic_set_default_irq_routing(struct openpic *opp)
+{
+ struct kvm_irq_routing_entry *routing;
+
+ /* Create a nop default map, so that dereferencing it still works */
+ routing = kzalloc((sizeof(*routing)), GFP_KERNEL);
+ if (!routing)
+ return -ENOMEM;
+
+ kvm_set_irq_routing(opp->kvm, routing, 0, 0);
+
+ kfree(routing);
+ return 0;
+}
+
+static int mpic_create(struct kvm_device *dev, u32 type)
+{
+ struct openpic *opp;
+ int ret;
+
+ /* We only support one MPIC at a time for now */
+ if (dev->kvm->arch.mpic)
+ return -EINVAL;
+
+ opp = kzalloc(sizeof(struct openpic), GFP_KERNEL);
+ if (!opp)
+ return -ENOMEM;
+
+ dev->private = opp;
+ opp->kvm = dev->kvm;
+ opp->dev = dev;
+ opp->model = type;
+ spin_lock_init(&opp->lock);
+
+ add_mmio_region(opp, &openpic_gbl_mmio);
+ add_mmio_region(opp, &openpic_tmr_mmio);
+ add_mmio_region(opp, &openpic_src_mmio);
+ add_mmio_region(opp, &openpic_cpu_mmio);
+
+ switch (opp->model) {
+ case KVM_DEV_TYPE_FSL_MPIC_20:
+ opp->fsl = &fsl_mpic_20;
+ opp->brr1 = 0x00400200;
+ opp->flags |= OPENPIC_FLAG_IDR_CRIT;
+ opp->nb_irqs = 80;
+ opp->mpic_mode_mask = GCR_MODE_MIXED;
+
+ fsl_common_init(opp);
+
+ break;
+
+ case KVM_DEV_TYPE_FSL_MPIC_42:
+ opp->fsl = &fsl_mpic_42;
+ opp->brr1 = 0x00400402;
+ opp->flags |= OPENPIC_FLAG_ILR;
+ opp->nb_irqs = 196;
+ opp->mpic_mode_mask = GCR_MODE_PROXY;
+
+ fsl_common_init(opp);
+
+ break;
+
+ default:
+ ret = -ENODEV;
+ goto err;
+ }
+
+ ret = mpic_set_default_irq_routing(opp);
+ if (ret)
+ goto err;
+
+ openpic_reset(opp);
+
+ smp_wmb();
+ dev->kvm->arch.mpic = opp;
+
+ return 0;
+
+err:
+ kfree(opp);
+ return ret;
+}
+
+struct kvm_device_ops kvm_mpic_ops = {
+ .name = "kvm-mpic",
+ .create = mpic_create,
+ .destroy = mpic_destroy,
+ .set_attr = mpic_set_attr,
+ .get_attr = mpic_get_attr,
+ .has_attr = mpic_has_attr,
+};
+
+int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
+ u32 cpu)
+{
+ struct openpic *opp = dev->private;
+ int ret = 0;
+
+ if (dev->ops != &kvm_mpic_ops)
+ return -EPERM;
+ if (opp->kvm != vcpu->kvm)
+ return -EPERM;
+ if (cpu < 0 || cpu >= MAX_CPU)
+ return -EPERM;
+
+ spin_lock_irq(&opp->lock);
+
+ if (opp->dst[cpu].vcpu) {
+ ret = -EEXIST;
+ goto out;
+ }
+ if (vcpu->arch.irq_type) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ opp->dst[cpu].vcpu = vcpu;
+ opp->nb_cpus = max(opp->nb_cpus, cpu + 1);
+
+ vcpu->arch.mpic = opp;
+ vcpu->arch.irq_cpu_id = cpu;
+ vcpu->arch.irq_type = KVMPPC_IRQ_MPIC;
+
+ /* This might need to be changed if GCR gets extended */
+ if (opp->mpic_mode_mask == GCR_MODE_PROXY)
+ vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL;
+
+out:
+ spin_unlock_irq(&opp->lock);
+ return ret;
+}
+
+/*
+ * This should only happen immediately before the mpic is destroyed,
+ * so we shouldn't need to worry about anything still trying to
+ * access the vcpu pointer.
+ */
+void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu)
+{
+ BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu);
+
+ opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL;
+}
+
+/*
+ * Return value:
+ * < 0 Interrupt was ignored (masked or not delivered for other reasons)
+ * = 0 Interrupt was coalesced (previous irq is still pending)
+ * > 0 Number of CPUs interrupt was delivered to
+ */
+static int mpic_set_irq(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level,
+ bool line_status)
+{
+ u32 irq = e->irqchip.pin;
+ struct openpic *opp = kvm->arch.mpic;
+ unsigned long flags;
+
+ spin_lock_irqsave(&opp->lock, flags);
+ openpic_set_irq(opp, irq, level);
+ spin_unlock_irqrestore(&opp->lock, flags);
+
+ /* All code paths we care about don't check for the return value */
+ return 0;
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level, bool line_status)
+{
+ struct openpic *opp = kvm->arch.mpic;
+ unsigned long flags;
+
+ spin_lock_irqsave(&opp->lock, flags);
+
+ /*
+ * XXX We ignore the target address for now, as we only support
+ * a single MSI bank.
+ */
+ openpic_msi_write(kvm->arch.mpic, MSIIR_OFFSET, e->msi.data);
+ spin_unlock_irqrestore(&opp->lock, flags);
+
+ /* All code paths we care about don't check for the return value */
+ return 0;
+}
+
+int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
+ struct kvm_kernel_irq_routing_entry *e,
+ const struct kvm_irq_routing_entry *ue)
+{
+ int r = -EINVAL;
+
+ switch (ue->type) {
+ case KVM_IRQ_ROUTING_IRQCHIP:
+ e->set = mpic_set_irq;
+ e->irqchip.irqchip = ue->u.irqchip.irqchip;
+ e->irqchip.pin = ue->u.irqchip.pin;
+ if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
+ goto out;
+ rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
+ break;
+ case KVM_IRQ_ROUTING_MSI:
+ e->set = kvm_set_msi;
+ e->msi.address_lo = ue->u.msi.address_lo;
+ e->msi.address_hi = ue->u.msi.address_hi;
+ e->msi.data = ue->u.msi.data;
+ break;
+ default:
+ goto out;
+ }
+
+ r = 0;
+out:
+ return r;
+}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 934413cd3a1b..6316ee336e88 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -25,6 +25,7 @@
#include <linux/hrtimer.h>
#include <linux/fs.h>
#include <linux/slab.h>
+#include <linux/file.h>
#include <asm/cputable.h>
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
@@ -32,6 +33,7 @@
#include <asm/cputhreads.h>
#include <asm/irqflags.h>
#include "timing.h"
+#include "irq.h"
#include "../mm/mmu_decl.h"
#define CREATE_TRACE_POINTS
@@ -317,6 +319,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_ONE_REG:
case KVM_CAP_IOEVENTFD:
+ case KVM_CAP_DEVICE_CTRL:
r = 1;
break;
#ifndef CONFIG_KVM_BOOK3S_64_HV
@@ -326,6 +329,9 @@ int kvm_dev_ioctl_check_extension(long ext)
#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
case KVM_CAP_SW_TLB:
#endif
+#ifdef CONFIG_KVM_MPIC
+ case KVM_CAP_IRQ_MPIC:
+#endif
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -335,6 +341,10 @@ int kvm_dev_ioctl_check_extension(long ext)
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
case KVM_CAP_PPC_ALLOC_HTAB:
+ case KVM_CAP_PPC_RTAS:
+#ifdef CONFIG_KVM_XICS
+ case KVM_CAP_IRQ_XICS:
+#endif
r = 1;
break;
#endif /* CONFIG_PPC_BOOK3S_64 */
@@ -411,18 +421,17 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- struct kvm_memory_slot old,
- struct kvm_userspace_memory_region *mem,
- bool user_alloc)
+ struct kvm_memory_slot *memslot,
+ struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change)
{
return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- struct kvm_memory_slot old,
- bool user_alloc)
+ struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ enum kvm_mr_change change)
{
kvmppc_core_commit_memory_region(kvm, mem, old);
}
@@ -460,6 +469,16 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
tasklet_kill(&vcpu->arch.tasklet);
kvmppc_remove_vcpu_debugfs(vcpu);
+
+ switch (vcpu->arch.irq_type) {
+ case KVMPPC_IRQ_MPIC:
+ kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
+ break;
+ case KVMPPC_IRQ_XICS:
+ kvmppc_xics_free_icp(vcpu);
+ break;
+ }
+
kvmppc_core_vcpu_free(vcpu);
}
@@ -532,12 +551,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
#endif
}
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug *dbg)
-{
- return -EINVAL;
-}
-
static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
@@ -612,6 +625,8 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int rt, unsigned int bytes, int is_bigendian)
{
+ int idx, ret;
+
if (bytes > sizeof(run->mmio.data)) {
printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
run->mmio.len);
@@ -627,8 +642,14 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->mmio_is_write = 0;
vcpu->arch.mmio_sign_extend = 0;
- if (!kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
- bytes, &run->mmio.data)) {
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+ bytes, &run->mmio.data);
+
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ if (!ret) {
kvmppc_complete_mmio_load(vcpu, run);
vcpu->mmio_needed = 0;
return EMULATE_DONE;
@@ -653,6 +674,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
u64 val, unsigned int bytes, int is_bigendian)
{
void *data = run->mmio.data;
+ int idx, ret;
if (bytes > sizeof(run->mmio.data)) {
printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
@@ -682,9 +704,14 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
}
- if (!kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
- bytes, &run->mmio.data)) {
- kvmppc_complete_mmio_load(vcpu, run);
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+ bytes, &run->mmio.data);
+
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ if (!ret) {
vcpu->mmio_needed = 0;
return EMULATE_DONE;
}
@@ -740,7 +767,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
{
if (irq->irq == KVM_INTERRUPT_UNSET) {
- kvmppc_core_dequeue_external(vcpu, irq);
+ kvmppc_core_dequeue_external(vcpu);
return 0;
}
@@ -770,7 +797,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
break;
case KVM_CAP_PPC_EPR:
r = 0;
- vcpu->arch.epr_enabled = cap->args[0];
+ if (cap->args[0])
+ vcpu->arch.epr_flags |= KVMPPC_EPR_USER;
+ else
+ vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER;
break;
#ifdef CONFIG_BOOKE
case KVM_CAP_PPC_BOOKE_WATCHDOG:
@@ -791,6 +821,44 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
break;
}
#endif
+#ifdef CONFIG_KVM_MPIC
+ case KVM_CAP_IRQ_MPIC: {
+ struct file *filp;
+ struct kvm_device *dev;
+
+ r = -EBADF;
+ filp = fget(cap->args[0]);
+ if (!filp)
+ break;
+
+ r = -EPERM;
+ dev = kvm_device_from_filp(filp);
+ if (dev)
+ r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]);
+
+ fput(filp);
+ break;
+ }
+#endif
+#ifdef CONFIG_KVM_XICS
+ case KVM_CAP_IRQ_XICS: {
+ struct file *filp;
+ struct kvm_device *dev;
+
+ r = -EBADF;
+ filp = fget(cap->args[0]);
+ if (!filp)
+ break;
+
+ r = -EPERM;
+ dev = kvm_device_from_filp(filp);
+ if (dev)
+ r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
+
+ fput(filp);
+ break;
+ }
+#endif /* CONFIG_KVM_XICS */
default:
r = -EINVAL;
break;
@@ -913,9 +981,22 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
return 0;
}
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
+ bool line_status)
+{
+ if (!irqchip_in_kernel(kvm))
+ return -ENXIO;
+
+ irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+ irq_event->irq, irq_event->level,
+ line_status);
+ return 0;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
+ struct kvm *kvm __maybe_unused = filp->private_data;
void __user *argp = (void __user *)arg;
long r;
@@ -934,7 +1015,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CREATE_SPAPR_TCE: {
struct kvm_create_spapr_tce create_tce;
- struct kvm *kvm = filp->private_data;
r = -EFAULT;
if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
@@ -946,8 +1026,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
#ifdef CONFIG_KVM_BOOK3S_64_HV
case KVM_ALLOCATE_RMA: {
- struct kvm *kvm = filp->private_data;
struct kvm_allocate_rma rma;
+ struct kvm *kvm = filp->private_data;
r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
@@ -956,7 +1036,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
case KVM_PPC_ALLOCATE_HTAB: {
- struct kvm *kvm = filp->private_data;
u32 htab_order;
r = -EFAULT;
@@ -973,7 +1052,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
case KVM_PPC_GET_HTAB_FD: {
- struct kvm *kvm = filp->private_data;
struct kvm_get_htab_fd ghf;
r = -EFAULT;
@@ -986,7 +1064,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_PPC_GET_SMMU_INFO: {
- struct kvm *kvm = filp->private_data;
struct kvm_ppc_smmu_info info;
memset(&info, 0, sizeof(info));
@@ -995,6 +1072,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = -EFAULT;
break;
}
+ case KVM_PPC_RTAS_DEFINE_TOKEN: {
+ struct kvm *kvm = filp->private_data;
+
+ r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
+ break;
+ }
#endif /* CONFIG_PPC_BOOK3S_64 */
default:
r = -ENOTTY;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 229951ffc351..8726779e1409 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -32,6 +32,7 @@
#include <linux/perf_event.h>
#include <linux/magic.h>
#include <linux/ratelimit.h>
+#include <linux/context_tracking.h>
#include <asm/firmware.h>
#include <asm/page.h>
@@ -196,6 +197,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
+ enum ctx_state prev_state = exception_enter();
struct vm_area_struct * vma;
struct mm_struct *mm = current->mm;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
@@ -204,6 +206,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
int trap = TRAP(regs);
int is_exec = trap == 0x400;
int fault;
+ int rc = 0;
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
/*
@@ -230,28 +233,30 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
* look at it
*/
if (error_code & ICSWX_DSI_UCT) {
- int rc = acop_handle_fault(regs, address, error_code);
+ rc = acop_handle_fault(regs, address, error_code);
if (rc)
- return rc;
+ goto bail;
}
#endif /* CONFIG_PPC_ICSWX */
if (notify_page_fault(regs))
- return 0;
+ goto bail;
if (unlikely(debugger_fault_handler(regs)))
- return 0;
+ goto bail;
/* On a kernel SLB miss we can only check for a valid exception entry */
- if (!user_mode(regs) && (address >= TASK_SIZE))
- return SIGSEGV;
+ if (!user_mode(regs) && (address >= TASK_SIZE)) {
+ rc = SIGSEGV;
+ goto bail;
+ }
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \
defined(CONFIG_PPC_BOOK3S_64))
if (error_code & DSISR_DABRMATCH) {
/* breakpoint match */
do_break(regs, address, error_code);
- return 0;
+ goto bail;
}
#endif
@@ -260,8 +265,10 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
local_irq_enable();
if (in_atomic() || mm == NULL) {
- if (!user_mode(regs))
- return SIGSEGV;
+ if (!user_mode(regs)) {
+ rc = SIGSEGV;
+ goto bail;
+ }
/* in_atomic() in user mode is really bad,
as is current->mm == NULL. */
printk(KERN_EMERG "Page fault in user mode with "
@@ -417,9 +424,11 @@ good_area:
*/
fault = handle_mm_fault(mm, vma, address, flags);
if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
- int rc = mm_fault_error(regs, address, fault);
+ rc = mm_fault_error(regs, address, fault);
if (rc >= MM_FAULT_RETURN)
- return rc;
+ goto bail;
+ else
+ rc = 0;
}
/*
@@ -454,7 +463,7 @@ good_area:
}
up_read(&mm->mmap_sem);
- return 0;
+ goto bail;
bad_area:
up_read(&mm->mmap_sem);
@@ -463,7 +472,7 @@ bad_area_nosemaphore:
/* User mode accesses cause a SIGSEGV */
if (user_mode(regs)) {
_exception(SIGSEGV, regs, code, address);
- return 0;
+ goto bail;
}
if (is_exec && (error_code & DSISR_PROTFAULT))
@@ -471,7 +480,11 @@ bad_area_nosemaphore:
" page (%lx) - exploit attempt? (uid: %d)\n",
address, from_kuid(&init_user_ns, current_uid()));
- return SIGSEGV;
+ rc = SIGSEGV;
+
+bail:
+ exception_exit(prev_state);
+ return rc;
}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 3e4c4ed19335..e303a6d74e3a 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -33,6 +33,7 @@
#include <linux/init.h>
#include <linux/signal.h>
#include <linux/memblock.h>
+#include <linux/context_tracking.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
@@ -954,6 +955,7 @@ void hash_failure_debug(unsigned long ea, unsigned long access,
*/
int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
{
+ enum ctx_state prev_state = exception_enter();
pgd_t *pgdir;
unsigned long vsid;
struct mm_struct *mm;
@@ -973,7 +975,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
mm = current->mm;
if (! mm) {
DBG_LOW(" user region with no mm !\n");
- return 1;
+ rc = 1;
+ goto bail;
}
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
@@ -992,19 +995,23 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
/* Not a valid range
* Send the problem up to do_page_fault
*/
- return 1;
+ rc = 1;
+ goto bail;
}
DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
/* Bad address. */
if (!vsid) {
DBG_LOW("Bad address!\n");
- return 1;
+ rc = 1;
+ goto bail;
}
/* Get pgdir */
pgdir = mm->pgd;
- if (pgdir == NULL)
- return 1;
+ if (pgdir == NULL) {
+ rc = 1;
+ goto bail;
+ }
/* Check CPU locality */
tmp = cpumask_of(smp_processor_id());
@@ -1027,7 +1034,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift);
if (ptep == NULL || !pte_present(*ptep)) {
DBG_LOW(" no PTE !\n");
- return 1;
+ rc = 1;
+ goto bail;
}
/* Add _PAGE_PRESENT to the required access perm */
@@ -1038,13 +1046,16 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
*/
if (access & ~pte_val(*ptep)) {
DBG_LOW(" no access !\n");
- return 1;
+ rc = 1;
+ goto bail;
}
#ifdef CONFIG_HUGETLB_PAGE
- if (hugeshift)
- return __hash_page_huge(ea, access, vsid, ptep, trap, local,
+ if (hugeshift) {
+ rc = __hash_page_huge(ea, access, vsid, ptep, trap, local,
ssize, hugeshift, psize);
+ goto bail;
+ }
#endif /* CONFIG_HUGETLB_PAGE */
#ifndef CONFIG_PPC_64K_PAGES
@@ -1124,6 +1135,9 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
pte_val(*(ptep + PTRS_PER_PTE)));
#endif
DBG_LOW(" -> rc=%d\n", rc);
+
+bail:
+ exception_exit(prev_state);
return rc;
}
EXPORT_SYMBOL_GPL(hash_page);
@@ -1230,6 +1244,7 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
* unmapping it first, it may see the speculated version.
*/
if (local && cpu_has_feature(CPU_FTR_TM) &&
+ current->thread.regs &&
MSR_TM_ACTIVE(current->thread.regs->msr)) {
tm_enable();
tm_abort(TM_CAUSE_TLBI);
@@ -1258,6 +1273,8 @@ void flush_hash_range(unsigned long number, int local)
*/
void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc)
{
+ enum ctx_state prev_state = exception_enter();
+
if (user_mode(regs)) {
#ifdef CONFIG_PPC_SUBPAGE_PROT
if (rc == -2)
@@ -1267,6 +1284,8 @@ void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc)
_exception(SIGBUS, regs, BUS_ADRERR, address);
} else
bad_page_fault(regs, address, SIGBUS);
+
+ exception_exit(prev_state);
}
long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index c2787bf779ca..a90b9c458990 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -215,7 +215,8 @@ static void __meminit vmemmap_create_mapping(unsigned long start,
unsigned long phys)
{
int mapped = htab_bolt_mapping(start, start + page_size, phys,
- PAGE_KERNEL, mmu_vmemmap_psize,
+ pgprot_val(PAGE_KERNEL),
+ mmu_vmemmap_psize,
mmu_kernel_ssize);
BUG_ON(mapped < 0);
}
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index c627843c5b2e..426180b84978 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -13,11 +13,13 @@
#include <linux/perf_event.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
+#include <linux/uaccess.h>
#include <asm/reg.h>
#include <asm/pmc.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/ptrace.h>
+#include <asm/code-patching.h>
#define BHRB_MAX_ENTRIES 32
#define BHRB_TARGET 0x0000000000000002
@@ -100,6 +102,10 @@ static inline int siar_valid(struct pt_regs *regs)
return 1;
}
+static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
+static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
+void power_pmu_flush_branch_stack(void) {}
+static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
#endif /* CONFIG_PPC32 */
static bool regs_use_siar(struct pt_regs *regs)
@@ -308,6 +314,159 @@ static inline int siar_valid(struct pt_regs *regs)
return 1;
}
+
+/* Reset all possible BHRB entries */
+static void power_pmu_bhrb_reset(void)
+{
+ asm volatile(PPC_CLRBHRB);
+}
+
+static void power_pmu_bhrb_enable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ if (!ppmu->bhrb_nr)
+ return;
+
+ /* Clear BHRB if we changed task context to avoid data leaks */
+ if (event->ctx->task && cpuhw->bhrb_context != event->ctx) {
+ power_pmu_bhrb_reset();
+ cpuhw->bhrb_context = event->ctx;
+ }
+ cpuhw->bhrb_users++;
+}
+
+static void power_pmu_bhrb_disable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ if (!ppmu->bhrb_nr)
+ return;
+
+ cpuhw->bhrb_users--;
+ WARN_ON_ONCE(cpuhw->bhrb_users < 0);
+
+ if (!cpuhw->disabled && !cpuhw->bhrb_users) {
+ /* BHRB cannot be turned off when other
+ * events are active on the PMU.
+ */
+
+ /* avoid stale pointer */
+ cpuhw->bhrb_context = NULL;
+ }
+}
+
+/* Called from ctxsw to prevent one process's branch entries to
+ * mingle with the other process's entries during context switch.
+ */
+void power_pmu_flush_branch_stack(void)
+{
+ if (ppmu->bhrb_nr)
+ power_pmu_bhrb_reset();
+}
+/* Calculate the to address for a branch */
+static __u64 power_pmu_bhrb_to(u64 addr)
+{
+ unsigned int instr;
+ int ret;
+ __u64 target;
+
+ if (is_kernel_addr(addr))
+ return branch_target((unsigned int *)addr);
+
+ /* Userspace: need copy instruction here then translate it */
+ pagefault_disable();
+ ret = __get_user_inatomic(instr, (unsigned int __user *)addr);
+ if (ret) {
+ pagefault_enable();
+ return 0;
+ }
+ pagefault_enable();
+
+ target = branch_target(&instr);
+ if ((!target) || (instr & BRANCH_ABSOLUTE))
+ return target;
+
+ /* Translate relative branch target from kernel to user address */
+ return target - (unsigned long)&instr + addr;
+}
+
+/* Processing BHRB entries */
+void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
+{
+ u64 val;
+ u64 addr;
+ int r_index, u_index, pred;
+
+ r_index = 0;
+ u_index = 0;
+ while (r_index < ppmu->bhrb_nr) {
+ /* Assembly read function */
+ val = read_bhrb(r_index++);
+ if (!val)
+ /* Terminal marker: End of valid BHRB entries */
+ break;
+ else {
+ addr = val & BHRB_EA;
+ pred = val & BHRB_PREDICTION;
+
+ if (!addr)
+ /* invalid entry */
+ continue;
+
+ /* Branches are read most recent first (ie. mfbhrb 0 is
+ * the most recent branch).
+ * There are two types of valid entries:
+ * 1) a target entry which is the to address of a
+ * computed goto like a blr,bctr,btar. The next
+ * entry read from the bhrb will be branch
+ * corresponding to this target (ie. the actual
+ * blr/bctr/btar instruction).
+ * 2) a from address which is an actual branch. If a
+ * target entry proceeds this, then this is the
+ * matching branch for that target. If this is not
+ * following a target entry, then this is a branch
+ * where the target is given as an immediate field
+ * in the instruction (ie. an i or b form branch).
+ * In this case we need to read the instruction from
+ * memory to determine the target/to address.
+ */
+
+ if (val & BHRB_TARGET) {
+ /* Target branches use two entries
+ * (ie. computed gotos/XL form)
+ */
+ cpuhw->bhrb_entries[u_index].to = addr;
+ cpuhw->bhrb_entries[u_index].mispred = pred;
+ cpuhw->bhrb_entries[u_index].predicted = ~pred;
+
+ /* Get from address in next entry */
+ val = read_bhrb(r_index++);
+ addr = val & BHRB_EA;
+ if (val & BHRB_TARGET) {
+ /* Shouldn't have two targets in a
+ row.. Reset index and try again */
+ r_index--;
+ addr = 0;
+ }
+ cpuhw->bhrb_entries[u_index].from = addr;
+ } else {
+ /* Branches to immediate field
+ (ie I or B form) */
+ cpuhw->bhrb_entries[u_index].from = addr;
+ cpuhw->bhrb_entries[u_index].to =
+ power_pmu_bhrb_to(addr);
+ cpuhw->bhrb_entries[u_index].mispred = pred;
+ cpuhw->bhrb_entries[u_index].predicted = ~pred;
+ }
+ u_index++;
+
+ }
+ }
+ cpuhw->bhrb_stack.nr = u_index;
+ return;
+}
+
#endif /* CONFIG_PPC64 */
static void perf_event_interrupt(struct pt_regs *regs);
@@ -904,47 +1063,6 @@ static int collect_events(struct perf_event *group, int max_count,
return n;
}
-/* Reset all possible BHRB entries */
-static void power_pmu_bhrb_reset(void)
-{
- asm volatile(PPC_CLRBHRB);
-}
-
-void power_pmu_bhrb_enable(struct perf_event *event)
-{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
-
- if (!ppmu->bhrb_nr)
- return;
-
- /* Clear BHRB if we changed task context to avoid data leaks */
- if (event->ctx->task && cpuhw->bhrb_context != event->ctx) {
- power_pmu_bhrb_reset();
- cpuhw->bhrb_context = event->ctx;
- }
- cpuhw->bhrb_users++;
-}
-
-void power_pmu_bhrb_disable(struct perf_event *event)
-{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
-
- if (!ppmu->bhrb_nr)
- return;
-
- cpuhw->bhrb_users--;
- WARN_ON_ONCE(cpuhw->bhrb_users < 0);
-
- if (!cpuhw->disabled && !cpuhw->bhrb_users) {
- /* BHRB cannot be turned off when other
- * events are active on the PMU.
- */
-
- /* avoid stale pointer */
- cpuhw->bhrb_context = NULL;
- }
-}
-
/*
* Add a event to the PMU.
* If all events are not already frozen, then we disable and
@@ -1180,15 +1298,6 @@ int power_pmu_commit_txn(struct pmu *pmu)
return 0;
}
-/* Called from ctxsw to prevent one process's branch entries to
- * mingle with the other process's entries during context switch.
- */
-void power_pmu_flush_branch_stack(void)
-{
- if (ppmu->bhrb_nr)
- power_pmu_bhrb_reset();
-}
-
/*
* Return 1 if we might be able to put event on a limited PMC,
* or 0 if not.
@@ -1458,77 +1567,6 @@ struct pmu power_pmu = {
.flush_branch_stack = power_pmu_flush_branch_stack,
};
-/* Processing BHRB entries */
-void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
-{
- u64 val;
- u64 addr;
- int r_index, u_index, target, pred;
-
- r_index = 0;
- u_index = 0;
- while (r_index < ppmu->bhrb_nr) {
- /* Assembly read function */
- val = read_bhrb(r_index);
-
- /* Terminal marker: End of valid BHRB entries */
- if (val == 0) {
- break;
- } else {
- /* BHRB field break up */
- addr = val & BHRB_EA;
- pred = val & BHRB_PREDICTION;
- target = val & BHRB_TARGET;
-
- /* Probable Missed entry: Not applicable for POWER8 */
- if ((addr == 0) && (target == 0) && (pred == 1)) {
- r_index++;
- continue;
- }
-
- /* Real Missed entry: Power8 based missed entry */
- if ((addr == 0) && (target == 1) && (pred == 1)) {
- r_index++;
- continue;
- }
-
- /* Reserved condition: Not a valid entry */
- if ((addr == 0) && (target == 1) && (pred == 0)) {
- r_index++;
- continue;
- }
-
- /* Is a target address */
- if (val & BHRB_TARGET) {
- /* First address cannot be a target address */
- if (r_index == 0) {
- r_index++;
- continue;
- }
-
- /* Update target address for the previous entry */
- cpuhw->bhrb_entries[u_index - 1].to = addr;
- cpuhw->bhrb_entries[u_index - 1].mispred = pred;
- cpuhw->bhrb_entries[u_index - 1].predicted = ~pred;
-
- /* Dont increment u_index */
- r_index++;
- } else {
- /* Update address, flags for current entry */
- cpuhw->bhrb_entries[u_index].from = addr;
- cpuhw->bhrb_entries[u_index].mispred = pred;
- cpuhw->bhrb_entries[u_index].predicted = ~pred;
-
- /* Successfully popullated one entry */
- u_index++;
- r_index++;
- }
- }
- }
- cpuhw->bhrb_stack.nr = u_index;
- return;
-}
-
/*
* A counter has overflowed; update its count and record
* things if requested. Note that interrupts are hard-disabled
diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig
index bd40bbb15e14..6e287f1294fa 100644
--- a/arch/powerpc/platforms/40x/Kconfig
+++ b/arch/powerpc/platforms/40x/Kconfig
@@ -138,7 +138,6 @@ config PPC4xx_GPIO
bool "PPC4xx GPIO support"
depends on 40x
select ARCH_REQUIRE_GPIOLIB
- select GENERIC_GPIO
help
Enable gpiolib support for ppc40x based boards
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 7be93367d92f..d6c7506ec7d9 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -248,7 +248,6 @@ config PPC4xx_GPIO
bool "PPC4xx GPIO support"
depends on 44x
select ARCH_REQUIRE_GPIOLIB
- select GENERIC_GPIO
help
Enable gpiolib support for ppc440 based boards
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index 8f02b05f4c96..efdd37c775ad 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -203,7 +203,6 @@ config GE_IMP3A
select DEFAULT_UIMAGE
select SWIOTLB
select MMIO_NVRAM
- select GENERIC_GPIO
select ARCH_REQUIRE_GPIOLIB
select GE_FPGA
help
@@ -328,7 +327,7 @@ config B4_QDS
select PPC_E500MC
select PHYS_64BIT
select SWIOTLB
- select GENERIC_GPIO
+ select GPIOLIB
select ARCH_REQUIRE_GPIOLIB
select HAS_RAPIDIO
select PPC_EPAPR_HV_PIC
diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig
index 7a6279e38213..1afd1e4a2dd2 100644
--- a/arch/powerpc/platforms/86xx/Kconfig
+++ b/arch/powerpc/platforms/86xx/Kconfig
@@ -37,7 +37,6 @@ config GEF_PPC9A
bool "GE PPC9A"
select DEFAULT_UIMAGE
select MMIO_NVRAM
- select GENERIC_GPIO
select ARCH_REQUIRE_GPIOLIB
select GE_FPGA
help
@@ -47,7 +46,6 @@ config GEF_SBC310
bool "GE SBC310"
select DEFAULT_UIMAGE
select MMIO_NVRAM
- select GENERIC_GPIO
select ARCH_REQUIRE_GPIOLIB
select GE_FPGA
help
@@ -57,7 +55,6 @@ config GEF_SBC610
bool "GE SBC610"
select DEFAULT_UIMAGE
select MMIO_NVRAM
- select GENERIC_GPIO
select ARCH_REQUIRE_GPIOLIB
select GE_FPGA
select HAS_RAPIDIO
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index 1fb0b3cddeb3..8dec3c0911ad 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -114,7 +114,6 @@ config 8xx_COPYBACK
config 8xx_GPIO
bool "GPIO API Support"
- select GENERIC_GPIO
select ARCH_REQUIRE_GPIOLIB
help
Saying Y here will cause the ports on an MPC8xx processor to be used
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 34d224be93ba..b62aab3e22ec 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -128,7 +128,7 @@ config PPC_RTAS_DAEMON
config RTAS_PROC
bool "Proc interface to RTAS"
- depends on PPC_RTAS
+ depends on PPC_RTAS && PROC_FS
default y
config RTAS_FLASH
@@ -302,7 +302,6 @@ config QUICC_ENGINE
config QE_GPIO
bool "QE GPIO support"
depends on QUICC_ENGINE
- select GENERIC_GPIO
select ARCH_REQUIRE_GPIOLIB
help
Say Y here if you're going to use hardware that connects to the
@@ -315,7 +314,6 @@ config CPM2
select PPC_LIB_RHEAP
select PPC_PCI_CHOICE
select ARCH_REQUIRE_GPIOLIB
- select GENERIC_GPIO
help
The CPM2 (Communications Processor Module) is a coprocessor on
embedded CPUs made by Freescale. Selecting this option means that
@@ -353,7 +351,6 @@ config OF_RTC
config SIMPLE_GPIO
bool "Support for simple, memory-mapped GPIO controllers"
depends on PPC
- select GENERIC_GPIO
select ARCH_REQUIRE_GPIOLIB
help
Say Y here to support simple, memory-mapped GPIO controllers.
@@ -364,7 +361,6 @@ config SIMPLE_GPIO
config MCU_MPC8349EMITX
bool "MPC8349E-mITX MCU driver"
depends on I2C=y && PPC_83xx
- select GENERIC_GPIO
select ARCH_REQUIRE_GPIOLIB
help
Say Y here to enable soft power-off functionality on the Freescale
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index e56bb651da1a..946306b1bb4e 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -550,7 +550,7 @@ static struct iommu_table *cell_get_iommu_table(struct device *dev)
*/
iommu = cell_iommu_for_node(dev_to_node(dev));
if (iommu == NULL || list_empty(&iommu->windows)) {
- printk(KERN_ERR "iommu: missing iommu for %s (node %d)\n",
+ dev_err(dev, "iommu: missing iommu for %s (node %d)\n",
of_node_full_name(dev->of_node), dev_to_node(dev));
return NULL;
}
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 8b1213993b10..f85db3a69b4a 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -715,7 +715,7 @@ static ssize_t spu_stat_show(struct device *dev,
spu->stats.libassist);
}
-static DEVICE_ATTR(stat, 0644, spu_stat_show, NULL);
+static DEVICE_ATTR(stat, 0444, spu_stat_show, NULL);
#ifdef CONFIG_KEXEC
diff --git a/arch/powerpc/platforms/embedded6xx/mpc10x.h b/arch/powerpc/platforms/embedded6xx/mpc10x.h
index b30a6a3b5bd2..b290b63661f1 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc10x.h
+++ b/arch/powerpc/platforms/embedded6xx/mpc10x.h
@@ -81,17 +81,6 @@
#define MPC10X_MAPB_PCI_MEM_OFFSET (MPC10X_MAPB_ISA_MEM_BASE - \
MPC10X_MAPB_PCI_MEM_START)
-/* Set hose members to values appropriate for the mem map used */
-#define MPC10X_SETUP_HOSE(hose, map) { \
- (hose)->pci_mem_offset = MPC10X_MAP##map##_PCI_MEM_OFFSET; \
- (hose)->io_space.start = MPC10X_MAP##map##_PCI_IO_START; \
- (hose)->io_space.end = MPC10X_MAP##map##_PCI_IO_END; \
- (hose)->mem_space.start = MPC10X_MAP##map##_PCI_MEM_START; \
- (hose)->mem_space.end = MPC10X_MAP##map##_PCI_MEM_END; \
- (hose)->io_base_virt = (void *)MPC10X_MAP##map##_ISA_IO_BASE; \
-}
-
-
/* Miscellaneous Configuration register offsets */
#define MPC10X_CFG_PIR_REG 0x09
#define MPC10X_CFG_PIR_HOST_BRIDGE 0x00
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index 2b8af75abc23..cf7009b8c7b6 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -824,6 +824,7 @@ static void __init parse_region_decode(struct pci_controller *hose,
hose->mem_resources[cur].name = hose->dn->full_name;
hose->mem_resources[cur].start = base;
hose->mem_resources[cur].end = end;
+ hose->mem_offset[cur] = 0;
DBG(" %d: 0x%08lx-0x%08lx\n", cur, base, end);
} else {
DBG(" : -0x%08lx\n", end);
@@ -866,7 +867,6 @@ static void __init setup_u3_ht(struct pci_controller* hose)
hose->io_resource.start = 0;
hose->io_resource.end = 0x003fffff;
hose->io_resource.flags = IORESOURCE_IO;
- hose->pci_mem_offset = 0;
hose->first_busno = 0;
hose->last_busno = 0xef;
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index aaa0dba49471..628c564ceadb 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -15,6 +15,7 @@
#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/interrupt.h>
+#include <linux/slab.h>
#include <asm/opal.h>
#include <asm/firmware.h>
@@ -28,13 +29,14 @@ struct opal {
static struct device_node *opal_node;
static DEFINE_SPINLOCK(opal_write_lock);
extern u64 opal_mc_secondary_handler[];
+static unsigned int *opal_irqs;
+static unsigned int opal_irq_count;
int __init early_init_dt_scan_opal(unsigned long node,
const char *uname, int depth, void *data)
{
const void *basep, *entryp;
unsigned long basesz, entrysz;
- u64 glue;
if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
return 0;
@@ -54,13 +56,27 @@ int __init early_init_dt_scan_opal(unsigned long node,
opal.entry, entryp, entrysz);
powerpc_firmware_features |= FW_FEATURE_OPAL;
- if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) {
+ if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
+ powerpc_firmware_features |= FW_FEATURE_OPALv2;
+ powerpc_firmware_features |= FW_FEATURE_OPALv3;
+ printk("OPAL V3 detected !\n");
+ } else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) {
powerpc_firmware_features |= FW_FEATURE_OPALv2;
printk("OPAL V2 detected !\n");
} else {
printk("OPAL V1 detected !\n");
}
+ return 1;
+}
+
+static int __init opal_register_exception_handlers(void)
+{
+ u64 glue;
+
+ if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
+ return -ENODEV;
+
/* Hookup some exception handlers. We use the fwnmi area at 0x7000
* to provide the glue space to OPAL
*/
@@ -74,9 +90,11 @@ int __init early_init_dt_scan_opal(unsigned long node,
glue += 128;
opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
- return 1;
+ return 0;
}
+early_initcall(opal_register_exception_handlers);
+
int opal_get_chars(uint32_t vtermno, char *buf, int count)
{
s64 len, rc;
@@ -133,6 +151,13 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
len = total_len;
rc = opal_console_write(vtermno, &len, data);
+
+ /* Closed or other error drop */
+ if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
+ rc != OPAL_BUSY_EVENT) {
+ written = total_len;
+ break;
+ }
if (rc == OPAL_SUCCESS) {
total_len -= len;
data += len;
@@ -305,6 +330,8 @@ static int __init opal_init(void)
irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
pr_debug("opal: Found %d interrupts reserved for OPAL\n",
irqs ? (irqlen / 4) : 0);
+ opal_irq_count = irqlen / 4;
+ opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL);
for (i = 0; irqs && i < (irqlen / 4); i++, irqs++) {
unsigned int hwirq = be32_to_cpup(irqs);
unsigned int irq = irq_create_mapping(NULL, hwirq);
@@ -316,7 +343,19 @@ static int __init opal_init(void)
if (rc)
pr_warning("opal: Error %d requesting irq %d"
" (0x%x)\n", rc, irq, hwirq);
+ opal_irqs[i] = irq;
}
return 0;
}
subsys_initcall(opal_init);
+
+void opal_shutdown(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < opal_irq_count; i++) {
+ if (opal_irqs[i])
+ free_irq(opal_irqs[i], 0);
+ opal_irqs[i] = 0;
+ }
+}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 8c6c9cf91c13..3937aaae5bc4 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -915,11 +915,14 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
index++;
}
} else if (res->flags & IORESOURCE_MEM) {
+ /* WARNING: Assumes M32 is mem region 0 in PHB. We need to
+ * harden that algorithm when we start supporting M64
+ */
region.start = res->start -
- hose->pci_mem_offset -
+ hose->mem_offset[0] -
phb->ioda.m32_pci_base;
region.end = res->end -
- hose->pci_mem_offset -
+ hose->mem_offset[0] -
phb->ioda.m32_pci_base;
index = region.start / phb->ioda.m32_segsize;
@@ -1045,6 +1048,12 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
}
+static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
+{
+ opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET,
+ OPAL_ASSERT_RESET);
+}
+
void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
{
struct pci_controller *hose;
@@ -1089,7 +1098,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
/* Detect specific models for error handling */
if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
phb->model = PNV_PHB_MODEL_P7IOC;
- else if (of_device_is_compatible(np, "ibm,p8-pciex"))
+ else if (of_device_is_compatible(np, "ibm,power8-pciex"))
phb->model = PNV_PHB_MODEL_PHB3;
else
phb->model = PNV_PHB_MODEL_UNKNOWN;
@@ -1115,8 +1124,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
phb->ioda.m32_size += 0x10000;
phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
- phb->ioda.m32_pci_base = hose->mem_resources[0].start -
- hose->pci_mem_offset;
+ phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0];
phb->ioda.io_size = hose->pci_io_size;
phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
@@ -1176,6 +1184,9 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
/* Setup TCEs */
phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
+ /* Setup shutdown function for kexec */
+ phb->shutdown = pnv_pci_ioda_shutdown;
+
/* Setup MSI support */
pnv_pci_init_ioda_msis(phb);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 55dfca844ddf..163bd7422f1c 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -450,6 +450,18 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
pnv_pci_dma_fallback_setup(hose, pdev);
}
+void pnv_pci_shutdown(void)
+{
+ struct pci_controller *hose;
+
+ list_for_each_entry(hose, &hose_list, list_node) {
+ struct pnv_phb *phb = hose->private_data;
+
+ if (phb && phb->shutdown)
+ phb->shutdown(phb);
+ }
+}
+
/* Fixup wrong class code in p7ioc and p8 root complex */
static void pnv_p7ioc_rc_quirk(struct pci_dev *dev)
{
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 48dc4bb856a1..25d76c4df50b 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -86,6 +86,7 @@ struct pnv_phb {
void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
void (*fixup_phb)(struct pci_controller *hose);
u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
+ void (*shutdown)(struct pnv_phb *phb);
union {
struct {
@@ -158,4 +159,5 @@ extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
u64 *startp, u64 *endp);
+
#endif /* __POWERNV_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 8a9df7f9667e..a1c6f83fc391 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -9,8 +9,10 @@ static inline void pnv_smp_init(void) { }
#ifdef CONFIG_PCI
extern void pnv_pci_init(void);
+extern void pnv_pci_shutdown(void);
#else
static inline void pnv_pci_init(void) { }
+static inline void pnv_pci_shutdown(void) { }
#endif
#endif /* _POWERNV_H */
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index db1ad1c8f68f..d4459bfc92f7 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -78,7 +78,9 @@ static void pnv_show_cpuinfo(struct seq_file *m)
if (root)
model = of_get_property(root, "model", NULL);
seq_printf(m, "machine\t\t: PowerNV %s\n", model);
- if (firmware_has_feature(FW_FEATURE_OPALv2))
+ if (firmware_has_feature(FW_FEATURE_OPALv3))
+ seq_printf(m, "firmware\t: OPAL v3\n");
+ else if (firmware_has_feature(FW_FEATURE_OPALv2))
seq_printf(m, "firmware\t: OPAL v2\n");
else if (firmware_has_feature(FW_FEATURE_OPAL))
seq_printf(m, "firmware\t: OPAL v1\n");
@@ -126,6 +128,17 @@ static void pnv_progress(char *s, unsigned short hex)
{
}
+static void pnv_shutdown(void)
+{
+ /* Let the PCI code clear up IODA tables */
+ pnv_pci_shutdown();
+
+ /* And unregister all OPAL interrupts so they don't fire
+ * up while we kexec
+ */
+ opal_shutdown();
+}
+
#ifdef CONFIG_KEXEC
static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
{
@@ -187,6 +200,7 @@ define_machine(powernv) {
.init_IRQ = pnv_init_IRQ,
.show_cpuinfo = pnv_show_cpuinfo,
.progress = pnv_progress,
+ .machine_shutdown = pnv_shutdown,
.power_save = power7_idle,
.calibrate_decr = generic_calibrate_decr,
#ifdef CONFIG_KEXEC
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 0bdc735db16f..88c9459c3e07 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -71,16 +71,68 @@ int pnv_smp_kick_cpu(int nr)
BUG_ON(nr < 0 || nr >= NR_CPUS);
- /* On OPAL v2 the CPU are still spinning inside OPAL itself,
- * get them back now
+ /*
+ * If we already started or OPALv2 is not supported, we just
+ * kick the CPU via the PACA
*/
- if (!paca[nr].cpu_start && firmware_has_feature(FW_FEATURE_OPALv2)) {
- pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", nr, pcpu);
- rc = opal_start_cpu(pcpu, start_here);
- if (rc != OPAL_SUCCESS)
- pr_warn("OPAL Error %ld starting CPU %d\n",
+ if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPALv2))
+ goto kick;
+
+ /*
+ * At this point, the CPU can either be spinning on the way in
+ * from kexec or be inside OPAL waiting to be started for the
+ * first time. OPAL v3 allows us to query OPAL to know if it
+ * has the CPUs, so we do that
+ */
+ if (firmware_has_feature(FW_FEATURE_OPALv3)) {
+ uint8_t status;
+
+ rc = opal_query_cpu_status(pcpu, &status);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("OPAL Error %ld querying CPU %d state\n",
rc, nr);
+ return -ENODEV;
+ }
+
+ /*
+ * Already started, just kick it, probably coming from
+ * kexec and spinning
+ */
+ if (status == OPAL_THREAD_STARTED)
+ goto kick;
+
+ /*
+ * Available/inactive, let's kick it
+ */
+ if (status == OPAL_THREAD_INACTIVE) {
+ pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n",
+ nr, pcpu);
+ rc = opal_start_cpu(pcpu, start_here);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("OPAL Error %ld starting CPU %d\n",
+ rc, nr);
+ return -ENODEV;
+ }
+ } else {
+ /*
+ * An unavailable CPU (or any other unknown status)
+ * shouldn't be started. It should also
+ * not be in the possible map but currently it can
+ * happen
+ */
+ pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable"
+ " (status %d)...\n", nr, pcpu, status);
+ return -ENODEV;
+ }
+ } else {
+ /*
+ * On OPAL v2, we just kick it and hope for the best,
+ * we must not test the error from opal_start_cpu() or
+ * we would fail to get CPUs from kexec.
+ */
+ opal_start_cpu(pcpu, start_here);
}
+ kick:
return smp_generic_kick_cpu(nr);
}
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 9a0941bc4d31..023b288f895b 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -18,6 +18,7 @@ config PPC_PSERIES
select PPC_PCI_CHOICE if EXPERT
select ZLIB_DEFLATE
select PPC_DOORBELL
+ select HAVE_CONTEXT_TRACKING
default y
config PPC_SPLPAR
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index e5b084723131..420524e6f8c9 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -24,6 +24,7 @@ static int query_token, change_token;
#define RTAS_RESET_FN 2
#define RTAS_CHANGE_MSI_FN 3
#define RTAS_CHANGE_MSIX_FN 4
+#define RTAS_CHANGE_32MSI_FN 5
static struct pci_dn *get_pdn(struct pci_dev *pdev)
{
@@ -58,7 +59,8 @@ static int rtas_change_msi(struct pci_dn *pdn, u32 func, u32 num_irqs)
seq_num = 1;
do {
- if (func == RTAS_CHANGE_MSI_FN || func == RTAS_CHANGE_MSIX_FN)
+ if (func == RTAS_CHANGE_MSI_FN || func == RTAS_CHANGE_MSIX_FN ||
+ func == RTAS_CHANGE_32MSI_FN)
rc = rtas_call(change_token, 6, 4, rtas_ret, addr,
BUID_HI(buid), BUID_LO(buid),
func, num_irqs, seq_num);
@@ -426,9 +428,12 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type)
*/
again:
if (type == PCI_CAP_ID_MSI) {
- rc = rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, nvec);
+ if (pdn->force_32bit_msi)
+ rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSI_FN, nvec);
+ else
+ rc = rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, nvec);
- if (rc < 0) {
+ if (rc < 0 && !pdn->force_32bit_msi) {
pr_debug("rtas_msi: trying the old firmware call.\n");
rc = rtas_change_msi(pdn, RTAS_CHANGE_FN, nvec);
}
@@ -512,3 +517,13 @@ static int rtas_msi_init(void)
return 0;
}
arch_initcall(rtas_msi_init);
+
+static void quirk_radeon(struct pci_dev *dev)
+{
+ struct pci_dn *pdn = get_pdn(dev);
+
+ if (pdn)
+ pdn->force_32bit_msi = 1;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x68f2, quirk_radeon);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0xaa68, quirk_radeon);
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 0b580f413a9a..5f93856cdf47 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -108,3 +108,56 @@ static void fixup_winbond_82c105(struct pci_dev* dev)
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105,
fixup_winbond_82c105);
+
+int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+ struct device_node *dn, *pdn;
+ struct pci_bus *bus;
+ const uint32_t *pcie_link_speed_stats;
+
+ bus = bridge->bus;
+
+ dn = pcibios_get_phb_of_node(bus);
+ if (!dn)
+ return 0;
+
+ for (pdn = dn; pdn != NULL; pdn = of_get_next_parent(pdn)) {
+ pcie_link_speed_stats = (const uint32_t *) of_get_property(pdn,
+ "ibm,pcie-link-speed-stats", NULL);
+ if (pcie_link_speed_stats)
+ break;
+ }
+
+ of_node_put(pdn);
+
+ if (!pcie_link_speed_stats) {
+ pr_err("no ibm,pcie-link-speed-stats property\n");
+ return 0;
+ }
+
+ switch (pcie_link_speed_stats[0]) {
+ case 0x01:
+ bus->max_bus_speed = PCIE_SPEED_2_5GT;
+ break;
+ case 0x02:
+ bus->max_bus_speed = PCIE_SPEED_5_0GT;
+ break;
+ default:
+ bus->max_bus_speed = PCI_SPEED_UNKNOWN;
+ break;
+ }
+
+ switch (pcie_link_speed_stats[1]) {
+ case 0x01:
+ bus->cur_bus_speed = PCIE_SPEED_2_5GT;
+ break;
+ case 0x02:
+ bus->cur_bus_speed = PCIE_SPEED_5_0GT;
+ break;
+ default:
+ bus->cur_bus_speed = PCI_SPEED_UNKNOWN;
+ break;
+ }
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 8af71e4cc17f..c2a3a258001c 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -63,4 +63,8 @@ extern int dlpar_detach_node(struct device_node *);
/* Snooze Delay, pseries_idle */
DECLARE_PER_CPU(long, smt_snooze_delay);
+/* PCI root bridge prepare function override for pseries */
+struct pci_host_bridge;
+int pseries_root_bridge_prepare(struct pci_host_bridge *bridge);
+
#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index ac932a9eb440..c11c8238797c 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -466,6 +466,8 @@ static void __init pSeries_setup_arch(void)
else
ppc_md.enable_pmcs = power4_enable_pmcs;
+ ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
+
if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
long rc;
if ((rc = pSeries_enable_reloc_on_exc()) != H_SUCCESS) {
diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c
index 47226e04126d..5f997e79d570 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -16,6 +16,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/suspend.h>
#include <linux/stat.h>
@@ -126,11 +127,15 @@ static ssize_t store_hibernate(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
+ cpumask_var_t offline_mask;
int rc;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (!alloc_cpumask_var(&offline_mask, GFP_TEMPORARY))
+ return -ENOMEM;
+
stream_id = simple_strtoul(buf, NULL, 16);
do {
@@ -140,15 +145,32 @@ static ssize_t store_hibernate(struct device *dev,
} while (rc == -EAGAIN);
if (!rc) {
+ /* All present CPUs must be online */
+ cpumask_andnot(offline_mask, cpu_present_mask,
+ cpu_online_mask);
+ rc = rtas_online_cpus_mask(offline_mask);
+ if (rc) {
+ pr_err("%s: Could not bring present CPUs online.\n",
+ __func__);
+ goto out;
+ }
+
stop_topology_update();
rc = pm_suspend(PM_SUSPEND_MEM);
start_topology_update();
+
+ /* Take down CPUs not online prior to suspend */
+ if (!rtas_offline_cpus_mask(offline_mask))
+ pr_warn("%s: Could not restore CPUs to offline "
+ "state.\n", __func__);
}
stream_id = 0;
if (!rc)
rc = count;
+out:
+ free_cpumask_var(offline_mask);
return rc;
}
diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c
index 97fe82ee8633..2d3b1dd9571d 100644
--- a/arch/powerpc/platforms/wsp/ics.c
+++ b/arch/powerpc/platforms/wsp/ics.c
@@ -361,7 +361,7 @@ static int wsp_chip_set_affinity(struct irq_data *d,
xive = xive_set_server(xive, get_irq_server(ics, hw_irq));
wsp_ics_set_xive(ics, hw_irq, xive);
- return 0;
+ return IRQ_SET_MASK_OK;
}
static struct irq_chip wsp_irq_chip = {
diff --git a/arch/powerpc/platforms/wsp/wsp_pci.c b/arch/powerpc/platforms/wsp/wsp_pci.c
index 8e22f561d171..62cb527493e7 100644
--- a/arch/powerpc/platforms/wsp/wsp_pci.c
+++ b/arch/powerpc/platforms/wsp/wsp_pci.c
@@ -502,7 +502,7 @@ static void __init wsp_pcie_configure_hw(struct pci_controller *hose)
(~(hose->mem_resources[0].end -
hose->mem_resources[0].start)) & 0x3ffffff0000ul);
out_be64(hose->cfg_data + PCIE_REG_M32A_START_ADDR,
- (hose->mem_resources[0].start - hose->pci_mem_offset) | 1);
+ (hose->mem_resources[0].start - hose->mem_offset[0]) | 1);
/* Clear all TVT entries
*
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index b0a518e97599..99464a7bdb3b 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -64,6 +64,8 @@ endif
obj-$(CONFIG_PPC_SCOM) += scom.o
+obj-$(CONFIG_PPC_EARLY_DEBUG_MEMCONS) += udbg_memcons.o
+
subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
obj-$(CONFIG_PPC_XICS) += xics/
diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c
index 6e0e1005227f..9cd0e60716fe 100644
--- a/arch/powerpc/sysdev/ehv_pic.c
+++ b/arch/powerpc/sysdev/ehv_pic.c
@@ -81,7 +81,7 @@ int ehv_pic_set_affinity(struct irq_data *d, const struct cpumask *dest,
ev_int_set_config(src, config, prio, cpuid);
spin_unlock_irqrestore(&ehv_pic_lock, flags);
- return 0;
+ return IRQ_SET_MASK_OK;
}
static unsigned int ehv_pic_type_to_vecpri(unsigned int type)
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index cffe7edac858..028ac1f71b51 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -178,7 +178,7 @@ static void setup_pci_atmu(struct pci_controller *hose)
struct ccsr_pci __iomem *pci = hose->private_data;
int i, j, n, mem_log, win_idx = 3, start_idx = 1, end_idx = 4;
u64 mem, sz, paddr_hi = 0;
- u64 paddr_lo = ULLONG_MAX;
+ u64 offset = 0, paddr_lo = ULLONG_MAX;
u32 pcicsrbar = 0, pcicsrbar_sz;
u32 piwar = PIWAR_EN | PIWAR_PF | PIWAR_TGI_LOCAL |
PIWAR_READ_SNOOP | PIWAR_WRITE_SNOOP;
@@ -208,8 +208,9 @@ static void setup_pci_atmu(struct pci_controller *hose)
paddr_lo = min(paddr_lo, (u64)hose->mem_resources[i].start);
paddr_hi = max(paddr_hi, (u64)hose->mem_resources[i].end);
- n = setup_one_atmu(pci, j, &hose->mem_resources[i],
- hose->pci_mem_offset);
+ /* We assume all memory resources have the same offset */
+ offset = hose->mem_offset[i];
+ n = setup_one_atmu(pci, j, &hose->mem_resources[i], offset);
if (n < 0 || j >= 5) {
pr_err("Ran out of outbound PCI ATMUs for resource %d!\n", i);
@@ -239,8 +240,8 @@ static void setup_pci_atmu(struct pci_controller *hose)
}
/* convert to pci address space */
- paddr_hi -= hose->pci_mem_offset;
- paddr_lo -= hose->pci_mem_offset;
+ paddr_hi -= offset;
+ paddr_lo -= offset;
if (paddr_hi == paddr_lo) {
pr_err("%s: No outbound window space\n", name);
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index d30e6a676c89..0a13ecb270c7 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -836,7 +836,7 @@ int mpic_set_affinity(struct irq_data *d, const struct cpumask *cpumask,
mpic_physmask(mask));
}
- return 0;
+ return IRQ_SET_MASK_OK;
}
static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type)
@@ -1001,8 +1001,12 @@ static int mpic_host_map(struct irq_domain *h, unsigned int virq,
if (hw == mpic->spurious_vec)
return -EINVAL;
- if (mpic->protected && test_bit(hw, mpic->protected))
- return -EINVAL;
+ if (mpic->protected && test_bit(hw, mpic->protected)) {
+ pr_warning("mpic: Mapping of source 0x%x failed, "
+ "source protected by firmware !\n",\
+ (unsigned int)hw);
+ return -EPERM;
+ }
#ifdef CONFIG_SMP
else if (hw >= mpic->ipi_vecs[0]) {
@@ -1029,8 +1033,12 @@ static int mpic_host_map(struct irq_domain *h, unsigned int virq,
if (mpic_map_error_int(mpic, virq, hw))
return 0;
- if (hw >= mpic->num_sources)
+ if (hw >= mpic->num_sources) {
+ pr_warning("mpic: Mapping of source 0x%x failed, "
+ "source out of range !\n",\
+ (unsigned int)hw);
return -EINVAL;
+ }
mpic_msi_reserve_hwirq(mpic, hw);
diff --git a/arch/powerpc/sysdev/ppc4xx_pci.c b/arch/powerpc/sysdev/ppc4xx_pci.c
index 56e8b3c3c890..64603a10b863 100644
--- a/arch/powerpc/sysdev/ppc4xx_pci.c
+++ b/arch/powerpc/sysdev/ppc4xx_pci.c
@@ -257,6 +257,7 @@ static void __init ppc4xx_configure_pci_PMMs(struct pci_controller *hose,
/* Setup outbound memory windows */
for (i = j = 0; i < 3; i++) {
struct resource *res = &hose->mem_resources[i];
+ resource_size_t offset = hose->mem_offset[i];
/* we only care about memory windows */
if (!(res->flags & IORESOURCE_MEM))
@@ -270,7 +271,7 @@ static void __init ppc4xx_configure_pci_PMMs(struct pci_controller *hose,
/* Configure the resource */
if (ppc4xx_setup_one_pci_PMM(hose, reg,
res->start,
- res->start - hose->pci_mem_offset,
+ res->start - offset,
resource_size(res),
res->flags,
j) == 0) {
@@ -279,7 +280,7 @@ static void __init ppc4xx_configure_pci_PMMs(struct pci_controller *hose,
/* If the resource PCI address is 0 then we have our
* ISA memory hole
*/
- if (res->start == hose->pci_mem_offset)
+ if (res->start == offset)
found_isa_hole = 1;
}
}
@@ -457,6 +458,7 @@ static void __init ppc4xx_configure_pcix_POMs(struct pci_controller *hose,
/* Setup outbound memory windows */
for (i = j = 0; i < 3; i++) {
struct resource *res = &hose->mem_resources[i];
+ resource_size_t offset = hose->mem_offset[i];
/* we only care about memory windows */
if (!(res->flags & IORESOURCE_MEM))
@@ -470,7 +472,7 @@ static void __init ppc4xx_configure_pcix_POMs(struct pci_controller *hose,
/* Configure the resource */
if (ppc4xx_setup_one_pcix_POM(hose, reg,
res->start,
- res->start - hose->pci_mem_offset,
+ res->start - offset,
resource_size(res),
res->flags,
j) == 0) {
@@ -479,7 +481,7 @@ static void __init ppc4xx_configure_pcix_POMs(struct pci_controller *hose,
/* If the resource PCI address is 0 then we have our
* ISA memory hole
*/
- if (res->start == hose->pci_mem_offset)
+ if (res->start == offset)
found_isa_hole = 1;
}
}
@@ -1792,6 +1794,7 @@ static void __init ppc4xx_configure_pciex_POMs(struct ppc4xx_pciex_port *port,
/* Setup outbound memory windows */
for (i = j = 0; i < 3; i++) {
struct resource *res = &hose->mem_resources[i];
+ resource_size_t offset = hose->mem_offset[i];
/* we only care about memory windows */
if (!(res->flags & IORESOURCE_MEM))
@@ -1805,7 +1808,7 @@ static void __init ppc4xx_configure_pciex_POMs(struct ppc4xx_pciex_port *port,
/* Configure the resource */
if (ppc4xx_setup_one_pciex_POM(port, hose, mbase,
res->start,
- res->start - hose->pci_mem_offset,
+ res->start - offset,
resource_size(res),
res->flags,
j) == 0) {
@@ -1814,7 +1817,7 @@ static void __init ppc4xx_configure_pciex_POMs(struct ppc4xx_pciex_port *port,
/* If the resource PCI address is 0 then we have our
* ISA memory hole
*/
- if (res->start == hose->pci_mem_offset)
+ if (res->start == offset)
found_isa_hole = 1;
}
}
diff --git a/arch/powerpc/sysdev/udbg_memcons.c b/arch/powerpc/sysdev/udbg_memcons.c
new file mode 100644
index 000000000000..ce5a7b489e4b
--- /dev/null
+++ b/arch/powerpc/sysdev/udbg_memcons.c
@@ -0,0 +1,105 @@
+/*
+ * A udbg backend which logs messages and reads input from in memory
+ * buffers.
+ *
+ * The console output can be read from memcons_output which is a
+ * circular buffer whose next write position is stored in memcons.output_pos.
+ *
+ * Input may be passed by writing into the memcons_input buffer when it is
+ * empty. The input buffer is empty when both input_pos == input_start and
+ * *input_start == '\0'.
+ *
+ * Copyright (C) 2003-2005 Anton Blanchard and Milton Miller, IBM Corp
+ * Copyright (C) 2013 Alistair Popple, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <asm/barrier.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/udbg.h>
+
+struct memcons {
+ char *output_start;
+ char *output_pos;
+ char *output_end;
+ char *input_start;
+ char *input_pos;
+ char *input_end;
+};
+
+static char memcons_output[CONFIG_PPC_MEMCONS_OUTPUT_SIZE];
+static char memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE];
+
+struct memcons memcons = {
+ .output_start = memcons_output,
+ .output_pos = memcons_output,
+ .output_end = &memcons_output[CONFIG_PPC_MEMCONS_OUTPUT_SIZE],
+ .input_start = memcons_input,
+ .input_pos = memcons_input,
+ .input_end = &memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE],
+};
+
+void memcons_putc(char c)
+{
+ char *new_output_pos;
+
+ *memcons.output_pos = c;
+ wmb();
+ new_output_pos = memcons.output_pos + 1;
+ if (new_output_pos >= memcons.output_end)
+ new_output_pos = memcons.output_start;
+
+ memcons.output_pos = new_output_pos;
+}
+
+int memcons_getc_poll(void)
+{
+ char c;
+ char *new_input_pos;
+
+ if (*memcons.input_pos) {
+ c = *memcons.input_pos;
+
+ new_input_pos = memcons.input_pos + 1;
+ if (new_input_pos >= memcons.input_end)
+ new_input_pos = memcons.input_start;
+ else if (*new_input_pos == '\0')
+ new_input_pos = memcons.input_start;
+
+ *memcons.input_pos = '\0';
+ wmb();
+ memcons.input_pos = new_input_pos;
+ return c;
+ }
+
+ return -1;
+}
+
+int memcons_getc(void)
+{
+ int c;
+
+ while (1) {
+ c = memcons_getc_poll();
+ if (c == -1)
+ cpu_relax();
+ else
+ break;
+ }
+
+ return c;
+}
+
+void udbg_init_memcons(void)
+{
+ udbg_putc = memcons_putc;
+ udbg_getc = memcons_getc;
+ udbg_getc_poll = memcons_getc_poll;
+}
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 89db29d17c25..7cd728b3b5e4 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -51,6 +51,12 @@ static struct icp_ipl __iomem *icp_native_regs[NR_CPUS];
static inline unsigned int icp_native_get_xirr(void)
{
int cpu = smp_processor_id();
+ unsigned int xirr;
+
+ /* Handled an interrupt latched by KVM */
+ xirr = kvmppc_get_xics_latch();
+ if (xirr)
+ return xirr;
return in_be32(&icp_native_regs[cpu]->xirr.word);
}
@@ -138,6 +144,7 @@ static unsigned int icp_native_get_irq(void)
static void icp_native_cause_ipi(int cpu, unsigned long data)
{
+ kvmppc_set_host_ipi(cpu, 1);
icp_native_set_qirr(cpu, IPI_PRIORITY);
}
@@ -151,6 +158,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
{
int cpu = smp_processor_id();
+ kvmppc_set_host_ipi(cpu, 0);
icp_native_set_qirr(cpu, 0xff);
return smp_ipi_demux();
diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c
index f7e8609df0d5..39d72212655e 100644
--- a/arch/powerpc/sysdev/xics/ics-opal.c
+++ b/arch/powerpc/sysdev/xics/ics-opal.c
@@ -148,7 +148,7 @@ static int ics_opal_set_affinity(struct irq_data *d,
__func__, d->irq, hw_irq, server, rc);
return -1;
}
- return 0;
+ return IRQ_SET_MASK_OK;
}
static struct irq_chip ics_opal_irq_chip = {