summaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig5
-rw-r--r--arch/powerpc/configs/c2k_defconfig1
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig1
-rw-r--r--arch/powerpc/crypto/aes-spe-glue.c6
-rw-r--r--arch/powerpc/include/asm/gpio.h4
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h9
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h51
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h1
-rw-r--r--arch/powerpc/include/asm/pci.h2
-rw-r--r--arch/powerpc/include/asm/pgtable.h3
-rw-r--r--arch/powerpc/include/asm/smp.h4
-rw-r--r--arch/powerpc/include/asm/xics.h1
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h9
-rw-r--r--arch/powerpc/include/uapi/asm/socket.h2
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c3
-rw-r--r--arch/powerpc/kernel/process.c4
-rw-r--r--arch/powerpc/kernel/rtasd.c9
-rw-r--r--arch/powerpc/kernel/setup_32.c2
-rw-r--r--arch/powerpc/kernel/setup_64.c3
-rw-r--r--arch/powerpc/kernel/smp.c30
-rw-r--r--arch/powerpc/kernel/traps.c5
-rw-r--r--arch/powerpc/kvm/Makefile2
-rw-r--r--arch/powerpc/kvm/book3s.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c156
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c330
-rw-r--r--arch/powerpc/kvm/book3s_hv.c215
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c131
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S18
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c35
-rw-r--r--arch/powerpc/kvm/powerpc.c38
-rw-r--r--arch/powerpc/mm/hash_utils_64.c36
-rw-r--r--arch/powerpc/mm/hugetlbpage-book3e.c13
-rw-r--r--arch/powerpc/mm/init_32.c8
-rw-r--r--arch/powerpc/mm/mem.c2
-rw-r--r--arch/powerpc/mm/mmap.c4
-rw-r--r--arch/powerpc/mm/mmu_context_hash64.c3
-rw-r--r--arch/powerpc/mm/pgtable.c8
-rw-r--r--arch/powerpc/mm/pgtable_64.c2
-rw-r--r--arch/powerpc/perf/hv-24x7.c8
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_shared.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c10
-rw-r--r--arch/powerpc/sysdev/ppc4xx_gpio.c2
-rw-r--r--arch/powerpc/sysdev/simple_gpio.c2
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c21
46 files changed, 1037 insertions, 171 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b580ce0c25a8..7cd32c038286 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -159,6 +159,7 @@ config PPC
select ARCH_HAS_DEVMEM_IS_ALLOWED
select HAVE_ARCH_SECCOMP_FILTER
select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN
@@ -847,14 +848,10 @@ config PCI_8260
select PPC_INDIRECT_PCI
default y
-source "drivers/pci/pcie/Kconfig"
-
source "drivers/pci/Kconfig"
source "drivers/pcmcia/Kconfig"
-source "drivers/pci/hotplug/Kconfig"
-
config HAS_RAPIDIO
bool
default n
diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig
index 91862292cd55..340685caa7b8 100644
--- a/arch/powerpc/configs/c2k_defconfig
+++ b/arch/powerpc/configs/c2k_defconfig
@@ -387,7 +387,6 @@ CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_SPINLOCK=y
CONFIG_BOOTX_TEXT=y
CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index e5d2c3dc07f1..99ccbebabfd3 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -1175,7 +1175,6 @@ CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_XMON=y
CONFIG_BOOTX_TEXT=y
CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_NETWORK_XFRM=y
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
index 6d99ebf2ea15..748fc00c5e19 100644
--- a/arch/powerpc/crypto/aes-spe-glue.c
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -22,6 +22,7 @@
#include <asm/byteorder.h>
#include <asm/switch_to.h>
#include <crypto/algapi.h>
+#include <crypto/xts.h>
/*
* MAX_BYTES defines the number of bytes that are allowed to be processed
@@ -126,6 +127,11 @@ static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+ int err;
+
+ err = xts_check_key(tfm, in_key, key_len);
+ if (err)
+ return err;
key_len >>= 1;
diff --git a/arch/powerpc/include/asm/gpio.h b/arch/powerpc/include/asm/gpio.h
deleted file mode 100644
index b3799d88ffcf..000000000000
--- a/arch/powerpc/include/asm/gpio.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __LINUX_GPIO_H
-#warning Include linux/gpio.h instead of asm/gpio.h
-#include <linux/gpio.h>
-#endif
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 2aa79c864e91..7529aab068f5 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -33,8 +33,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
}
#endif
-#define SPAPR_TCE_SHIFT 12
-
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
#endif
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 9d08d8cbed1a..d7b343170453 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -182,7 +182,10 @@ struct kvmppc_spapr_tce_table {
struct list_head list;
struct kvm *kvm;
u64 liobn;
- u32 window_size;
+ struct rcu_head rcu;
+ u32 page_shift;
+ u64 offset; /* in pages */
+ u64 size; /* window size in pages */
struct page *pages[0];
};
@@ -289,7 +292,7 @@ struct kvmppc_vcore {
struct list_head runnable_threads;
struct list_head preempt_list;
spinlock_t lock;
- wait_queue_head_t wq;
+ struct swait_queue_head wq;
spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
u64 stolen_tb;
u64 preempt_tb;
@@ -629,7 +632,7 @@ struct kvm_vcpu_arch {
u8 prodded;
u32 last_inst;
- wait_queue_head_t *wqp;
+ struct swait_queue_head *wqp;
struct kvmppc_vcore *vcore;
int ret;
int trap;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2241d5357129..2544edabe7f3 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -165,9 +165,25 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
- struct kvm_create_spapr_tce *args);
+ struct kvm_create_spapr_tce_64 *args);
+extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
+ struct kvm_vcpu *vcpu, unsigned long liobn);
+extern long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
+ unsigned long ioba, unsigned long npages);
+extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt,
+ unsigned long tce);
+extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
+ unsigned long *ua, unsigned long **prmap);
+extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
+ unsigned long idx, unsigned long tce);
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce);
+extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_list, unsigned long npages);
+extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_value, unsigned long npages);
extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba);
extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
@@ -437,6 +453,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{
return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
}
+extern void kvmppc_alloc_host_rm_ops(void);
+extern void kvmppc_free_host_rm_ops(void);
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
@@ -445,7 +463,11 @@ extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xics_ipi_action(void);
+extern int h_ipi_redirect;
#else
+static inline void kvmppc_alloc_host_rm_ops(void) {};
+static inline void kvmppc_free_host_rm_ops(void) {};
static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
@@ -459,6 +481,33 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{ return 0; }
#endif
+/*
+ * Host-side operations we want to set up while running in real
+ * mode in the guest operating on the xics.
+ * Currently only VCPU wakeup is supported.
+ */
+
+union kvmppc_rm_state {
+ unsigned long raw;
+ struct {
+ u32 in_host;
+ u32 rm_action;
+ };
+};
+
+struct kvmppc_host_rm_core {
+ union kvmppc_rm_state rm_state;
+ void *rm_data;
+ char pad[112];
+};
+
+struct kvmppc_host_rm_ops {
+ struct kvmppc_host_rm_core *rm_core;
+ void (*vcpu_kick)(struct kvm_vcpu *vcpu);
+};
+
+extern struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
+
static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_KVM_BOOKE_HV
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 9f165e8a77bf..f5056e3394b4 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -10,7 +10,6 @@
#include <linux/pci.h>
#include <linux/list.h>
#include <linux/ioport.h>
-#include <asm-generic/pci-bridge.h>
struct device_node;
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 6f8065a7d487..a6f3ac0d4602 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -20,8 +20,6 @@
#include <asm/prom.h>
#include <asm/pci-bridge.h>
-#include <asm-generic/pci-dma-compat.h>
-
/* Return values for pci_controller_ops.probe_mode function */
#define PCI_PROBE_NONE -1 /* Don't look at this bus at all */
#define PCI_PROBE_NORMAL 0 /* Do normal PCI probing */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index ac9fb114e25d..47897a30982d 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -78,6 +78,9 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
}
return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift);
}
+
+unsigned long vmalloc_to_phys(void *vmalloc_addr);
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 174271ef2767..e1afd4c4f695 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -117,6 +117,9 @@ extern int cpu_to_core_id(int cpu);
#define PPC_MSG_TICK_BROADCAST 2
#define PPC_MSG_DEBUGGER_BREAK 3
+/* This is only used by the powernv kernel */
+#define PPC_MSG_RM_HOST_ACTION 4
+
/* for irq controllers that have dedicated ipis per message (4) */
extern int smp_request_message_ipi(int virq, int message);
extern const char *smp_ipi_name[];
@@ -124,6 +127,7 @@ extern const char *smp_ipi_name[];
/* for irq controllers with only a single ipi */
extern void smp_muxed_ipi_set_data(int cpu, unsigned long data);
extern void smp_muxed_ipi_message_pass(int cpu, int msg);
+extern void smp_muxed_ipi_set_message(int cpu, int msg);
extern irqreturn_t smp_ipi_demux(void);
void smp_init_pSeries(void);
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 5d61bbced6a1..04ef3ae511da 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -30,6 +30,7 @@
#ifdef CONFIG_PPC_ICP_NATIVE
extern int icp_native_init(void);
extern void icp_native_flush_interrupt(void);
+extern void icp_native_cause_ipi_rm(int cpu);
#else
static inline int icp_native_init(void) { return -ENODEV; }
#endif
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index ab4d4732c492..c93cf35ce379 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -333,6 +333,15 @@ struct kvm_create_spapr_tce {
__u32 window_size;
};
+/* for KVM_CAP_SPAPR_TCE_64 */
+struct kvm_create_spapr_tce_64 {
+ __u64 liobn;
+ __u32 page_shift;
+ __u32 flags;
+ __u64 offset; /* in pages */
+ __u64 size; /* in pages */
+};
+
/* for KVM_ALLOCATE_RMA */
struct kvm_allocate_rma {
__u64 rma_size;
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index dd54f28ecdec..1672e3398270 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -95,4 +95,6 @@
#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52
+#define SO_CNX_ADVICE 53
+
#endif /* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 05e804cdecaa..aec9a1b1d25b 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -109,8 +109,9 @@ void arch_unregister_hw_breakpoint(struct perf_event *bp)
* If the breakpoint is unregistered between a hw_breakpoint_handler()
* and the single_step_dabr_instruction(), then cleanup the breakpoint
* restoration variables to prevent dangling pointers.
+ * FIXME, this should not be using bp->ctx at all! Sayeth peterz.
*/
- if (bp->ctx && bp->ctx->task)
+ if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L))
bp->ctx->task->thread.last_hit_ubp = NULL;
}
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index d7a9df51b974..612df305886b 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1902,9 +1902,9 @@ static inline unsigned long brk_rnd(void)
/* 8MB for 32bit, 1GB for 64bit */
if (is_32bit_task())
- rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT)));
+ rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT)));
else
- rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT)));
+ rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT)));
return rnd << PAGE_SHIFT;
}
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 5a2c049c1c61..aa610ce8742f 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -49,7 +49,7 @@ static unsigned int rtas_error_log_buffer_max;
static unsigned int event_scan;
static unsigned int rtas_event_scan_rate;
-static int full_rtas_msgs = 0;
+static bool full_rtas_msgs;
/* Stop logging to nvram after first fatal error */
static int logging_enabled; /* Until we initialize everything,
@@ -592,11 +592,6 @@ __setup("surveillance=", surveillance_setup);
static int __init rtasmsgs_setup(char *str)
{
- if (strcmp(str, "on") == 0)
- full_rtas_msgs = 1;
- else if (strcmp(str, "off") == 0)
- full_rtas_msgs = 0;
-
- return 1;
+ return (kstrtobool(str, &full_rtas_msgs) == 0);
}
__setup("rtasmsgs=", rtasmsgs_setup);
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index ad8c9db61237..d544fa311757 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -114,8 +114,6 @@ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
notrace void __init machine_init(u64 dt_ptr)
{
- lockdep_init();
-
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 5c03a6a9b054..f98be8383a39 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -255,9 +255,6 @@ void __init early_setup(unsigned long dt_ptr)
setup_paca(&boot_paca);
fixup_boot_paca();
- /* Initialize lockdep early or else spinlocks will blow */
- lockdep_init();
-
/* -------- printk is now safe to use ------- */
/* Enable early debugging if any specified (see udbg.h) */
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8575d044775d..8cac1eb41466 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -206,7 +206,7 @@ int smp_request_message_ipi(int virq, int msg)
#ifdef CONFIG_PPC_SMP_MUXED_IPI
struct cpu_messages {
- int messages; /* current messages */
+ long messages; /* current messages */
unsigned long data; /* data for cause ipi */
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
@@ -218,7 +218,7 @@ void smp_muxed_ipi_set_data(int cpu, unsigned long data)
info->data = data;
}
-void smp_muxed_ipi_message_pass(int cpu, int msg)
+void smp_muxed_ipi_set_message(int cpu, int msg)
{
struct cpu_messages *info = &per_cpu(ipi_message, cpu);
char *message = (char *)&info->messages;
@@ -228,6 +228,13 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
*/
smp_mb();
message[msg] = 1;
+}
+
+void smp_muxed_ipi_message_pass(int cpu, int msg)
+{
+ struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+
+ smp_muxed_ipi_set_message(cpu, msg);
/*
* cause_ipi functions are required to include a full barrier
* before doing whatever causes the IPI.
@@ -236,20 +243,31 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
}
#ifdef __BIG_ENDIAN__
-#define IPI_MESSAGE(A) (1 << (24 - 8 * (A)))
+#define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
#else
-#define IPI_MESSAGE(A) (1 << (8 * (A)))
+#define IPI_MESSAGE(A) (1uL << (8 * (A)))
#endif
irqreturn_t smp_ipi_demux(void)
{
struct cpu_messages *info = this_cpu_ptr(&ipi_message);
- unsigned int all;
+ unsigned long all;
mb(); /* order any irq clear */
do {
all = xchg(&info->messages, 0);
+#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+ /*
+ * Must check for PPC_MSG_RM_HOST_ACTION messages
+ * before PPC_MSG_CALL_FUNCTION messages because when
+ * a VM is destroyed, we call kick_all_cpus_sync()
+ * to ensure that any pending PPC_MSG_RM_HOST_ACTION
+ * messages have completed before we free any VCPUs.
+ */
+ if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
+ kvmppc_xics_ipi_action();
+#endif
if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
generic_smp_call_function_interrupt();
if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
@@ -732,7 +750,7 @@ void start_secondary(void *unused)
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
BUG();
}
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 88414dde7e35..9229ba63c370 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -203,9 +203,8 @@ static int __kprobes __die(const char *str, struct pt_regs *regs, long err)
#ifdef CONFIG_SMP
printk("SMP NR_CPUS=%d ", NR_CPUS);
#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC ");
-#endif
+ if (debug_pagealloc_enabled())
+ printk("DEBUG_PAGEALLOC ");
#ifdef CONFIG_NUMA
printk("NUMA ");
#endif
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 0570eef83fba..7f7b6d86ac73 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -8,7 +8,7 @@ ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
KVM := ../../../virt/kvm
common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
- $(KVM)/eventfd.o
+ $(KVM)/eventfd.o $(KVM)/vfio.o
CFLAGS_e500_mmu.o := -I.
CFLAGS_e500_mmu_host.o := -I.
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 638c6d9be9e0..b34220d2aa42 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -807,7 +807,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
{
#ifdef CONFIG_PPC64
- INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+ INIT_LIST_HEAD_RCU(&kvm->arch.spapr_tce_tables);
INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
#endif
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 9c3b76bb69d9..82970042295e 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -14,6 +14,7 @@
*
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
* Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
*/
#include <linux/types.h>
@@ -36,28 +37,69 @@
#include <asm/ppc-opcode.h>
#include <asm/kvm_host.h>
#include <asm/udbg.h>
+#include <asm/iommu.h>
+#include <asm/tce.h>
-#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
+static unsigned long kvmppc_tce_pages(unsigned long iommu_pages)
+{
+ return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
+}
-static long kvmppc_stt_npages(unsigned long window_size)
+static unsigned long kvmppc_stt_pages(unsigned long tce_pages)
{
- return ALIGN((window_size >> SPAPR_TCE_SHIFT)
- * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
+ unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) +
+ (tce_pages * sizeof(struct page *));
+
+ return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE;
}
-static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
+static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
{
- struct kvm *kvm = stt->kvm;
- int i;
+ long ret = 0;
- mutex_lock(&kvm->lock);
- list_del(&stt->list);
- for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
+ if (!current || !current->mm)
+ return ret; /* process exited */
+
+ down_write(&current->mm->mmap_sem);
+
+ if (inc) {
+ unsigned long locked, lock_limit;
+
+ locked = current->mm->locked_vm + stt_pages;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+ ret = -ENOMEM;
+ else
+ current->mm->locked_vm += stt_pages;
+ } else {
+ if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm))
+ stt_pages = current->mm->locked_vm;
+
+ current->mm->locked_vm -= stt_pages;
+ }
+
+ pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid,
+ inc ? '+' : '-',
+ stt_pages << PAGE_SHIFT,
+ current->mm->locked_vm << PAGE_SHIFT,
+ rlimit(RLIMIT_MEMLOCK),
+ ret ? " - exceeded" : "");
+
+ up_write(&current->mm->mmap_sem);
+
+ return ret;
+}
+
+static void release_spapr_tce_table(struct rcu_head *head)
+{
+ struct kvmppc_spapr_tce_table *stt = container_of(head,
+ struct kvmppc_spapr_tce_table, rcu);
+ unsigned long i, npages = kvmppc_tce_pages(stt->size);
+
+ for (i = 0; i < npages; i++)
__free_page(stt->pages[i]);
- kfree(stt);
- mutex_unlock(&kvm->lock);
- kvm_put_kvm(kvm);
+ kfree(stt);
}
static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -65,7 +107,7 @@ static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
struct page *page;
- if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
+ if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
return VM_FAULT_SIGBUS;
page = stt->pages[vmf->pgoff];
@@ -88,7 +130,14 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
{
struct kvmppc_spapr_tce_table *stt = filp->private_data;
- release_spapr_tce_table(stt);
+ list_del_rcu(&stt->list);
+
+ kvm_put_kvm(stt->kvm);
+
+ kvmppc_account_memlimit(
+ kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
+ call_rcu(&stt->rcu, release_spapr_tce_table);
+
return 0;
}
@@ -98,20 +147,29 @@ static const struct file_operations kvm_spapr_tce_fops = {
};
long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
- struct kvm_create_spapr_tce *args)
+ struct kvm_create_spapr_tce_64 *args)
{
struct kvmppc_spapr_tce_table *stt = NULL;
- long npages;
+ unsigned long npages, size;
int ret = -ENOMEM;
int i;
+ if (!args->size)
+ return -EINVAL;
+
/* Check this LIOBN hasn't been previously allocated */
list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
if (stt->liobn == args->liobn)
return -EBUSY;
}
- npages = kvmppc_stt_npages(args->window_size);
+ size = args->size;
+ npages = kvmppc_tce_pages(size);
+ ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
+ if (ret) {
+ stt = NULL;
+ goto fail;
+ }
stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
GFP_KERNEL);
@@ -119,7 +177,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
goto fail;
stt->liobn = args->liobn;
- stt->window_size = args->window_size;
+ stt->page_shift = args->page_shift;
+ stt->offset = args->offset;
+ stt->size = size;
stt->kvm = kvm;
for (i = 0; i < npages; i++) {
@@ -131,7 +191,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
kvm_get_kvm(kvm);
mutex_lock(&kvm->lock);
- list_add(&stt->list, &kvm->arch.spapr_tce_tables);
+ list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
mutex_unlock(&kvm->lock);
@@ -148,3 +208,59 @@ fail:
}
return ret;
}
+
+long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_list, unsigned long npages)
+{
+ struct kvmppc_spapr_tce_table *stt;
+ long i, ret = H_SUCCESS, idx;
+ unsigned long entry, ua = 0;
+ u64 __user *tces, tce;
+
+ stt = kvmppc_find_table(vcpu, liobn);
+ if (!stt)
+ return H_TOO_HARD;
+
+ entry = ioba >> stt->page_shift;
+ /*
+ * SPAPR spec says that the maximum size of the list is 512 TCEs
+ * so the whole table fits in 4K page
+ */
+ if (npages > 512)
+ return H_PARAMETER;
+
+ if (tce_list & (SZ_4K - 1))
+ return H_PARAMETER;
+
+ ret = kvmppc_ioba_validate(stt, ioba, npages);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
+ ret = H_TOO_HARD;
+ goto unlock_exit;
+ }
+ tces = (u64 __user *) ua;
+
+ for (i = 0; i < npages; ++i) {
+ if (get_user(tce, tces + i)) {
+ ret = H_TOO_HARD;
+ goto unlock_exit;
+ }
+ tce = be64_to_cpu(tce);
+
+ ret = kvmppc_tce_validate(stt, tce);
+ if (ret != H_SUCCESS)
+ goto unlock_exit;
+
+ kvmppc_tce_put(stt, entry + i, tce);
+ }
+
+unlock_exit:
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect);
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 039028d3ccb5..f88b859af53b 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -14,6 +14,7 @@
*
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
* Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
*/
#include <linux/types.h>
@@ -30,76 +31,321 @@
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/book3s/64/mmu-hash.h>
+#include <asm/mmu_context.h>
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
#include <asm/kvm_host.h>
#include <asm/udbg.h>
+#include <asm/iommu.h>
+#include <asm/tce.h>
+#include <asm/iommu.h>
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
-/* WARNING: This will be called in real-mode on HV KVM and virtual
+/*
+ * Finds a TCE table descriptor by LIOBN.
+ *
+ * WARNING: This will be called in real or virtual mode on HV KVM and virtual
* mode on PR KVM
*/
-long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
- unsigned long ioba, unsigned long tce)
+struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu,
+ unsigned long liobn)
{
struct kvm *kvm = vcpu->kvm;
struct kvmppc_spapr_tce_table *stt;
+ list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list)
+ if (stt->liobn == liobn)
+ return stt;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(kvmppc_find_table);
+
+/*
+ * Validates IO address.
+ *
+ * WARNING: This will be called in real-mode on HV KVM and virtual
+ * mode on PR KVM
+ */
+long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
+ unsigned long ioba, unsigned long npages)
+{
+ unsigned long mask = (1ULL << stt->page_shift) - 1;
+ unsigned long idx = ioba >> stt->page_shift;
+
+ if ((ioba & mask) || (idx < stt->offset) ||
+ (idx - stt->offset + npages > stt->size) ||
+ (idx + npages < idx))
+ return H_PARAMETER;
+
+ return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_ioba_validate);
+
+/*
+ * Validates TCE address.
+ * At the moment flags and page mask are validated.
+ * As the host kernel does not access those addresses (just puts them
+ * to the table and user space is supposed to process them), we can skip
+ * checking other things (such as TCE is a guest RAM address or the page
+ * was actually allocated).
+ *
+ * WARNING: This will be called in real-mode on HV KVM and virtual
+ * mode on PR KVM
+ */
+long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
+{
+ unsigned long page_mask = ~((1ULL << stt->page_shift) - 1);
+ unsigned long mask = ~(page_mask | TCE_PCI_WRITE | TCE_PCI_READ);
+
+ if (tce & mask)
+ return H_PARAMETER;
+
+ return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_tce_validate);
+
+/* Note on the use of page_address() in real mode,
+ *
+ * It is safe to use page_address() in real mode on ppc64 because
+ * page_address() is always defined as lowmem_page_address()
+ * which returns __va(PFN_PHYS(page_to_pfn(page))) which is arithmetic
+ * operation and does not access page struct.
+ *
+ * Theoretically page_address() could be defined different
+ * but either WANT_PAGE_VIRTUAL or HASHED_PAGE_VIRTUAL
+ * would have to be enabled.
+ * WANT_PAGE_VIRTUAL is never enabled on ppc32/ppc64,
+ * HASHED_PAGE_VIRTUAL could be enabled for ppc32 only and only
+ * if CONFIG_HIGHMEM is defined. As CONFIG_SPARSEMEM_VMEMMAP
+ * is not expected to be enabled on ppc32, page_address()
+ * is safe for ppc32 as well.
+ *
+ * WARNING: This will be called in real-mode on HV KVM and virtual
+ * mode on PR KVM
+ */
+static u64 *kvmppc_page_address(struct page *page)
+{
+#if defined(HASHED_PAGE_VIRTUAL) || defined(WANT_PAGE_VIRTUAL)
+#error TODO: fix to avoid page_address() here
+#endif
+ return (u64 *) page_address(page);
+}
+
+/*
+ * Handles TCE requests for emulated devices.
+ * Puts guest TCE values to the table and expects user space to convert them.
+ * Called in both real and virtual modes.
+ * Cannot fail so kvmppc_tce_validate must be called before it.
+ *
+ * WARNING: This will be called in real-mode on HV KVM and virtual
+ * mode on PR KVM
+ */
+void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
+ unsigned long idx, unsigned long tce)
+{
+ struct page *page;
+ u64 *tbl;
+
+ idx -= stt->offset;
+ page = stt->pages[idx / TCES_PER_PAGE];
+ tbl = kvmppc_page_address(page);
+
+ tbl[idx % TCES_PER_PAGE] = tce;
+}
+EXPORT_SYMBOL_GPL(kvmppc_tce_put);
+
+long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
+ unsigned long *ua, unsigned long **prmap)
+{
+ unsigned long gfn = gpa >> PAGE_SHIFT;
+ struct kvm_memory_slot *memslot;
+
+ memslot = search_memslots(kvm_memslots(kvm), gfn);
+ if (!memslot)
+ return -EINVAL;
+
+ *ua = __gfn_to_hva_memslot(memslot, gfn) |
+ (gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ if (prmap)
+ *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
+#endif
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba, unsigned long tce)
+{
+ struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
+ long ret;
+
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
/* liobn, ioba, tce); */
- list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
- if (stt->liobn == liobn) {
- unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
- struct page *page;
- u64 *tbl;
-
- /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */
- /* liobn, stt, stt->window_size); */
- if (ioba >= stt->window_size)
- return H_PARAMETER;
-
- page = stt->pages[idx / TCES_PER_PAGE];
- tbl = (u64 *)page_address(page);
-
- /* FIXME: Need to validate the TCE itself */
- /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */
- tbl[idx % TCES_PER_PAGE] = tce;
- return H_SUCCESS;
- }
- }
+ if (!stt)
+ return H_TOO_HARD;
+
+ ret = kvmppc_ioba_validate(stt, ioba, 1);
+ if (ret != H_SUCCESS)
+ return ret;
- /* Didn't find the liobn, punt it to userspace */
- return H_TOO_HARD;
+ ret = kvmppc_tce_validate(stt, tce);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ kvmppc_tce_put(stt, ioba >> stt->page_shift, tce);
+
+ return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
-long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
- unsigned long ioba)
+static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
+ unsigned long ua, unsigned long *phpa)
+{
+ pte_t *ptep, pte;
+ unsigned shift = 0;
+
+ ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift);
+ if (!ptep || !pte_present(*ptep))
+ return -ENXIO;
+ pte = *ptep;
+
+ if (!shift)
+ shift = PAGE_SHIFT;
+
+ /* Avoid handling anything potentially complicated in realmode */
+ if (shift > PAGE_SHIFT)
+ return -EAGAIN;
+
+ if (!pte_young(pte))
+ return -EAGAIN;
+
+ *phpa = (pte_pfn(pte) << PAGE_SHIFT) | (ua & ((1ULL << shift) - 1)) |
+ (ua & ~PAGE_MASK);
+
+ return 0;
+}
+
+long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_list, unsigned long npages)
{
- struct kvm *kvm = vcpu->kvm;
struct kvmppc_spapr_tce_table *stt;
+ long i, ret = H_SUCCESS;
+ unsigned long tces, entry, ua = 0;
+ unsigned long *rmap = NULL;
+
+ stt = kvmppc_find_table(vcpu, liobn);
+ if (!stt)
+ return H_TOO_HARD;
+
+ entry = ioba >> stt->page_shift;
+ /*
+ * The spec says that the maximum size of the list is 512 TCEs
+ * so the whole table addressed resides in 4K page
+ */
+ if (npages > 512)
+ return H_PARAMETER;
+
+ if (tce_list & (SZ_4K - 1))
+ return H_PARAMETER;
+
+ ret = kvmppc_ioba_validate(stt, ioba, npages);
+ if (ret != H_SUCCESS)
+ return ret;
- list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
- if (stt->liobn == liobn) {
- unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
- struct page *page;
- u64 *tbl;
+ if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
+ return H_TOO_HARD;
- if (ioba >= stt->window_size)
- return H_PARAMETER;
+ rmap = (void *) vmalloc_to_phys(rmap);
- page = stt->pages[idx / TCES_PER_PAGE];
- tbl = (u64 *)page_address(page);
+ /*
+ * Synchronize with the MMU notifier callbacks in
+ * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
+ * While we have the rmap lock, code running on other CPUs
+ * cannot finish unmapping the host real page that backs
+ * this guest real page, so we are OK to access the host
+ * real page.
+ */
+ lock_rmap(rmap);
+ if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
+ ret = H_TOO_HARD;
+ goto unlock_exit;
+ }
+
+ for (i = 0; i < npages; ++i) {
+ unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
+
+ ret = kvmppc_tce_validate(stt, tce);
+ if (ret != H_SUCCESS)
+ goto unlock_exit;
- vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE];
- return H_SUCCESS;
- }
+ kvmppc_tce_put(stt, entry + i, tce);
}
- /* Didn't find the liobn, punt it to userspace */
- return H_TOO_HARD;
+unlock_exit:
+ unlock_rmap(rmap);
+
+ return ret;
+}
+
+long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_value, unsigned long npages)
+{
+ struct kvmppc_spapr_tce_table *stt;
+ long i, ret;
+
+ stt = kvmppc_find_table(vcpu, liobn);
+ if (!stt)
+ return H_TOO_HARD;
+
+ ret = kvmppc_ioba_validate(stt, ioba, npages);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ /* Check permission bits only to allow userspace poison TCE for debug */
+ if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
+ return H_PARAMETER;
+
+ for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
+ kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
+
+ return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
+
+long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba)
+{
+ struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
+ long ret;
+ unsigned long idx;
+ struct page *page;
+ u64 *tbl;
+
+ if (!stt)
+ return H_TOO_HARD;
+
+ ret = kvmppc_ioba_validate(stt, ioba, 1);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ idx = (ioba >> stt->page_shift) - stt->offset;
+ page = stt->pages[idx / TCES_PER_PAGE];
+ tbl = (u64 *)page_address(page);
+
+ vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE];
+
+ return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_h_get_tce);
+
+#endif /* KVM_BOOK3S_HV_POSSIBLE */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index baeddb06811d..84fb4fcfaa41 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -81,6 +81,17 @@ static int target_smt_mode;
module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
+#ifdef CONFIG_KVM_XICS
+static struct kernel_param_ops module_param_ops = {
+ .set = param_set_int,
+ .get = param_get_int,
+};
+
+module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
+ S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
+#endif
+
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
@@ -114,11 +125,11 @@ static bool kvmppc_ipi_thread(int cpu)
static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
{
int cpu;
- wait_queue_head_t *wqp;
+ struct swait_queue_head *wqp;
wqp = kvm_arch_vcpu_wq(vcpu);
- if (waitqueue_active(wqp)) {
- wake_up_interruptible(wqp);
+ if (swait_active(wqp)) {
+ swake_up(wqp);
++vcpu->stat.halt_wakeup;
}
@@ -701,8 +712,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
tvcpu->arch.prodded = 1;
smp_mb();
if (vcpu->arch.ceded) {
- if (waitqueue_active(&vcpu->wq)) {
- wake_up_interruptible(&vcpu->wq);
+ if (swait_active(&vcpu->wq)) {
+ swake_up(&vcpu->wq);
vcpu->stat.halt_wakeup++;
}
}
@@ -768,7 +779,31 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (kvmppc_xics_enabled(vcpu)) {
ret = kvmppc_xics_hcall(vcpu, req);
break;
- } /* fallthrough */
+ }
+ return RESUME_HOST;
+ case H_PUT_TCE:
+ ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_PUT_TCE_INDIRECT:
+ ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_STUFF_TCE:
+ ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
default:
return RESUME_HOST;
}
@@ -1459,7 +1494,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
INIT_LIST_HEAD(&vcore->runnable_threads);
spin_lock_init(&vcore->lock);
spin_lock_init(&vcore->stoltb_lock);
- init_waitqueue_head(&vcore->wq);
+ init_swait_queue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
vcore->first_vcpuid = core * threads_per_subcore;
@@ -2279,6 +2314,46 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
}
/*
+ * Clear core from the list of active host cores as we are about to
+ * enter the guest. Only do this if it is the primary thread of the
+ * core (not if a subcore) that is entering the guest.
+ */
+static inline void kvmppc_clear_host_core(int cpu)
+{
+ int core;
+
+ if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
+ return;
+ /*
+ * Memory barrier can be omitted here as we will do a smp_wmb()
+ * later in kvmppc_start_thread and we need ensure that state is
+ * visible to other CPUs only after we enter guest.
+ */
+ core = cpu >> threads_shift;
+ kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
+}
+
+/*
+ * Advertise this core as an active host core since we exited the guest
+ * Only need to do this if it is the primary thread of the core that is
+ * exiting.
+ */
+static inline void kvmppc_set_host_core(int cpu)
+{
+ int core;
+
+ if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
+ return;
+
+ /*
+ * Memory barrier can be omitted here because we do a spin_unlock
+ * immediately after this which provides the memory barrier.
+ */
+ core = cpu >> threads_shift;
+ kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
+}
+
+/*
* Run a set of guest threads on a physical core.
* Called with vc->lock held.
*/
@@ -2390,6 +2465,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
}
}
+ kvmppc_clear_host_core(pcpu);
+
/* Start all the threads */
active = 0;
for (sub = 0; sub < core_info.n_subcores; ++sub) {
@@ -2486,6 +2563,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
kvmppc_ipi_thread(pcpu + i);
}
+ kvmppc_set_host_core(pcpu);
+
spin_unlock(&vc->lock);
/* make sure updates to secondary vcpu structs are visible now */
@@ -2531,10 +2610,9 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
{
struct kvm_vcpu *vcpu;
int do_sleep = 1;
+ DECLARE_SWAITQUEUE(wait);
- DEFINE_WAIT(wait);
-
- prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
/*
* Check one last time for pending exceptions and ceded state after
@@ -2548,7 +2626,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
}
if (!do_sleep) {
- finish_wait(&vc->wq, &wait);
+ finish_swait(&vc->wq, &wait);
return;
}
@@ -2556,7 +2634,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
trace_kvmppc_vcore_blocked(vc, 0);
spin_unlock(&vc->lock);
schedule();
- finish_wait(&vc->wq, &wait);
+ finish_swait(&vc->wq, &wait);
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
trace_kvmppc_vcore_blocked(vc, 1);
@@ -2612,7 +2690,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
kvmppc_start_thread(vcpu, vc);
trace_kvm_guest_enter(vcpu);
} else if (vc->vcore_state == VCORE_SLEEPING) {
- wake_up(&vc->wq);
+ swake_up(&vc->wq);
}
}
@@ -2984,6 +3062,114 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
goto out_srcu;
}
+#ifdef CONFIG_KVM_XICS
+static int kvmppc_cpu_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
+{
+ unsigned long cpu = (long)hcpu;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ kvmppc_set_host_core(cpu);
+ break;
+
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
+ kvmppc_clear_host_core(cpu);
+ break;
+#endif
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block kvmppc_cpu_notifier = {
+ .notifier_call = kvmppc_cpu_notify,
+};
+
+/*
+ * Allocate a per-core structure for managing state about which cores are
+ * running in the host versus the guest and for exchanging data between
+ * real mode KVM and CPU running in the host.
+ * This is only done for the first VM.
+ * The allocated structure stays even if all VMs have stopped.
+ * It is only freed when the kvm-hv module is unloaded.
+ * It's OK for this routine to fail, we just don't support host
+ * core operations like redirecting H_IPI wakeups.
+ */
+void kvmppc_alloc_host_rm_ops(void)
+{
+ struct kvmppc_host_rm_ops *ops;
+ unsigned long l_ops;
+ int cpu, core;
+ int size;
+
+ /* Not the first time here ? */
+ if (kvmppc_host_rm_ops_hv != NULL)
+ return;
+
+ ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL);
+ if (!ops)
+ return;
+
+ size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core);
+ ops->rm_core = kzalloc(size, GFP_KERNEL);
+
+ if (!ops->rm_core) {
+ kfree(ops);
+ return;
+ }
+
+ get_online_cpus();
+
+ for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
+ if (!cpu_online(cpu))
+ continue;
+
+ core = cpu >> threads_shift;
+ ops->rm_core[core].rm_state.in_host = 1;
+ }
+
+ ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv;
+
+ /*
+ * Make the contents of the kvmppc_host_rm_ops structure visible
+ * to other CPUs before we assign it to the global variable.
+ * Do an atomic assignment (no locks used here), but if someone
+ * beats us to it, just free our copy and return.
+ */
+ smp_wmb();
+ l_ops = (unsigned long) ops;
+
+ if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
+ put_online_cpus();
+ kfree(ops->rm_core);
+ kfree(ops);
+ return;
+ }
+
+ register_cpu_notifier(&kvmppc_cpu_notifier);
+
+ put_online_cpus();
+}
+
+void kvmppc_free_host_rm_ops(void)
+{
+ if (kvmppc_host_rm_ops_hv) {
+ unregister_cpu_notifier(&kvmppc_cpu_notifier);
+ kfree(kvmppc_host_rm_ops_hv->rm_core);
+ kfree(kvmppc_host_rm_ops_hv);
+ kvmppc_host_rm_ops_hv = NULL;
+ }
+}
+#endif
+
static int kvmppc_core_init_vm_hv(struct kvm *kvm)
{
unsigned long lpcr, lpid;
@@ -2996,6 +3182,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
return -ENOMEM;
kvm->arch.lpid = lpid;
+ kvmppc_alloc_host_rm_ops();
+
/*
* Since we don't flush the TLB when tearing down a VM,
* and this lpid might have previously been used,
@@ -3229,6 +3417,7 @@ static int kvmppc_book3s_init_hv(void)
static void kvmppc_book3s_exit_hv(void)
{
+ kvmppc_free_host_rm_ops();
kvmppc_hv_ops = NULL;
}
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index fd7006bf6b1a..5f0380db3eab 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -283,3 +283,6 @@ void kvmhv_commence_exit(int trap)
kvmhv_interrupt_vcore(vc, ee);
}
}
+
+struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
+EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 24f58076d49e..980d8a6f7284 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -17,12 +17,16 @@
#include <asm/xics.h>
#include <asm/debug.h>
#include <asm/synch.h>
+#include <asm/cputhreads.h>
#include <asm/ppc-opcode.h>
#include "book3s_xics.h"
#define DEBUG_PASSUP
+int h_ipi_redirect = 1;
+EXPORT_SYMBOL(h_ipi_redirect);
+
static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
u32 new_irq);
@@ -50,11 +54,84 @@ static void ics_rm_check_resend(struct kvmppc_xics *xics,
/* -- ICP routines -- */
+#ifdef CONFIG_SMP
+static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
+{
+ int hcpu;
+
+ hcpu = hcore << threads_shift;
+ kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
+ smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
+ icp_native_cause_ipi_rm(hcpu);
+}
+#else
+static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
+#endif
+
+/*
+ * We start the search from our current CPU Id in the core map
+ * and go in a circle until we get back to our ID looking for a
+ * core that is running in host context and that hasn't already
+ * been targeted for another rm_host_ops.
+ *
+ * In the future, could consider using a fairer algorithm (one
+ * that distributes the IPIs better)
+ *
+ * Returns -1, if no CPU could be found in the host
+ * Else, returns a CPU Id which has been reserved for use
+ */
+static inline int grab_next_hostcore(int start,
+ struct kvmppc_host_rm_core *rm_core, int max, int action)
+{
+ bool success;
+ int core;
+ union kvmppc_rm_state old, new;
+
+ for (core = start + 1; core < max; core++) {
+ old = new = READ_ONCE(rm_core[core].rm_state);
+
+ if (!old.in_host || old.rm_action)
+ continue;
+
+ /* Try to grab this host core if not taken already. */
+ new.rm_action = action;
+
+ success = cmpxchg64(&rm_core[core].rm_state.raw,
+ old.raw, new.raw) == old.raw;
+ if (success) {
+ /*
+ * Make sure that the store to the rm_action is made
+ * visible before we return to caller (and the
+ * subsequent store to rm_data) to synchronize with
+ * the IPI handler.
+ */
+ smp_wmb();
+ return core;
+ }
+ }
+
+ return -1;
+}
+
+static inline int find_available_hostcore(int action)
+{
+ int core;
+ int my_core = smp_processor_id() >> threads_shift;
+ struct kvmppc_host_rm_core *rm_core = kvmppc_host_rm_ops_hv->rm_core;
+
+ core = grab_next_hostcore(my_core, rm_core, cpu_nr_cores(), action);
+ if (core == -1)
+ core = grab_next_hostcore(core, rm_core, my_core, action);
+
+ return core;
+}
+
static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
struct kvm_vcpu *this_vcpu)
{
struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
int cpu;
+ int hcore;
/* Mark the target VCPU as having an interrupt pending */
vcpu->stat.queue_intr++;
@@ -66,11 +143,22 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
return;
}
- /* Check if the core is loaded, if not, too hard */
+ /*
+ * Check if the core is loaded,
+ * if not, find an available host core to post to wake the VCPU,
+ * if we can't find one, set up state to eventually return too hard.
+ */
cpu = vcpu->arch.thread_cpu;
if (cpu < 0 || cpu >= nr_cpu_ids) {
- this_icp->rm_action |= XICS_RM_KICK_VCPU;
- this_icp->rm_kick_target = vcpu;
+ hcore = -1;
+ if (kvmppc_host_rm_ops_hv && h_ipi_redirect)
+ hcore = find_available_hostcore(XICS_RM_KICK_VCPU);
+ if (hcore != -1) {
+ icp_send_hcore_msg(hcore, vcpu);
+ } else {
+ this_icp->rm_action |= XICS_RM_KICK_VCPU;
+ this_icp->rm_kick_target = vcpu;
+ }
return;
}
@@ -623,3 +711,40 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
bail:
return check_too_hard(xics, icp);
}
+
+/* --- Non-real mode XICS-related built-in routines --- */
+
+/**
+ * Host Operations poked by RM KVM
+ */
+static void rm_host_ipi_action(int action, void *data)
+{
+ switch (action) {
+ case XICS_RM_KICK_VCPU:
+ kvmppc_host_rm_ops_hv->vcpu_kick(data);
+ break;
+ default:
+ WARN(1, "Unexpected rm_action=%d data=%p\n", action, data);
+ break;
+ }
+
+}
+
+void kvmppc_xics_ipi_action(void)
+{
+ int core;
+ unsigned int cpu = smp_processor_id();
+ struct kvmppc_host_rm_core *rm_corep;
+
+ core = cpu >> threads_shift;
+ rm_corep = &kvmppc_host_rm_ops_hv->rm_core[core];
+
+ if (rm_corep->rm_data) {
+ rm_host_ipi_action(rm_corep->rm_state.rm_action,
+ rm_corep->rm_data);
+ /* Order these stores against the real mode KVM */
+ rm_corep->rm_data = NULL;
+ smp_wmb();
+ rm_corep->rm_state.rm_action = 0;
+ }
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index c613fee0b9f7..62ea3c6acdee 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1370,6 +1370,20 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
std r6, VCPU_ACOP(r9)
stw r7, VCPU_GUEST_PID(r9)
std r8, VCPU_WORT(r9)
+ /*
+ * Restore various registers to 0, where non-zero values
+ * set by the guest could disrupt the host.
+ */
+ li r0, 0
+ mtspr SPRN_IAMR, r0
+ mtspr SPRN_CIABR, r0
+ mtspr SPRN_DAWRX, r0
+ mtspr SPRN_TCSCR, r0
+ mtspr SPRN_WORT, r0
+ /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
+ li r0, 1
+ sldi r0, r0, 31
+ mtspr SPRN_MMCRS, r0
8:
/* Save and reset AMR and UAMOR before turning on the MMU */
@@ -2006,8 +2020,8 @@ hcall_real_table:
.long 0 /* 0x12c */
.long 0 /* 0x130 */
.long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
- .long 0 /* 0x138 */
- .long 0 /* 0x13c */
+ .long DOTSYM(kvmppc_h_stuff_tce) - hcall_real_table
+ .long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table
.long 0 /* 0x140 */
.long 0 /* 0x144 */
.long 0 /* 0x148 */
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index f2c75a1e0536..02176fd52f84 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -280,6 +280,37 @@ static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
+static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu)
+{
+ unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
+ unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
+ unsigned long tce = kvmppc_get_gpr(vcpu, 6);
+ unsigned long npages = kvmppc_get_gpr(vcpu, 7);
+ long rc;
+
+ rc = kvmppc_h_put_tce_indirect(vcpu, liobn, ioba,
+ tce, npages);
+ if (rc == H_TOO_HARD)
+ return EMULATE_FAIL;
+ kvmppc_set_gpr(vcpu, 3, rc);
+ return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu)
+{
+ unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
+ unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
+ unsigned long tce_value = kvmppc_get_gpr(vcpu, 6);
+ unsigned long npages = kvmppc_get_gpr(vcpu, 7);
+ long rc;
+
+ rc = kvmppc_h_stuff_tce(vcpu, liobn, ioba, tce_value, npages);
+ if (rc == H_TOO_HARD)
+ return EMULATE_FAIL;
+ kvmppc_set_gpr(vcpu, 3, rc);
+ return EMULATE_DONE;
+}
+
static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{
long rc = kvmppc_xics_hcall(vcpu, cmd);
@@ -306,6 +337,10 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
return kvmppc_h_pr_bulk_remove(vcpu);
case H_PUT_TCE:
return kvmppc_h_pr_put_tce(vcpu);
+ case H_PUT_TCE_INDIRECT:
+ return kvmppc_h_pr_put_tce_indirect(vcpu);
+ case H_STUFF_TCE:
+ return kvmppc_h_pr_stuff_tce(vcpu);
case H_CEDE:
kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
kvm_vcpu_block(vcpu);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a3b182dcb823..19aa59b0850c 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -33,6 +33,7 @@
#include <asm/tlbflush.h>
#include <asm/cputhreads.h>
#include <asm/irqflags.h>
+#include <asm/iommu.h>
#include "timing.h"
#include "irq.h"
#include "../mm/mmu_decl.h"
@@ -437,6 +438,16 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
unsigned int i;
struct kvm_vcpu *vcpu;
+#ifdef CONFIG_KVM_XICS
+ /*
+ * We call kick_all_cpus_sync() to ensure that all
+ * CPUs have executed any pending IPIs before we
+ * continue and free VCPUs structures below.
+ */
+ if (is_kvmppc_hv_enabled(kvm))
+ kick_all_cpus_sync();
+#endif
+
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_arch_vcpu_free(vcpu);
@@ -509,6 +520,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
+ case KVM_CAP_SPAPR_TCE_64:
case KVM_CAP_PPC_ALLOC_HTAB:
case KVM_CAP_PPC_RTAS:
case KVM_CAP_PPC_FIXUP_HCALL:
@@ -569,6 +581,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PPC_GET_SMMU_INFO:
r = 1;
break;
+ case KVM_CAP_SPAPR_MULTITCE:
+ r = 1;
+ break;
#endif
default:
r = 0;
@@ -1331,13 +1346,34 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
#ifdef CONFIG_PPC_BOOK3S_64
+ case KVM_CREATE_SPAPR_TCE_64: {
+ struct kvm_create_spapr_tce_64 create_tce_64;
+
+ r = -EFAULT;
+ if (copy_from_user(&create_tce_64, argp, sizeof(create_tce_64)))
+ goto out;
+ if (create_tce_64.flags) {
+ r = -EINVAL;
+ goto out;
+ }
+ r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
+ goto out;
+ }
case KVM_CREATE_SPAPR_TCE: {
struct kvm_create_spapr_tce create_tce;
+ struct kvm_create_spapr_tce_64 create_tce_64;
r = -EFAULT;
if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
goto out;
- r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
+
+ create_tce_64.liobn = create_tce.liobn;
+ create_tce_64.page_shift = IOMMU_PAGE_SHIFT_4K;
+ create_tce_64.offset = 0;
+ create_tce_64.size = create_tce.window_size >>
+ IOMMU_PAGE_SHIFT_4K;
+ create_tce_64.flags = 0;
+ r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
goto out;
}
case KVM_PPC_GET_SMMU_INFO: {
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 90dd9280894f..7635b1c6b5da 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -255,8 +255,10 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
if (ret < 0)
break;
+
#ifdef CONFIG_DEBUG_PAGEALLOC
- if ((paddr >> PAGE_SHIFT) < linear_map_hash_count)
+ if (debug_pagealloc_enabled() &&
+ (paddr >> PAGE_SHIFT) < linear_map_hash_count)
linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80;
#endif /* CONFIG_DEBUG_PAGEALLOC */
}
@@ -516,17 +518,17 @@ static void __init htab_init_page_sizes(void)
if (mmu_has_feature(MMU_FTR_16M_PAGE))
memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
sizeof(mmu_psize_defaults_gp));
- found:
-#ifndef CONFIG_DEBUG_PAGEALLOC
- /*
- * Pick a size for the linear mapping. Currently, we only support
- * 16M, 1M and 4K which is the default
- */
- if (mmu_psize_defs[MMU_PAGE_16M].shift)
- mmu_linear_psize = MMU_PAGE_16M;
- else if (mmu_psize_defs[MMU_PAGE_1M].shift)
- mmu_linear_psize = MMU_PAGE_1M;
-#endif /* CONFIG_DEBUG_PAGEALLOC */
+found:
+ if (!debug_pagealloc_enabled()) {
+ /*
+ * Pick a size for the linear mapping. Currently, we only
+ * support 16M, 1M and 4K which is the default
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ mmu_linear_psize = MMU_PAGE_16M;
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ mmu_linear_psize = MMU_PAGE_1M;
+ }
#ifdef CONFIG_PPC_64K_PAGES
/*
@@ -742,10 +744,12 @@ static void __init htab_initialize(void)
prot = pgprot_val(PAGE_KERNEL);
#ifdef CONFIG_DEBUG_PAGEALLOC
- linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
- linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count,
- 1, ppc64_rma_size));
- memset(linear_map_hash_slots, 0, linear_map_hash_count);
+ if (debug_pagealloc_enabled()) {
+ linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
+ linear_map_hash_slots = __va(memblock_alloc_base(
+ linear_map_hash_count, 1, ppc64_rma_size));
+ memset(linear_map_hash_slots, 0, linear_map_hash_count);
+ }
#endif /* CONFIG_DEBUG_PAGEALLOC */
/* On U3 based machines, we need to reserve the DART area and
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
index 7e6d0880813f..83a8be791e06 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -8,6 +8,8 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
+#include <asm/mmu.h>
+
#ifdef CONFIG_PPC_FSL_BOOK3E
#ifdef CONFIG_PPC64
static inline int tlb1_next(void)
@@ -60,6 +62,14 @@ static inline void book3e_tlb_lock(void)
unsigned long tmp;
int token = smp_processor_id() + 1;
+ /*
+ * Besides being unnecessary in the absence of SMT, this
+ * check prevents trying to do lbarx/stbcx. on e5500 which
+ * doesn't implement either feature.
+ */
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return;
+
asm volatile("1: lbarx %0, 0, %1;"
"cmpwi %0, 0;"
"bne 2f;"
@@ -80,6 +90,9 @@ static inline void book3e_tlb_unlock(void)
{
struct paca_struct *paca = get_paca();
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return;
+
isync();
paca->tcd_ptr->lock = 0;
}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 4eb1b8f5667c..c899fe340bbd 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -112,10 +112,10 @@ void __init MMU_setup(void)
if (strstr(boot_command_line, "noltlbs")) {
__map_without_ltlbs = 1;
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
- __map_without_bats = 1;
- __map_without_ltlbs = 1;
-#endif
+ if (debug_pagealloc_enabled()) {
+ __map_without_bats = 1;
+ __map_without_ltlbs = 1;
+ }
}
/*
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index f980da6d7569..ac79dbde1015 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -547,7 +547,7 @@ static int __init add_system_ram_resources(void)
res->name = "System RAM";
res->start = base;
res->end = base + size - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
WARN_ON(request_resource(&iomem_resource, res) < 0);
}
}
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 0f0502e12f6c..4087705ba90f 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -59,9 +59,9 @@ unsigned long arch_mmap_rnd(void)
/* 8MB for 32bit, 1GB for 64bit */
if (is_32bit_task())
- rnd = (unsigned long)get_random_int() % (1<<(23-PAGE_SHIFT));
+ rnd = get_random_long() % (1<<(23-PAGE_SHIFT));
else
- rnd = (unsigned long)get_random_int() % (1<<(30-PAGE_SHIFT));
+ rnd = get_random_long() % (1UL<<(30-PAGE_SHIFT));
return rnd << PAGE_SHIFT;
}
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 4e4efbc2658e..9ca6fe16cb29 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -118,8 +118,7 @@ static void destroy_pagetable_page(struct mm_struct *mm)
/* drop all the pending references */
count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
- count = atomic_sub_return(PTE_FRAG_NR - count, &page->_count);
- if (!count) {
+ if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) {
pgtable_page_dtor(page);
free_hot_cold_page(page, 0);
}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 83dfd7925c72..de37ff445362 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -243,3 +243,11 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
}
#endif /* CONFIG_DEBUG_VM */
+unsigned long vmalloc_to_phys(void *va)
+{
+ unsigned long pfn = vmalloc_to_pfn(va);
+
+ BUG_ON(!pfn);
+ return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va);
+}
+EXPORT_SYMBOL_GPL(vmalloc_to_phys);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 0eb53128ca2a..347106080bb1 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -403,7 +403,7 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
* count.
*/
if (likely(!mm->context.pte_frag)) {
- atomic_set(&page->_count, PTE_FRAG_NR);
+ set_page_count(page, PTE_FRAG_NR);
mm->context.pte_frag = ret + PTE_FRAG_SIZE;
}
spin_unlock(&mm->page_table_lock);
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 59012e750abe..2da41b78cb6d 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -503,14 +503,6 @@ static int event_data_to_attrs(unsigned ix, struct attribute **attrs,
return 0;
}
-static unsigned long vmalloc_to_phys(void *v)
-{
- struct page *p = vmalloc_to_page(v);
-
- BUG_ON(!p);
- return page_to_phys(p) + offset_in_page(v);
-}
-
/* */
struct event_uniq {
struct rb_node node;
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 711f3d352af7..452da2391153 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -188,7 +188,7 @@ static struct fsl_diu_shared_fb __attribute__ ((__aligned__(8))) diu_shared_fb;
static inline void mpc512x_free_bootmem(struct page *page)
{
BUG_ON(PageTail(page));
- BUG_ON(atomic_read(&page->_count) > 1);
+ BUG_ON(page_ref_count(page) > 1);
free_reserved_page(page);
}
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 32274f72fe3f..282837a1d74b 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -47,20 +47,14 @@ static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;
static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;
-static int cede_offline_enabled __read_mostly = 1;
+static bool cede_offline_enabled __read_mostly = true;
/*
* Enable/disable cede_offline when available.
*/
static int __init setup_cede_offline(char *str)
{
- if (!strcmp(str, "off"))
- cede_offline_enabled = 0;
- else if (!strcmp(str, "on"))
- cede_offline_enabled = 1;
- else
- return 0;
- return 1;
+ return (kstrtobool(str, &cede_offline_enabled) == 0);
}
__setup("cede_offline=", setup_cede_offline);
diff --git a/arch/powerpc/sysdev/ppc4xx_gpio.c b/arch/powerpc/sysdev/ppc4xx_gpio.c
index fc65ad1b3293..d7a7ef135b9f 100644
--- a/arch/powerpc/sysdev/ppc4xx_gpio.c
+++ b/arch/powerpc/sysdev/ppc4xx_gpio.c
@@ -78,7 +78,7 @@ static int ppc4xx_gpio_get(struct gpio_chip *gc, unsigned int gpio)
struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
- return in_be32(&regs->ir) & GPIO_MASK(gpio);
+ return !!(in_be32(&regs->ir) & GPIO_MASK(gpio));
}
static inline void
diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c
index ff5e73230a36..56ce8ca3281b 100644
--- a/arch/powerpc/sysdev/simple_gpio.c
+++ b/arch/powerpc/sysdev/simple_gpio.c
@@ -46,7 +46,7 @@ static int u8_gpio_get(struct gpio_chip *gc, unsigned int gpio)
{
struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- return in_8(mm_gc->regs) & u8_pin2mask(gpio);
+ return !!(in_8(mm_gc->regs) & u8_pin2mask(gpio));
}
static void u8_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index eae32654bdf2..afdf62f2a695 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -159,6 +159,27 @@ static void icp_native_cause_ipi(int cpu, unsigned long data)
icp_native_set_qirr(cpu, IPI_PRIORITY);
}
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void icp_native_cause_ipi_rm(int cpu)
+{
+ /*
+ * Currently not used to send IPIs to another CPU
+ * on the same core. Only caller is KVM real mode.
+ * Need the physical address of the XICS to be
+ * previously saved in kvm_hstate in the paca.
+ */
+ unsigned long xics_phys;
+
+ /*
+ * Just like the cause_ipi functions, it is required to
+ * include a full barrier (out8 includes a sync) before
+ * causing the IPI.
+ */
+ xics_phys = paca[cpu].kvm_hstate.xics_phys;
+ out_rm8((u8 *)(xics_phys + XICS_MFRR), IPI_PRIORITY);
+}
+#endif
+
/*
* Called when an interrupt is received on an off-line CPU to
* clear the interrupt, so that the CPU can go back to nap mode.