summaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/include/asm/device.h3
-rw-r--r--arch/powerpc/include/asm/dma-mapping.h23
-rw-r--r--arch/powerpc/include/asm/kvm_host.h4
-rw-r--r--arch/powerpc/include/asm/pgtable.h6
-rw-r--r--arch/powerpc/include/asm/qe.h1
-rw-r--r--arch/powerpc/include/asm/socket.h3
-rw-r--r--arch/powerpc/include/asm/spinlock.h20
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c2
-rw-r--r--arch/powerpc/kernel/dma-swiotlb.c48
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S19
-rw-r--r--arch/powerpc/kernel/perf_callchain.c527
-rw-r--r--arch/powerpc/kvm/44x.c4
-rw-r--r--arch/powerpc/kvm/44x_tlb.c11
-rw-r--r--arch/powerpc/kvm/Kconfig14
-rw-r--r--arch/powerpc/kvm/Makefile4
-rw-r--r--arch/powerpc/kvm/booke.c2
-rw-r--r--arch/powerpc/kvm/e500.c7
-rw-r--r--arch/powerpc/kvm/e500_emulate.c3
-rw-r--r--arch/powerpc/kvm/e500_tlb.c26
-rw-r--r--arch/powerpc/kvm/e500_tlb.h6
-rw-r--r--arch/powerpc/kvm/emulate.c7
-rw-r--r--arch/powerpc/kvm/powerpc.c32
-rw-r--r--arch/powerpc/kvm/trace.h104
-rw-r--r--arch/powerpc/mm/slb.c37
-rw-r--r--arch/powerpc/mm/stab.c11
-rw-r--r--arch/powerpc/sysdev/qe_lib/qe.c13
27 files changed, 800 insertions, 139 deletions
diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h
index 7d2277cef09a..e3e06e0f7fc0 100644
--- a/arch/powerpc/include/asm/device.h
+++ b/arch/powerpc/include/asm/device.h
@@ -30,4 +30,7 @@ dev_archdata_get_node(const struct dev_archdata *ad)
return ad->of_node;
}
+struct pdev_archdata {
+};
+
#endif /* _ASM_POWERPC_DEVICE_H */
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index b44aaabdd1a6..0c34371ec49c 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -424,6 +424,29 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
#endif
}
+static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
+{
+ struct dma_mapping_ops *ops = get_dma_ops(dev);
+
+ if (ops->addr_needs_map && ops->addr_needs_map(dev, addr, size))
+ return 0;
+
+ if (!dev->dma_mask)
+ return 0;
+
+ return addr + size <= *dev->dma_mask;
+}
+
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+ return paddr + get_dma_direct_offset(dev);
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+ return daddr - get_dma_direct_offset(dev);
+}
+
#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
#ifdef CONFIG_NOT_COHERENT_CACHE
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index fddc3ed715fa..c9c930ed11d7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -34,7 +34,8 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
/* We don't currently support large pages. */
-#define KVM_PAGES_PER_HPAGE (1UL << 31)
+#define KVM_NR_PAGE_SIZES 1
+#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
struct kvm;
struct kvm_run;
@@ -153,7 +154,6 @@ struct kvm_vcpu_arch {
u32 pid;
u32 swap_pid;
- u32 pvr;
u32 ccr0;
u32 ccr1;
u32 dbcr0;
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index eb17da781128..2a5da069714e 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -104,8 +104,8 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
else
pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte));
-#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) && defined(CONFIG_SMP)
- /* Second case is 32-bit with 64-bit PTE in SMP mode. In this case, we
+#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
+ /* Second case is 32-bit with 64-bit PTE. In this case, we
* can just store as long as we do the two halves in the right order
* with a barrier in between. This is possible because we take care,
* in the hash code, to pre-invalidate if the PTE was already hashed,
@@ -140,7 +140,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
#else
/* Anything else just stores the PTE normally. That covers all 64-bit
- * cases, and 32-bit non-hash with 64-bit PTEs in UP mode
+ * cases, and 32-bit non-hash with 32-bit PTEs.
*/
*ptep = pte;
#endif
diff --git a/arch/powerpc/include/asm/qe.h b/arch/powerpc/include/asm/qe.h
index 157c5ca581c8..f388f0ab193f 100644
--- a/arch/powerpc/include/asm/qe.h
+++ b/arch/powerpc/include/asm/qe.h
@@ -154,6 +154,7 @@ int qe_get_snum(void);
void qe_put_snum(u8 snum);
unsigned int qe_get_num_of_risc(void);
unsigned int qe_get_num_of_snums(void);
+int qe_alive_during_sleep(void);
/* we actually use cpm_muram implementation, define this for convenience */
#define qe_muram_init cpm_muram_init
diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h
index 1e5cfad0e3f7..3ab8b3e6feb0 100644
--- a/arch/powerpc/include/asm/socket.h
+++ b/arch/powerpc/include/asm/socket.h
@@ -64,4 +64,7 @@
#define SO_TIMESTAMPING 37
#define SCM_TIMESTAMPING SO_TIMESTAMPING
+#define SO_PROTOCOL 38
+#define SO_DOMAIN 39
+
#endif /* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index c3b193121f81..198266cf9e2d 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -54,7 +54,7 @@
* This returns the old value in the lock, so we succeeded
* in getting the lock if the return value is 0.
*/
-static inline unsigned long __spin_trylock(raw_spinlock_t *lock)
+static inline unsigned long arch_spin_trylock(raw_spinlock_t *lock)
{
unsigned long tmp, token;
@@ -76,7 +76,7 @@ static inline unsigned long __spin_trylock(raw_spinlock_t *lock)
static inline int __raw_spin_trylock(raw_spinlock_t *lock)
{
CLEAR_IO_SYNC;
- return __spin_trylock(lock) == 0;
+ return arch_spin_trylock(lock) == 0;
}
/*
@@ -108,7 +108,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
{
CLEAR_IO_SYNC;
while (1) {
- if (likely(__spin_trylock(lock) == 0))
+ if (likely(arch_spin_trylock(lock) == 0))
break;
do {
HMT_low();
@@ -126,7 +126,7 @@ void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
CLEAR_IO_SYNC;
while (1) {
- if (likely(__spin_trylock(lock) == 0))
+ if (likely(arch_spin_trylock(lock) == 0))
break;
local_save_flags(flags_dis);
local_irq_restore(flags);
@@ -181,7 +181,7 @@ extern void __raw_spin_unlock_wait(raw_spinlock_t *lock);
* This returns the old value in the lock + 1,
* so we got a read lock if the return value is > 0.
*/
-static inline long __read_trylock(raw_rwlock_t *rw)
+static inline long arch_read_trylock(raw_rwlock_t *rw)
{
long tmp;
@@ -205,7 +205,7 @@ static inline long __read_trylock(raw_rwlock_t *rw)
* This returns the old value in the lock,
* so we got the write lock if the return value is 0.
*/
-static inline long __write_trylock(raw_rwlock_t *rw)
+static inline long arch_write_trylock(raw_rwlock_t *rw)
{
long tmp, token;
@@ -228,7 +228,7 @@ static inline long __write_trylock(raw_rwlock_t *rw)
static inline void __raw_read_lock(raw_rwlock_t *rw)
{
while (1) {
- if (likely(__read_trylock(rw) > 0))
+ if (likely(arch_read_trylock(rw) > 0))
break;
do {
HMT_low();
@@ -242,7 +242,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw)
static inline void __raw_write_lock(raw_rwlock_t *rw)
{
while (1) {
- if (likely(__write_trylock(rw) == 0))
+ if (likely(arch_write_trylock(rw) == 0))
break;
do {
HMT_low();
@@ -255,12 +255,12 @@ static inline void __raw_write_lock(raw_rwlock_t *rw)
static inline int __raw_read_trylock(raw_rwlock_t *rw)
{
- return __read_trylock(rw) > 0;
+ return arch_read_trylock(rw) > 0;
}
static inline int __raw_write_trylock(raw_rwlock_t *rw)
{
- return __write_trylock(rw) == 0;
+ return arch_write_trylock(rw) == 0;
}
static inline void __raw_read_unlock(raw_rwlock_t *rw)
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index b73396b93905..9619285f64e8 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -97,7 +97,7 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
-obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o
+obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o perf_callchain.o
obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
power5+-pmu.o power6-pmu.o power7-pmu.o
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 561b64652311..197b15646eeb 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -67,6 +67,8 @@ int main(void)
DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id));
#ifdef CONFIG_PPC64
DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context));
+ DEFINE(SIGSEGV, SIGSEGV);
+ DEFINE(NMI_MASK, NMI_MASK);
#else
DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 68ccf11e4f19..e8a57de85bcf 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -24,50 +24,12 @@
int swiotlb __read_mostly;
unsigned int ppc_swiotlb_enable;
-void *swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t addr)
-{
- unsigned long pfn = PFN_DOWN(swiotlb_bus_to_phys(hwdev, addr));
- void *pageaddr = page_address(pfn_to_page(pfn));
-
- if (pageaddr != NULL)
- return pageaddr + (addr % PAGE_SIZE);
- return NULL;
-}
-
-dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
-{
- return paddr + get_dma_direct_offset(hwdev);
-}
-
-phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
-
-{
- return baddr - get_dma_direct_offset(hwdev);
-}
-
-/*
- * Determine if an address needs bounce buffering via swiotlb.
- * Going forward I expect the swiotlb code to generalize on using
- * a dma_ops->addr_needs_map, and this function will move from here to the
- * generic swiotlb code.
- */
-int
-swiotlb_arch_address_needs_mapping(struct device *hwdev, dma_addr_t addr,
- size_t size)
-{
- struct dma_mapping_ops *dma_ops = get_dma_ops(hwdev);
-
- BUG_ON(!dma_ops);
- return dma_ops->addr_needs_map(hwdev, addr, size);
-}
-
/*
* Determine if an address is reachable by a pci device, or if we must bounce.
*/
static int
swiotlb_pci_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size)
{
- u64 mask = dma_get_mask(hwdev);
dma_addr_t max;
struct pci_controller *hose;
struct pci_dev *pdev = to_pci_dev(hwdev);
@@ -79,16 +41,9 @@ swiotlb_pci_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size)
if ((addr + size > max) | (addr < hose->dma_window_base_cur))
return 1;
- return !is_buffer_dma_capable(mask, addr, size);
-}
-
-static int
-swiotlb_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size)
-{
- return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
+ return 0;
}
-
/*
* At the moment, all platforms that use this code only require
* swiotlb to be used if we're operating on HIGHMEM. Since
@@ -104,7 +59,6 @@ struct dma_mapping_ops swiotlb_dma_ops = {
.dma_supported = swiotlb_dma_supported,
.map_page = swiotlb_map_page,
.unmap_page = swiotlb_unmap_page,
- .addr_needs_map = swiotlb_addr_needs_map,
.sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
.sync_single_range_for_device = swiotlb_sync_single_range_for_device,
.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index eb898112e577..8ac85e08ffae 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -729,6 +729,11 @@ BEGIN_FTR_SECTION
bne- do_ste_alloc /* If so handle it */
END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
+ clrrdi r11,r1,THREAD_SHIFT
+ lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
+ andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
+ bne 77f /* then don't call hash_page now */
+
/*
* On iSeries, we soft-disable interrupts here, then
* hard-enable interrupts so that the hash_page code can spin on
@@ -833,6 +838,20 @@ handle_page_fault:
bl .low_hash_fault
b .ret_from_except
+/*
+ * We come here as a result of a DSI at a point where we don't want
+ * to call hash_page, such as when we are accessing memory (possibly
+ * user memory) inside a PMU interrupt that occurred while interrupts
+ * were soft-disabled. We want to invoke the exception handler for
+ * the access, or panic if there isn't a handler.
+ */
+77: bl .save_nvgprs
+ mr r4,r3
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ li r5,SIGSEGV
+ bl .bad_page_fault
+ b .ret_from_except
+
/* here we have a segment miss */
do_ste_alloc:
bl .ste_allocate /* try to insert stab entry */
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
new file mode 100644
index 000000000000..f74b62c67511
--- /dev/null
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -0,0 +1,527 @@
+/*
+ * Performance counter callchain support - powerpc architecture code
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_counter.h>
+#include <linux/percpu.h>
+#include <linux/uaccess.h>
+#include <linux/mm.h>
+#include <asm/ptrace.h>
+#include <asm/pgtable.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/vdso.h>
+#ifdef CONFIG_PPC64
+#include "ppc32.h"
+#endif
+
+/*
+ * Store another value in a callchain_entry.
+ */
+static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
+{
+ unsigned int nr = entry->nr;
+
+ if (nr < PERF_MAX_STACK_DEPTH) {
+ entry->ip[nr] = ip;
+ entry->nr = nr + 1;
+ }
+}
+
+/*
+ * Is sp valid as the address of the next kernel stack frame after prev_sp?
+ * The next frame may be in a different stack area but should not go
+ * back down in the same stack area.
+ */
+static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
+{
+ if (sp & 0xf)
+ return 0; /* must be 16-byte aligned */
+ if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
+ return 0;
+ if (sp >= prev_sp + STACK_FRAME_OVERHEAD)
+ return 1;
+ /*
+ * sp could decrease when we jump off an interrupt stack
+ * back to the regular process stack.
+ */
+ if ((sp & ~(THREAD_SIZE - 1)) != (prev_sp & ~(THREAD_SIZE - 1)))
+ return 1;
+ return 0;
+}
+
+static void perf_callchain_kernel(struct pt_regs *regs,
+ struct perf_callchain_entry *entry)
+{
+ unsigned long sp, next_sp;
+ unsigned long next_ip;
+ unsigned long lr;
+ long level = 0;
+ unsigned long *fp;
+
+ lr = regs->link;
+ sp = regs->gpr[1];
+ callchain_store(entry, PERF_CONTEXT_KERNEL);
+ callchain_store(entry, regs->nip);
+
+ if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
+ return;
+
+ for (;;) {
+ fp = (unsigned long *) sp;
+ next_sp = fp[0];
+
+ if (next_sp == sp + STACK_INT_FRAME_SIZE &&
+ fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+ /*
+ * This looks like an interrupt frame for an
+ * interrupt that occurred in the kernel
+ */
+ regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD);
+ next_ip = regs->nip;
+ lr = regs->link;
+ level = 0;
+ callchain_store(entry, PERF_CONTEXT_KERNEL);
+
+ } else {
+ if (level == 0)
+ next_ip = lr;
+ else
+ next_ip = fp[STACK_FRAME_LR_SAVE];
+
+ /*
+ * We can't tell which of the first two addresses
+ * we get are valid, but we can filter out the
+ * obviously bogus ones here. We replace them
+ * with 0 rather than removing them entirely so
+ * that userspace can tell which is which.
+ */
+ if ((level == 1 && next_ip == lr) ||
+ (level <= 1 && !kernel_text_address(next_ip)))
+ next_ip = 0;
+
+ ++level;
+ }
+
+ callchain_store(entry, next_ip);
+ if (!valid_next_sp(next_sp, sp))
+ return;
+ sp = next_sp;
+ }
+}
+
+#ifdef CONFIG_PPC64
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define is_huge_psize(pagesize) (HPAGE_SHIFT && mmu_huge_psizes[pagesize])
+#else
+#define is_huge_psize(pagesize) 0
+#endif
+
+/*
+ * On 64-bit we don't want to invoke hash_page on user addresses from
+ * interrupt context, so if the access faults, we read the page tables
+ * to find which page (if any) is mapped and access it directly.
+ */
+static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
+{
+ pgd_t *pgdir;
+ pte_t *ptep, pte;
+ int pagesize;
+ unsigned long addr = (unsigned long) ptr;
+ unsigned long offset;
+ unsigned long pfn;
+ void *kaddr;
+
+ pgdir = current->mm->pgd;
+ if (!pgdir)
+ return -EFAULT;
+
+ pagesize = get_slice_psize(current->mm, addr);
+
+ /* align address to page boundary */
+ offset = addr & ((1ul << mmu_psize_defs[pagesize].shift) - 1);
+ addr -= offset;
+
+ if (is_huge_psize(pagesize))
+ ptep = huge_pte_offset(current->mm, addr);
+ else
+ ptep = find_linux_pte(pgdir, addr);
+
+ if (ptep == NULL)
+ return -EFAULT;
+ pte = *ptep;
+ if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
+ return -EFAULT;
+ pfn = pte_pfn(pte);
+ if (!page_is_ram(pfn))
+ return -EFAULT;
+
+ /* no highmem to worry about here */
+ kaddr = pfn_to_kaddr(pfn);
+ memcpy(ret, kaddr + offset, nb);
+ return 0;
+}
+
+static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
+{
+ if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) ||
+ ((unsigned long)ptr & 7))
+ return -EFAULT;
+
+ if (!__get_user_inatomic(*ret, ptr))
+ return 0;
+
+ return read_user_stack_slow(ptr, ret, 8);
+}
+
+static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
+{
+ if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
+ ((unsigned long)ptr & 3))
+ return -EFAULT;
+
+ if (!__get_user_inatomic(*ret, ptr))
+ return 0;
+
+ return read_user_stack_slow(ptr, ret, 4);
+}
+
+static inline int valid_user_sp(unsigned long sp, int is_64)
+{
+ if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32)
+ return 0;
+ return 1;
+}
+
+/*
+ * 64-bit user processes use the same stack frame for RT and non-RT signals.
+ */
+struct signal_frame_64 {
+ char dummy[__SIGNAL_FRAMESIZE];
+ struct ucontext uc;
+ unsigned long unused[2];
+ unsigned int tramp[6];
+ struct siginfo *pinfo;
+ void *puc;
+ struct siginfo info;
+ char abigap[288];
+};
+
+static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
+{
+ if (nip == fp + offsetof(struct signal_frame_64, tramp))
+ return 1;
+ if (vdso64_rt_sigtramp && current->mm->context.vdso_base &&
+ nip == current->mm->context.vdso_base + vdso64_rt_sigtramp)
+ return 1;
+ return 0;
+}
+
+/*
+ * Do some sanity checking on the signal frame pointed to by sp.
+ * We check the pinfo and puc pointers in the frame.
+ */
+static int sane_signal_64_frame(unsigned long sp)
+{
+ struct signal_frame_64 __user *sf;
+ unsigned long pinfo, puc;
+
+ sf = (struct signal_frame_64 __user *) sp;
+ if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
+ read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
+ return 0;
+ return pinfo == (unsigned long) &sf->info &&
+ puc == (unsigned long) &sf->uc;
+}
+
+static void perf_callchain_user_64(struct pt_regs *regs,
+ struct perf_callchain_entry *entry)
+{
+ unsigned long sp, next_sp;
+ unsigned long next_ip;
+ unsigned long lr;
+ long level = 0;
+ struct signal_frame_64 __user *sigframe;
+ unsigned long __user *fp, *uregs;
+
+ next_ip = regs->nip;
+ lr = regs->link;
+ sp = regs->gpr[1];
+ callchain_store(entry, PERF_CONTEXT_USER);
+ callchain_store(entry, next_ip);
+
+ for (;;) {
+ fp = (unsigned long __user *) sp;
+ if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
+ return;
+ if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
+ return;
+
+ /*
+ * Note: the next_sp - sp >= signal frame size check
+ * is true when next_sp < sp, which can happen when
+ * transitioning from an alternate signal stack to the
+ * normal stack.
+ */
+ if (next_sp - sp >= sizeof(struct signal_frame_64) &&
+ (is_sigreturn_64_address(next_ip, sp) ||
+ (level <= 1 && is_sigreturn_64_address(lr, sp))) &&
+ sane_signal_64_frame(sp)) {
+ /*
+ * This looks like an signal frame
+ */
+ sigframe = (struct signal_frame_64 __user *) sp;
+ uregs = sigframe->uc.uc_mcontext.gp_regs;
+ if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
+ read_user_stack_64(&uregs[PT_LNK], &lr) ||
+ read_user_stack_64(&uregs[PT_R1], &sp))
+ return;
+ level = 0;
+ callchain_store(entry, PERF_CONTEXT_USER);
+ callchain_store(entry, next_ip);
+ continue;
+ }
+
+ if (level == 0)
+ next_ip = lr;
+ callchain_store(entry, next_ip);
+ ++level;
+ sp = next_sp;
+ }
+}
+
+static inline int current_is_64bit(void)
+{
+ /*
+ * We can't use test_thread_flag() here because we may be on an
+ * interrupt stack, and the thread flags don't get copied over
+ * from the thread_info on the main stack to the interrupt stack.
+ */
+ return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT);
+}
+
+#else /* CONFIG_PPC64 */
+/*
+ * On 32-bit we just access the address and let hash_page create a
+ * HPTE if necessary, so there is no need to fall back to reading
+ * the page tables. Since this is called at interrupt level,
+ * do_page_fault() won't treat a DSI as a page fault.
+ */
+static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
+{
+ if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
+ ((unsigned long)ptr & 3))
+ return -EFAULT;
+
+ return __get_user_inatomic(*ret, ptr);
+}
+
+static inline void perf_callchain_user_64(struct pt_regs *regs,
+ struct perf_callchain_entry *entry)
+{
+}
+
+static inline int current_is_64bit(void)
+{
+ return 0;
+}
+
+static inline int valid_user_sp(unsigned long sp, int is_64)
+{
+ if (!sp || (sp & 7) || sp > TASK_SIZE - 32)
+ return 0;
+ return 1;
+}
+
+#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE
+#define sigcontext32 sigcontext
+#define mcontext32 mcontext
+#define ucontext32 ucontext
+#define compat_siginfo_t struct siginfo
+
+#endif /* CONFIG_PPC64 */
+
+/*
+ * Layout for non-RT signal frames
+ */
+struct signal_frame_32 {
+ char dummy[__SIGNAL_FRAMESIZE32];
+ struct sigcontext32 sctx;
+ struct mcontext32 mctx;
+ int abigap[56];
+};
+
+/*
+ * Layout for RT signal frames
+ */
+struct rt_signal_frame_32 {
+ char dummy[__SIGNAL_FRAMESIZE32 + 16];
+ compat_siginfo_t info;
+ struct ucontext32 uc;
+ int abigap[56];
+};
+
+static int is_sigreturn_32_address(unsigned int nip, unsigned int fp)
+{
+ if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad))
+ return 1;
+ if (vdso32_sigtramp && current->mm->context.vdso_base &&
+ nip == current->mm->context.vdso_base + vdso32_sigtramp)
+ return 1;
+ return 0;
+}
+
+static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp)
+{
+ if (nip == fp + offsetof(struct rt_signal_frame_32,
+ uc.uc_mcontext.mc_pad))
+ return 1;
+ if (vdso32_rt_sigtramp && current->mm->context.vdso_base &&
+ nip == current->mm->context.vdso_base + vdso32_rt_sigtramp)
+ return 1;
+ return 0;
+}
+
+static int sane_signal_32_frame(unsigned int sp)
+{
+ struct signal_frame_32 __user *sf;
+ unsigned int regs;
+
+ sf = (struct signal_frame_32 __user *) (unsigned long) sp;
+ if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, &regs))
+ return 0;
+ return regs == (unsigned long) &sf->mctx;
+}
+
+static int sane_rt_signal_32_frame(unsigned int sp)
+{
+ struct rt_signal_frame_32 __user *sf;
+ unsigned int regs;
+
+ sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
+ if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, &regs))
+ return 0;
+ return regs == (unsigned long) &sf->uc.uc_mcontext;
+}
+
+static unsigned int __user *signal_frame_32_regs(unsigned int sp,
+ unsigned int next_sp, unsigned int next_ip)
+{
+ struct mcontext32 __user *mctx = NULL;
+ struct signal_frame_32 __user *sf;
+ struct rt_signal_frame_32 __user *rt_sf;
+
+ /*
+ * Note: the next_sp - sp >= signal frame size check
+ * is true when next_sp < sp, for example, when
+ * transitioning from an alternate signal stack to the
+ * normal stack.
+ */
+ if (next_sp - sp >= sizeof(struct signal_frame_32) &&
+ is_sigreturn_32_address(next_ip, sp) &&
+ sane_signal_32_frame(sp)) {
+ sf = (struct signal_frame_32 __user *) (unsigned long) sp;
+ mctx = &sf->mctx;
+ }
+
+ if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) &&
+ is_rt_sigreturn_32_address(next_ip, sp) &&
+ sane_rt_signal_32_frame(sp)) {
+ rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
+ mctx = &rt_sf->uc.uc_mcontext;
+ }
+
+ if (!mctx)
+ return NULL;
+ return mctx->mc_gregs;
+}
+
+static void perf_callchain_user_32(struct pt_regs *regs,
+ struct perf_callchain_entry *entry)
+{
+ unsigned int sp, next_sp;
+ unsigned int next_ip;
+ unsigned int lr;
+ long level = 0;
+ unsigned int __user *fp, *uregs;
+
+ next_ip = regs->nip;
+ lr = regs->link;
+ sp = regs->gpr[1];
+ callchain_store(entry, PERF_CONTEXT_USER);
+ callchain_store(entry, next_ip);
+
+ while (entry->nr < PERF_MAX_STACK_DEPTH) {
+ fp = (unsigned int __user *) (unsigned long) sp;
+ if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
+ return;
+ if (level > 0 && read_user_stack_32(&fp[1], &next_ip))
+ return;
+
+ uregs = signal_frame_32_regs(sp, next_sp, next_ip);
+ if (!uregs && level <= 1)
+ uregs = signal_frame_32_regs(sp, next_sp, lr);
+ if (uregs) {
+ /*
+ * This looks like an signal frame, so restart
+ * the stack trace with the values in it.
+ */
+ if (read_user_stack_32(&uregs[PT_NIP], &next_ip) ||
+ read_user_stack_32(&uregs[PT_LNK], &lr) ||
+ read_user_stack_32(&uregs[PT_R1], &sp))
+ return;
+ level = 0;
+ callchain_store(entry, PERF_CONTEXT_USER);
+ callchain_store(entry, next_ip);
+ continue;
+ }
+
+ if (level == 0)
+ next_ip = lr;
+ callchain_store(entry, next_ip);
+ ++level;
+ sp = next_sp;
+ }
+}
+
+/*
+ * Since we can't get PMU interrupts inside a PMU interrupt handler,
+ * we don't need separate irq and nmi entries here.
+ */
+static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
+
+struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+ struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
+
+ entry->nr = 0;
+
+ if (current->pid == 0) /* idle task? */
+ return entry;
+
+ if (!user_mode(regs)) {
+ perf_callchain_kernel(regs, entry);
+ if (current->mm)
+ regs = task_pt_regs(current);
+ else
+ regs = NULL;
+ }
+
+ if (regs) {
+ if (current_is_64bit())
+ perf_callchain_user_64(regs, entry);
+ else
+ perf_callchain_user_32(regs, entry);
+ }
+
+ return entry;
+}
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 0cef809cec21..f4d1b55aa70b 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -138,7 +138,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
}
-static int kvmppc_44x_init(void)
+static int __init kvmppc_44x_init(void)
{
int r;
@@ -149,7 +149,7 @@ static int kvmppc_44x_init(void)
return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE);
}
-static void kvmppc_44x_exit(void)
+static void __exit kvmppc_44x_exit(void)
{
kvmppc_booke_exit();
}
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 4a16f472cc18..ff3cb63b8117 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -30,6 +30,7 @@
#include "timing.h"
#include "44x_tlb.h"
+#include "trace.h"
#ifndef PPC44x_TLBE_SIZE
#define PPC44x_TLBE_SIZE PPC44x_TLB_4K
@@ -263,7 +264,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
/* XXX set tlb_44x_index to stlb_index? */
- KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
+ trace_kvm_stlb_inval(stlb_index);
}
void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
@@ -365,8 +366,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
/* Insert shadow mapping into hardware TLB. */
kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
kvmppc_44x_tlbwe(victim, &stlbe);
- KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1,
- stlbe.word2, handler);
+ trace_kvm_stlb_write(victim, stlbe.tid, stlbe.word0, stlbe.word1,
+ stlbe.word2);
}
/* For a particular guest TLB entry, invalidate the corresponding host TLB
@@ -485,8 +486,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
}
- KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
- tlbe->word1, tlbe->word2, handler);
+ trace_kvm_gtlb_write(gtlb_index, tlbe->tid, tlbe->word0, tlbe->word1,
+ tlbe->word2);
kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
return EMULATE_DONE;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 5a152a52796f..c29926846613 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -2,8 +2,7 @@
# KVM configuration
#
-config HAVE_KVM_IRQCHIP
- bool
+source "virt/kvm/Kconfig"
menuconfig VIRTUALIZATION
bool "Virtualization"
@@ -59,17 +58,6 @@ config KVM_E500
If unsure, say N.
-config KVM_TRACE
- bool "KVM trace support"
- depends on KVM && MARKERS && SYSFS
- select RELAY
- select DEBUG_FS
- default n
- ---help---
- This option allows reading a trace of kvm-related events through
- relayfs. Note the ABI is not considered stable and will be
- modified in future updates.
-
source drivers/virtio/Kconfig
endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 459c7ee580f7..37655fe19f2f 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -8,7 +8,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm
common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
-common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o)
+CFLAGS_44x_tlb.o := -I.
+CFLAGS_e500_tlb.o := -I.
+CFLAGS_emulate.o := -I.
kvm-objs := $(common-objs-y) powerpc.o emulate.o
obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 642e4204cf25..e7bf4d029484 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -520,7 +520,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
return kvmppc_core_vcpu_translate(vcpu, tr);
}
-int kvmppc_booke_init(void)
+int __init kvmppc_booke_init(void)
{
unsigned long ivor[16];
unsigned long max_ivor = 0;
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index d8067fd81cdd..64949eef43f1 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -60,9 +60,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
kvmppc_e500_tlb_setup(vcpu_e500);
- /* Use the same core vertion as host's */
- vcpu->arch.pvr = mfspr(SPRN_PVR);
-
return 0;
}
@@ -132,7 +129,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
}
-static int kvmppc_e500_init(void)
+static int __init kvmppc_e500_init(void)
{
int r, i;
unsigned long ivor[3];
@@ -160,7 +157,7 @@ static int kvmppc_e500_init(void)
return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE);
}
-static void kvmppc_e500_exit(void)
+static void __init kvmppc_e500_exit(void)
{
kvmppc_booke_exit();
}
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 3f760414b9f8..be95b8d8e3b7 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -180,6 +180,9 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
case SPRN_MMUCSR0:
vcpu->arch.gpr[rt] = 0; break;
+ case SPRN_MMUCFG:
+ vcpu->arch.gpr[rt] = mfspr(SPRN_MMUCFG); break;
+
/* extra exceptions */
case SPRN_IVOR32:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 0e773fc2d5e4..fb1e1dc11ba5 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -22,6 +22,7 @@
#include "../mm/mmu_decl.h"
#include "e500_tlb.h"
+#include "trace.h"
#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
@@ -224,9 +225,8 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
stlbe->mas1 = 0;
- KVMTRACE_5D(STLB_INVAL, &vcpu_e500->vcpu, index_of(tlbsel, esel),
- stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
- handler);
+ trace_kvm_stlb_inval(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
+ stlbe->mas3, stlbe->mas7);
}
static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
@@ -269,7 +269,7 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
tlbsel = (vcpu_e500->mas4 >> 28) & 0x1;
victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
pidsel = (vcpu_e500->mas4 >> 16) & 0xf;
- tsized = (vcpu_e500->mas4 >> 8) & 0xf;
+ tsized = (vcpu_e500->mas4 >> 7) & 0x1f;
vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
| MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
@@ -309,7 +309,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
vcpu_e500->shadow_pages[tlbsel][esel] = new_page;
/* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
- stlbe->mas1 = MAS1_TSIZE(BOOKE_PAGESZ_4K)
+ stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
stlbe->mas2 = (gvaddr & MAS2_EPN)
| e500_shadow_mas2_attrib(gtlbe->mas2,
@@ -319,9 +319,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
vcpu_e500->vcpu.arch.msr & MSR_PR);
stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
- KVMTRACE_5D(STLB_WRITE, &vcpu_e500->vcpu, index_of(tlbsel, esel),
- stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
- handler);
+ trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
+ stlbe->mas3, stlbe->mas7);
}
/* XXX only map the one-one case, for now use TLB0 */
@@ -535,9 +534,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
gtlbe->mas3 = vcpu_e500->mas3;
gtlbe->mas7 = vcpu_e500->mas7;
- KVMTRACE_5D(GTLB_WRITE, vcpu, vcpu_e500->mas0,
- gtlbe->mas1, gtlbe->mas2, gtlbe->mas3, gtlbe->mas7,
- handler);
+ trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2,
+ gtlbe->mas3, gtlbe->mas7);
/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
if (tlbe_is_host_safe(vcpu, gtlbe)) {
@@ -545,7 +543,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
case 0:
/* TLB0 */
gtlbe->mas1 &= ~MAS1_TSIZE(~0);
- gtlbe->mas1 |= MAS1_TSIZE(BOOKE_PAGESZ_4K);
+ gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
stlbsel = 0;
sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel);
@@ -679,14 +677,14 @@ void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
/* Insert large initial mapping for guest. */
tlbe = &vcpu_e500->guest_tlb[1][0];
- tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_256M);
+ tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
tlbe->mas2 = 0;
tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
tlbe->mas7 = 0;
/* 4K map for serial output. Used by kernel wrapper. */
tlbe = &vcpu_e500->guest_tlb[1][1];
- tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_4K);
+ tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
tlbe->mas7 = 0;
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
index 45b064b76906..d28e3010a5e2 100644
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -16,7 +16,7 @@
#define __KVM_E500_TLB_H__
#include <linux/kvm_host.h>
-#include <asm/mmu-fsl-booke.h>
+#include <asm/mmu-book3e.h>
#include <asm/tlb.h>
#include <asm/kvm_e500.h>
@@ -59,7 +59,7 @@ extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
/* TLB helper functions */
static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
{
- return (tlbe->mas1 >> 8) & 0xf;
+ return (tlbe->mas1 >> 7) & 0x1f;
}
static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
@@ -70,7 +70,7 @@ static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
static inline u64 get_tlb_bytes(const struct tlbe *tlbe)
{
unsigned int pgsize = get_tlb_size(tlbe);
- return 1ULL << 10 << (pgsize << 1);
+ return 1ULL << 10 << pgsize;
}
static inline gva_t get_tlb_end(const struct tlbe *tlbe)
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index a561d6e8da1c..7737146af3fb 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -29,6 +29,7 @@
#include <asm/kvm_ppc.h>
#include <asm/disassemble.h>
#include "timing.h"
+#include "trace.h"
#define OP_TRAP 3
@@ -187,7 +188,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case SPRN_SRR1:
vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
case SPRN_PVR:
- vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
+ vcpu->arch.gpr[rt] = mfspr(SPRN_PVR); break;
+ case SPRN_PIR:
+ vcpu->arch.gpr[rt] = mfspr(SPRN_PIR); break;
/* Note: mftb and TBRL/TBWL are user-accessible, so
* the guest can always access the real TB anyways.
@@ -417,7 +420,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
}
- KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit);
+ trace_kvm_ppc_instr(inst, vcpu->arch.pc, emulated);
if (advance)
vcpu->arch.pc += 4; /* Advance past emulated instruction. */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 2cf915e51e7e..2a4551f78f60 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -31,25 +31,17 @@
#include "timing.h"
#include "../mm/mmu_decl.h"
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
{
return gfn;
}
-int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
-{
- return !!(v->arch.pending_exceptions);
-}
-
-int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
-{
- /* do real check here */
- return 1;
-}
-
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- return !(v->arch.msr & MSR_WE);
+ return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions);
}
@@ -122,13 +114,17 @@ struct kvm *kvm_arch_create_vm(void)
static void kvmppc_free_vcpus(struct kvm *kvm)
{
unsigned int i;
+ struct kvm_vcpu *vcpu;
- for (i = 0; i < KVM_MAX_VCPUS; ++i) {
- if (kvm->vcpus[i]) {
- kvm_arch_vcpu_free(kvm->vcpus[i]);
- kvm->vcpus[i] = NULL;
- }
- }
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_arch_vcpu_free(vcpu);
+
+ mutex_lock(&kvm->lock);
+ for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
+ kvm->vcpus[i] = NULL;
+
+ atomic_set(&kvm->online_vcpus, 0);
+ mutex_unlock(&kvm->lock);
}
void kvm_arch_sync_events(struct kvm *kvm)
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
new file mode 100644
index 000000000000..67f219de0455
--- /dev/null
+++ b/arch/powerpc/kvm/trace.h
@@ -0,0 +1,104 @@
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+
+/*
+ * Tracepoint for guest mode entry.
+ */
+TRACE_EVENT(kvm_ppc_instr,
+ TP_PROTO(unsigned int inst, unsigned long pc, unsigned int emulate),
+ TP_ARGS(inst, pc, emulate),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, inst )
+ __field( unsigned long, pc )
+ __field( unsigned int, emulate )
+ ),
+
+ TP_fast_assign(
+ __entry->inst = inst;
+ __entry->pc = pc;
+ __entry->emulate = emulate;
+ ),
+
+ TP_printk("inst %u pc 0x%lx emulate %u\n",
+ __entry->inst, __entry->pc, __entry->emulate)
+);
+
+TRACE_EVENT(kvm_stlb_inval,
+ TP_PROTO(unsigned int stlb_index),
+ TP_ARGS(stlb_index),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, stlb_index )
+ ),
+
+ TP_fast_assign(
+ __entry->stlb_index = stlb_index;
+ ),
+
+ TP_printk("stlb_index %u", __entry->stlb_index)
+);
+
+TRACE_EVENT(kvm_stlb_write,
+ TP_PROTO(unsigned int victim, unsigned int tid, unsigned int word0,
+ unsigned int word1, unsigned int word2),
+ TP_ARGS(victim, tid, word0, word1, word2),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, victim )
+ __field( unsigned int, tid )
+ __field( unsigned int, word0 )
+ __field( unsigned int, word1 )
+ __field( unsigned int, word2 )
+ ),
+
+ TP_fast_assign(
+ __entry->victim = victim;
+ __entry->tid = tid;
+ __entry->word0 = word0;
+ __entry->word1 = word1;
+ __entry->word2 = word2;
+ ),
+
+ TP_printk("victim %u tid %u w0 %u w1 %u w2 %u",
+ __entry->victim, __entry->tid, __entry->word0,
+ __entry->word1, __entry->word2)
+);
+
+TRACE_EVENT(kvm_gtlb_write,
+ TP_PROTO(unsigned int gtlb_index, unsigned int tid, unsigned int word0,
+ unsigned int word1, unsigned int word2),
+ TP_ARGS(gtlb_index, tid, word0, word1, word2),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, gtlb_index )
+ __field( unsigned int, tid )
+ __field( unsigned int, word0 )
+ __field( unsigned int, word1 )
+ __field( unsigned int, word2 )
+ ),
+
+ TP_fast_assign(
+ __entry->gtlb_index = gtlb_index;
+ __entry->tid = tid;
+ __entry->word0 = word0;
+ __entry->word1 = word1;
+ __entry->word2 = word2;
+ ),
+
+ TP_printk("gtlb_index %u tid %u w0 %u w1 %u w2 %u",
+ __entry->gtlb_index, __entry->tid, __entry->word0,
+ __entry->word1, __entry->word2)
+);
+
+#endif /* _TRACE_KVM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 5b7038f248b6..a685652effeb 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -92,15 +92,13 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize,
: "memory" );
}
-void slb_flush_and_rebolt(void)
+static void __slb_flush_and_rebolt(void)
{
/* If you change this make sure you change SLB_NUM_BOLTED
* appropriately too. */
unsigned long linear_llp, vmalloc_llp, lflags, vflags;
unsigned long ksp_esid_data, ksp_vsid_data;
- WARN_ON(!irqs_disabled());
-
linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
lflags = SLB_VSID_KERNEL | linear_llp;
@@ -117,12 +115,6 @@ void slb_flush_and_rebolt(void)
ksp_vsid_data = get_slb_shadow()->save_area[2].vsid;
}
- /*
- * We can't take a PMU exception in the following code, so hard
- * disable interrupts.
- */
- hard_irq_disable();
-
/* We need to do this all in asm, so we're sure we don't touch
* the stack between the slbia and rebolting it. */
asm volatile("isync\n"
@@ -139,6 +131,21 @@ void slb_flush_and_rebolt(void)
: "memory");
}
+void slb_flush_and_rebolt(void)
+{
+
+ WARN_ON(!irqs_disabled());
+
+ /*
+ * We can't take a PMU exception in the following code, so hard
+ * disable interrupts.
+ */
+ hard_irq_disable();
+
+ __slb_flush_and_rebolt();
+ get_paca()->slb_cache_ptr = 0;
+}
+
void slb_vmalloc_update(void)
{
unsigned long vflags;
@@ -180,12 +187,20 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2)
/* Flush all user entries from the segment table of the current processor. */
void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
{
- unsigned long offset = get_paca()->slb_cache_ptr;
+ unsigned long offset;
unsigned long slbie_data = 0;
unsigned long pc = KSTK_EIP(tsk);
unsigned long stack = KSTK_ESP(tsk);
unsigned long unmapped_base;
+ /*
+ * We need interrupts hard-disabled here, not just soft-disabled,
+ * so that a PMU interrupt can't occur, which might try to access
+ * user memory (to get a stack trace) and possible cause an SLB miss
+ * which would update the slb_cache/slb_cache_ptr fields in the PACA.
+ */
+ hard_irq_disable();
+ offset = get_paca()->slb_cache_ptr;
if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) &&
offset <= SLB_CACHE_ENTRIES) {
int i;
@@ -200,7 +215,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
}
asm volatile("isync" : : : "memory");
} else {
- slb_flush_and_rebolt();
+ __slb_flush_and_rebolt();
}
/* Workaround POWER5 < DD2.1 issue */
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 98cd1dc2ae75..ab5fb48b3e90 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -164,7 +164,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
{
struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr;
struct stab_entry *ste;
- unsigned long offset = __get_cpu_var(stab_cache_ptr);
+ unsigned long offset;
unsigned long pc = KSTK_EIP(tsk);
unsigned long stack = KSTK_ESP(tsk);
unsigned long unmapped_base;
@@ -172,6 +172,15 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
/* Force previous translations to complete. DRENG */
asm volatile("isync" : : : "memory");
+ /*
+ * We need interrupts hard-disabled here, not just soft-disabled,
+ * so that a PMU interrupt can't occur, which might try to access
+ * user memory (to get a stack trace) and possible cause an STAB miss
+ * which would update the stab_cache/stab_cache_ptr per-cpu variables.
+ */
+ hard_irq_disable();
+
+ offset = __get_cpu_var(stab_cache_ptr);
if (offset <= NR_STAB_CACHE_ENTRIES) {
int i;
diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c
index 237e3654f48c..464271bea6c9 100644
--- a/arch/powerpc/sysdev/qe_lib/qe.c
+++ b/arch/powerpc/sysdev/qe_lib/qe.c
@@ -65,6 +65,19 @@ static unsigned int qe_num_of_snum;
static phys_addr_t qebase = -1;
+int qe_alive_during_sleep(void)
+{
+ static int ret = -1;
+
+ if (ret != -1)
+ return ret;
+
+ ret = !of_find_compatible_node(NULL, NULL, "fsl,mpc8569-pmc");
+
+ return ret;
+}
+EXPORT_SYMBOL(qe_alive_during_sleep);
+
phys_addr_t get_qe_base(void)
{
struct device_node *qe;