diff options
Diffstat (limited to 'arch/powerpc')
27 files changed, 800 insertions, 139 deletions
diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 7d2277cef09a..e3e06e0f7fc0 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -30,4 +30,7 @@ dev_archdata_get_node(const struct dev_archdata *ad) return ad->of_node; } +struct pdev_archdata { +}; + #endif /* _ASM_POWERPC_DEVICE_H */ diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index b44aaabdd1a6..0c34371ec49c 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -424,6 +424,29 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) #endif } +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + struct dma_mapping_ops *ops = get_dma_ops(dev); + + if (ops->addr_needs_map && ops->addr_needs_map(dev, addr, size)) + return 0; + + if (!dev->dma_mask) + return 0; + + return addr + size <= *dev->dma_mask; +} + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return paddr + get_dma_direct_offset(dev); +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return daddr - get_dma_direct_offset(dev); +} + #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) #ifdef CONFIG_NOT_COHERENT_CACHE diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index fddc3ed715fa..c9c930ed11d7 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -34,7 +34,8 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 /* We don't currently support large pages. */ -#define KVM_PAGES_PER_HPAGE (1UL << 31) +#define KVM_NR_PAGE_SIZES 1 +#define KVM_PAGES_PER_HPAGE(x) (1UL<<31) struct kvm; struct kvm_run; @@ -153,7 +154,6 @@ struct kvm_vcpu_arch { u32 pid; u32 swap_pid; - u32 pvr; u32 ccr0; u32 ccr1; u32 dbcr0; diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index eb17da781128..2a5da069714e 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -104,8 +104,8 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, else pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte)); -#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) && defined(CONFIG_SMP) - /* Second case is 32-bit with 64-bit PTE in SMP mode. In this case, we +#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) + /* Second case is 32-bit with 64-bit PTE. In this case, we * can just store as long as we do the two halves in the right order * with a barrier in between. This is possible because we take care, * in the hash code, to pre-invalidate if the PTE was already hashed, @@ -140,7 +140,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, #else /* Anything else just stores the PTE normally. That covers all 64-bit - * cases, and 32-bit non-hash with 64-bit PTEs in UP mode + * cases, and 32-bit non-hash with 32-bit PTEs. */ *ptep = pte; #endif diff --git a/arch/powerpc/include/asm/qe.h b/arch/powerpc/include/asm/qe.h index 157c5ca581c8..f388f0ab193f 100644 --- a/arch/powerpc/include/asm/qe.h +++ b/arch/powerpc/include/asm/qe.h @@ -154,6 +154,7 @@ int qe_get_snum(void); void qe_put_snum(u8 snum); unsigned int qe_get_num_of_risc(void); unsigned int qe_get_num_of_snums(void); +int qe_alive_during_sleep(void); /* we actually use cpm_muram implementation, define this for convenience */ #define qe_muram_init cpm_muram_init diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h index 1e5cfad0e3f7..3ab8b3e6feb0 100644 --- a/arch/powerpc/include/asm/socket.h +++ b/arch/powerpc/include/asm/socket.h @@ -64,4 +64,7 @@ #define SO_TIMESTAMPING 37 #define SCM_TIMESTAMPING SO_TIMESTAMPING +#define SO_PROTOCOL 38 +#define SO_DOMAIN 39 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index c3b193121f81..198266cf9e2d 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -54,7 +54,7 @@ * This returns the old value in the lock, so we succeeded * in getting the lock if the return value is 0. */ -static inline unsigned long __spin_trylock(raw_spinlock_t *lock) +static inline unsigned long arch_spin_trylock(raw_spinlock_t *lock) { unsigned long tmp, token; @@ -76,7 +76,7 @@ static inline unsigned long __spin_trylock(raw_spinlock_t *lock) static inline int __raw_spin_trylock(raw_spinlock_t *lock) { CLEAR_IO_SYNC; - return __spin_trylock(lock) == 0; + return arch_spin_trylock(lock) == 0; } /* @@ -108,7 +108,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) { CLEAR_IO_SYNC; while (1) { - if (likely(__spin_trylock(lock) == 0)) + if (likely(arch_spin_trylock(lock) == 0)) break; do { HMT_low(); @@ -126,7 +126,7 @@ void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) CLEAR_IO_SYNC; while (1) { - if (likely(__spin_trylock(lock) == 0)) + if (likely(arch_spin_trylock(lock) == 0)) break; local_save_flags(flags_dis); local_irq_restore(flags); @@ -181,7 +181,7 @@ extern void __raw_spin_unlock_wait(raw_spinlock_t *lock); * This returns the old value in the lock + 1, * so we got a read lock if the return value is > 0. */ -static inline long __read_trylock(raw_rwlock_t *rw) +static inline long arch_read_trylock(raw_rwlock_t *rw) { long tmp; @@ -205,7 +205,7 @@ static inline long __read_trylock(raw_rwlock_t *rw) * This returns the old value in the lock, * so we got the write lock if the return value is 0. */ -static inline long __write_trylock(raw_rwlock_t *rw) +static inline long arch_write_trylock(raw_rwlock_t *rw) { long tmp, token; @@ -228,7 +228,7 @@ static inline long __write_trylock(raw_rwlock_t *rw) static inline void __raw_read_lock(raw_rwlock_t *rw) { while (1) { - if (likely(__read_trylock(rw) > 0)) + if (likely(arch_read_trylock(rw) > 0)) break; do { HMT_low(); @@ -242,7 +242,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw) static inline void __raw_write_lock(raw_rwlock_t *rw) { while (1) { - if (likely(__write_trylock(rw) == 0)) + if (likely(arch_write_trylock(rw) == 0)) break; do { HMT_low(); @@ -255,12 +255,12 @@ static inline void __raw_write_lock(raw_rwlock_t *rw) static inline int __raw_read_trylock(raw_rwlock_t *rw) { - return __read_trylock(rw) > 0; + return arch_read_trylock(rw) > 0; } static inline int __raw_write_trylock(raw_rwlock_t *rw) { - return __write_trylock(rw) == 0; + return arch_write_trylock(rw) == 0; } static inline void __raw_read_unlock(raw_rwlock_t *rw) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b73396b93905..9619285f64e8 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -97,7 +97,7 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o +obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o perf_callchain.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 561b64652311..197b15646eeb 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -67,6 +67,8 @@ int main(void) DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id)); #ifdef CONFIG_PPC64 DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); + DEFINE(SIGSEGV, SIGSEGV); + DEFINE(NMI_MASK, NMI_MASK); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 68ccf11e4f19..e8a57de85bcf 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -24,50 +24,12 @@ int swiotlb __read_mostly; unsigned int ppc_swiotlb_enable; -void *swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t addr) -{ - unsigned long pfn = PFN_DOWN(swiotlb_bus_to_phys(hwdev, addr)); - void *pageaddr = page_address(pfn_to_page(pfn)); - - if (pageaddr != NULL) - return pageaddr + (addr % PAGE_SIZE); - return NULL; -} - -dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) -{ - return paddr + get_dma_direct_offset(hwdev); -} - -phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) - -{ - return baddr - get_dma_direct_offset(hwdev); -} - -/* - * Determine if an address needs bounce buffering via swiotlb. - * Going forward I expect the swiotlb code to generalize on using - * a dma_ops->addr_needs_map, and this function will move from here to the - * generic swiotlb code. - */ -int -swiotlb_arch_address_needs_mapping(struct device *hwdev, dma_addr_t addr, - size_t size) -{ - struct dma_mapping_ops *dma_ops = get_dma_ops(hwdev); - - BUG_ON(!dma_ops); - return dma_ops->addr_needs_map(hwdev, addr, size); -} - /* * Determine if an address is reachable by a pci device, or if we must bounce. */ static int swiotlb_pci_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size) { - u64 mask = dma_get_mask(hwdev); dma_addr_t max; struct pci_controller *hose; struct pci_dev *pdev = to_pci_dev(hwdev); @@ -79,16 +41,9 @@ swiotlb_pci_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size) if ((addr + size > max) | (addr < hose->dma_window_base_cur)) return 1; - return !is_buffer_dma_capable(mask, addr, size); -} - -static int -swiotlb_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size) -{ - return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size); + return 0; } - /* * At the moment, all platforms that use this code only require * swiotlb to be used if we're operating on HIGHMEM. Since @@ -104,7 +59,6 @@ struct dma_mapping_ops swiotlb_dma_ops = { .dma_supported = swiotlb_dma_supported, .map_page = swiotlb_map_page, .unmap_page = swiotlb_unmap_page, - .addr_needs_map = swiotlb_addr_needs_map, .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, .sync_single_range_for_device = swiotlb_sync_single_range_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index eb898112e577..8ac85e08ffae 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -729,6 +729,11 @@ BEGIN_FTR_SECTION bne- do_ste_alloc /* If so handle it */ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) + clrrdi r11,r1,THREAD_SHIFT + lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ + andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ + bne 77f /* then don't call hash_page now */ + /* * On iSeries, we soft-disable interrupts here, then * hard-enable interrupts so that the hash_page code can spin on @@ -833,6 +838,20 @@ handle_page_fault: bl .low_hash_fault b .ret_from_except +/* + * We come here as a result of a DSI at a point where we don't want + * to call hash_page, such as when we are accessing memory (possibly + * user memory) inside a PMU interrupt that occurred while interrupts + * were soft-disabled. We want to invoke the exception handler for + * the access, or panic if there isn't a handler. + */ +77: bl .save_nvgprs + mr r4,r3 + addi r3,r1,STACK_FRAME_OVERHEAD + li r5,SIGSEGV + bl .bad_page_fault + b .ret_from_except + /* here we have a segment miss */ do_ste_alloc: bl .ste_allocate /* try to insert stab entry */ diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c new file mode 100644 index 000000000000..f74b62c67511 --- /dev/null +++ b/arch/powerpc/kernel/perf_callchain.c @@ -0,0 +1,527 @@ +/* + * Performance counter callchain support - powerpc architecture code + * + * Copyright © 2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_counter.h> +#include <linux/percpu.h> +#include <linux/uaccess.h> +#include <linux/mm.h> +#include <asm/ptrace.h> +#include <asm/pgtable.h> +#include <asm/sigcontext.h> +#include <asm/ucontext.h> +#include <asm/vdso.h> +#ifdef CONFIG_PPC64 +#include "ppc32.h" +#endif + +/* + * Store another value in a callchain_entry. + */ +static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) +{ + unsigned int nr = entry->nr; + + if (nr < PERF_MAX_STACK_DEPTH) { + entry->ip[nr] = ip; + entry->nr = nr + 1; + } +} + +/* + * Is sp valid as the address of the next kernel stack frame after prev_sp? + * The next frame may be in a different stack area but should not go + * back down in the same stack area. + */ +static int valid_next_sp(unsigned long sp, unsigned long prev_sp) +{ + if (sp & 0xf) + return 0; /* must be 16-byte aligned */ + if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) + return 0; + if (sp >= prev_sp + STACK_FRAME_OVERHEAD) + return 1; + /* + * sp could decrease when we jump off an interrupt stack + * back to the regular process stack. + */ + if ((sp & ~(THREAD_SIZE - 1)) != (prev_sp & ~(THREAD_SIZE - 1))) + return 1; + return 0; +} + +static void perf_callchain_kernel(struct pt_regs *regs, + struct perf_callchain_entry *entry) +{ + unsigned long sp, next_sp; + unsigned long next_ip; + unsigned long lr; + long level = 0; + unsigned long *fp; + + lr = regs->link; + sp = regs->gpr[1]; + callchain_store(entry, PERF_CONTEXT_KERNEL); + callchain_store(entry, regs->nip); + + if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) + return; + + for (;;) { + fp = (unsigned long *) sp; + next_sp = fp[0]; + + if (next_sp == sp + STACK_INT_FRAME_SIZE && + fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + /* + * This looks like an interrupt frame for an + * interrupt that occurred in the kernel + */ + regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD); + next_ip = regs->nip; + lr = regs->link; + level = 0; + callchain_store(entry, PERF_CONTEXT_KERNEL); + + } else { + if (level == 0) + next_ip = lr; + else + next_ip = fp[STACK_FRAME_LR_SAVE]; + + /* + * We can't tell which of the first two addresses + * we get are valid, but we can filter out the + * obviously bogus ones here. We replace them + * with 0 rather than removing them entirely so + * that userspace can tell which is which. + */ + if ((level == 1 && next_ip == lr) || + (level <= 1 && !kernel_text_address(next_ip))) + next_ip = 0; + + ++level; + } + + callchain_store(entry, next_ip); + if (!valid_next_sp(next_sp, sp)) + return; + sp = next_sp; + } +} + +#ifdef CONFIG_PPC64 + +#ifdef CONFIG_HUGETLB_PAGE +#define is_huge_psize(pagesize) (HPAGE_SHIFT && mmu_huge_psizes[pagesize]) +#else +#define is_huge_psize(pagesize) 0 +#endif + +/* + * On 64-bit we don't want to invoke hash_page on user addresses from + * interrupt context, so if the access faults, we read the page tables + * to find which page (if any) is mapped and access it directly. + */ +static int read_user_stack_slow(void __user *ptr, void *ret, int nb) +{ + pgd_t *pgdir; + pte_t *ptep, pte; + int pagesize; + unsigned long addr = (unsigned long) ptr; + unsigned long offset; + unsigned long pfn; + void *kaddr; + + pgdir = current->mm->pgd; + if (!pgdir) + return -EFAULT; + + pagesize = get_slice_psize(current->mm, addr); + + /* align address to page boundary */ + offset = addr & ((1ul << mmu_psize_defs[pagesize].shift) - 1); + addr -= offset; + + if (is_huge_psize(pagesize)) + ptep = huge_pte_offset(current->mm, addr); + else + ptep = find_linux_pte(pgdir, addr); + + if (ptep == NULL) + return -EFAULT; + pte = *ptep; + if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER)) + return -EFAULT; + pfn = pte_pfn(pte); + if (!page_is_ram(pfn)) + return -EFAULT; + + /* no highmem to worry about here */ + kaddr = pfn_to_kaddr(pfn); + memcpy(ret, kaddr + offset, nb); + return 0; +} + +static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) +{ + if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) || + ((unsigned long)ptr & 7)) + return -EFAULT; + + if (!__get_user_inatomic(*ret, ptr)) + return 0; + + return read_user_stack_slow(ptr, ret, 8); +} + +static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) +{ + if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || + ((unsigned long)ptr & 3)) + return -EFAULT; + + if (!__get_user_inatomic(*ret, ptr)) + return 0; + + return read_user_stack_slow(ptr, ret, 4); +} + +static inline int valid_user_sp(unsigned long sp, int is_64) +{ + if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32) + return 0; + return 1; +} + +/* + * 64-bit user processes use the same stack frame for RT and non-RT signals. + */ +struct signal_frame_64 { + char dummy[__SIGNAL_FRAMESIZE]; + struct ucontext uc; + unsigned long unused[2]; + unsigned int tramp[6]; + struct siginfo *pinfo; + void *puc; + struct siginfo info; + char abigap[288]; +}; + +static int is_sigreturn_64_address(unsigned long nip, unsigned long fp) +{ + if (nip == fp + offsetof(struct signal_frame_64, tramp)) + return 1; + if (vdso64_rt_sigtramp && current->mm->context.vdso_base && + nip == current->mm->context.vdso_base + vdso64_rt_sigtramp) + return 1; + return 0; +} + +/* + * Do some sanity checking on the signal frame pointed to by sp. + * We check the pinfo and puc pointers in the frame. + */ +static int sane_signal_64_frame(unsigned long sp) +{ + struct signal_frame_64 __user *sf; + unsigned long pinfo, puc; + + sf = (struct signal_frame_64 __user *) sp; + if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) || + read_user_stack_64((unsigned long __user *) &sf->puc, &puc)) + return 0; + return pinfo == (unsigned long) &sf->info && + puc == (unsigned long) &sf->uc; +} + +static void perf_callchain_user_64(struct pt_regs *regs, + struct perf_callchain_entry *entry) +{ + unsigned long sp, next_sp; + unsigned long next_ip; + unsigned long lr; + long level = 0; + struct signal_frame_64 __user *sigframe; + unsigned long __user *fp, *uregs; + + next_ip = regs->nip; + lr = regs->link; + sp = regs->gpr[1]; + callchain_store(entry, PERF_CONTEXT_USER); + callchain_store(entry, next_ip); + + for (;;) { + fp = (unsigned long __user *) sp; + if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp)) + return; + if (level > 0 && read_user_stack_64(&fp[2], &next_ip)) + return; + + /* + * Note: the next_sp - sp >= signal frame size check + * is true when next_sp < sp, which can happen when + * transitioning from an alternate signal stack to the + * normal stack. + */ + if (next_sp - sp >= sizeof(struct signal_frame_64) && + (is_sigreturn_64_address(next_ip, sp) || + (level <= 1 && is_sigreturn_64_address(lr, sp))) && + sane_signal_64_frame(sp)) { + /* + * This looks like an signal frame + */ + sigframe = (struct signal_frame_64 __user *) sp; + uregs = sigframe->uc.uc_mcontext.gp_regs; + if (read_user_stack_64(&uregs[PT_NIP], &next_ip) || + read_user_stack_64(&uregs[PT_LNK], &lr) || + read_user_stack_64(&uregs[PT_R1], &sp)) + return; + level = 0; + callchain_store(entry, PERF_CONTEXT_USER); + callchain_store(entry, next_ip); + continue; + } + + if (level == 0) + next_ip = lr; + callchain_store(entry, next_ip); + ++level; + sp = next_sp; + } +} + +static inline int current_is_64bit(void) +{ + /* + * We can't use test_thread_flag() here because we may be on an + * interrupt stack, and the thread flags don't get copied over + * from the thread_info on the main stack to the interrupt stack. + */ + return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT); +} + +#else /* CONFIG_PPC64 */ +/* + * On 32-bit we just access the address and let hash_page create a + * HPTE if necessary, so there is no need to fall back to reading + * the page tables. Since this is called at interrupt level, + * do_page_fault() won't treat a DSI as a page fault. + */ +static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) +{ + if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || + ((unsigned long)ptr & 3)) + return -EFAULT; + + return __get_user_inatomic(*ret, ptr); +} + +static inline void perf_callchain_user_64(struct pt_regs *regs, + struct perf_callchain_entry *entry) +{ +} + +static inline int current_is_64bit(void) +{ + return 0; +} + +static inline int valid_user_sp(unsigned long sp, int is_64) +{ + if (!sp || (sp & 7) || sp > TASK_SIZE - 32) + return 0; + return 1; +} + +#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE +#define sigcontext32 sigcontext +#define mcontext32 mcontext +#define ucontext32 ucontext +#define compat_siginfo_t struct siginfo + +#endif /* CONFIG_PPC64 */ + +/* + * Layout for non-RT signal frames + */ +struct signal_frame_32 { + char dummy[__SIGNAL_FRAMESIZE32]; + struct sigcontext32 sctx; + struct mcontext32 mctx; + int abigap[56]; +}; + +/* + * Layout for RT signal frames + */ +struct rt_signal_frame_32 { + char dummy[__SIGNAL_FRAMESIZE32 + 16]; + compat_siginfo_t info; + struct ucontext32 uc; + int abigap[56]; +}; + +static int is_sigreturn_32_address(unsigned int nip, unsigned int fp) +{ + if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad)) + return 1; + if (vdso32_sigtramp && current->mm->context.vdso_base && + nip == current->mm->context.vdso_base + vdso32_sigtramp) + return 1; + return 0; +} + +static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp) +{ + if (nip == fp + offsetof(struct rt_signal_frame_32, + uc.uc_mcontext.mc_pad)) + return 1; + if (vdso32_rt_sigtramp && current->mm->context.vdso_base && + nip == current->mm->context.vdso_base + vdso32_rt_sigtramp) + return 1; + return 0; +} + +static int sane_signal_32_frame(unsigned int sp) +{ + struct signal_frame_32 __user *sf; + unsigned int regs; + + sf = (struct signal_frame_32 __user *) (unsigned long) sp; + if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, ®s)) + return 0; + return regs == (unsigned long) &sf->mctx; +} + +static int sane_rt_signal_32_frame(unsigned int sp) +{ + struct rt_signal_frame_32 __user *sf; + unsigned int regs; + + sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; + if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, ®s)) + return 0; + return regs == (unsigned long) &sf->uc.uc_mcontext; +} + +static unsigned int __user *signal_frame_32_regs(unsigned int sp, + unsigned int next_sp, unsigned int next_ip) +{ + struct mcontext32 __user *mctx = NULL; + struct signal_frame_32 __user *sf; + struct rt_signal_frame_32 __user *rt_sf; + + /* + * Note: the next_sp - sp >= signal frame size check + * is true when next_sp < sp, for example, when + * transitioning from an alternate signal stack to the + * normal stack. + */ + if (next_sp - sp >= sizeof(struct signal_frame_32) && + is_sigreturn_32_address(next_ip, sp) && + sane_signal_32_frame(sp)) { + sf = (struct signal_frame_32 __user *) (unsigned long) sp; + mctx = &sf->mctx; + } + + if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) && + is_rt_sigreturn_32_address(next_ip, sp) && + sane_rt_signal_32_frame(sp)) { + rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; + mctx = &rt_sf->uc.uc_mcontext; + } + + if (!mctx) + return NULL; + return mctx->mc_gregs; +} + +static void perf_callchain_user_32(struct pt_regs *regs, + struct perf_callchain_entry *entry) +{ + unsigned int sp, next_sp; + unsigned int next_ip; + unsigned int lr; + long level = 0; + unsigned int __user *fp, *uregs; + + next_ip = regs->nip; + lr = regs->link; + sp = regs->gpr[1]; + callchain_store(entry, PERF_CONTEXT_USER); + callchain_store(entry, next_ip); + + while (entry->nr < PERF_MAX_STACK_DEPTH) { + fp = (unsigned int __user *) (unsigned long) sp; + if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp)) + return; + if (level > 0 && read_user_stack_32(&fp[1], &next_ip)) + return; + + uregs = signal_frame_32_regs(sp, next_sp, next_ip); + if (!uregs && level <= 1) + uregs = signal_frame_32_regs(sp, next_sp, lr); + if (uregs) { + /* + * This looks like an signal frame, so restart + * the stack trace with the values in it. + */ + if (read_user_stack_32(&uregs[PT_NIP], &next_ip) || + read_user_stack_32(&uregs[PT_LNK], &lr) || + read_user_stack_32(&uregs[PT_R1], &sp)) + return; + level = 0; + callchain_store(entry, PERF_CONTEXT_USER); + callchain_store(entry, next_ip); + continue; + } + + if (level == 0) + next_ip = lr; + callchain_store(entry, next_ip); + ++level; + sp = next_sp; + } +} + +/* + * Since we can't get PMU interrupts inside a PMU interrupt handler, + * we don't need separate irq and nmi entries here. + */ +static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); + +struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + struct perf_callchain_entry *entry = &__get_cpu_var(callchain); + + entry->nr = 0; + + if (current->pid == 0) /* idle task? */ + return entry; + + if (!user_mode(regs)) { + perf_callchain_kernel(regs, entry); + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } + + if (regs) { + if (current_is_64bit()) + perf_callchain_user_64(regs, entry); + else + perf_callchain_user_32(regs, entry); + } + + return entry; +} diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 0cef809cec21..f4d1b55aa70b 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -138,7 +138,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) kmem_cache_free(kvm_vcpu_cache, vcpu_44x); } -static int kvmppc_44x_init(void) +static int __init kvmppc_44x_init(void) { int r; @@ -149,7 +149,7 @@ static int kvmppc_44x_init(void) return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE); } -static void kvmppc_44x_exit(void) +static void __exit kvmppc_44x_exit(void) { kvmppc_booke_exit(); } diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 4a16f472cc18..ff3cb63b8117 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -30,6 +30,7 @@ #include "timing.h" #include "44x_tlb.h" +#include "trace.h" #ifndef PPC44x_TLBE_SIZE #define PPC44x_TLBE_SIZE PPC44x_TLB_4K @@ -263,7 +264,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x, /* XXX set tlb_44x_index to stlb_index? */ - KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler); + trace_kvm_stlb_inval(stlb_index); } void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) @@ -365,8 +366,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, /* Insert shadow mapping into hardware TLB. */ kvmppc_44x_tlbe_set_modified(vcpu_44x, victim); kvmppc_44x_tlbwe(victim, &stlbe); - KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1, - stlbe.word2, handler); + trace_kvm_stlb_write(victim, stlbe.tid, stlbe.word0, stlbe.word1, + stlbe.word2); } /* For a particular guest TLB entry, invalidate the corresponding host TLB @@ -485,8 +486,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index); } - KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0, - tlbe->word1, tlbe->word2, handler); + trace_kvm_gtlb_write(gtlb_index, tlbe->tid, tlbe->word0, tlbe->word1, + tlbe->word2); kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); return EMULATE_DONE; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 5a152a52796f..c29926846613 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -2,8 +2,7 @@ # KVM configuration # -config HAVE_KVM_IRQCHIP - bool +source "virt/kvm/Kconfig" menuconfig VIRTUALIZATION bool "Virtualization" @@ -59,17 +58,6 @@ config KVM_E500 If unsure, say N. -config KVM_TRACE - bool "KVM trace support" - depends on KVM && MARKERS && SYSFS - select RELAY - select DEBUG_FS - default n - ---help--- - This option allows reading a trace of kvm-related events through - relayfs. Note the ABI is not considered stable and will be - modified in future updates. - source drivers/virtio/Kconfig endif # VIRTUALIZATION diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 459c7ee580f7..37655fe19f2f 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -8,7 +8,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) -common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o) +CFLAGS_44x_tlb.o := -I. +CFLAGS_e500_tlb.o := -I. +CFLAGS_emulate.o := -I. kvm-objs := $(common-objs-y) powerpc.o emulate.o obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 642e4204cf25..e7bf4d029484 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -520,7 +520,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, return kvmppc_core_vcpu_translate(vcpu, tr); } -int kvmppc_booke_init(void) +int __init kvmppc_booke_init(void) { unsigned long ivor[16]; unsigned long max_ivor = 0; diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index d8067fd81cdd..64949eef43f1 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -60,9 +60,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) kvmppc_e500_tlb_setup(vcpu_e500); - /* Use the same core vertion as host's */ - vcpu->arch.pvr = mfspr(SPRN_PVR); - return 0; } @@ -132,7 +129,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) kmem_cache_free(kvm_vcpu_cache, vcpu_e500); } -static int kvmppc_e500_init(void) +static int __init kvmppc_e500_init(void) { int r, i; unsigned long ivor[3]; @@ -160,7 +157,7 @@ static int kvmppc_e500_init(void) return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE); } -static void kvmppc_e500_exit(void) +static void __init kvmppc_e500_exit(void) { kvmppc_booke_exit(); } diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 3f760414b9f8..be95b8d8e3b7 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -180,6 +180,9 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_MMUCSR0: vcpu->arch.gpr[rt] = 0; break; + case SPRN_MMUCFG: + vcpu->arch.gpr[rt] = mfspr(SPRN_MMUCFG); break; + /* extra exceptions */ case SPRN_IVOR32: vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 0e773fc2d5e4..fb1e1dc11ba5 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -22,6 +22,7 @@ #include "../mm/mmu_decl.h" #include "e500_tlb.h" +#include "trace.h" #define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1) @@ -224,9 +225,8 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel); stlbe->mas1 = 0; - KVMTRACE_5D(STLB_INVAL, &vcpu_e500->vcpu, index_of(tlbsel, esel), - stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7, - handler); + trace_kvm_stlb_inval(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2, + stlbe->mas3, stlbe->mas7); } static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, @@ -269,7 +269,7 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, tlbsel = (vcpu_e500->mas4 >> 28) & 0x1; victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0; pidsel = (vcpu_e500->mas4 >> 16) & 0xf; - tsized = (vcpu_e500->mas4 >> 8) & 0xf; + tsized = (vcpu_e500->mas4 >> 7) & 0x1f; vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); @@ -309,7 +309,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, vcpu_e500->shadow_pages[tlbsel][esel] = new_page; /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */ - stlbe->mas1 = MAS1_TSIZE(BOOKE_PAGESZ_4K) + stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K) | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID; stlbe->mas2 = (gvaddr & MAS2_EPN) | e500_shadow_mas2_attrib(gtlbe->mas2, @@ -319,9 +319,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, vcpu_e500->vcpu.arch.msr & MSR_PR); stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN; - KVMTRACE_5D(STLB_WRITE, &vcpu_e500->vcpu, index_of(tlbsel, esel), - stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7, - handler); + trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2, + stlbe->mas3, stlbe->mas7); } /* XXX only map the one-one case, for now use TLB0 */ @@ -535,9 +534,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) gtlbe->mas3 = vcpu_e500->mas3; gtlbe->mas7 = vcpu_e500->mas7; - KVMTRACE_5D(GTLB_WRITE, vcpu, vcpu_e500->mas0, - gtlbe->mas1, gtlbe->mas2, gtlbe->mas3, gtlbe->mas7, - handler); + trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2, + gtlbe->mas3, gtlbe->mas7); /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ if (tlbe_is_host_safe(vcpu, gtlbe)) { @@ -545,7 +543,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) case 0: /* TLB0 */ gtlbe->mas1 &= ~MAS1_TSIZE(~0); - gtlbe->mas1 |= MAS1_TSIZE(BOOKE_PAGESZ_4K); + gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K); stlbsel = 0; sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel); @@ -679,14 +677,14 @@ void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) /* Insert large initial mapping for guest. */ tlbe = &vcpu_e500->guest_tlb[1][0]; - tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_256M); + tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M); tlbe->mas2 = 0; tlbe->mas3 = E500_TLB_SUPER_PERM_MASK; tlbe->mas7 = 0; /* 4K map for serial output. Used by kernel wrapper. */ tlbe = &vcpu_e500->guest_tlb[1][1]; - tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_4K); + tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K); tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G; tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK; tlbe->mas7 = 0; diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h index 45b064b76906..d28e3010a5e2 100644 --- a/arch/powerpc/kvm/e500_tlb.h +++ b/arch/powerpc/kvm/e500_tlb.h @@ -16,7 +16,7 @@ #define __KVM_E500_TLB_H__ #include <linux/kvm_host.h> -#include <asm/mmu-fsl-booke.h> +#include <asm/mmu-book3e.h> #include <asm/tlb.h> #include <asm/kvm_e500.h> @@ -59,7 +59,7 @@ extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *); /* TLB helper functions */ static inline unsigned int get_tlb_size(const struct tlbe *tlbe) { - return (tlbe->mas1 >> 8) & 0xf; + return (tlbe->mas1 >> 7) & 0x1f; } static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) @@ -70,7 +70,7 @@ static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) static inline u64 get_tlb_bytes(const struct tlbe *tlbe) { unsigned int pgsize = get_tlb_size(tlbe); - return 1ULL << 10 << (pgsize << 1); + return 1ULL << 10 << pgsize; } static inline gva_t get_tlb_end(const struct tlbe *tlbe) diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index a561d6e8da1c..7737146af3fb 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -29,6 +29,7 @@ #include <asm/kvm_ppc.h> #include <asm/disassemble.h> #include "timing.h" +#include "trace.h" #define OP_TRAP 3 @@ -187,7 +188,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case SPRN_SRR1: vcpu->arch.gpr[rt] = vcpu->arch.srr1; break; case SPRN_PVR: - vcpu->arch.gpr[rt] = vcpu->arch.pvr; break; + vcpu->arch.gpr[rt] = mfspr(SPRN_PVR); break; + case SPRN_PIR: + vcpu->arch.gpr[rt] = mfspr(SPRN_PIR); break; /* Note: mftb and TBRL/TBWL are user-accessible, so * the guest can always access the real TB anyways. @@ -417,7 +420,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } } - KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit); + trace_kvm_ppc_instr(inst, vcpu->arch.pc, emulated); if (advance) vcpu->arch.pc += 4; /* Advance past emulated instruction. */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2cf915e51e7e..2a4551f78f60 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -31,25 +31,17 @@ #include "timing.h" #include "../mm/mmu_decl.h" +#define CREATE_TRACE_POINTS +#include "trace.h" + gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) { return gfn; } -int kvm_cpu_has_interrupt(struct kvm_vcpu *v) -{ - return !!(v->arch.pending_exceptions); -} - -int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) -{ - /* do real check here */ - return 1; -} - int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return !(v->arch.msr & MSR_WE); + return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions); } @@ -122,13 +114,17 @@ struct kvm *kvm_arch_create_vm(void) static void kvmppc_free_vcpus(struct kvm *kvm) { unsigned int i; + struct kvm_vcpu *vcpu; - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - if (kvm->vcpus[i]) { - kvm_arch_vcpu_free(kvm->vcpus[i]); - kvm->vcpus[i] = NULL; - } - } + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_arch_vcpu_free(vcpu); + + mutex_lock(&kvm->lock); + for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) + kvm->vcpus[i] = NULL; + + atomic_set(&kvm->online_vcpus, 0); + mutex_unlock(&kvm->lock); } void kvm_arch_sync_events(struct kvm *kvm) diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h new file mode 100644 index 000000000000..67f219de0455 --- /dev/null +++ b/arch/powerpc/kvm/trace.h @@ -0,0 +1,104 @@ +#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace + +/* + * Tracepoint for guest mode entry. + */ +TRACE_EVENT(kvm_ppc_instr, + TP_PROTO(unsigned int inst, unsigned long pc, unsigned int emulate), + TP_ARGS(inst, pc, emulate), + + TP_STRUCT__entry( + __field( unsigned int, inst ) + __field( unsigned long, pc ) + __field( unsigned int, emulate ) + ), + + TP_fast_assign( + __entry->inst = inst; + __entry->pc = pc; + __entry->emulate = emulate; + ), + + TP_printk("inst %u pc 0x%lx emulate %u\n", + __entry->inst, __entry->pc, __entry->emulate) +); + +TRACE_EVENT(kvm_stlb_inval, + TP_PROTO(unsigned int stlb_index), + TP_ARGS(stlb_index), + + TP_STRUCT__entry( + __field( unsigned int, stlb_index ) + ), + + TP_fast_assign( + __entry->stlb_index = stlb_index; + ), + + TP_printk("stlb_index %u", __entry->stlb_index) +); + +TRACE_EVENT(kvm_stlb_write, + TP_PROTO(unsigned int victim, unsigned int tid, unsigned int word0, + unsigned int word1, unsigned int word2), + TP_ARGS(victim, tid, word0, word1, word2), + + TP_STRUCT__entry( + __field( unsigned int, victim ) + __field( unsigned int, tid ) + __field( unsigned int, word0 ) + __field( unsigned int, word1 ) + __field( unsigned int, word2 ) + ), + + TP_fast_assign( + __entry->victim = victim; + __entry->tid = tid; + __entry->word0 = word0; + __entry->word1 = word1; + __entry->word2 = word2; + ), + + TP_printk("victim %u tid %u w0 %u w1 %u w2 %u", + __entry->victim, __entry->tid, __entry->word0, + __entry->word1, __entry->word2) +); + +TRACE_EVENT(kvm_gtlb_write, + TP_PROTO(unsigned int gtlb_index, unsigned int tid, unsigned int word0, + unsigned int word1, unsigned int word2), + TP_ARGS(gtlb_index, tid, word0, word1, word2), + + TP_STRUCT__entry( + __field( unsigned int, gtlb_index ) + __field( unsigned int, tid ) + __field( unsigned int, word0 ) + __field( unsigned int, word1 ) + __field( unsigned int, word2 ) + ), + + TP_fast_assign( + __entry->gtlb_index = gtlb_index; + __entry->tid = tid; + __entry->word0 = word0; + __entry->word1 = word1; + __entry->word2 = word2; + ), + + TP_printk("gtlb_index %u tid %u w0 %u w1 %u w2 %u", + __entry->gtlb_index, __entry->tid, __entry->word0, + __entry->word1, __entry->word2) +); + +#endif /* _TRACE_KVM_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 5b7038f248b6..a685652effeb 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -92,15 +92,13 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize, : "memory" ); } -void slb_flush_and_rebolt(void) +static void __slb_flush_and_rebolt(void) { /* If you change this make sure you change SLB_NUM_BOLTED * appropriately too. */ unsigned long linear_llp, vmalloc_llp, lflags, vflags; unsigned long ksp_esid_data, ksp_vsid_data; - WARN_ON(!irqs_disabled()); - linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; lflags = SLB_VSID_KERNEL | linear_llp; @@ -117,12 +115,6 @@ void slb_flush_and_rebolt(void) ksp_vsid_data = get_slb_shadow()->save_area[2].vsid; } - /* - * We can't take a PMU exception in the following code, so hard - * disable interrupts. - */ - hard_irq_disable(); - /* We need to do this all in asm, so we're sure we don't touch * the stack between the slbia and rebolting it. */ asm volatile("isync\n" @@ -139,6 +131,21 @@ void slb_flush_and_rebolt(void) : "memory"); } +void slb_flush_and_rebolt(void) +{ + + WARN_ON(!irqs_disabled()); + + /* + * We can't take a PMU exception in the following code, so hard + * disable interrupts. + */ + hard_irq_disable(); + + __slb_flush_and_rebolt(); + get_paca()->slb_cache_ptr = 0; +} + void slb_vmalloc_update(void) { unsigned long vflags; @@ -180,12 +187,20 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2) /* Flush all user entries from the segment table of the current processor. */ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) { - unsigned long offset = get_paca()->slb_cache_ptr; + unsigned long offset; unsigned long slbie_data = 0; unsigned long pc = KSTK_EIP(tsk); unsigned long stack = KSTK_ESP(tsk); unsigned long unmapped_base; + /* + * We need interrupts hard-disabled here, not just soft-disabled, + * so that a PMU interrupt can't occur, which might try to access + * user memory (to get a stack trace) and possible cause an SLB miss + * which would update the slb_cache/slb_cache_ptr fields in the PACA. + */ + hard_irq_disable(); + offset = get_paca()->slb_cache_ptr; if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) && offset <= SLB_CACHE_ENTRIES) { int i; @@ -200,7 +215,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) } asm volatile("isync" : : : "memory"); } else { - slb_flush_and_rebolt(); + __slb_flush_and_rebolt(); } /* Workaround POWER5 < DD2.1 issue */ diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 98cd1dc2ae75..ab5fb48b3e90 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -164,7 +164,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) { struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; struct stab_entry *ste; - unsigned long offset = __get_cpu_var(stab_cache_ptr); + unsigned long offset; unsigned long pc = KSTK_EIP(tsk); unsigned long stack = KSTK_ESP(tsk); unsigned long unmapped_base; @@ -172,6 +172,15 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) /* Force previous translations to complete. DRENG */ asm volatile("isync" : : : "memory"); + /* + * We need interrupts hard-disabled here, not just soft-disabled, + * so that a PMU interrupt can't occur, which might try to access + * user memory (to get a stack trace) and possible cause an STAB miss + * which would update the stab_cache/stab_cache_ptr per-cpu variables. + */ + hard_irq_disable(); + + offset = __get_cpu_var(stab_cache_ptr); if (offset <= NR_STAB_CACHE_ENTRIES) { int i; diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c index 237e3654f48c..464271bea6c9 100644 --- a/arch/powerpc/sysdev/qe_lib/qe.c +++ b/arch/powerpc/sysdev/qe_lib/qe.c @@ -65,6 +65,19 @@ static unsigned int qe_num_of_snum; static phys_addr_t qebase = -1; +int qe_alive_during_sleep(void) +{ + static int ret = -1; + + if (ret != -1) + return ret; + + ret = !of_find_compatible_node(NULL, NULL, "fsl,mpc8569-pmc"); + + return ret; +} +EXPORT_SYMBOL(qe_alive_during_sleep); + phys_addr_t get_qe_base(void) { struct device_node *qe; |