diff options
125 files changed, 14175 insertions, 4007 deletions
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index d39c9f206271..460f72e640e6 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -217,7 +217,7 @@ static void _sparc_free_io(struct resource *res) unsigned long plen; plen = res->end - res->start + 1; - if ((plen & (PAGE_SIZE-1)) != 0) BUG(); + BUG_ON((plen & (PAGE_SIZE-1)) != 0); sparc_unmapiorange(res->start, plen); release_resource(res); } @@ -512,8 +512,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t n, void *p, dma_addr_t ba) dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction) { - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); /* IIep is write-through, not flushing. */ return virt_to_phys(ptr); } @@ -528,8 +527,7 @@ dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t ba, size_t size, int direction) { - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); if (direction != PCI_DMA_TODEVICE) { mmu_inval_dma_area((unsigned long)phys_to_virt(ba), (size + PAGE_SIZE-1) & PAGE_MASK); @@ -542,8 +540,7 @@ void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t ba, size_t size, dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page, unsigned long offset, size_t size, int direction) { - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); /* IIep is write-through, not flushing. */ return page_to_phys(page) + offset; } @@ -551,8 +548,7 @@ dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page, void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address, size_t size, int direction) { - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); /* mmu_inval_dma_area XXX */ } @@ -576,11 +572,10 @@ int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, { int n; - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); /* IIep is write-through, not flushing. */ for (n = 0; n < nents; n++) { - if (page_address(sg->page) == NULL) BUG(); + BUG_ON(page_address(sg->page) == NULL); sg->dvma_address = virt_to_phys(page_address(sg->page)); sg->dvma_length = sg->length; sg++; @@ -597,11 +592,10 @@ void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, { int n; - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); if (direction != PCI_DMA_TODEVICE) { for (n = 0; n < nents; n++) { - if (page_address(sg->page) == NULL) BUG(); + BUG_ON(page_address(sg->page) == NULL); mmu_inval_dma_area( (unsigned long) page_address(sg->page), (sg->length + PAGE_SIZE-1) & PAGE_MASK); @@ -622,8 +616,7 @@ void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, */ void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t ba, size_t size, int direction) { - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); if (direction != PCI_DMA_TODEVICE) { mmu_inval_dma_area((unsigned long)phys_to_virt(ba), (size + PAGE_SIZE-1) & PAGE_MASK); @@ -632,8 +625,7 @@ void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t ba, size_t si void pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t ba, size_t size, int direction) { - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); if (direction != PCI_DMA_TODEVICE) { mmu_inval_dma_area((unsigned long)phys_to_virt(ba), (size + PAGE_SIZE-1) & PAGE_MASK); @@ -650,11 +642,10 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int { int n; - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); if (direction != PCI_DMA_TODEVICE) { for (n = 0; n < nents; n++) { - if (page_address(sg->page) == NULL) BUG(); + BUG_ON(page_address(sg->page) == NULL); mmu_inval_dma_area( (unsigned long) page_address(sg->page), (sg->length + PAGE_SIZE-1) & PAGE_MASK); @@ -667,11 +658,10 @@ void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg, i { int n; - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); if (direction != PCI_DMA_TODEVICE) { for (n = 0; n < nents; n++) { - if (page_address(sg->page) == NULL) BUG(); + BUG_ON(page_address(sg->page) == NULL); mmu_inval_dma_area( (unsigned long) page_address(sg->page), (sg->length + PAGE_SIZE-1) & PAGE_MASK); diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 4c0a50a76554..c3685b314d71 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -186,6 +186,15 @@ endchoice endmenu +config ARCH_SPARSEMEM_ENABLE + def_bool y + +config ARCH_SPARSEMEM_DEFAULT + def_bool y + +config LARGE_ALLOCS + def_bool y + source "mm/Kconfig" config GENERIC_ISA_DMA @@ -350,6 +359,15 @@ config SOLARIS_EMUL endmenu +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on SMP + default y + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with UltraSPARC cpus at a cost of slightly increased + overhead in some places. If unsure say N here. + config CMDLINE_BOOL bool "Default bootloader kernel arguments" diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig index 069d49777b2a..f819a9663a8d 100644 --- a/arch/sparc64/defconfig +++ b/arch/sparc64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16-rc2 -# Tue Feb 7 17:47:18 2006 +# Linux kernel version: 2.6.16 +# Mon Mar 20 01:23:21 2006 # CONFIG_SPARC=y CONFIG_SPARC64=y @@ -115,14 +115,20 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_HUGETLB_PAGE_SIZE_4MB=y # CONFIG_HUGETLB_PAGE_SIZE_512K is not set # CONFIG_HUGETLB_PAGE_SIZE_64K is not set +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_DEFAULT=y +CONFIG_LARGE_ALLOCS=y CONFIG_SELECT_MEMORY_MODEL=y -CONFIG_FLATMEM_MANUAL=y +# CONFIG_FLATMEM_MANUAL is not set # CONFIG_DISCONTIGMEM_MANUAL is not set -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_FLATMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y +CONFIG_HAVE_MEMORY_PRESENT=y # CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPARSEMEM_EXTREME=y +CONFIG_MEMORY_HOTPLUG=y CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_MIGRATION=y CONFIG_GENERIC_ISA_DMA=y CONFIG_SBUS=y CONFIG_SBUSCHAR=y @@ -655,6 +661,7 @@ CONFIG_SERIAL_SUNCORE=y CONFIG_SERIAL_SUNSU=y CONFIG_SERIAL_SUNSU_CONSOLE=y CONFIG_SERIAL_SUNSAB=m +CONFIG_SERIAL_SUNHV=y CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set @@ -1116,11 +1123,7 @@ CONFIG_USB_HIDDEV=y # CONFIG_INFINIBAND is not set # -# SN Devices -# - -# -# EDAC - error detection and reporting (RAS) +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) # # diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile index 83d67eb18895..6f6816488b04 100644 --- a/arch/sparc64/kernel/Makefile +++ b/arch/sparc64/kernel/Makefile @@ -11,10 +11,12 @@ obj-y := process.o setup.o cpu.o idprom.o \ traps.o devices.o auxio.o una_asm.o \ irq.o ptrace.o time.o sys_sparc.o signal.o \ unaligned.o central.o pci.o starfire.o semaphore.o \ - power.o sbus.o iommu_common.o sparc64_ksyms.o chmc.o + power.o sbus.o iommu_common.o sparc64_ksyms.o chmc.o \ + visemul.o obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \ - pci_psycho.o pci_sabre.o pci_schizo.o + pci_psycho.o pci_sabre.o pci_schizo.o \ + pci_sun4v.o pci_sun4v_asm.o obj-$(CONFIG_SMP) += smp.o trampoline.o obj-$(CONFIG_SPARC32_COMPAT) += sys32.o sys_sparc32.o signal32.o obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o @@ -38,5 +40,5 @@ else CMODEL_CFLAG := -m64 -mcmodel=medlow endif -head.o: head.S ttable.S itlb_base.S dtlb_base.S dtlb_backend.S dtlb_prot.S \ +head.o: head.S ttable.S itlb_miss.S dtlb_miss.S ktlb.S tsb.S \ etrap.S rtrap.S winfixup.S entry.S diff --git a/arch/sparc64/kernel/binfmt_aout32.c b/arch/sparc64/kernel/binfmt_aout32.c index 202a80c24b6f..d7caa60a0074 100644 --- a/arch/sparc64/kernel/binfmt_aout32.c +++ b/arch/sparc64/kernel/binfmt_aout32.c @@ -31,6 +31,7 @@ #include <asm/system.h> #include <asm/uaccess.h> #include <asm/pgalloc.h> +#include <asm/mmu_context.h> static int load_aout32_binary(struct linux_binprm *, struct pt_regs * regs); static int load_aout32_library(struct file*); @@ -238,6 +239,8 @@ static int load_aout32_binary(struct linux_binprm * bprm, struct pt_regs * regs) (current->mm->start_data = N_DATADDR(ex)); current->mm->brk = ex.a_bss + (current->mm->start_brk = N_BSSADDR(ex)); + current->mm->free_area_cache = current->mm->mmap_base; + current->mm->cached_hole_size = 0; current->mm->mmap = NULL; compute_creds(bprm); @@ -329,15 +332,8 @@ beyond_if: current->mm->start_stack = (unsigned long) create_aout32_tables((char __user *)bprm->p, bprm); - if (!(orig_thr_flags & _TIF_32BIT)) { - unsigned long pgd_cache = get_pgd_cache(current->mm->pgd); - - __asm__ __volatile__("stxa\t%0, [%1] %2\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (pgd_cache), - "r" (TSB_REG), "i" (ASI_DMMU)); - } + tsb_context_switch(current->mm); + start_thread32(regs, ex.a_entry, current->mm->start_stack); if (current->ptrace & PT_PTRACED) send_sig(SIGTRAP, current, 0); diff --git a/arch/sparc64/kernel/binfmt_elf32.c b/arch/sparc64/kernel/binfmt_elf32.c index a1a12d2aa353..8a2abcce2737 100644 --- a/arch/sparc64/kernel/binfmt_elf32.c +++ b/arch/sparc64/kernel/binfmt_elf32.c @@ -153,7 +153,9 @@ MODULE_AUTHOR("Eric Youngdale, David S. Miller, Jakub Jelinek"); #undef MODULE_DESCRIPTION #undef MODULE_AUTHOR +#include <asm/a.out.h> + #undef TASK_SIZE -#define TASK_SIZE 0xf0000000 +#define TASK_SIZE STACK_TOP32 #include "../../../fs/binfmt_elf.c" diff --git a/arch/sparc64/kernel/cpu.c b/arch/sparc64/kernel/cpu.c index 00eed88ef2e8..11cc0caef592 100644 --- a/arch/sparc64/kernel/cpu.c +++ b/arch/sparc64/kernel/cpu.c @@ -13,6 +13,7 @@ #include <asm/system.h> #include <asm/fpumacro.h> #include <asm/cpudata.h> +#include <asm/spitfire.h> DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 }; @@ -71,6 +72,12 @@ void __init cpu_probe(void) unsigned long ver, fpu_vers, manuf, impl, fprs; int i; + if (tlb_type == hypervisor) { + sparc_cpu_type = "UltraSparc T1 (Niagara)"; + sparc_fpu_type = "UltraSparc T1 integrated FPU"; + return; + } + fprs = fprs_read(); fprs_write(FPRS_FEF); __asm__ __volatile__ ("rdpr %%ver, %0; stx %%fsr, [%1]" diff --git a/arch/sparc64/kernel/devices.c b/arch/sparc64/kernel/devices.c index df9a1ca8fd77..007e8922cd16 100644 --- a/arch/sparc64/kernel/devices.c +++ b/arch/sparc64/kernel/devices.c @@ -12,6 +12,7 @@ #include <linux/string.h> #include <linux/spinlock.h> #include <linux/errno.h> +#include <linux/bootmem.h> #include <asm/page.h> #include <asm/oplib.h> @@ -20,6 +21,8 @@ #include <asm/spitfire.h> #include <asm/timer.h> #include <asm/cpudata.h> +#include <asm/vdev.h> +#include <asm/irq.h> /* Used to synchronize acceses to NatSemi SUPER I/O chip configure * operations in asm/ns87303.h @@ -29,13 +32,158 @@ DEFINE_SPINLOCK(ns87303_lock); extern void cpu_probe(void); extern void central_probe(void); -static char *cpu_mid_prop(void) +u32 sun4v_vdev_devhandle; +int sun4v_vdev_root; + +struct vdev_intmap { + unsigned int phys; + unsigned int irq; + unsigned int cnode; + unsigned int cinterrupt; +}; + +struct vdev_intmask { + unsigned int phys; + unsigned int interrupt; + unsigned int __unused; +}; + +static struct vdev_intmap *vdev_intmap; +static int vdev_num_intmap; +static struct vdev_intmask vdev_intmask; + +static void __init sun4v_virtual_device_probe(void) +{ + struct linux_prom64_registers regs; + struct vdev_intmap *ip; + int node, sz, err; + + if (tlb_type != hypervisor) + return; + + node = prom_getchild(prom_root_node); + node = prom_searchsiblings(node, "virtual-devices"); + if (!node) { + prom_printf("SUN4V: Fatal error, no virtual-devices node.\n"); + prom_halt(); + } + + sun4v_vdev_root = node; + + prom_getproperty(node, "reg", (char *)®s, sizeof(regs)); + sun4v_vdev_devhandle = (regs.phys_addr >> 32UL) & 0x0fffffff; + + sz = prom_getproplen(node, "interrupt-map"); + if (sz <= 0) { + prom_printf("SUN4V: Error, no vdev interrupt-map.\n"); + prom_halt(); + } + + if ((sz % sizeof(*ip)) != 0) { + prom_printf("SUN4V: Bogus interrupt-map property size %d\n", + sz); + prom_halt(); + } + + vdev_intmap = ip = alloc_bootmem_low_pages(sz); + if (!vdev_intmap) { + prom_printf("SUN4V: Error, cannot allocate vdev_intmap.\n"); + prom_halt(); + } + + err = prom_getproperty(node, "interrupt-map", (char *) ip, sz); + if (err == -1) { + prom_printf("SUN4V: Fatal error, no vdev interrupt-map.\n"); + prom_halt(); + } + if (err != sz) { + prom_printf("SUN4V: Inconsistent interrupt-map size, " + "proplen(%d) vs getprop(%d).\n", sz,err); + prom_halt(); + } + + vdev_num_intmap = err / sizeof(*ip); + + err = prom_getproperty(node, "interrupt-map-mask", + (char *) &vdev_intmask, + sizeof(vdev_intmask)); + if (err <= 0) { + prom_printf("SUN4V: Fatal error, no vdev " + "interrupt-map-mask.\n"); + prom_halt(); + } + if (err % sizeof(vdev_intmask)) { + prom_printf("SUN4V: Bogus interrupt-map-mask " + "property size %d\n", err); + prom_halt(); + } + + printk("SUN4V: virtual-devices devhandle[%x]\n", + sun4v_vdev_devhandle); +} + +unsigned int sun4v_vdev_device_interrupt(unsigned int dev_node) +{ + unsigned int irq, reg; + int err, i; + + err = prom_getproperty(dev_node, "interrupts", + (char *) &irq, sizeof(irq)); + if (err <= 0) { + printk("VDEV: Cannot get \"interrupts\" " + "property for OBP node %x\n", dev_node); + return 0; + } + + err = prom_getproperty(dev_node, "reg", + (char *) ®, sizeof(reg)); + if (err <= 0) { + printk("VDEV: Cannot get \"reg\" " + "property for OBP node %x\n", dev_node); + return 0; + } + + for (i = 0; i < vdev_num_intmap; i++) { + if (vdev_intmap[i].phys == (reg & vdev_intmask.phys) && + vdev_intmap[i].irq == (irq & vdev_intmask.interrupt)) { + irq = vdev_intmap[i].cinterrupt; + break; + } + } + + if (i == vdev_num_intmap) { + printk("VDEV: No matching interrupt map entry " + "for OBP node %x\n", dev_node); + return 0; + } + + return sun4v_build_irq(sun4v_vdev_devhandle, irq, 5, 0); +} + +static const char *cpu_mid_prop(void) { if (tlb_type == spitfire) return "upa-portid"; return "portid"; } +static int get_cpu_mid(int prom_node) +{ + if (tlb_type == hypervisor) { + struct linux_prom64_registers reg; + + if (prom_getproplen(prom_node, "cpuid") == 4) + return prom_getintdefault(prom_node, "cpuid", 0); + + prom_getproperty(prom_node, "reg", (char *) ®, sizeof(reg)); + return (reg.phys_addr >> 32) & 0x0fffffffUL; + } else { + const char *prop_name = cpu_mid_prop(); + + return prom_getintdefault(prom_node, prop_name, 0); + } +} + static int check_cpu_node(int nd, int *cur_inst, int (*compare)(int, int, void *), void *compare_arg, int *prom_node, int *mid) @@ -50,7 +198,7 @@ static int check_cpu_node(int nd, int *cur_inst, if (prom_node) *prom_node = nd; if (mid) - *mid = prom_getintdefault(nd, cpu_mid_prop(), 0); + *mid = get_cpu_mid(nd); return 0; } @@ -105,7 +253,7 @@ static int cpu_mid_compare(int nd, int instance, void *_arg) int desired_mid = (int) (long) _arg; int this_mid; - this_mid = prom_getintdefault(nd, cpu_mid_prop(), 0); + this_mid = get_cpu_mid(nd); if (this_mid == desired_mid) return 0; return -ENODEV; @@ -126,7 +274,8 @@ void __init device_scan(void) #ifndef CONFIG_SMP { - int err, cpu_node; + int err, cpu_node, def; + err = cpu_find_by_instance(0, &cpu_node, NULL); if (err) { prom_printf("No cpu nodes, cannot continue\n"); @@ -135,21 +284,40 @@ void __init device_scan(void) cpu_data(0).clock_tick = prom_getintdefault(cpu_node, "clock-frequency", 0); + + def = ((tlb_type == hypervisor) ? + (8 * 1024) : + (16 * 1024)); cpu_data(0).dcache_size = prom_getintdefault(cpu_node, "dcache-size", - 16 * 1024); + def); + + def = 32; cpu_data(0).dcache_line_size = - prom_getintdefault(cpu_node, "dcache-line-size", 32); + prom_getintdefault(cpu_node, "dcache-line-size", + def); + + def = 16 * 1024; cpu_data(0).icache_size = prom_getintdefault(cpu_node, "icache-size", - 16 * 1024); + def); + + def = 32; cpu_data(0).icache_line_size = - prom_getintdefault(cpu_node, "icache-line-size", 32); + prom_getintdefault(cpu_node, "icache-line-size", + def); + + def = ((tlb_type == hypervisor) ? + (3 * 1024 * 1024) : + (4 * 1024 * 1024)); cpu_data(0).ecache_size = prom_getintdefault(cpu_node, "ecache-size", - 4 * 1024 * 1024); + def); + + def = 64; cpu_data(0).ecache_line_size = - prom_getintdefault(cpu_node, "ecache-line-size", 64); + prom_getintdefault(cpu_node, "ecache-line-size", + def); printk("CPU[0]: Caches " "D[sz(%d):line_sz(%d)] " "I[sz(%d):line_sz(%d)] " @@ -160,6 +328,7 @@ void __init device_scan(void) } #endif + sun4v_virtual_device_probe(); central_probe(); cpu_probe(); diff --git a/arch/sparc64/kernel/dtlb_backend.S b/arch/sparc64/kernel/dtlb_backend.S deleted file mode 100644 index acc889a7f9c1..000000000000 --- a/arch/sparc64/kernel/dtlb_backend.S +++ /dev/null @@ -1,170 +0,0 @@ -/* $Id: dtlb_backend.S,v 1.16 2001/10/09 04:02:11 davem Exp $ - * dtlb_backend.S: Back end to DTLB miss replacement strategy. - * This is included directly into the trap table. - * - * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com) - * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz) - */ - -#include <asm/pgtable.h> -#include <asm/mmu.h> - -#define VALID_SZ_BITS (_PAGE_VALID | _PAGE_SZBITS) - -#define VPTE_BITS (_PAGE_CP | _PAGE_CV | _PAGE_P ) -#define VPTE_SHIFT (PAGE_SHIFT - 3) - -/* Ways we can get here: - * - * 1) Nucleus loads and stores to/from PA-->VA direct mappings at tl>1. - * 2) Nucleus loads and stores to/from user/kernel window save areas. - * 3) VPTE misses from dtlb_base and itlb_base. - * - * We need to extract out the PMD and PGDIR indexes from the - * linear virtual page table access address. The PTE index - * is at the bottom, but we are not concerned with it. Bits - * 0 to 2 are clear since each PTE is 8 bytes in size. Each - * PMD and PGDIR entry are 4 bytes in size. Thus, this - * address looks something like: - * - * |---------------------------------------------------------------| - * | ... | PGDIR index | PMD index | PTE index | | - * |---------------------------------------------------------------| - * 63 F E D C B A 3 2 0 <- bit nr - * - * The variable bits above are defined as: - * A --> 3 + (PAGE_SHIFT - log2(8)) - * --> 3 + (PAGE_SHIFT - 3) - 1 - * (ie. this is "bit 3" + PAGE_SIZE - size of PTE entry in bits - 1) - * B --> A + 1 - * C --> B + (PAGE_SHIFT - log2(4)) - * --> B + (PAGE_SHIFT - 2) - 1 - * (ie. this is "bit B" + PAGE_SIZE - size of PMD entry in bits - 1) - * D --> C + 1 - * E --> D + (PAGE_SHIFT - log2(4)) - * --> D + (PAGE_SHIFT - 2) - 1 - * (ie. this is "bit D" + PAGE_SIZE - size of PGDIR entry in bits - 1) - * F --> E + 1 - * - * (Note how "B" always evalutes to PAGE_SHIFT, all the other constants - * cancel out.) - * - * For 8K PAGE_SIZE (thus, PAGE_SHIFT of 13) the bit numbers are: - * A --> 12 - * B --> 13 - * C --> 23 - * D --> 24 - * E --> 34 - * F --> 35 - * - * For 64K PAGE_SIZE (thus, PAGE_SHIFT of 16) the bit numbers are: - * A --> 15 - * B --> 16 - * C --> 29 - * D --> 30 - * E --> 43 - * F --> 44 - * - * Because bits both above and below each PGDIR and PMD index need to - * be masked out, and the index can be as long as 14 bits (when using a - * 64K PAGE_SIZE, and thus a PAGE_SHIFT of 16), we need 3 instructions - * to extract each index out. - * - * Shifts do not pair very well on UltraSPARC-I, II, IIi, and IIe, so - * we try to avoid using them for the entire operation. We could setup - * a mask anywhere from bit 31 down to bit 10 using the sethi instruction. - * - * We need a mask covering bits B --> C and one covering D --> E. - * For 8K PAGE_SIZE these masks are 0x00ffe000 and 0x7ff000000. - * For 64K PAGE_SIZE these masks are 0x3fff0000 and 0xfffc0000000. - * The second in each set cannot be loaded with a single sethi - * instruction, because the upper bits are past bit 32. We would - * need to use a sethi + a shift. - * - * For the time being, we use 2 shifts and a simple "and" mask. - * We shift left to clear the bits above the index, we shift down - * to clear the bits below the index (sans the log2(4 or 8) bits) - * and a mask to clear the log2(4 or 8) bits. We need therefore - * define 4 shift counts, all of which are relative to PAGE_SHIFT. - * - * Although unsupportable for other reasons, this does mean that - * 512K and 4MB page sizes would be generaally supported by the - * kernel. (ELF binaries would break with > 64K PAGE_SIZE since - * the sections are only aligned that strongly). - * - * The operations performed for extraction are thus: - * - * ((X << FOO_SHIFT_LEFT) >> FOO_SHIFT_RIGHT) & ~0x3 - * - */ - -#define A (3 + (PAGE_SHIFT - 3) - 1) -#define B (A + 1) -#define C (B + (PAGE_SHIFT - 2) - 1) -#define D (C + 1) -#define E (D + (PAGE_SHIFT - 2) - 1) -#define F (E + 1) - -#define PMD_SHIFT_LEFT (64 - D) -#define PMD_SHIFT_RIGHT (64 - (D - B) - 2) -#define PGDIR_SHIFT_LEFT (64 - F) -#define PGDIR_SHIFT_RIGHT (64 - (F - D) - 2) -#define LOW_MASK_BITS 0x3 - -/* TLB1 ** ICACHE line 1: tl1 DTLB and quick VPTE miss */ - ldxa [%g1 + %g1] ASI_DMMU, %g4 ! Get TAG_ACCESS - add %g3, %g3, %g5 ! Compute VPTE base - cmp %g4, %g5 ! VPTE miss? - bgeu,pt %xcc, 1f ! Continue here - andcc %g4, TAG_CONTEXT_BITS, %g5 ! tl0 miss Nucleus test - ba,a,pt %xcc, from_tl1_trap ! Fall to tl0 miss -1: sllx %g6, VPTE_SHIFT, %g4 ! Position TAG_ACCESS - or %g4, %g5, %g4 ! Prepare TAG_ACCESS - -/* TLB1 ** ICACHE line 2: Quick VPTE miss */ - mov TSB_REG, %g1 ! Grab TSB reg - ldxa [%g1] ASI_DMMU, %g5 ! Doing PGD caching? - sllx %g6, PMD_SHIFT_LEFT, %g1 ! Position PMD offset - be,pn %xcc, sparc64_vpte_nucleus ! Is it from Nucleus? - srlx %g1, PMD_SHIFT_RIGHT, %g1 ! Mask PMD offset bits - brnz,pt %g5, sparc64_vpte_continue ! Yep, go like smoke - andn %g1, LOW_MASK_BITS, %g1 ! Final PMD mask - sllx %g6, PGDIR_SHIFT_LEFT, %g5 ! Position PGD offset - -/* TLB1 ** ICACHE line 3: Quick VPTE miss */ - srlx %g5, PGDIR_SHIFT_RIGHT, %g5 ! Mask PGD offset bits - andn %g5, LOW_MASK_BITS, %g5 ! Final PGD mask - lduwa [%g7 + %g5] ASI_PHYS_USE_EC, %g5! Load PGD - brz,pn %g5, vpte_noent ! Valid? -sparc64_kpte_continue: - sllx %g5, 11, %g5 ! Shift into place -sparc64_vpte_continue: - lduwa [%g5 + %g1] ASI_PHYS_USE_EC, %g5! Load PMD - sllx %g5, 11, %g5 ! Shift into place - brz,pn %g5, vpte_noent ! Valid? - -/* TLB1 ** ICACHE line 4: Quick VPTE miss */ - mov (VALID_SZ_BITS >> 61), %g1 ! upper vpte into %g1 - sllx %g1, 61, %g1 ! finish calc - or %g5, VPTE_BITS, %g5 ! Prepare VPTE data - or %g5, %g1, %g5 ! ... - mov TLB_SFSR, %g1 ! Restore %g1 value - stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Load VPTE into TLB - stxa %g4, [%g1 + %g1] ASI_DMMU ! Restore previous TAG_ACCESS - retry ! Load PTE once again - -#undef VALID_SZ_BITS -#undef VPTE_SHIFT -#undef VPTE_BITS -#undef A -#undef B -#undef C -#undef D -#undef E -#undef F -#undef PMD_SHIFT_LEFT -#undef PMD_SHIFT_RIGHT -#undef PGDIR_SHIFT_LEFT -#undef PGDIR_SHIFT_RIGHT -#undef LOW_MASK_BITS - diff --git a/arch/sparc64/kernel/dtlb_base.S b/arch/sparc64/kernel/dtlb_base.S deleted file mode 100644 index 6528786840c0..000000000000 --- a/arch/sparc64/kernel/dtlb_base.S +++ /dev/null @@ -1,109 +0,0 @@ -/* $Id: dtlb_base.S,v 1.17 2001/10/11 22:33:52 davem Exp $ - * dtlb_base.S: Front end to DTLB miss replacement strategy. - * This is included directly into the trap table. - * - * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com) - * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz) - */ - -#include <asm/pgtable.h> -#include <asm/mmu.h> - -/* %g1 TLB_SFSR (%g1 + %g1 == TLB_TAG_ACCESS) - * %g2 (KERN_HIGHBITS | KERN_LOWBITS) - * %g3 VPTE base (0xfffffffe00000000) Spitfire/Blackbird (44-bit VA space) - * (0xffe0000000000000) Cheetah (64-bit VA space) - * %g7 __pa(current->mm->pgd) - * - * The VPTE base value is completely magic, but note that - * few places in the kernel other than these TLB miss - * handlers know anything about the VPTE mechanism or - * how it works (see VPTE_SIZE, TASK_SIZE and PTRS_PER_PGD). - * Consider the 44-bit VADDR Ultra-I/II case as an example: - * - * VA[0 : (1<<43)] produce VPTE index [%g3 : 0] - * VA[0 : -(1<<43)] produce VPTE index [%g3-(1<<(43-PAGE_SHIFT+3)) : %g3] - * - * For Cheetah's 64-bit VADDR space this is: - * - * VA[0 : (1<<63)] produce VPTE index [%g3 : 0] - * VA[0 : -(1<<63)] produce VPTE index [%g3-(1<<(63-PAGE_SHIFT+3)) : %g3] - * - * If you're paying attention you'll notice that this means half of - * the VPTE table is above %g3 and half is below, low VA addresses - * map progressively upwards from %g3, and high VA addresses map - * progressively upwards towards %g3. This trick was needed to make - * the same 8 instruction handler work both for Spitfire/Blackbird's - * peculiar VA space hole configuration and the full 64-bit VA space - * one of Cheetah at the same time. - */ - -/* Ways we can get here: - * - * 1) Nucleus loads and stores to/from PA-->VA direct mappings. - * 2) Nucleus loads and stores to/from vmalloc() areas. - * 3) User loads and stores. - * 4) User space accesses by nucleus at tl0 - */ - -#if PAGE_SHIFT == 13 -/* - * To compute vpte offset, we need to do ((addr >> 13) << 3), - * which can be optimized to (addr >> 10) if bits 10/11/12 can - * be guaranteed to be 0 ... mmu_context.h does guarantee this - * by only using 10 bits in the hwcontext value. - */ -#define CREATE_VPTE_OFFSET1(r1, r2) nop -#define CREATE_VPTE_OFFSET2(r1, r2) \ - srax r1, 10, r2 -#else -#define CREATE_VPTE_OFFSET1(r1, r2) \ - srax r1, PAGE_SHIFT, r2 -#define CREATE_VPTE_OFFSET2(r1, r2) \ - sllx r2, 3, r2 -#endif - -/* DTLB ** ICACHE line 1: Quick user TLB misses */ - mov TLB_SFSR, %g1 - ldxa [%g1 + %g1] ASI_DMMU, %g4 ! Get TAG_ACCESS - andcc %g4, TAG_CONTEXT_BITS, %g0 ! From Nucleus? -from_tl1_trap: - rdpr %tl, %g5 ! For TL==3 test - CREATE_VPTE_OFFSET1(%g4, %g6) ! Create VPTE offset - be,pn %xcc, kvmap ! Yep, special processing - CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset - cmp %g5, 4 ! Last trap level? - -/* DTLB ** ICACHE line 2: User finish + quick kernel TLB misses */ - be,pn %xcc, longpath ! Yep, cannot risk VPTE miss - nop ! delay slot - ldxa [%g3 + %g6] ASI_S, %g5 ! Load VPTE -1: brgez,pn %g5, longpath ! Invalid, branch out - nop ! Delay-slot -9: stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB - retry ! Trap return - nop - -/* DTLB ** ICACHE line 3: winfixups+real_faults */ -longpath: - rdpr %pstate, %g5 ! Move into alternate globals - wrpr %g5, PSTATE_AG|PSTATE_MG, %pstate - rdpr %tl, %g4 ! See where we came from. - cmp %g4, 1 ! Is etrap/rtrap window fault? - mov TLB_TAG_ACCESS, %g4 ! Prepare for fault processing - ldxa [%g4] ASI_DMMU, %g5 ! Load faulting VA page - be,pt %xcc, sparc64_realfault_common ! Jump to normal fault handling - mov FAULT_CODE_DTLB, %g4 ! It was read from DTLB - -/* DTLB ** ICACHE line 4: Unused... */ - ba,a,pt %xcc, winfix_trampoline ! Call window fixup code - nop - nop - nop - nop - nop - nop - nop - -#undef CREATE_VPTE_OFFSET1 -#undef CREATE_VPTE_OFFSET2 diff --git a/arch/sparc64/kernel/dtlb_miss.S b/arch/sparc64/kernel/dtlb_miss.S new file mode 100644 index 000000000000..09a6a15a7105 --- /dev/null +++ b/arch/sparc64/kernel/dtlb_miss.S @@ -0,0 +1,39 @@ +/* DTLB ** ICACHE line 1: Context 0 check and TSB load */ + ldxa [%g0] ASI_DMMU_TSB_8KB_PTR, %g1 ! Get TSB 8K pointer + ldxa [%g0] ASI_DMMU, %g6 ! Get TAG TARGET + srlx %g6, 48, %g5 ! Get context + sllx %g6, 22, %g6 ! Zero out context + brz,pn %g5, kvmap_dtlb ! Context 0 processing + srlx %g6, 22, %g6 ! Delay slot + TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry + cmp %g4, %g6 ! Compare TAG + +/* DTLB ** ICACHE line 2: TSB compare and TLB load */ + bne,pn %xcc, tsb_miss_dtlb ! Miss + mov FAULT_CODE_DTLB, %g3 + stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Load TLB + retry ! Trap done + nop + nop + nop + nop + +/* DTLB ** ICACHE line 3: */ + nop + nop + nop + nop + nop + nop + nop + nop + +/* DTLB ** ICACHE line 4: */ + nop + nop + nop + nop + nop + nop + nop + nop diff --git a/arch/sparc64/kernel/ebus.c b/arch/sparc64/kernel/ebus.c index 7991e919d8ab..c69504aa638f 100644 --- a/arch/sparc64/kernel/ebus.c +++ b/arch/sparc64/kernel/ebus.c @@ -277,10 +277,9 @@ static inline void *ebus_alloc(size_t size) { void *mem; - mem = kmalloc(size, GFP_ATOMIC); + mem = kzalloc(size, GFP_ATOMIC); if (!mem) panic("ebus_alloc: out of memory"); - memset((char *)mem, 0, size); return mem; } diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index a73553ae7e53..6d0b3ed77a02 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -50,7 +50,8 @@ do_fpdis: add %g0, %g0, %g0 ba,a,pt %xcc, rtrap_clr_l6 -1: ldub [%g6 + TI_FPSAVED], %g5 +1: TRAP_LOAD_THREAD_REG(%g6, %g1) + ldub [%g6 + TI_FPSAVED], %g5 wr %g0, FPRS_FEF, %fprs andcc %g5, FPRS_FEF, %g0 be,a,pt %icc, 1f @@ -96,10 +97,22 @@ do_fpdis: add %g6, TI_FPREGS + 0x80, %g1 faddd %f0, %f2, %f4 fmuld %f0, %f2, %f6 - ldxa [%g3] ASI_DMMU, %g5 + +661: ldxa [%g3] ASI_DMMU, %g5 + .section .sun4v_1insn_patch, "ax" + .word 661b + ldxa [%g3] ASI_MMU, %g5 + .previous + sethi %hi(sparc64_kern_sec_context), %g2 ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2 - stxa %g2, [%g3] ASI_DMMU + +661: stxa %g2, [%g3] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g3] ASI_MMU + .previous + membar #Sync add %g6, TI_FPREGS + 0xc0, %g2 faddd %f0, %f2, %f8 @@ -125,11 +138,23 @@ do_fpdis: fzero %f32 mov SECONDARY_CONTEXT, %g3 fzero %f34 - ldxa [%g3] ASI_DMMU, %g5 + +661: ldxa [%g3] ASI_DMMU, %g5 + .section .sun4v_1insn_patch, "ax" + .word 661b + ldxa [%g3] ASI_MMU, %g5 + .previous + add %g6, TI_FPREGS, %g1 sethi %hi(sparc64_kern_sec_context), %g2 ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2 - stxa %g2, [%g3] ASI_DMMU + +661: stxa %g2, [%g3] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g3] ASI_MMU + .previous + membar #Sync add %g6, TI_FPREGS + 0x40, %g2 faddd %f32, %f34, %f36 @@ -154,10 +179,22 @@ do_fpdis: nop 3: mov SECONDARY_CONTEXT, %g3 add %g6, TI_FPREGS, %g1 - ldxa [%g3] ASI_DMMU, %g5 + +661: ldxa [%g3] ASI_DMMU, %g5 + .section .sun4v_1insn_patch, "ax" + .word 661b + ldxa [%g3] ASI_MMU, %g5 + .previous + sethi %hi(sparc64_kern_sec_context), %g2 ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2 - stxa %g2, [%g3] ASI_DMMU + +661: stxa %g2, [%g3] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g3] ASI_MMU + .previous + membar #Sync mov 0x40, %g2 membar #Sync @@ -168,7 +205,13 @@ do_fpdis: ldda [%g1 + %g2] ASI_BLK_S, %f48 membar #Sync fpdis_exit: - stxa %g5, [%g3] ASI_DMMU + +661: stxa %g5, [%g3] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g5, [%g3] ASI_MMU + .previous + membar #Sync fpdis_exit2: wr %g7, 0, %gsr @@ -189,6 +232,7 @@ fp_other_bounce: .globl do_fpother_check_fitos .align 32 do_fpother_check_fitos: + TRAP_LOAD_THREAD_REG(%g6, %g1) sethi %hi(fp_other_bounce - 4), %g7 or %g7, %lo(fp_other_bounce - 4), %g7 @@ -312,6 +356,7 @@ fitos_emul_fini: .globl do_fptrap .align 32 do_fptrap: + TRAP_LOAD_THREAD_REG(%g6, %g1) stx %fsr, [%g6 + TI_XFSR] do_fptrap_after_fsr: ldub [%g6 + TI_FPSAVED], %g3 @@ -321,10 +366,22 @@ do_fptrap_after_fsr: rd %gsr, %g3 stx %g3, [%g6 + TI_GSR] mov SECONDARY_CONTEXT, %g3 - ldxa [%g3] ASI_DMMU, %g5 + +661: ldxa [%g3] ASI_DMMU, %g5 + .section .sun4v_1insn_patch, "ax" + .word 661b + ldxa [%g3] ASI_MMU, %g5 + .previous + sethi %hi(sparc64_kern_sec_context), %g2 ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2 - stxa %g2, [%g3] ASI_DMMU + +661: stxa %g2, [%g3] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g3] ASI_MMU + .previous + membar #Sync add %g6, TI_FPREGS, %g2 andcc %g1, FPRS_DL, %g0 @@ -339,7 +396,13 @@ do_fptrap_after_fsr: stda %f48, [%g2 + %g3] ASI_BLK_S 5: mov SECONDARY_CONTEXT, %g1 membar #Sync - stxa %g5, [%g1] ASI_DMMU + +661: stxa %g5, [%g1] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g5, [%g1] ASI_MMU + .previous + membar #Sync ba,pt %xcc, etrap wr %g0, 0, %fprs @@ -353,8 +416,6 @@ do_fptrap_after_fsr: * * With this method we can do most of the cross-call tlb/cache * flushing very quickly. - * - * Current CPU's IRQ worklist table is locked into %g6, don't touch. */ .text .align 32 @@ -378,6 +439,8 @@ do_ivec: sllx %g2, %g4, %g2 sllx %g4, 2, %g4 + TRAP_LOAD_IRQ_WORK(%g6, %g1) + lduw [%g6 + %g4], %g5 /* g5 = irq_work(cpu, pil) */ stw %g5, [%g3 + 0x00] /* bucket->irq_chain = g5 */ stw %g3, [%g6 + %g4] /* irq_work(cpu, pil) = bucket */ @@ -399,76 +462,6 @@ do_ivec_xcall: 1: jmpl %g3, %g0 nop - .globl save_alternate_globals -save_alternate_globals: /* %o0 = save_area */ - rdpr %pstate, %o5 - andn %o5, PSTATE_IE, %o1 - wrpr %o1, PSTATE_AG, %pstate - stx %g0, [%o0 + 0x00] - stx %g1, [%o0 + 0x08] - stx %g2, [%o0 + 0x10] - stx %g3, [%o0 + 0x18] - stx %g4, [%o0 + 0x20] - stx %g5, [%o0 + 0x28] - stx %g6, [%o0 + 0x30] - stx %g7, [%o0 + 0x38] - wrpr %o1, PSTATE_IG, %pstate - stx %g0, [%o0 + 0x40] - stx %g1, [%o0 + 0x48] - stx %g2, [%o0 + 0x50] - stx %g3, [%o0 + 0x58] - stx %g4, [%o0 + 0x60] - stx %g5, [%o0 + 0x68] - stx %g6, [%o0 + 0x70] - stx %g7, [%o0 + 0x78] - wrpr %o1, PSTATE_MG, %pstate - stx %g0, [%o0 + 0x80] - stx %g1, [%o0 + 0x88] - stx %g2, [%o0 + 0x90] - stx %g3, [%o0 + 0x98] - stx %g4, [%o0 + 0xa0] - stx %g5, [%o0 + 0xa8] - stx %g6, [%o0 + 0xb0] - stx %g7, [%o0 + 0xb8] - wrpr %o5, 0x0, %pstate - retl - nop - - .globl restore_alternate_globals -restore_alternate_globals: /* %o0 = save_area */ - rdpr %pstate, %o5 - andn %o5, PSTATE_IE, %o1 - wrpr %o1, PSTATE_AG, %pstate - ldx [%o0 + 0x00], %g0 - ldx [%o0 + 0x08], %g1 - ldx [%o0 + 0x10], %g2 - ldx [%o0 + 0x18], %g3 - ldx [%o0 + 0x20], %g4 - ldx [%o0 + 0x28], %g5 - ldx [%o0 + 0x30], %g6 - ldx [%o0 + 0x38], %g7 - wrpr %o1, PSTATE_IG, %pstate - ldx [%o0 + 0x40], %g0 - ldx [%o0 + 0x48], %g1 - ldx [%o0 + 0x50], %g2 - ldx [%o0 + 0x58], %g3 - ldx [%o0 + 0x60], %g4 - ldx [%o0 + 0x68], %g5 - ldx [%o0 + 0x70], %g6 - ldx [%o0 + 0x78], %g7 - wrpr %o1, PSTATE_MG, %pstate - ldx [%o0 + 0x80], %g0 - ldx [%o0 + 0x88], %g1 - ldx [%o0 + 0x90], %g2 - ldx [%o0 + 0x98], %g3 - ldx [%o0 + 0xa0], %g4 - ldx [%o0 + 0xa8], %g5 - ldx [%o0 + 0xb0], %g6 - ldx [%o0 + 0xb8], %g7 - wrpr %o5, 0x0, %pstate - retl - nop - .globl getcc, setcc getcc: ldx [%o0 + PT_V9_TSTATE], %o1 @@ -488,9 +481,24 @@ setcc: retl stx %o1, [%o0 + PT_V9_TSTATE] - .globl utrap, utrap_ill -utrap: brz,pn %g1, etrap + .globl utrap_trap +utrap_trap: /* %g3=handler,%g4=level */ + TRAP_LOAD_THREAD_REG(%g6, %g1) + ldx [%g6 + TI_UTRAPS], %g1 + brnz,pt %g1, invoke_utrap nop + + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + call bad_trap + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + clr %l6 + +invoke_utrap: + sllx %g3, 3, %g3 + ldx [%g1 + %g3], %g1 save %sp, -128, %sp rdpr %tstate, %l6 rdpr %cwp, %l7 @@ -500,17 +508,6 @@ utrap: brz,pn %g1, etrap rdpr %tnpc, %l7 wrpr %g1, 0, %tnpc done -utrap_ill: - call bad_trap - add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - clr %l6 - - /* XXX Here is stuff we still need to write... -DaveM XXX */ - .globl netbsd_syscall -netbsd_syscall: - retl - nop /* We need to carefully read the error status, ACK * the errors, prevent recursive traps, and pass the @@ -1001,7 +998,7 @@ dcpe_icpe_tl1_common: * %g3: scratch * %g4: AFSR * %g5: AFAR - * %g6: current thread ptr + * %g6: unused, will have current thread ptr after etrap * %g7: scratch */ __cheetah_log_error: @@ -1539,13 +1536,14 @@ ret_from_syscall: 1: b,pt %xcc, ret_sys_call ldx [%sp + PTREGS_OFF + PT_V9_I0], %o0 -sparc_exit: wrpr %g0, (PSTATE_RMO | PSTATE_PEF | PSTATE_PRIV), %pstate +sparc_exit: rdpr %pstate, %g2 + wrpr %g2, PSTATE_IE, %pstate rdpr %otherwin, %g1 rdpr %cansave, %g3 add %g3, %g1, %g3 wrpr %g3, 0x0, %cansave wrpr %g0, 0x0, %otherwin - wrpr %g0, (PSTATE_RMO | PSTATE_PEF | PSTATE_PRIV | PSTATE_IE), %pstate + wrpr %g2, 0x0, %pstate ba,pt %xcc, sys_exit stb %g0, [%g6 + TI_WSAVED] @@ -1690,3 +1688,138 @@ __flushw_user: restore %g0, %g0, %g0 2: retl nop + +#ifdef CONFIG_SMP + .globl hard_smp_processor_id +hard_smp_processor_id: +#endif + .globl real_hard_smp_processor_id +real_hard_smp_processor_id: + __GET_CPUID(%o0) + retl + nop + + /* %o0: devhandle + * %o1: devino + * + * returns %o0: sysino + */ + .globl sun4v_devino_to_sysino +sun4v_devino_to_sysino: + mov HV_FAST_INTR_DEVINO2SYSINO, %o5 + ta HV_FAST_TRAP + retl + mov %o1, %o0 + + /* %o0: sysino + * + * returns %o0: intr_enabled (HV_INTR_{DISABLED,ENABLED}) + */ + .globl sun4v_intr_getenabled +sun4v_intr_getenabled: + mov HV_FAST_INTR_GETENABLED, %o5 + ta HV_FAST_TRAP + retl + mov %o1, %o0 + + /* %o0: sysino + * %o1: intr_enabled (HV_INTR_{DISABLED,ENABLED}) + */ + .globl sun4v_intr_setenabled +sun4v_intr_setenabled: + mov HV_FAST_INTR_SETENABLED, %o5 + ta HV_FAST_TRAP + retl + nop + + /* %o0: sysino + * + * returns %o0: intr_state (HV_INTR_STATE_*) + */ + .globl sun4v_intr_getstate +sun4v_intr_getstate: + mov HV_FAST_INTR_GETSTATE, %o5 + ta HV_FAST_TRAP + retl + mov %o1, %o0 + + /* %o0: sysino + * %o1: intr_state (HV_INTR_STATE_*) + */ + .globl sun4v_intr_setstate +sun4v_intr_setstate: + mov HV_FAST_INTR_SETSTATE, %o5 + ta HV_FAST_TRAP + retl + nop + + /* %o0: sysino + * + * returns %o0: cpuid + */ + .globl sun4v_intr_gettarget +sun4v_intr_gettarget: + mov HV_FAST_INTR_GETTARGET, %o5 + ta HV_FAST_TRAP + retl + mov %o1, %o0 + + /* %o0: sysino + * %o1: cpuid + */ + .globl sun4v_intr_settarget +sun4v_intr_settarget: + mov HV_FAST_INTR_SETTARGET, %o5 + ta HV_FAST_TRAP + retl + nop + + /* %o0: type + * %o1: queue paddr + * %o2: num queue entries + * + * returns %o0: status + */ + .globl sun4v_cpu_qconf +sun4v_cpu_qconf: + mov HV_FAST_CPU_QCONF, %o5 + ta HV_FAST_TRAP + retl + nop + + /* returns %o0: status + */ + .globl sun4v_cpu_yield +sun4v_cpu_yield: + mov HV_FAST_CPU_YIELD, %o5 + ta HV_FAST_TRAP + retl + nop + + /* %o0: num cpus in cpu list + * %o1: cpu list paddr + * %o2: mondo block paddr + * + * returns %o0: status + */ + .globl sun4v_cpu_mondo_send +sun4v_cpu_mondo_send: + mov HV_FAST_CPU_MONDO_SEND, %o5 + ta HV_FAST_TRAP + retl + nop + + /* %o0: CPU ID + * + * returns %o0: -status if status non-zero, else + * %o0: cpu state as HV_CPU_STATE_* + */ + .globl sun4v_cpu_state +sun4v_cpu_state: + mov HV_FAST_CPU_STATE, %o5 + ta HV_FAST_TRAP + brnz,pn %o0, 1f + sub %g0, %o0, %o0 + mov %o1, %o0 +1: retl + nop diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S index 0d8eba21111b..149383835c25 100644 --- a/arch/sparc64/kernel/etrap.S +++ b/arch/sparc64/kernel/etrap.S @@ -31,6 +31,7 @@ .globl etrap, etrap_irq, etraptl1 etrap: rdpr %pil, %g2 etrap_irq: + TRAP_LOAD_THREAD_REG(%g6, %g1) rdpr %tstate, %g1 sllx %g2, 20, %g3 andcc %g1, TSTATE_PRIV, %g0 @@ -54,7 +55,31 @@ etrap_irq: rd %y, %g3 stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TNPC] st %g3, [%g2 + STACKFRAME_SZ + PT_V9_Y] - save %g2, -STACK_BIAS, %sp ! Ordering here is critical + + rdpr %cansave, %g1 + brnz,pt %g1, etrap_save + nop + + rdpr %cwp, %g1 + add %g1, 2, %g1 + wrpr %g1, %cwp + be,pt %xcc, etrap_user_spill + mov ASI_AIUP, %g3 + + rdpr %otherwin, %g3 + brz %g3, etrap_kernel_spill + mov ASI_AIUS, %g3 + +etrap_user_spill: + + wr %g3, 0x0, %asi + ldx [%g6 + TI_FLAGS], %g3 + and %g3, _TIF_32BIT, %g3 + brnz,pt %g3, etrap_user_spill_32bit + nop + ba,a,pt %xcc, etrap_user_spill_64bit + +etrap_save: save %g2, -STACK_BIAS, %sp mov %g6, %l6 bne,pn %xcc, 3f @@ -70,42 +95,56 @@ etrap_irq: wrpr %g2, 0, %wstate sethi %hi(sparc64_kern_pri_context), %g2 ldx [%g2 + %lo(sparc64_kern_pri_context)], %g3 - stxa %g3, [%l4] ASI_DMMU - flush %l6 - wr %g0, ASI_AIUS, %asi -2: wrpr %g0, 0x0, %tl - mov %g4, %l4 + +661: stxa %g3, [%l4] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g3, [%l4] ASI_MMU + .previous + + sethi %hi(KERNBASE), %l4 + flush %l4 + mov ASI_AIUS, %l7 +2: mov %g4, %l4 mov %g5, %l5 + add %g7, 4, %l2 + + /* Go to trap time globals so we can save them. */ +661: wrpr %g0, ETRAP_PSTATE1, %pstate + .section .sun4v_1insn_patch, "ax" + .word 661b + SET_GL(0) + .previous - mov %g7, %l2 - wrpr %g0, ETRAP_PSTATE1, %pstate stx %g1, [%sp + PTREGS_OFF + PT_V9_G1] stx %g2, [%sp + PTREGS_OFF + PT_V9_G2] + sllx %l7, 24, %l7 stx %g3, [%sp + PTREGS_OFF + PT_V9_G3] + rdpr %cwp, %l0 stx %g4, [%sp + PTREGS_OFF + PT_V9_G4] stx %g5, [%sp + PTREGS_OFF + PT_V9_G5] stx %g6, [%sp + PTREGS_OFF + PT_V9_G6] - stx %g7, [%sp + PTREGS_OFF + PT_V9_G7] + or %l7, %l0, %l7 + sethi %hi(TSTATE_RMO | TSTATE_PEF), %l0 + or %l7, %l0, %l7 + wrpr %l2, %tnpc + wrpr %l7, (TSTATE_PRIV | TSTATE_IE), %tstate stx %i0, [%sp + PTREGS_OFF + PT_V9_I0] stx %i1, [%sp + PTREGS_OFF + PT_V9_I1] stx %i2, [%sp + PTREGS_OFF + PT_V9_I2] stx %i3, [%sp + PTREGS_OFF + PT_V9_I3] stx %i4, [%sp + PTREGS_OFF + PT_V9_I4] stx %i5, [%sp + PTREGS_OFF + PT_V9_I5] - stx %i6, [%sp + PTREGS_OFF + PT_V9_I6] - stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] - wrpr %g0, ETRAP_PSTATE2, %pstate mov %l6, %g6 -#ifdef CONFIG_SMP - mov TSB_REG, %g3 - ldxa [%g3] ASI_IMMU, %g5 -#endif - jmpl %l2 + 0x4, %g0 - ldx [%g6 + TI_TASK], %g4 + stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] + LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %l1) + ldx [%g6 + TI_TASK], %g4 + done -3: ldub [%l6 + TI_FPDEPTH], %l5 +3: mov ASI_P, %l7 + ldub [%l6 + TI_FPDEPTH], %l5 add %l6, TI_FPSAVED + 1, %l4 srl %l5, 1, %l3 add %l5, 2, %l5 @@ -125,6 +164,7 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself. * 0x58 TL4's TT * 0x60 TL */ + TRAP_LOAD_THREAD_REG(%g6, %g1) sub %sp, ((4 * 8) * 4) + 8, %g2 rdpr %tl, %g1 @@ -148,6 +188,11 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself. rdpr %tt, %g3 stx %g3, [%g2 + STACK_BIAS + 0x38] + sethi %hi(is_sun4v), %g3 + lduw [%g3 + %lo(is_sun4v)], %g3 + brnz,pn %g3, finish_tl1_capture + nop + wrpr %g0, 3, %tl rdpr %tstate, %g3 stx %g3, [%g2 + STACK_BIAS + 0x40] @@ -168,91 +213,20 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself. rdpr %tt, %g3 stx %g3, [%g2 + STACK_BIAS + 0x78] - wrpr %g1, %tl stx %g1, [%g2 + STACK_BIAS + 0x80] +finish_tl1_capture: + wrpr %g0, 1, %tl +661: nop + .section .sun4v_1insn_patch, "ax" + .word 661b + SET_GL(1) + .previous + rdpr %tstate, %g1 sub %g2, STACKFRAME_SZ + TRACEREG_SZ - STACK_BIAS, %g2 ba,pt %xcc, 1b andcc %g1, TSTATE_PRIV, %g0 - .align 64 - .globl scetrap -scetrap: rdpr %pil, %g2 - rdpr %tstate, %g1 - sllx %g2, 20, %g3 - andcc %g1, TSTATE_PRIV, %g0 - or %g1, %g3, %g1 - bne,pn %xcc, 1f - sub %sp, (STACKFRAME_SZ+TRACEREG_SZ-STACK_BIAS), %g2 - wrpr %g0, 7, %cleanwin - - sllx %g1, 51, %g3 - sethi %hi(TASK_REGOFF), %g2 - or %g2, %lo(TASK_REGOFF), %g2 - brlz,pn %g3, 1f - add %g6, %g2, %g2 - wr %g0, 0, %fprs -1: rdpr %tpc, %g3 - stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TSTATE] - - rdpr %tnpc, %g1 - stx %g3, [%g2 + STACKFRAME_SZ + PT_V9_TPC] - stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TNPC] - save %g2, -STACK_BIAS, %sp ! Ordering here is critical - mov %g6, %l6 - bne,pn %xcc, 2f - mov ASI_P, %l7 - rdpr %canrestore, %g3 - - rdpr %wstate, %g2 - wrpr %g0, 0, %canrestore - sll %g2, 3, %g2 - mov PRIMARY_CONTEXT, %l4 - wrpr %g3, 0, %otherwin - wrpr %g2, 0, %wstate - sethi %hi(sparc64_kern_pri_context), %g2 - ldx [%g2 + %lo(sparc64_kern_pri_context)], %g3 - stxa %g3, [%l4] ASI_DMMU - flush %l6 - - mov ASI_AIUS, %l7 -2: mov %g4, %l4 - mov %g5, %l5 - add %g7, 0x4, %l2 - wrpr %g0, ETRAP_PSTATE1, %pstate - stx %g1, [%sp + PTREGS_OFF + PT_V9_G1] - stx %g2, [%sp + PTREGS_OFF + PT_V9_G2] - sllx %l7, 24, %l7 - - stx %g3, [%sp + PTREGS_OFF + PT_V9_G3] - rdpr %cwp, %l0 - stx %g4, [%sp + PTREGS_OFF + PT_V9_G4] - stx %g5, [%sp + PTREGS_OFF + PT_V9_G5] - stx %g6, [%sp + PTREGS_OFF + PT_V9_G6] - stx %g7, [%sp + PTREGS_OFF + PT_V9_G7] - or %l7, %l0, %l7 - sethi %hi(TSTATE_RMO | TSTATE_PEF), %l0 - - or %l7, %l0, %l7 - wrpr %l2, %tnpc - wrpr %l7, (TSTATE_PRIV | TSTATE_IE), %tstate - stx %i0, [%sp + PTREGS_OFF + PT_V9_I0] - stx %i1, [%sp + PTREGS_OFF + PT_V9_I1] - stx %i2, [%sp + PTREGS_OFF + PT_V9_I2] - stx %i3, [%sp + PTREGS_OFF + PT_V9_I3] - stx %i4, [%sp + PTREGS_OFF + PT_V9_I4] - - stx %i5, [%sp + PTREGS_OFF + PT_V9_I5] - stx %i6, [%sp + PTREGS_OFF + PT_V9_I6] - mov %l6, %g6 - stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] -#ifdef CONFIG_SMP - mov TSB_REG, %g3 - ldxa [%g3] ASI_IMMU, %g5 -#endif - ldx [%g6 + TI_TASK], %g4 - done - #undef TASK_REGOFF #undef ETRAP_PSTATE1 diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index b49dcd4504b0..3eadac5e171e 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -26,6 +26,7 @@ #include <asm/head.h> #include <asm/ttable.h> #include <asm/mmu.h> +#include <asm/cpudata.h> /* This section from from _start to sparc64_boot_end should fit into * 0x0000000000404000 to 0x0000000000408000. @@ -94,12 +95,17 @@ sparc64_boot: wrpr %g1, 0x0, %pstate ba,a,pt %xcc, 1f - .globl prom_finddev_name, prom_chosen_path - .globl prom_getprop_name, prom_mmu_name - .globl prom_callmethod_name, prom_translate_name + .globl prom_finddev_name, prom_chosen_path, prom_root_node + .globl prom_getprop_name, prom_mmu_name, prom_peer_name + .globl prom_callmethod_name, prom_translate_name, prom_root_compatible .globl prom_map_name, prom_unmap_name, prom_mmu_ihandle_cache .globl prom_boot_mapped_pc, prom_boot_mapping_mode .globl prom_boot_mapping_phys_high, prom_boot_mapping_phys_low + .globl is_sun4v +prom_peer_name: + .asciz "peer" +prom_compatible_name: + .asciz "compatible" prom_finddev_name: .asciz "finddevice" prom_chosen_path: @@ -116,7 +122,13 @@ prom_map_name: .asciz "map" prom_unmap_name: .asciz "unmap" +prom_sun4v_name: + .asciz "sun4v" .align 4 +prom_root_compatible: + .skip 64 +prom_root_node: + .word 0 prom_mmu_ihandle_cache: .word 0 prom_boot_mapped_pc: @@ -128,8 +140,54 @@ prom_boot_mapping_phys_high: .xword 0 prom_boot_mapping_phys_low: .xword 0 +is_sun4v: + .word 0 1: rd %pc, %l0 + + mov (1b - prom_peer_name), %l1 + sub %l0, %l1, %l1 + mov 0, %l2 + + /* prom_root_node = prom_peer(0) */ + stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "peer" + mov 1, %l3 + stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 1 + stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1 + stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1, 0 + stx %g0, [%sp + 2047 + 128 + 0x20] ! ret1 + call %l7 + add %sp, (2047 + 128), %o0 ! argument array + + ldx [%sp + 2047 + 128 + 0x20], %l4 ! prom root node + mov (1b - prom_root_node), %l1 + sub %l0, %l1, %l1 + stw %l4, [%l1] + + mov (1b - prom_getprop_name), %l1 + mov (1b - prom_compatible_name), %l2 + mov (1b - prom_root_compatible), %l5 + sub %l0, %l1, %l1 + sub %l0, %l2, %l2 + sub %l0, %l5, %l5 + + /* prom_getproperty(prom_root_node, "compatible", + * &prom_root_compatible, 64) + */ + stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "getprop" + mov 4, %l3 + stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 4 + mov 1, %l3 + stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1 + stx %l4, [%sp + 2047 + 128 + 0x18] ! arg1, prom_root_node + stx %l2, [%sp + 2047 + 128 + 0x20] ! arg2, "compatible" + stx %l5, [%sp + 2047 + 128 + 0x28] ! arg3, &prom_root_compatible + mov 64, %l3 + stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4, size + stx %g0, [%sp + 2047 + 128 + 0x38] ! ret1 + call %l7 + add %sp, (2047 + 128), %o0 ! argument array + mov (1b - prom_finddev_name), %l1 mov (1b - prom_chosen_path), %l2 mov (1b - prom_boot_mapped_pc), %l3 @@ -238,6 +296,27 @@ prom_boot_mapping_phys_low: add %sp, (192 + 128), %sp sparc64_boot_after_remap: + sethi %hi(prom_root_compatible), %g1 + or %g1, %lo(prom_root_compatible), %g1 + sethi %hi(prom_sun4v_name), %g7 + or %g7, %lo(prom_sun4v_name), %g7 + mov 5, %g3 +1: ldub [%g7], %g2 + ldub [%g1], %g4 + cmp %g2, %g4 + bne,pn %icc, 2f + add %g7, 1, %g7 + subcc %g3, 1, %g3 + bne,pt %xcc, 1b + add %g1, 1, %g1 + + sethi %hi(is_sun4v), %g1 + or %g1, %lo(is_sun4v), %g1 + mov 1, %g7 + stw %g7, [%g1] + +2: + BRANCH_IF_SUN4V(g1, jump_to_sun4u_init) BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot) BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot) ba,pt %xcc, spitfire_boot @@ -301,20 +380,58 @@ jump_to_sun4u_init: nop sun4u_init: + BRANCH_IF_SUN4V(g1, sun4v_init) + /* Set ctx 0 */ - mov PRIMARY_CONTEXT, %g7 - stxa %g0, [%g7] ASI_DMMU - membar #Sync + mov PRIMARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_DMMU + membar #Sync - mov SECONDARY_CONTEXT, %g7 - stxa %g0, [%g7] ASI_DMMU + mov SECONDARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_DMMU membar #Sync - BRANCH_IF_ANY_CHEETAH(g1,g7,cheetah_tlb_fixup) + ba,pt %xcc, sun4u_continue + nop + +sun4v_init: + /* Set ctx 0 */ + mov PRIMARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_MMU + membar #Sync + + mov SECONDARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_MMU + membar #Sync + ba,pt %xcc, niagara_tlb_fixup + nop + +sun4u_continue: + BRANCH_IF_ANY_CHEETAH(g1, g7, cheetah_tlb_fixup) ba,pt %xcc, spitfire_tlb_fixup nop +niagara_tlb_fixup: + mov 3, %g2 /* Set TLB type to hypervisor. */ + sethi %hi(tlb_type), %g1 + stw %g2, [%g1 + %lo(tlb_type)] + + /* Patch copy/clear ops. */ + call niagara_patch_copyops + nop + call niagara_patch_bzero + nop + call niagara_patch_pageops + nop + + /* Patch TLB/cache ops. */ + call hypervisor_patch_cachetlbops + nop + + ba,pt %xcc, tlb_fixup_done + nop + cheetah_tlb_fixup: mov 2, %g2 /* Set TLB type to cheetah+. */ BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f) @@ -411,85 +528,55 @@ setup_trap_table: wrpr %g0, 15, %pil /* Make the firmware call to jump over to the Linux trap table. */ - call prom_set_trap_table - sethi %hi(sparc64_ttable_tl0), %o0 + sethi %hi(is_sun4v), %o0 + lduw [%o0 + %lo(is_sun4v)], %o0 + brz,pt %o0, 1f + nop - /* Start using proper page size encodings in ctx register. */ - sethi %hi(sparc64_kern_pri_context), %g3 - ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2 - mov PRIMARY_CONTEXT, %g1 - stxa %g2, [%g1] ASI_DMMU - membar #Sync + TRAP_LOAD_TRAP_BLOCK(%g2, %g3) + add %g2, TRAP_PER_CPU_FAULT_INFO, %g2 + stxa %g2, [%g0] ASI_SCRATCHPAD - /* The Linux trap handlers expect various trap global registers - * to be setup with some fixed values. So here we set these - * up very carefully. These globals are: - * - * Alternate Globals (PSTATE_AG): - * - * %g6 --> current_thread_info() - * - * MMU Globals (PSTATE_MG): - * - * %g1 --> TLB_SFSR - * %g2 --> ((_PAGE_VALID | _PAGE_SZ4MB | - * _PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W) - * ^ 0xfffff80000000000) - * (this %g2 value is used for computing the PAGE_OFFSET kernel - * TLB entries quickly, the virtual address of the fault XOR'd - * with this %g2 value is the PTE to load into the TLB) - * %g3 --> VPTE_BASE_CHEETAH or VPTE_BASE_SPITFIRE + /* Compute physical address: * - * Interrupt Globals (PSTATE_IG, setup by init_irqwork_curcpu()): - * - * %g6 --> __irq_work[smp_processor_id()] + * paddr = kern_base + (mmfsa_vaddr - KERNBASE) */ + sethi %hi(KERNBASE), %g3 + sub %g2, %g3, %g2 + sethi %hi(kern_base), %g3 + ldx [%g3 + %lo(kern_base)], %g3 + add %g2, %g3, %o1 - rdpr %pstate, %o1 - mov %g6, %o2 - wrpr %o1, PSTATE_AG, %pstate - mov %o2, %g6 - -#define KERN_HIGHBITS ((_PAGE_VALID|_PAGE_SZ4MB)^0xfffff80000000000) -#define KERN_LOWBITS (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W) - wrpr %o1, PSTATE_MG, %pstate - mov TSB_REG, %g1 - stxa %g0, [%g1] ASI_DMMU - membar #Sync - stxa %g0, [%g1] ASI_IMMU - membar #Sync - mov TLB_SFSR, %g1 - sethi %uhi(KERN_HIGHBITS), %g2 - or %g2, %ulo(KERN_HIGHBITS), %g2 - sllx %g2, 32, %g2 - or %g2, KERN_LOWBITS, %g2 - - BRANCH_IF_ANY_CHEETAH(g3,g7,8f) - ba,pt %xcc, 9f + call prom_set_trap_table_sun4v + sethi %hi(sparc64_ttable_tl0), %o0 + + ba,pt %xcc, 2f nop -8: - sethi %uhi(VPTE_BASE_CHEETAH), %g3 - or %g3, %ulo(VPTE_BASE_CHEETAH), %g3 - ba,pt %xcc, 2f - sllx %g3, 32, %g3 +1: call prom_set_trap_table + sethi %hi(sparc64_ttable_tl0), %o0 -9: - sethi %uhi(VPTE_BASE_SPITFIRE), %g3 - or %g3, %ulo(VPTE_BASE_SPITFIRE), %g3 - sllx %g3, 32, %g3 + /* Start using proper page size encodings in ctx register. */ +2: sethi %hi(sparc64_kern_pri_context), %g3 + ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2 -2: - clr %g7 -#undef KERN_HIGHBITS -#undef KERN_LOWBITS + mov PRIMARY_CONTEXT, %g1 + +661: stxa %g2, [%g1] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g1] ASI_MMU + .previous + + membar #Sync /* Kill PROM timer */ sethi %hi(0x80000000), %o2 sllx %o2, 32, %o2 wr %o2, 0, %tick_cmpr - BRANCH_IF_ANY_CHEETAH(o2,o3,1f) + BRANCH_IF_SUN4V(o2, 1f) + BRANCH_IF_ANY_CHEETAH(o2, o3, 1f) ba,pt %xcc, 2f nop @@ -502,7 +589,6 @@ setup_trap_table: 2: wrpr %g0, %g0, %wstate - wrpr %o1, 0x0, %pstate call init_irqwork_curcpu nop @@ -517,7 +603,7 @@ setup_trap_table: restore .globl setup_tba -setup_tba: /* i0 = is_starfire */ +setup_tba: save %sp, -192, %sp /* The boot processor is the only cpu which invokes this @@ -536,31 +622,35 @@ setup_tba: /* i0 = is_starfire */ restore sparc64_boot_end: -#include "systbls.S" #include "ktlb.S" +#include "tsb.S" #include "etrap.S" #include "rtrap.S" #include "winfixup.S" #include "entry.S" +#include "sun4v_tlb_miss.S" +#include "sun4v_ivec.S" /* * The following skip makes sure the trap table in ttable.S is aligned * on a 32K boundary as required by the v9 specs for TBA register. + * + * We align to a 32K boundary, then we have the 32K kernel TSB, + * then the 32K aligned trap table. */ 1: .skip 0x4000 + _start - 1b -#ifdef CONFIG_SBUS -/* This is just a hack to fool make depend config.h discovering - strategy: As the .S files below need config.h, but - make depend does not find it for them, we include config.h - in head.S */ -#endif + .globl swapper_tsb +swapper_tsb: + .skip (32 * 1024) ! 0x0000000000408000 #include "ttable.S" +#include "systbls.S" + .data .align 8 .globl prom_tba, tlb_type diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index 233526ba3abe..8c93ba655b33 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -21,6 +21,7 @@ #include <linux/delay.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/bootmem.h> #include <asm/ptrace.h> #include <asm/processor.h> @@ -39,6 +40,7 @@ #include <asm/cache.h> #include <asm/cpudata.h> #include <asm/auxio.h> +#include <asm/head.h> #ifdef CONFIG_SMP static void distribute_irqs(void); @@ -136,12 +138,48 @@ out_unlock: return 0; } +extern unsigned long real_hard_smp_processor_id(void); + +static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid) +{ + unsigned int tid; + + if (this_is_starfire) { + tid = starfire_translate(imap, cpuid); + tid <<= IMAP_TID_SHIFT; + tid &= IMAP_TID_UPA; + } else { + if (tlb_type == cheetah || tlb_type == cheetah_plus) { + unsigned long ver; + + __asm__ ("rdpr %%ver, %0" : "=r" (ver)); + if ((ver >> 32UL) == __JALAPENO_ID || + (ver >> 32UL) == __SERRANO_ID) { + tid = cpuid << IMAP_TID_SHIFT; + tid &= IMAP_TID_JBUS; + } else { + unsigned int a = cpuid & 0x1f; + unsigned int n = (cpuid >> 5) & 0x1f; + + tid = ((a << IMAP_AID_SHIFT) | + (n << IMAP_NID_SHIFT)); + tid &= (IMAP_AID_SAFARI | + IMAP_NID_SAFARI);; + } + } else { + tid = cpuid << IMAP_TID_SHIFT; + tid &= IMAP_TID_UPA; + } + } + + return tid; +} + /* Now these are always passed a true fully specified sun4u INO. */ void enable_irq(unsigned int irq) { struct ino_bucket *bucket = __bucket(irq); - unsigned long imap; - unsigned long tid; + unsigned long imap, cpuid; imap = bucket->imap; if (imap == 0UL) @@ -149,47 +187,38 @@ void enable_irq(unsigned int irq) preempt_disable(); - if (tlb_type == cheetah || tlb_type == cheetah_plus) { - unsigned long ver; - - __asm__ ("rdpr %%ver, %0" : "=r" (ver)); - if ((ver >> 32) == 0x003e0016) { - /* We set it to our JBUS ID. */ - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (tid) - : "i" (ASI_JBUS_CONFIG)); - tid = ((tid & (0x1fUL<<17)) << 9); - tid &= IMAP_TID_JBUS; - } else { - /* We set it to our Safari AID. */ - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (tid) - : "i" (ASI_SAFARI_CONFIG)); - tid = ((tid & (0x3ffUL<<17)) << 9); - tid &= IMAP_AID_SAFARI; - } - } else if (this_is_starfire == 0) { - /* We set it to our UPA MID. */ - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (tid) - : "i" (ASI_UPA_CONFIG)); - tid = ((tid & UPA_CONFIG_MID) << 9); - tid &= IMAP_TID_UPA; + /* This gets the physical processor ID, even on uniprocessor, + * so we can always program the interrupt target correctly. + */ + cpuid = real_hard_smp_processor_id(); + + if (tlb_type == hypervisor) { + unsigned int ino = __irq_ino(irq); + int err; + + err = sun4v_intr_settarget(ino, cpuid); + if (err != HV_EOK) + printk("sun4v_intr_settarget(%x,%lu): err(%d)\n", + ino, cpuid, err); + err = sun4v_intr_setenabled(ino, HV_INTR_ENABLED); + if (err != HV_EOK) + printk("sun4v_intr_setenabled(%x): err(%d)\n", + ino, err); } else { - tid = (starfire_translate(imap, smp_processor_id()) << 26); - tid &= IMAP_TID_UPA; + unsigned int tid = sun4u_compute_tid(imap, cpuid); + + /* NOTE NOTE NOTE, IGN and INO are read-only, IGN is a product + * of this SYSIO's preconfigured IGN in the SYSIO Control + * Register, the hardware just mirrors that value here. + * However for Graphics and UPA Slave devices the full + * IMAP_INR field can be set by the programmer here. + * + * Things like FFB can now be handled via the new IRQ + * mechanism. + */ + upa_writel(tid | IMAP_VALID, imap); } - /* NOTE NOTE NOTE, IGN and INO are read-only, IGN is a product - * of this SYSIO's preconfigured IGN in the SYSIO Control - * Register, the hardware just mirrors that value here. - * However for Graphics and UPA Slave devices the full - * IMAP_INR field can be set by the programmer here. - * - * Things like FFB can now be handled via the new IRQ mechanism. - */ - upa_writel(tid | IMAP_VALID, imap); - preempt_enable(); } @@ -201,16 +230,26 @@ void disable_irq(unsigned int irq) imap = bucket->imap; if (imap != 0UL) { - u32 tmp; + if (tlb_type == hypervisor) { + unsigned int ino = __irq_ino(irq); + int err; + + err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED); + if (err != HV_EOK) + printk("sun4v_intr_setenabled(%x): " + "err(%d)\n", ino, err); + } else { + u32 tmp; - /* NOTE: We do not want to futz with the IRQ clear registers - * and move the state to IDLE, the SCSI code does call - * disable_irq() to assure atomicity in the queue cmd - * SCSI adapter driver code. Thus we'd lose interrupts. - */ - tmp = upa_readl(imap); - tmp &= ~IMAP_VALID; - upa_writel(tmp, imap); + /* NOTE: We do not want to futz with the IRQ clear registers + * and move the state to IDLE, the SCSI code does call + * disable_irq() to assure atomicity in the queue cmd + * SCSI adapter driver code. Thus we'd lose interrupts. + */ + tmp = upa_readl(imap); + tmp &= ~IMAP_VALID; + upa_writel(tmp, imap); + } } } @@ -248,6 +287,8 @@ unsigned int build_irq(int pil, int inofixup, unsigned long iclr, unsigned long return __irq(&pil0_dummy_bucket); } + BUG_ON(tlb_type == hypervisor); + /* RULE: Both must be specified in all other cases. */ if (iclr == 0UL || imap == 0UL) { prom_printf("Invalid build_irq %d %d %016lx %016lx\n", @@ -275,12 +316,11 @@ unsigned int build_irq(int pil, int inofixup, unsigned long iclr, unsigned long goto out; } - bucket->irq_info = kmalloc(sizeof(struct irq_desc), GFP_ATOMIC); + bucket->irq_info = kzalloc(sizeof(struct irq_desc), GFP_ATOMIC); if (!bucket->irq_info) { prom_printf("IRQ: Error, kmalloc(irq_desc) failed.\n"); prom_halt(); } - memset(bucket->irq_info, 0, sizeof(struct irq_desc)); /* Ok, looks good, set it up. Don't touch the irq_chain or * the pending flag. @@ -294,6 +334,37 @@ out: return __irq(bucket); } +unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino, int pil, unsigned char flags) +{ + struct ino_bucket *bucket; + unsigned long sysino; + + sysino = sun4v_devino_to_sysino(devhandle, devino); + + bucket = &ivector_table[sysino]; + + /* Catch accidental accesses to these things. IMAP/ICLR handling + * is done by hypervisor calls on sun4v platforms, not by direct + * register accesses. + * + * But we need to make them look unique for the disable_irq() logic + * in free_irq(). + */ + bucket->imap = ~0UL - sysino; + bucket->iclr = ~0UL - sysino; + + bucket->pil = pil; + bucket->flags = flags; + + bucket->irq_info = kzalloc(sizeof(struct irq_desc), GFP_ATOMIC); + if (!bucket->irq_info) { + prom_printf("IRQ: Error, kmalloc(irq_desc) failed.\n"); + prom_halt(); + } + + return __irq(bucket); +} + static void atomic_bucket_insert(struct ino_bucket *bucket) { unsigned long pstate; @@ -482,7 +553,6 @@ void free_irq(unsigned int irq, void *dev_id) bucket = __bucket(irq); if (bucket != &pil0_dummy_bucket) { struct irq_desc *desc = bucket->irq_info; - unsigned long imap = bucket->imap; int ent, i; for (i = 0; i < MAX_IRQ_DESC_ACTION; i++) { @@ -495,6 +565,8 @@ void free_irq(unsigned int irq, void *dev_id) } if (!desc->action_active_mask) { + unsigned long imap = bucket->imap; + /* This unique interrupt source is now inactive. */ bucket->flags &= ~IBF_ACTIVE; @@ -592,7 +664,18 @@ static void process_bucket(int irq, struct ino_bucket *bp, struct pt_regs *regs) break; } if (bp->pil != 0) { - upa_writel(ICLR_IDLE, bp->iclr); + if (tlb_type == hypervisor) { + unsigned int ino = __irq_ino(bp); + int err; + + err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE); + if (err != HV_EOK) + printk("sun4v_intr_setstate(%x): " + "err(%d)\n", ino, err); + } else { + upa_writel(ICLR_IDLE, bp->iclr); + } + /* Test and add entropy */ if (random & SA_SAMPLE_RANDOM) add_interrupt_randomness(irq); @@ -694,7 +777,7 @@ irqreturn_t sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs) val = readb(auxio_register); val |= AUXIO_AUX1_FTCNT; writeb(val, auxio_register); - val &= AUXIO_AUX1_FTCNT; + val &= ~AUXIO_AUX1_FTCNT; writeb(val, auxio_register); doing_pdma = 0; @@ -727,25 +810,23 @@ EXPORT_SYMBOL(probe_irq_off); static int retarget_one_irq(struct irqaction *p, int goal_cpu) { struct ino_bucket *bucket = get_ino_in_irqaction(p) + ivector_table; - unsigned long imap = bucket->imap; - unsigned int tid; while (!cpu_online(goal_cpu)) { if (++goal_cpu >= NR_CPUS) goal_cpu = 0; } - if (tlb_type == cheetah || tlb_type == cheetah_plus) { - tid = goal_cpu << 26; - tid &= IMAP_AID_SAFARI; - } else if (this_is_starfire == 0) { - tid = goal_cpu << 26; - tid &= IMAP_TID_UPA; + if (tlb_type == hypervisor) { + unsigned int ino = __irq_ino(bucket); + + sun4v_intr_settarget(ino, goal_cpu); + sun4v_intr_setenabled(ino, HV_INTR_ENABLED); } else { - tid = (starfire_translate(imap, goal_cpu) << 26); - tid &= IMAP_TID_UPA; + unsigned long imap = bucket->imap; + unsigned int tid = sun4u_compute_tid(imap, goal_cpu); + + upa_writel(tid | IMAP_VALID, imap); } - upa_writel(tid | IMAP_VALID, imap); do { if (++goal_cpu >= NR_CPUS) @@ -848,33 +929,114 @@ static void kill_prom_timer(void) void init_irqwork_curcpu(void) { - register struct irq_work_struct *workp asm("o2"); - register unsigned long tmp asm("o3"); int cpu = hard_smp_processor_id(); - memset(__irq_work + cpu, 0, sizeof(*workp)); - - /* Make sure we are called with PSTATE_IE disabled. */ - __asm__ __volatile__("rdpr %%pstate, %0\n\t" - : "=r" (tmp)); - if (tmp & PSTATE_IE) { - prom_printf("BUG: init_irqwork_curcpu() called with " - "PSTATE_IE enabled, bailing.\n"); - __asm__ __volatile__("mov %%i7, %0\n\t" - : "=r" (tmp)); - prom_printf("BUG: Called from %lx\n", tmp); + memset(__irq_work + cpu, 0, sizeof(struct irq_work_struct)); +} + +static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type) +{ + unsigned long num_entries = 128; + unsigned long status; + + status = sun4v_cpu_qconf(type, paddr, num_entries); + if (status != HV_EOK) { + prom_printf("SUN4V: sun4v_cpu_qconf(%lu:%lx:%lu) failed, " + "err %lu\n", type, paddr, num_entries, status); prom_halt(); } +} - /* Set interrupt globals. */ - workp = &__irq_work[cpu]; - __asm__ __volatile__( - "rdpr %%pstate, %0\n\t" - "wrpr %0, %1, %%pstate\n\t" - "mov %2, %%g6\n\t" - "wrpr %0, 0x0, %%pstate\n\t" - : "=&r" (tmp) - : "i" (PSTATE_IG), "r" (workp)); +static void __cpuinit sun4v_register_mondo_queues(int this_cpu) +{ + struct trap_per_cpu *tb = &trap_block[this_cpu]; + + register_one_mondo(tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO); + register_one_mondo(tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO); + register_one_mondo(tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR); + register_one_mondo(tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR); +} + +static void __cpuinit alloc_one_mondo(unsigned long *pa_ptr, int use_bootmem) +{ + void *page; + + if (use_bootmem) + page = alloc_bootmem_low_pages(PAGE_SIZE); + else + page = (void *) get_zeroed_page(GFP_ATOMIC); + + if (!page) { + prom_printf("SUN4V: Error, cannot allocate mondo queue.\n"); + prom_halt(); + } + + *pa_ptr = __pa(page); +} + +static void __cpuinit alloc_one_kbuf(unsigned long *pa_ptr, int use_bootmem) +{ + void *page; + + if (use_bootmem) + page = alloc_bootmem_low_pages(PAGE_SIZE); + else + page = (void *) get_zeroed_page(GFP_ATOMIC); + + if (!page) { + prom_printf("SUN4V: Error, cannot allocate kbuf page.\n"); + prom_halt(); + } + + *pa_ptr = __pa(page); +} + +static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb, int use_bootmem) +{ +#ifdef CONFIG_SMP + void *page; + + BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64)); + + if (use_bootmem) + page = alloc_bootmem_low_pages(PAGE_SIZE); + else + page = (void *) get_zeroed_page(GFP_ATOMIC); + + if (!page) { + prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n"); + prom_halt(); + } + + tb->cpu_mondo_block_pa = __pa(page); + tb->cpu_list_pa = __pa(page + 64); +#endif +} + +/* Allocate and register the mondo and error queues for this cpu. */ +void __cpuinit sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load) +{ + struct trap_per_cpu *tb = &trap_block[cpu]; + + if (alloc) { + alloc_one_mondo(&tb->cpu_mondo_pa, use_bootmem); + alloc_one_mondo(&tb->dev_mondo_pa, use_bootmem); + alloc_one_mondo(&tb->resum_mondo_pa, use_bootmem); + alloc_one_kbuf(&tb->resum_kernel_buf_pa, use_bootmem); + alloc_one_mondo(&tb->nonresum_mondo_pa, use_bootmem); + alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, use_bootmem); + + init_cpu_send_mondo_info(tb, use_bootmem); + } + + if (load) { + if (cpu != hard_smp_processor_id()) { + prom_printf("SUN4V: init mondo on cpu %d not %d\n", + cpu, hard_smp_processor_id()); + prom_halt(); + } + sun4v_register_mondo_queues(cpu); + } } /* Only invoked on boot processor. */ @@ -884,6 +1046,9 @@ void __init init_IRQ(void) kill_prom_timer(); memset(&ivector_table[0], 0, sizeof(ivector_table)); + if (tlb_type == hypervisor) + sun4v_init_mondo_queues(1, hard_smp_processor_id(), 1, 1); + /* We need to clear any IRQ's pending in the soft interrupt * registers, a spurious one could be left around from the * PROM timer which we just disabled. diff --git a/arch/sparc64/kernel/itlb_base.S b/arch/sparc64/kernel/itlb_base.S deleted file mode 100644 index 4951ff8f6877..000000000000 --- a/arch/sparc64/kernel/itlb_base.S +++ /dev/null @@ -1,79 +0,0 @@ -/* $Id: itlb_base.S,v 1.12 2002/02/09 19:49:30 davem Exp $ - * itlb_base.S: Front end to ITLB miss replacement strategy. - * This is included directly into the trap table. - * - * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com) - * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz) - */ - -#if PAGE_SHIFT == 13 -/* - * To compute vpte offset, we need to do ((addr >> 13) << 3), - * which can be optimized to (addr >> 10) if bits 10/11/12 can - * be guaranteed to be 0 ... mmu_context.h does guarantee this - * by only using 10 bits in the hwcontext value. - */ -#define CREATE_VPTE_OFFSET1(r1, r2) \ - srax r1, 10, r2 -#define CREATE_VPTE_OFFSET2(r1, r2) nop -#else /* PAGE_SHIFT */ -#define CREATE_VPTE_OFFSET1(r1, r2) \ - srax r1, PAGE_SHIFT, r2 -#define CREATE_VPTE_OFFSET2(r1, r2) \ - sllx r2, 3, r2 -#endif /* PAGE_SHIFT */ - - -/* Ways we can get here: - * - * 1) Nucleus instruction misses from module code. - * 2) All user instruction misses. - * - * All real page faults merge their code paths to the - * sparc64_realfault_common label below. - */ - -/* ITLB ** ICACHE line 1: Quick user TLB misses */ - mov TLB_SFSR, %g1 - ldxa [%g1 + %g1] ASI_IMMU, %g4 ! Get TAG_ACCESS - CREATE_VPTE_OFFSET1(%g4, %g6) ! Create VPTE offset - CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset - ldxa [%g3 + %g6] ASI_P, %g5 ! Load VPTE -1: brgez,pn %g5, 3f ! Not valid, branch out - sethi %hi(_PAGE_EXEC), %g4 ! Delay-slot - andcc %g5, %g4, %g0 ! Executable? - -/* ITLB ** ICACHE line 2: Real faults */ - be,pn %xcc, 3f ! Nope, branch. - nop ! Delay-slot -2: stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load PTE into TLB - retry ! Trap return -3: rdpr %pstate, %g4 ! Move into alt-globals - wrpr %g4, PSTATE_AG|PSTATE_MG, %pstate - rdpr %tpc, %g5 ! And load faulting VA - mov FAULT_CODE_ITLB, %g4 ! It was read from ITLB - -/* ITLB ** ICACHE line 3: Finish faults */ -sparc64_realfault_common: ! Called by dtlb_miss - stb %g4, [%g6 + TI_FAULT_CODE] - stx %g5, [%g6 + TI_FAULT_ADDR] - ba,pt %xcc, etrap ! Save state -1: rd %pc, %g7 ! ... - call do_sparc64_fault ! Call fault handler - add %sp, PTREGS_OFF, %o0! Compute pt_regs arg - ba,pt %xcc, rtrap_clr_l6 ! Restore cpu state - nop - -/* ITLB ** ICACHE line 4: Window fixups */ -winfix_trampoline: - rdpr %tpc, %g3 ! Prepare winfixup TNPC - or %g3, 0x7c, %g3 ! Compute branch offset - wrpr %g3, %tnpc ! Write it into TNPC - done ! Do it to it - nop - nop - nop - nop - -#undef CREATE_VPTE_OFFSET1 -#undef CREATE_VPTE_OFFSET2 diff --git a/arch/sparc64/kernel/itlb_miss.S b/arch/sparc64/kernel/itlb_miss.S new file mode 100644 index 000000000000..ad46e2024f4b --- /dev/null +++ b/arch/sparc64/kernel/itlb_miss.S @@ -0,0 +1,39 @@ +/* ITLB ** ICACHE line 1: Context 0 check and TSB load */ + ldxa [%g0] ASI_IMMU_TSB_8KB_PTR, %g1 ! Get TSB 8K pointer + ldxa [%g0] ASI_IMMU, %g6 ! Get TAG TARGET + srlx %g6, 48, %g5 ! Get context + sllx %g6, 22, %g6 ! Zero out context + brz,pn %g5, kvmap_itlb ! Context 0 processing + srlx %g6, 22, %g6 ! Delay slot + TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry + cmp %g4, %g6 ! Compare TAG + +/* ITLB ** ICACHE line 2: TSB compare and TLB load */ + bne,pn %xcc, tsb_miss_itlb ! Miss + mov FAULT_CODE_ITLB, %g3 + andcc %g5, _PAGE_EXEC_4U, %g0 ! Executable? + be,pn %xcc, tsb_do_fault + nop ! Delay slot, fill me + stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load TLB + retry ! Trap done + nop + +/* ITLB ** ICACHE line 3: */ + nop + nop + nop + nop + nop + nop + nop + nop + +/* ITLB ** ICACHE line 4: */ + nop + nop + nop + nop + nop + nop + nop + nop diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S index d9244d3c9f73..31da1e564c95 100644 --- a/arch/sparc64/kernel/ktlb.S +++ b/arch/sparc64/kernel/ktlb.S @@ -4,191 +4,276 @@ * Copyright (C) 1996 Eddie C. Dost (ecd@brainaid.de) * Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx) * Copyright (C) 1996,98,99 Jakub Jelinek (jj@sunsite.mff.cuni.cz) -*/ + */ #include <linux/config.h> #include <asm/head.h> #include <asm/asi.h> #include <asm/page.h> #include <asm/pgtable.h> +#include <asm/tsb.h> .text .align 32 -/* - * On a second level vpte miss, check whether the original fault is to the OBP - * range (note that this is only possible for instruction miss, data misses to - * obp range do not use vpte). If so, go back directly to the faulting address. - * This is because we want to read the tpc, otherwise we have no way of knowing - * the 8k aligned faulting address if we are using >8k kernel pagesize. This - * also ensures no vpte range addresses are dropped into tlb while obp is - * executing (see inherit_locked_prom_mappings() rant). - */ -sparc64_vpte_nucleus: - /* Note that kvmap below has verified that the address is - * in the range MODULES_VADDR --> VMALLOC_END already. So - * here we need only check if it is an OBP address or not. +kvmap_itlb: + /* g6: TAG TARGET */ + mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_IMMU, %g4 + + /* sun4v_itlb_miss branches here with the missing virtual + * address already loaded into %g4 */ +kvmap_itlb_4v: + +kvmap_itlb_nonlinear: + /* Catch kernel NULL pointer calls. */ + sethi %hi(PAGE_SIZE), %g5 + cmp %g4, %g5 + bleu,pn %xcc, kvmap_dtlb_longpath + nop + + KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_itlb_load) + +kvmap_itlb_tsb_miss: sethi %hi(LOW_OBP_ADDRESS), %g5 cmp %g4, %g5 - blu,pn %xcc, kern_vpte + blu,pn %xcc, kvmap_itlb_vmalloc_addr mov 0x1, %g5 sllx %g5, 32, %g5 cmp %g4, %g5 - blu,pn %xcc, vpte_insn_obp + blu,pn %xcc, kvmap_itlb_obp nop - /* These two instructions are patched by paginig_init(). */ -kern_vpte: - sethi %hi(swapper_pgd_zero), %g5 - lduw [%g5 + %lo(swapper_pgd_zero)], %g5 +kvmap_itlb_vmalloc_addr: + KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath) - /* With kernel PGD in %g5, branch back into dtlb_backend. */ - ba,pt %xcc, sparc64_kpte_continue - andn %g1, 0x3, %g1 /* Finish PMD offset adjustment. */ + KTSB_LOCK_TAG(%g1, %g2, %g7) -vpte_noent: - /* Restore previous TAG_ACCESS, %g5 is zero, and we will - * skip over the trap instruction so that the top level - * TLB miss handler will thing this %g5 value is just an - * invalid PTE, thus branching to full fault processing. - */ - mov TLB_SFSR, %g1 - stxa %g4, [%g1 + %g1] ASI_DMMU - done - -vpte_insn_obp: - /* Behave as if we are at TL0. */ - wrpr %g0, 1, %tl - rdpr %tpc, %g4 /* Find original faulting iaddr */ - srlx %g4, 13, %g4 /* Throw out context bits */ - sllx %g4, 13, %g4 /* g4 has vpn + ctx0 now */ - - /* Restore previous TAG_ACCESS. */ - mov TLB_SFSR, %g1 - stxa %g4, [%g1 + %g1] ASI_IMMU - - sethi %hi(prom_trans), %g5 - or %g5, %lo(prom_trans), %g5 - -1: ldx [%g5 + 0x00], %g6 ! base - brz,a,pn %g6, longpath ! no more entries, fail - mov TLB_SFSR, %g1 ! and restore %g1 - ldx [%g5 + 0x08], %g1 ! len - add %g6, %g1, %g1 ! end - cmp %g6, %g4 - bgu,pt %xcc, 2f - cmp %g4, %g1 - bgeu,pt %xcc, 2f - ldx [%g5 + 0x10], %g1 ! PTE - - /* TLB load, restore %g1, and return from trap. */ - sub %g4, %g6, %g6 - add %g1, %g6, %g5 - mov TLB_SFSR, %g1 - stxa %g5, [%g0] ASI_ITLB_DATA_IN - retry + /* Load and check PTE. */ + ldxa [%g5] ASI_PHYS_USE_EC, %g5 + mov 1, %g7 + sllx %g7, TSB_TAG_INVALID_BIT, %g7 + brgez,a,pn %g5, kvmap_itlb_longpath + KTSB_STORE(%g1, %g7) + + KTSB_WRITE(%g1, %g5, %g6) + + /* fallthrough to TLB load */ -2: ba,pt %xcc, 1b - add %g5, (3 * 8), %g5 ! next entry - -kvmap_do_obp: - sethi %hi(prom_trans), %g5 - or %g5, %lo(prom_trans), %g5 - srlx %g4, 13, %g4 - sllx %g4, 13, %g4 - -1: ldx [%g5 + 0x00], %g6 ! base - brz,a,pn %g6, longpath ! no more entries, fail - mov TLB_SFSR, %g1 ! and restore %g1 - ldx [%g5 + 0x08], %g1 ! len - add %g6, %g1, %g1 ! end - cmp %g6, %g4 - bgu,pt %xcc, 2f - cmp %g4, %g1 - bgeu,pt %xcc, 2f - ldx [%g5 + 0x10], %g1 ! PTE - - /* TLB load, restore %g1, and return from trap. */ - sub %g4, %g6, %g6 - add %g1, %g6, %g5 - mov TLB_SFSR, %g1 - stxa %g5, [%g0] ASI_DTLB_DATA_IN +kvmap_itlb_load: + +661: stxa %g5, [%g0] ASI_ITLB_DATA_IN retry + .section .sun4v_2insn_patch, "ax" + .word 661b + nop + nop + .previous + + /* For sun4v the ASI_ITLB_DATA_IN store and the retry + * instruction get nop'd out and we get here to branch + * to the sun4v tlb load code. The registers are setup + * as follows: + * + * %g4: vaddr + * %g5: PTE + * %g6: TAG + * + * The sun4v TLB load wants the PTE in %g3 so we fix that + * up here. + */ + ba,pt %xcc, sun4v_itlb_load + mov %g5, %g3 -2: ba,pt %xcc, 1b - add %g5, (3 * 8), %g5 ! next entry +kvmap_itlb_longpath: + +661: rdpr %pstate, %g5 + wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate + .section .sun4v_2insn_patch, "ax" + .word 661b + SET_GL(1) + nop + .previous + + rdpr %tpc, %g5 + ba,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_ITLB, %g4 + +kvmap_itlb_obp: + OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath) + + KTSB_LOCK_TAG(%g1, %g2, %g7) + + KTSB_WRITE(%g1, %g5, %g6) + + ba,pt %xcc, kvmap_itlb_load + nop + +kvmap_dtlb_obp: + OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath) + + KTSB_LOCK_TAG(%g1, %g2, %g7) + + KTSB_WRITE(%g1, %g5, %g6) + + ba,pt %xcc, kvmap_dtlb_load + nop -/* - * On a first level data miss, check whether this is to the OBP range (note - * that such accesses can be made by prom, as well as by kernel using - * prom_getproperty on "address"), and if so, do not use vpte access ... - * rather, use information saved during inherit_prom_mappings() using 8k - * pagesize. - */ .align 32 -kvmap: - brgez,pn %g4, kvmap_nonlinear +kvmap_dtlb_tsb4m_load: + KTSB_LOCK_TAG(%g1, %g2, %g7) + KTSB_WRITE(%g1, %g5, %g6) + ba,pt %xcc, kvmap_dtlb_load nop -#ifdef CONFIG_DEBUG_PAGEALLOC +kvmap_dtlb: + /* %g6: TAG TARGET */ + mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_DMMU, %g4 + + /* sun4v_dtlb_miss branches here with the missing virtual + * address already loaded into %g4 + */ +kvmap_dtlb_4v: + brgez,pn %g4, kvmap_dtlb_nonlinear + nop + + /* Correct TAG_TARGET is already in %g6, check 4mb TSB. */ + KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load) + + /* TSB entry address left in %g1, lookup linear PTE. + * Must preserve %g1 and %g6 (TAG). + */ +kvmap_dtlb_tsb4m_miss: + sethi %hi(kpte_linear_bitmap), %g2 + or %g2, %lo(kpte_linear_bitmap), %g2 + + /* Clear the PAGE_OFFSET top virtual bits, then shift + * down to get a 256MB physical address index. + */ + sllx %g4, 21, %g5 + mov 1, %g7 + srlx %g5, 21 + 28, %g5 + + /* Don't try this at home kids... this depends upon srlx + * only taking the low 6 bits of the shift count in %g5. + */ + sllx %g7, %g5, %g7 + + /* Divide by 64 to get the offset into the bitmask. */ + srlx %g5, 6, %g5 + sllx %g5, 3, %g5 + + /* kern_linear_pte_xor[((mask & bit) ? 1 : 0)] */ + ldx [%g2 + %g5], %g2 + andcc %g2, %g7, %g0 + sethi %hi(kern_linear_pte_xor), %g5 + or %g5, %lo(kern_linear_pte_xor), %g5 + bne,a,pt %xcc, 1f + add %g5, 8, %g5 + +1: ldx [%g5], %g2 + .globl kvmap_linear_patch kvmap_linear_patch: -#endif - ba,pt %xcc, kvmap_load + ba,pt %xcc, kvmap_dtlb_tsb4m_load xor %g2, %g4, %g5 -#ifdef CONFIG_DEBUG_PAGEALLOC - sethi %hi(swapper_pg_dir), %g5 - or %g5, %lo(swapper_pg_dir), %g5 - sllx %g4, 64 - (PGDIR_SHIFT + PGDIR_BITS), %g6 - srlx %g6, 64 - PAGE_SHIFT, %g6 - andn %g6, 0x3, %g6 - lduw [%g5 + %g6], %g5 - brz,pn %g5, longpath - sllx %g4, 64 - (PMD_SHIFT + PMD_BITS), %g6 - srlx %g6, 64 - PAGE_SHIFT, %g6 - sllx %g5, 11, %g5 - andn %g6, 0x3, %g6 - lduwa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 - brz,pn %g5, longpath - sllx %g4, 64 - PMD_SHIFT, %g6 - srlx %g6, 64 - PAGE_SHIFT, %g6 - sllx %g5, 11, %g5 - andn %g6, 0x7, %g6 - ldxa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 - brz,pn %g5, longpath +kvmap_dtlb_vmalloc_addr: + KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) + + KTSB_LOCK_TAG(%g1, %g2, %g7) + + /* Load and check PTE. */ + ldxa [%g5] ASI_PHYS_USE_EC, %g5 + mov 1, %g7 + sllx %g7, TSB_TAG_INVALID_BIT, %g7 + brgez,a,pn %g5, kvmap_dtlb_longpath + KTSB_STORE(%g1, %g7) + + KTSB_WRITE(%g1, %g5, %g6) + + /* fallthrough to TLB load */ + +kvmap_dtlb_load: + +661: stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB + retry + .section .sun4v_2insn_patch, "ax" + .word 661b + nop + nop + .previous + + /* For sun4v the ASI_DTLB_DATA_IN store and the retry + * instruction get nop'd out and we get here to branch + * to the sun4v tlb load code. The registers are setup + * as follows: + * + * %g4: vaddr + * %g5: PTE + * %g6: TAG + * + * The sun4v TLB load wants the PTE in %g3 so we fix that + * up here. + */ + ba,pt %xcc, sun4v_dtlb_load + mov %g5, %g3 + +kvmap_dtlb_nonlinear: + /* Catch kernel NULL pointer derefs. */ + sethi %hi(PAGE_SIZE), %g5 + cmp %g4, %g5 + bleu,pn %xcc, kvmap_dtlb_longpath nop - ba,a,pt %xcc, kvmap_load -#endif -kvmap_nonlinear: + KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load) + +kvmap_dtlb_tsbmiss: sethi %hi(MODULES_VADDR), %g5 cmp %g4, %g5 - blu,pn %xcc, longpath + blu,pn %xcc, kvmap_dtlb_longpath mov (VMALLOC_END >> 24), %g5 sllx %g5, 24, %g5 cmp %g4, %g5 - bgeu,pn %xcc, longpath + bgeu,pn %xcc, kvmap_dtlb_longpath nop kvmap_check_obp: sethi %hi(LOW_OBP_ADDRESS), %g5 cmp %g4, %g5 - blu,pn %xcc, kvmap_vmalloc_addr + blu,pn %xcc, kvmap_dtlb_vmalloc_addr mov 0x1, %g5 sllx %g5, 32, %g5 cmp %g4, %g5 - blu,pn %xcc, kvmap_do_obp + blu,pn %xcc, kvmap_dtlb_obp nop - -kvmap_vmalloc_addr: - /* If we get here, a vmalloc addr was accessed, load kernel VPTE. */ - ldxa [%g3 + %g6] ASI_N, %g5 - brgez,pn %g5, longpath + ba,pt %xcc, kvmap_dtlb_vmalloc_addr nop -kvmap_load: - /* PTE is valid, load into TLB and return from trap. */ - stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB - retry +kvmap_dtlb_longpath: + +661: rdpr %pstate, %g5 + wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate + .section .sun4v_2insn_patch, "ax" + .word 661b + SET_GL(1) + ldxa [%g0] ASI_SCRATCHPAD, %g5 + .previous + + rdpr %tl, %g3 + cmp %g3, 1 + +661: mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_DMMU, %g5 + .section .sun4v_2insn_patch, "ax" + .word 661b + ldx [%g5 + HV_FAULT_D_ADDR_OFFSET], %g5 + nop + .previous + + be,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_DTLB, %g4 + ba,pt %xcc, winfix_trampoline + nop diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c index 2ff7c32ab0ce..95ffa9418620 100644 --- a/arch/sparc64/kernel/pci.c +++ b/arch/sparc64/kernel/pci.c @@ -188,6 +188,7 @@ extern void psycho_init(int, char *); extern void schizo_init(int, char *); extern void schizo_plus_init(int, char *); extern void tomatillo_init(int, char *); +extern void sun4v_pci_init(int, char *); static struct { char *model_name; @@ -204,6 +205,7 @@ static struct { { "pci108e,8002", schizo_plus_init }, { "SUNW,tomatillo", tomatillo_init }, { "pci108e,a801", tomatillo_init }, + { "SUNW,sun4v-pci", sun4v_pci_init }, }; #define PCI_NUM_CONTROLLER_TYPES (sizeof(pci_controller_table) / \ sizeof(pci_controller_table[0])) @@ -283,6 +285,12 @@ int __init pcic_present(void) return pci_controller_scan(pci_is_controller); } +struct pci_iommu_ops *pci_iommu_ops; +EXPORT_SYMBOL(pci_iommu_ops); + +extern struct pci_iommu_ops pci_sun4u_iommu_ops, + pci_sun4v_iommu_ops; + /* Find each controller in the system, attach and initialize * software state structure for each and link into the * pci_controller_root. Setup the controller enough such @@ -290,6 +298,11 @@ int __init pcic_present(void) */ static void __init pci_controller_probe(void) { + if (tlb_type == hypervisor) + pci_iommu_ops = &pci_sun4v_iommu_ops; + else + pci_iommu_ops = &pci_sun4u_iommu_ops; + printk("PCI: Probing for controllers.\n"); pci_controller_scan(pci_controller_init); diff --git a/arch/sparc64/kernel/pci_common.c b/arch/sparc64/kernel/pci_common.c index 58310aacea28..33dedb1aacd4 100644 --- a/arch/sparc64/kernel/pci_common.c +++ b/arch/sparc64/kernel/pci_common.c @@ -39,6 +39,8 @@ static int __init find_device_prom_node(struct pci_pbm_info *pbm, { int node; + *nregs = 0; + /* * Return the PBM's PROM node in case we are it's PCI device, * as the PBM's reg property is different to standard PCI reg @@ -51,10 +53,8 @@ static int __init find_device_prom_node(struct pci_pbm_info *pbm, pdev->device == PCI_DEVICE_ID_SUN_SCHIZO || pdev->device == PCI_DEVICE_ID_SUN_TOMATILLO || pdev->device == PCI_DEVICE_ID_SUN_SABRE || - pdev->device == PCI_DEVICE_ID_SUN_HUMMINGBIRD)) { - *nregs = 0; + pdev->device == PCI_DEVICE_ID_SUN_HUMMINGBIRD)) return bus_prom_node; - } node = prom_getchild(bus_prom_node); while (node != 0) { @@ -541,135 +541,183 @@ void __init pci_assign_unassigned(struct pci_pbm_info *pbm, pci_assign_unassigned(pbm, bus); } -static int __init pci_intmap_match(struct pci_dev *pdev, unsigned int *interrupt) +static inline unsigned int pci_slot_swivel(struct pci_pbm_info *pbm, + struct pci_dev *toplevel_pdev, + struct pci_dev *pdev, + unsigned int interrupt) { - struct linux_prom_pci_intmap bridge_local_intmap[PROM_PCIIMAP_MAX], *intmap; - struct linux_prom_pci_intmask bridge_local_intmask, *intmask; - struct pcidev_cookie *dev_pcp = pdev->sysdata; - struct pci_pbm_info *pbm = dev_pcp->pbm; - struct linux_prom_pci_registers *pregs = dev_pcp->prom_regs; - unsigned int hi, mid, lo, irq; - int i, num_intmap, map_slot; + unsigned int ret; - intmap = &pbm->pbm_intmap[0]; - intmask = &pbm->pbm_intmask; - num_intmap = pbm->num_pbm_intmap; - map_slot = 0; + if (unlikely(interrupt < 1 || interrupt > 4)) { + printk("%s: Device %s interrupt value of %u is strange.\n", + pbm->name, pci_name(pdev), interrupt); + return interrupt; + } - /* If we are underneath a PCI bridge, use PROM register - * property of the parent bridge which is closest to - * the PBM. - * - * However if that parent bridge has interrupt map/mask - * properties of its own we use the PROM register property - * of the next child device on the path to PDEV. - * - * In detail the two cases are (note that the 'X' below is the - * 'next child on the path to PDEV' mentioned above): - * - * 1) PBM --> PCI bus lacking int{map,mask} --> X ... PDEV - * - * Here we use regs of 'PCI bus' device. - * - * 2) PBM --> PCI bus with int{map,mask} --> X ... PDEV - * - * Here we use regs of 'X'. Note that X can be PDEV. - */ - if (pdev->bus->number != pbm->pci_first_busno) { - struct pcidev_cookie *bus_pcp, *regs_pcp; - struct pci_dev *bus_dev, *regs_dev; - int plen; + ret = ((interrupt - 1 + (PCI_SLOT(pdev->devfn) & 3)) & 3) + 1; + + printk("%s: %s IRQ Swivel %s [%x:%x] -> [%x]\n", + pbm->name, pci_name(toplevel_pdev), pci_name(pdev), + interrupt, PCI_SLOT(pdev->devfn), ret); + + return ret; +} + +static inline unsigned int pci_apply_intmap(struct pci_pbm_info *pbm, + struct pci_dev *toplevel_pdev, + struct pci_dev *pbus, + struct pci_dev *pdev, + unsigned int interrupt, + unsigned int *cnode) +{ + struct linux_prom_pci_intmap imap[PROM_PCIIMAP_MAX]; + struct linux_prom_pci_intmask imask; + struct pcidev_cookie *pbus_pcp = pbus->sysdata; + struct pcidev_cookie *pdev_pcp = pdev->sysdata; + struct linux_prom_pci_registers *pregs = pdev_pcp->prom_regs; + int plen, num_imap, i; + unsigned int hi, mid, lo, irq, orig_interrupt; + + *cnode = pbus_pcp->prom_node; + + plen = prom_getproperty(pbus_pcp->prom_node, "interrupt-map", + (char *) &imap[0], sizeof(imap)); + if (plen <= 0 || + (plen % sizeof(struct linux_prom_pci_intmap)) != 0) { + printk("%s: Device %s interrupt-map has bad len %d\n", + pbm->name, pci_name(pbus), plen); + goto no_intmap; + } + num_imap = plen / sizeof(struct linux_prom_pci_intmap); + + plen = prom_getproperty(pbus_pcp->prom_node, "interrupt-map-mask", + (char *) &imask, sizeof(imask)); + if (plen <= 0 || + (plen % sizeof(struct linux_prom_pci_intmask)) != 0) { + printk("%s: Device %s interrupt-map-mask has bad len %d\n", + pbm->name, pci_name(pbus), plen); + goto no_intmap; + } + + orig_interrupt = interrupt; - bus_dev = pdev->bus->self; - regs_dev = pdev; + hi = pregs->phys_hi & imask.phys_hi; + mid = pregs->phys_mid & imask.phys_mid; + lo = pregs->phys_lo & imask.phys_lo; + irq = interrupt & imask.interrupt; - while (bus_dev->bus && - bus_dev->bus->number != pbm->pci_first_busno) { - regs_dev = bus_dev; - bus_dev = bus_dev->bus->self; + for (i = 0; i < num_imap; i++) { + if (imap[i].phys_hi == hi && + imap[i].phys_mid == mid && + imap[i].phys_lo == lo && + imap[i].interrupt == irq) { + *cnode = imap[i].cnode; + interrupt = imap[i].cinterrupt; } + } - regs_pcp = regs_dev->sysdata; - pregs = regs_pcp->prom_regs; + printk("%s: %s MAP BUS %s DEV %s [%x] -> [%x]\n", + pbm->name, pci_name(toplevel_pdev), + pci_name(pbus), pci_name(pdev), + orig_interrupt, interrupt); - bus_pcp = bus_dev->sysdata; +no_intmap: + return interrupt; +} - /* But if the PCI bridge has it's own interrupt map - * and mask properties, use that and the regs of the - * PCI entity at the next level down on the path to the - * device. - */ - plen = prom_getproperty(bus_pcp->prom_node, "interrupt-map", - (char *) &bridge_local_intmap[0], - sizeof(bridge_local_intmap)); - if (plen != -1) { - intmap = &bridge_local_intmap[0]; - num_intmap = plen / sizeof(struct linux_prom_pci_intmap); - plen = prom_getproperty(bus_pcp->prom_node, - "interrupt-map-mask", - (char *) &bridge_local_intmask, - sizeof(bridge_local_intmask)); - if (plen == -1) { - printk("pci_intmap_match: Warning! Bridge has intmap " - "but no intmask.\n"); - printk("pci_intmap_match: Trying to recover.\n"); - return 0; - } +/* For each PCI bus on the way to the root: + * 1) If it has an interrupt-map property, apply it. + * 2) Else, swivel the interrupt number based upon the PCI device number. + * + * Return the "IRQ controller" node. If this is the PBM's device node, + * all interrupt translations are complete, else we should use that node's + * "reg" property to apply the PBM's "interrupt-{map,mask}" to the interrupt. + */ +static unsigned int __init pci_intmap_match_to_root(struct pci_pbm_info *pbm, + struct pci_dev *pdev, + unsigned int *interrupt) +{ + struct pci_dev *toplevel_pdev = pdev; + struct pcidev_cookie *toplevel_pcp = toplevel_pdev->sysdata; + unsigned int cnode = toplevel_pcp->prom_node; + + while (pdev->bus->number != pbm->pci_first_busno) { + struct pci_dev *pbus = pdev->bus->self; + struct pcidev_cookie *pcp = pbus->sysdata; + int plen; - if (pdev->bus->self != bus_dev) - map_slot = 1; + plen = prom_getproplen(pcp->prom_node, "interrupt-map"); + if (plen <= 0) { + *interrupt = pci_slot_swivel(pbm, toplevel_pdev, + pdev, *interrupt); + cnode = pcp->prom_node; } else { - pregs = bus_pcp->prom_regs; - map_slot = 1; + *interrupt = pci_apply_intmap(pbm, toplevel_pdev, + pbus, pdev, + *interrupt, &cnode); + + while (pcp->prom_node != cnode && + pbus->bus->number != pbm->pci_first_busno) { + pbus = pbus->bus->self; + pcp = pbus->sysdata; + } } - } + pdev = pbus; - if (map_slot) { - *interrupt = ((*interrupt - - 1 - + PCI_SLOT(pdev->devfn)) & 0x3) + 1; + if (cnode == pbm->prom_node) + break; } - hi = pregs->phys_hi & intmask->phys_hi; - mid = pregs->phys_mid & intmask->phys_mid; - lo = pregs->phys_lo & intmask->phys_lo; - irq = *interrupt & intmask->interrupt; - - for (i = 0; i < num_intmap; i++) { - if (intmap[i].phys_hi == hi && - intmap[i].phys_mid == mid && - intmap[i].phys_lo == lo && - intmap[i].interrupt == irq) { - *interrupt = intmap[i].cinterrupt; - printk("PCI-IRQ: Routing bus[%2x] slot[%2x] map[%d] to INO[%02x]\n", - pdev->bus->number, PCI_SLOT(pdev->devfn), - map_slot, *interrupt); - return 1; - } + return cnode; +} + +static int __init pci_intmap_match(struct pci_dev *pdev, unsigned int *interrupt) +{ + struct pcidev_cookie *dev_pcp = pdev->sysdata; + struct pci_pbm_info *pbm = dev_pcp->pbm; + struct linux_prom_pci_registers reg[PROMREG_MAX]; + unsigned int hi, mid, lo, irq; + int i, cnode, plen; + + cnode = pci_intmap_match_to_root(pbm, pdev, interrupt); + if (cnode == pbm->prom_node) + goto success; + + plen = prom_getproperty(cnode, "reg", (char *) reg, sizeof(reg)); + if (plen <= 0 || + (plen % sizeof(struct linux_prom_pci_registers)) != 0) { + printk("%s: OBP node %x reg property has bad len %d\n", + pbm->name, cnode, plen); + goto fail; } - /* We will run this code even if pbm->num_pbm_intmap is zero, just so - * we can apply the slot mapping to the PROM interrupt property value. - * So do not spit out these warnings in that case. - */ - if (num_intmap != 0) { - /* Print it both to OBP console and kernel one so that if bootup - * hangs here the user has the information to report. - */ - prom_printf("pci_intmap_match: bus %02x, devfn %02x: ", - pdev->bus->number, pdev->devfn); - prom_printf("IRQ [%08x.%08x.%08x.%08x] not found in interrupt-map\n", - pregs->phys_hi, pregs->phys_mid, pregs->phys_lo, *interrupt); - prom_printf("Please email this information to davem@redhat.com\n"); - - printk("pci_intmap_match: bus %02x, devfn %02x: ", - pdev->bus->number, pdev->devfn); - printk("IRQ [%08x.%08x.%08x.%08x] not found in interrupt-map\n", - pregs->phys_hi, pregs->phys_mid, pregs->phys_lo, *interrupt); - printk("Please email this information to davem@redhat.com\n"); + hi = reg[0].phys_hi & pbm->pbm_intmask.phys_hi; + mid = reg[0].phys_mid & pbm->pbm_intmask.phys_mid; + lo = reg[0].phys_lo & pbm->pbm_intmask.phys_lo; + irq = *interrupt & pbm->pbm_intmask.interrupt; + + for (i = 0; i < pbm->num_pbm_intmap; i++) { + struct linux_prom_pci_intmap *intmap; + + intmap = &pbm->pbm_intmap[i]; + + if (intmap->phys_hi == hi && + intmap->phys_mid == mid && + intmap->phys_lo == lo && + intmap->interrupt == irq) { + *interrupt = intmap->cinterrupt; + goto success; + } } +fail: return 0; + +success: + printk("PCI-IRQ: Routing bus[%2x] slot[%2x] to INO[%02x]\n", + pdev->bus->number, PCI_SLOT(pdev->devfn), + *interrupt); + return 1; } static void __init pdev_fixup_irq(struct pci_dev *pdev) @@ -703,16 +751,18 @@ static void __init pdev_fixup_irq(struct pci_dev *pdev) return; } - /* Fully specified already? */ - if (((prom_irq & PCI_IRQ_IGN) >> 6) == portid) { - pdev->irq = p->irq_build(pbm, pdev, prom_irq); - goto have_irq; - } + if (tlb_type != hypervisor) { + /* Fully specified already? */ + if (((prom_irq & PCI_IRQ_IGN) >> 6) == portid) { + pdev->irq = p->irq_build(pbm, pdev, prom_irq); + goto have_irq; + } - /* An onboard device? (bit 5 set) */ - if ((prom_irq & PCI_IRQ_INO) & 0x20) { - pdev->irq = p->irq_build(pbm, pdev, (portid << 6 | prom_irq)); - goto have_irq; + /* An onboard device? (bit 5 set) */ + if ((prom_irq & PCI_IRQ_INO) & 0x20) { + pdev->irq = p->irq_build(pbm, pdev, (portid << 6 | prom_irq)); + goto have_irq; + } } /* Can we find a matching entry in the interrupt-map? */ @@ -927,33 +977,30 @@ void pci_register_legacy_regions(struct resource *io_res, struct resource *p; /* VGA Video RAM. */ - p = kmalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return; - memset(p, 0, sizeof(*p)); p->name = "Video RAM area"; p->start = mem_res->start + 0xa0000UL; p->end = p->start + 0x1ffffUL; p->flags = IORESOURCE_BUSY; request_resource(mem_res, p); - p = kmalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return; - memset(p, 0, sizeof(*p)); p->name = "System ROM"; p->start = mem_res->start + 0xf0000UL; p->end = p->start + 0xffffUL; p->flags = IORESOURCE_BUSY; request_resource(mem_res, p); - p = kmalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return; - memset(p, 0, sizeof(*p)); p->name = "Video ROM"; p->start = mem_res->start + 0xc0000UL; p->end = p->start + 0x7fffUL; diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c index a11910be1013..8efbc139769d 100644 --- a/arch/sparc64/kernel/pci_iommu.c +++ b/arch/sparc64/kernel/pci_iommu.c @@ -139,12 +139,11 @@ void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, /* Allocate and initialize the free area map. */ sz = num_tsb_entries / 8; sz = (sz + 7UL) & ~7UL; - iommu->arena.map = kmalloc(sz, GFP_KERNEL); + iommu->arena.map = kzalloc(sz, GFP_KERNEL); if (!iommu->arena.map) { prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n"); prom_halt(); } - memset(iommu->arena.map, 0, sz); iommu->arena.limit = num_tsb_entries; /* Allocate and initialize the dummy page which we @@ -219,7 +218,7 @@ static inline void iommu_free_ctx(struct pci_iommu *iommu, int ctx) * DMA for PCI device PDEV. Return non-NULL cpu-side address if * successful and set *DMA_ADDRP to the PCI side dma address. */ -void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp) +static void *pci_4u_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp) { struct pcidev_cookie *pcp; struct pci_iommu *iommu; @@ -267,7 +266,7 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad } /* Free and unmap a consistent DMA translation. */ -void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_t dvma) +static void pci_4u_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_t dvma) { struct pcidev_cookie *pcp; struct pci_iommu *iommu; @@ -294,7 +293,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_ /* Map a single buffer at PTR of SZ bytes for PCI DMA * in streaming mode. */ -dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direction) +static dma_addr_t pci_4u_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direction) { struct pcidev_cookie *pcp; struct pci_iommu *iommu; @@ -415,7 +414,7 @@ do_flush_sync: } /* Unmap a single streaming mode DMA translation. */ -void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) +static void pci_4u_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) { struct pcidev_cookie *pcp; struct pci_iommu *iommu; @@ -548,7 +547,7 @@ static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, * When making changes here, inspect the assembly output. I was having * hard time to kepp this routine out of using stack slots for holding variables. */ -int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) +static int pci_4u_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) { struct pcidev_cookie *pcp; struct pci_iommu *iommu; @@ -562,9 +561,9 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int /* Fast path single entry scatterlists. */ if (nelems == 1) { sglist->dma_address = - pci_map_single(pdev, - (page_address(sglist->page) + sglist->offset), - sglist->length, direction); + pci_4u_map_single(pdev, + (page_address(sglist->page) + sglist->offset), + sglist->length, direction); if (unlikely(sglist->dma_address == PCI_DMA_ERROR_CODE)) return 0; sglist->dma_length = sglist->length; @@ -635,7 +634,7 @@ bad_no_ctx: } /* Unmap a set of streaming mode DMA translations. */ -void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) +static void pci_4u_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) { struct pcidev_cookie *pcp; struct pci_iommu *iommu; @@ -695,7 +694,7 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, /* Make physical memory consistent for a single * streaming mode DMA translation after a transfer. */ -void pci_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) +static void pci_4u_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) { struct pcidev_cookie *pcp; struct pci_iommu *iommu; @@ -735,7 +734,7 @@ void pci_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size /* Make physical memory consistent for a set of streaming * mode DMA translations after a transfer. */ -void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) +static void pci_4u_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) { struct pcidev_cookie *pcp; struct pci_iommu *iommu; @@ -776,6 +775,17 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, i spin_unlock_irqrestore(&iommu->lock, flags); } +struct pci_iommu_ops pci_sun4u_iommu_ops = { + .alloc_consistent = pci_4u_alloc_consistent, + .free_consistent = pci_4u_free_consistent, + .map_single = pci_4u_map_single, + .unmap_single = pci_4u_unmap_single, + .map_sg = pci_4u_map_sg, + .unmap_sg = pci_4u_unmap_sg, + .dma_sync_single_for_cpu = pci_4u_dma_sync_single_for_cpu, + .dma_sync_sg_for_cpu = pci_4u_dma_sync_sg_for_cpu, +}; + static void ali_sound_dma_hack(struct pci_dev *pdev, int set_bit) { struct pci_dev *ali_isa_bridge; diff --git a/arch/sparc64/kernel/pci_psycho.c b/arch/sparc64/kernel/pci_psycho.c index c03ed5f49d31..d17878b145c2 100644 --- a/arch/sparc64/kernel/pci_psycho.c +++ b/arch/sparc64/kernel/pci_psycho.c @@ -286,17 +286,17 @@ static unsigned char psycho_pil_table[] = { /*0x14*/0, 0, 0, 0, /* PCI B slot 1 Int A, B, C, D */ /*0x18*/0, 0, 0, 0, /* PCI B slot 2 Int A, B, C, D */ /*0x1c*/0, 0, 0, 0, /* PCI B slot 3 Int A, B, C, D */ -/*0x20*/4, /* SCSI */ +/*0x20*/5, /* SCSI */ /*0x21*/5, /* Ethernet */ /*0x22*/8, /* Parallel Port */ /*0x23*/13, /* Audio Record */ /*0x24*/14, /* Audio Playback */ /*0x25*/15, /* PowerFail */ -/*0x26*/4, /* second SCSI */ +/*0x26*/5, /* second SCSI */ /*0x27*/11, /* Floppy */ -/*0x28*/4, /* Spare Hardware */ +/*0x28*/5, /* Spare Hardware */ /*0x29*/9, /* Keyboard */ -/*0x2a*/4, /* Mouse */ +/*0x2a*/5, /* Mouse */ /*0x2b*/12, /* Serial */ /*0x2c*/10, /* Timer 0 */ /*0x2d*/11, /* Timer 1 */ @@ -313,11 +313,11 @@ static int psycho_ino_to_pil(struct pci_dev *pdev, unsigned int ino) ret = psycho_pil_table[ino]; if (ret == 0 && pdev == NULL) { - ret = 4; + ret = 5; } else if (ret == 0) { switch ((pdev->class >> 16) & 0xff) { case PCI_BASE_CLASS_STORAGE: - ret = 4; + ret = 5; break; case PCI_BASE_CLASS_NETWORK: @@ -336,7 +336,7 @@ static int psycho_ino_to_pil(struct pci_dev *pdev, unsigned int ino) break; default: - ret = 4; + ret = 5; break; }; } @@ -1164,7 +1164,7 @@ static void pbm_config_busmastering(struct pci_pbm_info *pbm) static void pbm_scan_bus(struct pci_controller_info *p, struct pci_pbm_info *pbm) { - struct pcidev_cookie *cookie = kmalloc(sizeof(*cookie), GFP_KERNEL); + struct pcidev_cookie *cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); if (!cookie) { prom_printf("PSYCHO: Critical allocation failure.\n"); @@ -1172,7 +1172,6 @@ static void pbm_scan_bus(struct pci_controller_info *p, } /* All we care about is the PBM. */ - memset(cookie, 0, sizeof(*cookie)); cookie->pbm = pbm; pbm->pci_bus = pci_scan_bus(pbm->pci_first_busno, @@ -1465,18 +1464,16 @@ void psycho_init(int node, char *model_name) } } - p = kmalloc(sizeof(struct pci_controller_info), GFP_ATOMIC); + p = kzalloc(sizeof(struct pci_controller_info), GFP_ATOMIC); if (!p) { prom_printf("PSYCHO: Fatal memory allocation error.\n"); prom_halt(); } - memset(p, 0, sizeof(*p)); - iommu = kmalloc(sizeof(struct pci_iommu), GFP_ATOMIC); + iommu = kzalloc(sizeof(struct pci_iommu), GFP_ATOMIC); if (!iommu) { prom_printf("PSYCHO: Fatal memory allocation error.\n"); prom_halt(); } - memset(iommu, 0, sizeof(*iommu)); p->pbm_A.iommu = p->pbm_B.iommu = iommu; p->next = pci_controller_root; diff --git a/arch/sparc64/kernel/pci_sabre.c b/arch/sparc64/kernel/pci_sabre.c index da8e1364194f..f67bb7f078cf 100644 --- a/arch/sparc64/kernel/pci_sabre.c +++ b/arch/sparc64/kernel/pci_sabre.c @@ -533,17 +533,17 @@ static unsigned char sabre_pil_table[] = { /*0x14*/0, 0, 0, 0, /* PCI B slot 1 Int A, B, C, D */ /*0x18*/0, 0, 0, 0, /* PCI B slot 2 Int A, B, C, D */ /*0x1c*/0, 0, 0, 0, /* PCI B slot 3 Int A, B, C, D */ -/*0x20*/4, /* SCSI */ +/*0x20*/5, /* SCSI */ /*0x21*/5, /* Ethernet */ /*0x22*/8, /* Parallel Port */ /*0x23*/13, /* Audio Record */ /*0x24*/14, /* Audio Playback */ /*0x25*/15, /* PowerFail */ -/*0x26*/4, /* second SCSI */ +/*0x26*/5, /* second SCSI */ /*0x27*/11, /* Floppy */ -/*0x28*/4, /* Spare Hardware */ +/*0x28*/5, /* Spare Hardware */ /*0x29*/9, /* Keyboard */ -/*0x2a*/4, /* Mouse */ +/*0x2a*/5, /* Mouse */ /*0x2b*/12, /* Serial */ /*0x2c*/10, /* Timer 0 */ /*0x2d*/11, /* Timer 1 */ @@ -565,11 +565,11 @@ static int sabre_ino_to_pil(struct pci_dev *pdev, unsigned int ino) ret = sabre_pil_table[ino]; if (ret == 0 && pdev == NULL) { - ret = 4; + ret = 5; } else if (ret == 0) { switch ((pdev->class >> 16) & 0xff) { case PCI_BASE_CLASS_STORAGE: - ret = 4; + ret = 5; break; case PCI_BASE_CLASS_NETWORK: @@ -588,7 +588,7 @@ static int sabre_ino_to_pil(struct pci_dev *pdev, unsigned int ino) break; default: - ret = 4; + ret = 5; break; }; } @@ -1167,7 +1167,7 @@ static void apb_init(struct pci_controller_info *p, struct pci_bus *sabre_bus) static struct pcidev_cookie *alloc_bridge_cookie(struct pci_pbm_info *pbm) { - struct pcidev_cookie *cookie = kmalloc(sizeof(*cookie), GFP_KERNEL); + struct pcidev_cookie *cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); if (!cookie) { prom_printf("SABRE: Critical allocation failure.\n"); @@ -1175,7 +1175,6 @@ static struct pcidev_cookie *alloc_bridge_cookie(struct pci_pbm_info *pbm) } /* All we care about is the PBM. */ - memset(cookie, 0, sizeof(*cookie)); cookie->pbm = pbm; return cookie; @@ -1556,19 +1555,17 @@ void sabre_init(int pnode, char *model_name) } } - p = kmalloc(sizeof(*p), GFP_ATOMIC); + p = kzalloc(sizeof(*p), GFP_ATOMIC); if (!p) { prom_printf("SABRE: Error, kmalloc(pci_controller_info) failed.\n"); prom_halt(); } - memset(p, 0, sizeof(*p)); - iommu = kmalloc(sizeof(*iommu), GFP_ATOMIC); + iommu = kzalloc(sizeof(*iommu), GFP_ATOMIC); if (!iommu) { prom_printf("SABRE: Error, kmalloc(pci_iommu) failed.\n"); prom_halt(); } - memset(iommu, 0, sizeof(*iommu)); p->pbm_A.iommu = p->pbm_B.iommu = iommu; upa_portid = prom_getintdefault(pnode, "upa-portid", 0xff); diff --git a/arch/sparc64/kernel/pci_schizo.c b/arch/sparc64/kernel/pci_schizo.c index d8c4e0919b4e..7fe4de03ac2e 100644 --- a/arch/sparc64/kernel/pci_schizo.c +++ b/arch/sparc64/kernel/pci_schizo.c @@ -243,8 +243,8 @@ static unsigned char schizo_pil_table[] = { /*0x0c*/0, 0, 0, 0, /* PCI slot 3 Int A, B, C, D */ /*0x10*/0, 0, 0, 0, /* PCI slot 4 Int A, B, C, D */ /*0x14*/0, 0, 0, 0, /* PCI slot 5 Int A, B, C, D */ -/*0x18*/4, /* SCSI */ -/*0x19*/4, /* second SCSI */ +/*0x18*/5, /* SCSI */ +/*0x19*/5, /* second SCSI */ /*0x1a*/0, /* UNKNOWN */ /*0x1b*/0, /* UNKNOWN */ /*0x1c*/8, /* Parallel */ @@ -254,7 +254,7 @@ static unsigned char schizo_pil_table[] = { /*0x20*/13, /* Audio Record */ /*0x21*/14, /* Audio Playback */ /*0x22*/12, /* Serial */ -/*0x23*/4, /* EBUS I2C */ +/*0x23*/5, /* EBUS I2C */ /*0x24*/10, /* RTC Clock */ /*0x25*/11, /* Floppy */ /*0x26*/0, /* UNKNOWN */ @@ -296,11 +296,11 @@ static int schizo_ino_to_pil(struct pci_dev *pdev, unsigned int ino) ret = schizo_pil_table[ino]; if (ret == 0 && pdev == NULL) { - ret = 4; + ret = 5; } else if (ret == 0) { switch ((pdev->class >> 16) & 0xff) { case PCI_BASE_CLASS_STORAGE: - ret = 4; + ret = 5; break; case PCI_BASE_CLASS_NETWORK: @@ -319,7 +319,7 @@ static int schizo_ino_to_pil(struct pci_dev *pdev, unsigned int ino) break; default: - ret = 4; + ret = 5; break; }; } @@ -1525,7 +1525,7 @@ static void pbm_config_busmastering(struct pci_pbm_info *pbm) static void pbm_scan_bus(struct pci_controller_info *p, struct pci_pbm_info *pbm) { - struct pcidev_cookie *cookie = kmalloc(sizeof(*cookie), GFP_KERNEL); + struct pcidev_cookie *cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); if (!cookie) { prom_printf("%s: Critical allocation failure.\n", pbm->name); @@ -1533,7 +1533,6 @@ static void pbm_scan_bus(struct pci_controller_info *p, } /* All we care about is the PBM. */ - memset(cookie, 0, sizeof(*cookie)); cookie->pbm = pbm; pbm->pci_bus = pci_scan_bus(pbm->pci_first_busno, @@ -2120,27 +2119,24 @@ static void __schizo_init(int node, char *model_name, int chip_type) } } - p = kmalloc(sizeof(struct pci_controller_info), GFP_ATOMIC); + p = kzalloc(sizeof(struct pci_controller_info), GFP_ATOMIC); if (!p) { prom_printf("SCHIZO: Fatal memory allocation error.\n"); prom_halt(); } - memset(p, 0, sizeof(*p)); - iommu = kmalloc(sizeof(struct pci_iommu), GFP_ATOMIC); + iommu = kzalloc(sizeof(struct pci_iommu), GFP_ATOMIC); if (!iommu) { prom_printf("SCHIZO: Fatal memory allocation error.\n"); prom_halt(); } - memset(iommu, 0, sizeof(*iommu)); p->pbm_A.iommu = iommu; - iommu = kmalloc(sizeof(struct pci_iommu), GFP_ATOMIC); + iommu = kzalloc(sizeof(struct pci_iommu), GFP_ATOMIC); if (!iommu) { prom_printf("SCHIZO: Fatal memory allocation error.\n"); prom_halt(); } - memset(iommu, 0, sizeof(*iommu)); p->pbm_B.iommu = iommu; p->next = pci_controller_root; diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c new file mode 100644 index 000000000000..9372d4f376d5 --- /dev/null +++ b/arch/sparc64/kernel/pci_sun4v.c @@ -0,0 +1,1147 @@ +/* pci_sun4v.c: SUN4V specific PCI controller support. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/percpu.h> + +#include <asm/pbm.h> +#include <asm/iommu.h> +#include <asm/irq.h> +#include <asm/upa.h> +#include <asm/pstate.h> +#include <asm/oplib.h> +#include <asm/hypervisor.h> + +#include "pci_impl.h" +#include "iommu_common.h" + +#include "pci_sun4v.h" + +#define PGLIST_NENTS (PAGE_SIZE / sizeof(u64)) + +struct pci_iommu_batch { + struct pci_dev *pdev; /* Device mapping is for. */ + unsigned long prot; /* IOMMU page protections */ + unsigned long entry; /* Index into IOTSB. */ + u64 *pglist; /* List of physical pages */ + unsigned long npages; /* Number of pages in list. */ +}; + +static DEFINE_PER_CPU(struct pci_iommu_batch, pci_iommu_batch); + +/* Interrupts must be disabled. */ +static inline void pci_iommu_batch_start(struct pci_dev *pdev, unsigned long prot, unsigned long entry) +{ + struct pci_iommu_batch *p = &__get_cpu_var(pci_iommu_batch); + + p->pdev = pdev; + p->prot = prot; + p->entry = entry; + p->npages = 0; +} + +/* Interrupts must be disabled. */ +static long pci_iommu_batch_flush(struct pci_iommu_batch *p) +{ + struct pcidev_cookie *pcp = p->pdev->sysdata; + unsigned long devhandle = pcp->pbm->devhandle; + unsigned long prot = p->prot; + unsigned long entry = p->entry; + u64 *pglist = p->pglist; + unsigned long npages = p->npages; + + while (npages != 0) { + long num; + + num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry), + npages, prot, __pa(pglist)); + if (unlikely(num < 0)) { + if (printk_ratelimit()) + printk("pci_iommu_batch_flush: IOMMU map of " + "[%08lx:%08lx:%lx:%lx:%lx] failed with " + "status %ld\n", + devhandle, HV_PCI_TSBID(0, entry), + npages, prot, __pa(pglist), num); + return -1; + } + + entry += num; + npages -= num; + pglist += num; + } + + p->entry = entry; + p->npages = 0; + + return 0; +} + +/* Interrupts must be disabled. */ +static inline long pci_iommu_batch_add(u64 phys_page) +{ + struct pci_iommu_batch *p = &__get_cpu_var(pci_iommu_batch); + + BUG_ON(p->npages >= PGLIST_NENTS); + + p->pglist[p->npages++] = phys_page; + if (p->npages == PGLIST_NENTS) + return pci_iommu_batch_flush(p); + + return 0; +} + +/* Interrupts must be disabled. */ +static inline long pci_iommu_batch_end(void) +{ + struct pci_iommu_batch *p = &__get_cpu_var(pci_iommu_batch); + + BUG_ON(p->npages >= PGLIST_NENTS); + + return pci_iommu_batch_flush(p); +} + +static long pci_arena_alloc(struct pci_iommu_arena *arena, unsigned long npages) +{ + unsigned long n, i, start, end, limit; + int pass; + + limit = arena->limit; + start = arena->hint; + pass = 0; + +again: + n = find_next_zero_bit(arena->map, limit, start); + end = n + npages; + if (unlikely(end >= limit)) { + if (likely(pass < 1)) { + limit = start; + start = 0; + pass++; + goto again; + } else { + /* Scanned the whole thing, give up. */ + return -1; + } + } + + for (i = n; i < end; i++) { + if (test_bit(i, arena->map)) { + start = i + 1; + goto again; + } + } + + for (i = n; i < end; i++) + __set_bit(i, arena->map); + + arena->hint = end; + + return n; +} + +static void pci_arena_free(struct pci_iommu_arena *arena, unsigned long base, unsigned long npages) +{ + unsigned long i; + + for (i = base; i < (base + npages); i++) + __clear_bit(i, arena->map); +} + +static void *pci_4v_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp) +{ + struct pcidev_cookie *pcp; + struct pci_iommu *iommu; + unsigned long flags, order, first_page, npages, n; + void *ret; + long entry; + + size = IO_PAGE_ALIGN(size); + order = get_order(size); + if (unlikely(order >= MAX_ORDER)) + return NULL; + + npages = size >> IO_PAGE_SHIFT; + + first_page = __get_free_pages(GFP_ATOMIC, order); + if (unlikely(first_page == 0UL)) + return NULL; + + memset((char *)first_page, 0, PAGE_SIZE << order); + + pcp = pdev->sysdata; + iommu = pcp->pbm->iommu; + + spin_lock_irqsave(&iommu->lock, flags); + entry = pci_arena_alloc(&iommu->arena, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + + if (unlikely(entry < 0L)) + goto arena_alloc_fail; + + *dma_addrp = (iommu->page_table_map_base + + (entry << IO_PAGE_SHIFT)); + ret = (void *) first_page; + first_page = __pa(first_page); + + local_irq_save(flags); + + pci_iommu_batch_start(pdev, + (HV_PCI_MAP_ATTR_READ | + HV_PCI_MAP_ATTR_WRITE), + entry); + + for (n = 0; n < npages; n++) { + long err = pci_iommu_batch_add(first_page + (n * PAGE_SIZE)); + if (unlikely(err < 0L)) + goto iommu_map_fail; + } + + if (unlikely(pci_iommu_batch_end() < 0L)) + goto iommu_map_fail; + + local_irq_restore(flags); + + return ret; + +iommu_map_fail: + /* Interrupts are disabled. */ + spin_lock(&iommu->lock); + pci_arena_free(&iommu->arena, entry, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + +arena_alloc_fail: + free_pages(first_page, order); + return NULL; +} + +static void pci_4v_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_t dvma) +{ + struct pcidev_cookie *pcp; + struct pci_iommu *iommu; + unsigned long flags, order, npages, entry; + u32 devhandle; + + npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; + pcp = pdev->sysdata; + iommu = pcp->pbm->iommu; + devhandle = pcp->pbm->devhandle; + entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + + spin_lock_irqsave(&iommu->lock, flags); + + pci_arena_free(&iommu->arena, entry, npages); + + do { + unsigned long num; + + num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), + npages); + entry += num; + npages -= num; + } while (npages != 0); + + spin_unlock_irqrestore(&iommu->lock, flags); + + order = get_order(size); + if (order < 10) + free_pages((unsigned long)cpu, order); +} + +static dma_addr_t pci_4v_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direction) +{ + struct pcidev_cookie *pcp; + struct pci_iommu *iommu; + unsigned long flags, npages, oaddr; + unsigned long i, base_paddr; + u32 bus_addr, ret; + unsigned long prot; + long entry; + + pcp = pdev->sysdata; + iommu = pcp->pbm->iommu; + + if (unlikely(direction == PCI_DMA_NONE)) + goto bad; + + oaddr = (unsigned long)ptr; + npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); + npages >>= IO_PAGE_SHIFT; + + spin_lock_irqsave(&iommu->lock, flags); + entry = pci_arena_alloc(&iommu->arena, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + + if (unlikely(entry < 0L)) + goto bad; + + bus_addr = (iommu->page_table_map_base + + (entry << IO_PAGE_SHIFT)); + ret = bus_addr | (oaddr & ~IO_PAGE_MASK); + base_paddr = __pa(oaddr & IO_PAGE_MASK); + prot = HV_PCI_MAP_ATTR_READ; + if (direction != PCI_DMA_TODEVICE) + prot |= HV_PCI_MAP_ATTR_WRITE; + + local_irq_save(flags); + + pci_iommu_batch_start(pdev, prot, entry); + + for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) { + long err = pci_iommu_batch_add(base_paddr); + if (unlikely(err < 0L)) + goto iommu_map_fail; + } + if (unlikely(pci_iommu_batch_end() < 0L)) + goto iommu_map_fail; + + local_irq_restore(flags); + + return ret; + +bad: + if (printk_ratelimit()) + WARN_ON(1); + return PCI_DMA_ERROR_CODE; + +iommu_map_fail: + /* Interrupts are disabled. */ + spin_lock(&iommu->lock); + pci_arena_free(&iommu->arena, entry, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + + return PCI_DMA_ERROR_CODE; +} + +static void pci_4v_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) +{ + struct pcidev_cookie *pcp; + struct pci_iommu *iommu; + unsigned long flags, npages; + long entry; + u32 devhandle; + + if (unlikely(direction == PCI_DMA_NONE)) { + if (printk_ratelimit()) + WARN_ON(1); + return; + } + + pcp = pdev->sysdata; + iommu = pcp->pbm->iommu; + devhandle = pcp->pbm->devhandle; + + npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); + npages >>= IO_PAGE_SHIFT; + bus_addr &= IO_PAGE_MASK; + + spin_lock_irqsave(&iommu->lock, flags); + + entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT; + pci_arena_free(&iommu->arena, entry, npages); + + do { + unsigned long num; + + num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), + npages); + entry += num; + npages -= num; + } while (npages != 0); + + spin_unlock_irqrestore(&iommu->lock, flags); +} + +#define SG_ENT_PHYS_ADDRESS(SG) \ + (__pa(page_address((SG)->page)) + (SG)->offset) + +static inline long fill_sg(long entry, struct pci_dev *pdev, + struct scatterlist *sg, + int nused, int nelems, unsigned long prot) +{ + struct scatterlist *dma_sg = sg; + struct scatterlist *sg_end = sg + nelems; + unsigned long flags; + int i; + + local_irq_save(flags); + + pci_iommu_batch_start(pdev, prot, entry); + + for (i = 0; i < nused; i++) { + unsigned long pteval = ~0UL; + u32 dma_npages; + + dma_npages = ((dma_sg->dma_address & (IO_PAGE_SIZE - 1UL)) + + dma_sg->dma_length + + ((IO_PAGE_SIZE - 1UL))) >> IO_PAGE_SHIFT; + do { + unsigned long offset; + signed int len; + + /* If we are here, we know we have at least one + * more page to map. So walk forward until we + * hit a page crossing, and begin creating new + * mappings from that spot. + */ + for (;;) { + unsigned long tmp; + + tmp = SG_ENT_PHYS_ADDRESS(sg); + len = sg->length; + if (((tmp ^ pteval) >> IO_PAGE_SHIFT) != 0UL) { + pteval = tmp & IO_PAGE_MASK; + offset = tmp & (IO_PAGE_SIZE - 1UL); + break; + } + if (((tmp ^ (tmp + len - 1UL)) >> IO_PAGE_SHIFT) != 0UL) { + pteval = (tmp + IO_PAGE_SIZE) & IO_PAGE_MASK; + offset = 0UL; + len -= (IO_PAGE_SIZE - (tmp & (IO_PAGE_SIZE - 1UL))); + break; + } + sg++; + } + + pteval = (pteval & IOPTE_PAGE); + while (len > 0) { + long err; + + err = pci_iommu_batch_add(pteval); + if (unlikely(err < 0L)) + goto iommu_map_failed; + + pteval += IO_PAGE_SIZE; + len -= (IO_PAGE_SIZE - offset); + offset = 0; + dma_npages--; + } + + pteval = (pteval & IOPTE_PAGE) + len; + sg++; + + /* Skip over any tail mappings we've fully mapped, + * adjusting pteval along the way. Stop when we + * detect a page crossing event. + */ + while (sg < sg_end && + (pteval << (64 - IO_PAGE_SHIFT)) != 0UL && + (pteval == SG_ENT_PHYS_ADDRESS(sg)) && + ((pteval ^ + (SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) { + pteval += sg->length; + sg++; + } + if ((pteval << (64 - IO_PAGE_SHIFT)) == 0UL) + pteval = ~0UL; + } while (dma_npages != 0); + dma_sg++; + } + + if (unlikely(pci_iommu_batch_end() < 0L)) + goto iommu_map_failed; + + local_irq_restore(flags); + return 0; + +iommu_map_failed: + local_irq_restore(flags); + return -1L; +} + +static int pci_4v_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) +{ + struct pcidev_cookie *pcp; + struct pci_iommu *iommu; + unsigned long flags, npages, prot; + u32 dma_base; + struct scatterlist *sgtmp; + long entry, err; + int used; + + /* Fast path single entry scatterlists. */ + if (nelems == 1) { + sglist->dma_address = + pci_4v_map_single(pdev, + (page_address(sglist->page) + sglist->offset), + sglist->length, direction); + if (unlikely(sglist->dma_address == PCI_DMA_ERROR_CODE)) + return 0; + sglist->dma_length = sglist->length; + return 1; + } + + pcp = pdev->sysdata; + iommu = pcp->pbm->iommu; + + if (unlikely(direction == PCI_DMA_NONE)) + goto bad; + + /* Step 1: Prepare scatter list. */ + npages = prepare_sg(sglist, nelems); + + /* Step 2: Allocate a cluster and context, if necessary. */ + spin_lock_irqsave(&iommu->lock, flags); + entry = pci_arena_alloc(&iommu->arena, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + + if (unlikely(entry < 0L)) + goto bad; + + dma_base = iommu->page_table_map_base + + (entry << IO_PAGE_SHIFT); + + /* Step 3: Normalize DMA addresses. */ + used = nelems; + + sgtmp = sglist; + while (used && sgtmp->dma_length) { + sgtmp->dma_address += dma_base; + sgtmp++; + used--; + } + used = nelems - used; + + /* Step 4: Create the mappings. */ + prot = HV_PCI_MAP_ATTR_READ; + if (direction != PCI_DMA_TODEVICE) + prot |= HV_PCI_MAP_ATTR_WRITE; + + err = fill_sg(entry, pdev, sglist, used, nelems, prot); + if (unlikely(err < 0L)) + goto iommu_map_failed; + + return used; + +bad: + if (printk_ratelimit()) + WARN_ON(1); + return 0; + +iommu_map_failed: + spin_lock_irqsave(&iommu->lock, flags); + pci_arena_free(&iommu->arena, entry, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + + return 0; +} + +static void pci_4v_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) +{ + struct pcidev_cookie *pcp; + struct pci_iommu *iommu; + unsigned long flags, i, npages; + long entry; + u32 devhandle, bus_addr; + + if (unlikely(direction == PCI_DMA_NONE)) { + if (printk_ratelimit()) + WARN_ON(1); + } + + pcp = pdev->sysdata; + iommu = pcp->pbm->iommu; + devhandle = pcp->pbm->devhandle; + + bus_addr = sglist->dma_address & IO_PAGE_MASK; + + for (i = 1; i < nelems; i++) + if (sglist[i].dma_length == 0) + break; + i--; + npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - + bus_addr) >> IO_PAGE_SHIFT; + + entry = ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + + spin_lock_irqsave(&iommu->lock, flags); + + pci_arena_free(&iommu->arena, entry, npages); + + do { + unsigned long num; + + num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), + npages); + entry += num; + npages -= num; + } while (npages != 0); + + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static void pci_4v_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) +{ + /* Nothing to do... */ +} + +static void pci_4v_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) +{ + /* Nothing to do... */ +} + +struct pci_iommu_ops pci_sun4v_iommu_ops = { + .alloc_consistent = pci_4v_alloc_consistent, + .free_consistent = pci_4v_free_consistent, + .map_single = pci_4v_map_single, + .unmap_single = pci_4v_unmap_single, + .map_sg = pci_4v_map_sg, + .unmap_sg = pci_4v_unmap_sg, + .dma_sync_single_for_cpu = pci_4v_dma_sync_single_for_cpu, + .dma_sync_sg_for_cpu = pci_4v_dma_sync_sg_for_cpu, +}; + +/* SUN4V PCI configuration space accessors. */ + +static inline int pci_sun4v_out_of_range(struct pci_pbm_info *pbm, unsigned int bus, unsigned int device, unsigned int func) +{ + if (bus == pbm->pci_first_busno) { + if (device == 0 && func == 0) + return 0; + return 1; + } + + if (bus < pbm->pci_first_busno || + bus > pbm->pci_last_busno) + return 1; + return 0; +} + +static int pci_sun4v_read_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn, + int where, int size, u32 *value) +{ + struct pci_pbm_info *pbm = bus_dev->sysdata; + u32 devhandle = pbm->devhandle; + unsigned int bus = bus_dev->number; + unsigned int device = PCI_SLOT(devfn); + unsigned int func = PCI_FUNC(devfn); + unsigned long ret; + + if (pci_sun4v_out_of_range(pbm, bus, device, func)) { + ret = ~0UL; + } else { + ret = pci_sun4v_config_get(devhandle, + HV_PCI_DEVICE_BUILD(bus, device, func), + where, size); +#if 0 + printk("rcfg: [%x:%x:%x:%d]=[%lx]\n", + devhandle, HV_PCI_DEVICE_BUILD(bus, device, func), + where, size, ret); +#endif + } + switch (size) { + case 1: + *value = ret & 0xff; + break; + case 2: + *value = ret & 0xffff; + break; + case 4: + *value = ret & 0xffffffff; + break; + }; + + + return PCIBIOS_SUCCESSFUL; +} + +static int pci_sun4v_write_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn, + int where, int size, u32 value) +{ + struct pci_pbm_info *pbm = bus_dev->sysdata; + u32 devhandle = pbm->devhandle; + unsigned int bus = bus_dev->number; + unsigned int device = PCI_SLOT(devfn); + unsigned int func = PCI_FUNC(devfn); + unsigned long ret; + + if (pci_sun4v_out_of_range(pbm, bus, device, func)) { + /* Do nothing. */ + } else { + ret = pci_sun4v_config_put(devhandle, + HV_PCI_DEVICE_BUILD(bus, device, func), + where, size, value); +#if 0 + printk("wcfg: [%x:%x:%x:%d] v[%x] == [%lx]\n", + devhandle, HV_PCI_DEVICE_BUILD(bus, device, func), + where, size, value, ret); +#endif + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops pci_sun4v_ops = { + .read = pci_sun4v_read_pci_cfg, + .write = pci_sun4v_write_pci_cfg, +}; + + +static void pbm_scan_bus(struct pci_controller_info *p, + struct pci_pbm_info *pbm) +{ + struct pcidev_cookie *cookie = kmalloc(sizeof(*cookie), GFP_KERNEL); + + if (!cookie) { + prom_printf("%s: Critical allocation failure.\n", pbm->name); + prom_halt(); + } + + /* All we care about is the PBM. */ + memset(cookie, 0, sizeof(*cookie)); + cookie->pbm = pbm; + + pbm->pci_bus = pci_scan_bus(pbm->pci_first_busno, p->pci_ops, pbm); +#if 0 + pci_fixup_host_bridge_self(pbm->pci_bus); + pbm->pci_bus->self->sysdata = cookie; +#endif + pci_fill_in_pbm_cookies(pbm->pci_bus, pbm, + pbm->prom_node); + pci_record_assignments(pbm, pbm->pci_bus); + pci_assign_unassigned(pbm, pbm->pci_bus); + pci_fixup_irq(pbm, pbm->pci_bus); + pci_determine_66mhz_disposition(pbm, pbm->pci_bus); + pci_setup_busmastering(pbm, pbm->pci_bus); +} + +static void pci_sun4v_scan_bus(struct pci_controller_info *p) +{ + if (p->pbm_A.prom_node) { + p->pbm_A.is_66mhz_capable = + prom_getbool(p->pbm_A.prom_node, "66mhz-capable"); + + pbm_scan_bus(p, &p->pbm_A); + } + if (p->pbm_B.prom_node) { + p->pbm_B.is_66mhz_capable = + prom_getbool(p->pbm_B.prom_node, "66mhz-capable"); + + pbm_scan_bus(p, &p->pbm_B); + } + + /* XXX register error interrupt handlers XXX */ +} + +static unsigned int pci_sun4v_irq_build(struct pci_pbm_info *pbm, + struct pci_dev *pdev, + unsigned int devino) +{ + u32 devhandle = pbm->devhandle; + int pil; + + pil = 5; + if (pdev) { + switch ((pdev->class >> 16) & 0xff) { + case PCI_BASE_CLASS_STORAGE: + pil = 5; + break; + + case PCI_BASE_CLASS_NETWORK: + pil = 6; + break; + + case PCI_BASE_CLASS_DISPLAY: + pil = 9; + break; + + case PCI_BASE_CLASS_MULTIMEDIA: + case PCI_BASE_CLASS_MEMORY: + case PCI_BASE_CLASS_BRIDGE: + case PCI_BASE_CLASS_SERIAL: + pil = 10; + break; + + default: + pil = 5; + break; + }; + } + BUG_ON(PIL_RESERVED(pil)); + + return sun4v_build_irq(devhandle, devino, pil, IBF_PCI); +} + +static void pci_sun4v_base_address_update(struct pci_dev *pdev, int resource) +{ + struct pcidev_cookie *pcp = pdev->sysdata; + struct pci_pbm_info *pbm = pcp->pbm; + struct resource *res, *root; + u32 reg; + int where, size, is_64bit; + + res = &pdev->resource[resource]; + if (resource < 6) { + where = PCI_BASE_ADDRESS_0 + (resource * 4); + } else if (resource == PCI_ROM_RESOURCE) { + where = pdev->rom_base_reg; + } else { + /* Somebody might have asked allocation of a non-standard resource */ + return; + } + + /* XXX 64-bit MEM handling is not %100 correct... XXX */ + is_64bit = 0; + if (res->flags & IORESOURCE_IO) + root = &pbm->io_space; + else { + root = &pbm->mem_space; + if ((res->flags & PCI_BASE_ADDRESS_MEM_TYPE_MASK) + == PCI_BASE_ADDRESS_MEM_TYPE_64) + is_64bit = 1; + } + + size = res->end - res->start; + pci_read_config_dword(pdev, where, ®); + reg = ((reg & size) | + (((u32)(res->start - root->start)) & ~size)); + if (resource == PCI_ROM_RESOURCE) { + reg |= PCI_ROM_ADDRESS_ENABLE; + res->flags |= IORESOURCE_ROM_ENABLE; + } + pci_write_config_dword(pdev, where, reg); + + /* This knows that the upper 32-bits of the address + * must be zero. Our PCI common layer enforces this. + */ + if (is_64bit) + pci_write_config_dword(pdev, where + 4, 0); +} + +static void pci_sun4v_resource_adjust(struct pci_dev *pdev, + struct resource *res, + struct resource *root) +{ + res->start += root->start; + res->end += root->start; +} + +/* Use ranges property to determine where PCI MEM, I/O, and Config + * space are for this PCI bus module. + */ +static void pci_sun4v_determine_mem_io_space(struct pci_pbm_info *pbm) +{ + int i, saw_mem, saw_io; + + saw_mem = saw_io = 0; + for (i = 0; i < pbm->num_pbm_ranges; i++) { + struct linux_prom_pci_ranges *pr = &pbm->pbm_ranges[i]; + unsigned long a; + int type; + + type = (pr->child_phys_hi >> 24) & 0x3; + a = (((unsigned long)pr->parent_phys_hi << 32UL) | + ((unsigned long)pr->parent_phys_lo << 0UL)); + + switch (type) { + case 1: + /* 16-bit IO space, 16MB */ + pbm->io_space.start = a; + pbm->io_space.end = a + ((16UL*1024UL*1024UL) - 1UL); + pbm->io_space.flags = IORESOURCE_IO; + saw_io = 1; + break; + + case 2: + /* 32-bit MEM space, 2GB */ + pbm->mem_space.start = a; + pbm->mem_space.end = a + (0x80000000UL - 1UL); + pbm->mem_space.flags = IORESOURCE_MEM; + saw_mem = 1; + break; + + case 3: + /* XXX 64-bit MEM handling XXX */ + + default: + break; + }; + } + + if (!saw_io || !saw_mem) { + prom_printf("%s: Fatal error, missing %s PBM range.\n", + pbm->name, + (!saw_io ? "IO" : "MEM")); + prom_halt(); + } + + printk("%s: PCI IO[%lx] MEM[%lx]\n", + pbm->name, + pbm->io_space.start, + pbm->mem_space.start); +} + +static void pbm_register_toplevel_resources(struct pci_controller_info *p, + struct pci_pbm_info *pbm) +{ + pbm->io_space.name = pbm->mem_space.name = pbm->name; + + request_resource(&ioport_resource, &pbm->io_space); + request_resource(&iomem_resource, &pbm->mem_space); + pci_register_legacy_regions(&pbm->io_space, + &pbm->mem_space); +} + +static unsigned long probe_existing_entries(struct pci_pbm_info *pbm, + struct pci_iommu *iommu) +{ + struct pci_iommu_arena *arena = &iommu->arena; + unsigned long i, cnt = 0; + u32 devhandle; + + devhandle = pbm->devhandle; + for (i = 0; i < arena->limit; i++) { + unsigned long ret, io_attrs, ra; + + ret = pci_sun4v_iommu_getmap(devhandle, + HV_PCI_TSBID(0, i), + &io_attrs, &ra); + if (ret == HV_EOK) { + cnt++; + __set_bit(i, arena->map); + } + } + + return cnt; +} + +static void pci_sun4v_iommu_init(struct pci_pbm_info *pbm) +{ + struct pci_iommu *iommu = pbm->iommu; + unsigned long num_tsb_entries, sz; + u32 vdma[2], dma_mask, dma_offset; + int err, tsbsize; + + err = prom_getproperty(pbm->prom_node, "virtual-dma", + (char *)&vdma[0], sizeof(vdma)); + if (err == 0 || err == -1) { + /* No property, use default values. */ + vdma[0] = 0x80000000; + vdma[1] = 0x80000000; + } + + dma_mask = vdma[0]; + switch (vdma[1]) { + case 0x20000000: + dma_mask |= 0x1fffffff; + tsbsize = 64; + break; + + case 0x40000000: + dma_mask |= 0x3fffffff; + tsbsize = 128; + break; + + case 0x80000000: + dma_mask |= 0x7fffffff; + tsbsize = 256; + break; + + default: + prom_printf("PCI-SUN4V: strange virtual-dma size.\n"); + prom_halt(); + }; + + tsbsize *= (8 * 1024); + + num_tsb_entries = tsbsize / sizeof(iopte_t); + + dma_offset = vdma[0]; + + /* Setup initial software IOMMU state. */ + spin_lock_init(&iommu->lock); + iommu->ctx_lowest_free = 1; + iommu->page_table_map_base = dma_offset; + iommu->dma_addr_mask = dma_mask; + + /* Allocate and initialize the free area map. */ + sz = num_tsb_entries / 8; + sz = (sz + 7UL) & ~7UL; + iommu->arena.map = kmalloc(sz, GFP_KERNEL); + if (!iommu->arena.map) { + prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n"); + prom_halt(); + } + memset(iommu->arena.map, 0, sz); + iommu->arena.limit = num_tsb_entries; + + sz = probe_existing_entries(pbm, iommu); + + printk("%s: TSB entries [%lu], existing mapings [%lu]\n", + pbm->name, num_tsb_entries, sz); +} + +static void pci_sun4v_get_bus_range(struct pci_pbm_info *pbm) +{ + unsigned int busrange[2]; + int prom_node = pbm->prom_node; + int err; + + err = prom_getproperty(prom_node, "bus-range", + (char *)&busrange[0], + sizeof(busrange)); + if (err == 0 || err == -1) { + prom_printf("%s: Fatal error, no bus-range.\n", pbm->name); + prom_halt(); + } + + pbm->pci_first_busno = busrange[0]; + pbm->pci_last_busno = busrange[1]; + +} + +static void pci_sun4v_pbm_init(struct pci_controller_info *p, int prom_node, u32 devhandle) +{ + struct pci_pbm_info *pbm; + int err, i; + + if (devhandle & 0x40) + pbm = &p->pbm_B; + else + pbm = &p->pbm_A; + + pbm->parent = p; + pbm->prom_node = prom_node; + pbm->pci_first_slot = 1; + + pbm->devhandle = devhandle; + + sprintf(pbm->name, "SUN4V-PCI%d PBM%c", + p->index, (pbm == &p->pbm_A ? 'A' : 'B')); + + printk("%s: devhandle[%x] prom_node[%x:%x]\n", + pbm->name, pbm->devhandle, + pbm->prom_node, prom_getchild(pbm->prom_node)); + + prom_getstring(prom_node, "name", + pbm->prom_name, sizeof(pbm->prom_name)); + + err = prom_getproperty(prom_node, "ranges", + (char *) pbm->pbm_ranges, + sizeof(pbm->pbm_ranges)); + if (err == 0 || err == -1) { + prom_printf("%s: Fatal error, no ranges property.\n", + pbm->name); + prom_halt(); + } + + pbm->num_pbm_ranges = + (err / sizeof(struct linux_prom_pci_ranges)); + + /* Mask out the top 8 bits of the ranges, leaving the real + * physical address. + */ + for (i = 0; i < pbm->num_pbm_ranges; i++) + pbm->pbm_ranges[i].parent_phys_hi &= 0x0fffffff; + + pci_sun4v_determine_mem_io_space(pbm); + pbm_register_toplevel_resources(p, pbm); + + err = prom_getproperty(prom_node, "interrupt-map", + (char *)pbm->pbm_intmap, + sizeof(pbm->pbm_intmap)); + if (err == 0 || err == -1) { + prom_printf("%s: Fatal error, no interrupt-map property.\n", + pbm->name); + prom_halt(); + } + + pbm->num_pbm_intmap = (err / sizeof(struct linux_prom_pci_intmap)); + err = prom_getproperty(prom_node, "interrupt-map-mask", + (char *)&pbm->pbm_intmask, + sizeof(pbm->pbm_intmask)); + if (err == 0 || err == -1) { + prom_printf("%s: Fatal error, no interrupt-map-mask.\n", + pbm->name); + prom_halt(); + } + + pci_sun4v_get_bus_range(pbm); + pci_sun4v_iommu_init(pbm); +} + +void sun4v_pci_init(int node, char *model_name) +{ + struct pci_controller_info *p; + struct pci_iommu *iommu; + struct linux_prom64_registers regs; + u32 devhandle; + int i; + + prom_getproperty(node, "reg", (char *)®s, sizeof(regs)); + devhandle = (regs.phys_addr >> 32UL) & 0x0fffffff; + + for (p = pci_controller_root; p; p = p->next) { + struct pci_pbm_info *pbm; + + if (p->pbm_A.prom_node && p->pbm_B.prom_node) + continue; + + pbm = (p->pbm_A.prom_node ? + &p->pbm_A : + &p->pbm_B); + + if (pbm->devhandle == (devhandle ^ 0x40)) { + pci_sun4v_pbm_init(p, node, devhandle); + return; + } + } + + for_each_cpu(i) { + unsigned long page = get_zeroed_page(GFP_ATOMIC); + + if (!page) + goto fatal_memory_error; + + per_cpu(pci_iommu_batch, i).pglist = (u64 *) page; + } + + p = kmalloc(sizeof(struct pci_controller_info), GFP_ATOMIC); + if (!p) + goto fatal_memory_error; + + memset(p, 0, sizeof(*p)); + + iommu = kmalloc(sizeof(struct pci_iommu), GFP_ATOMIC); + if (!iommu) + goto fatal_memory_error; + + memset(iommu, 0, sizeof(*iommu)); + p->pbm_A.iommu = iommu; + + iommu = kmalloc(sizeof(struct pci_iommu), GFP_ATOMIC); + if (!iommu) + goto fatal_memory_error; + + memset(iommu, 0, sizeof(*iommu)); + p->pbm_B.iommu = iommu; + + p->next = pci_controller_root; + pci_controller_root = p; + + p->index = pci_num_controllers++; + p->pbms_same_domain = 0; + + p->scan_bus = pci_sun4v_scan_bus; + p->irq_build = pci_sun4v_irq_build; + p->base_address_update = pci_sun4v_base_address_update; + p->resource_adjust = pci_sun4v_resource_adjust; + p->pci_ops = &pci_sun4v_ops; + + /* Like PSYCHO and SCHIZO we have a 2GB aligned area + * for memory space. + */ + pci_memspace_mask = 0x7fffffffUL; + + pci_sun4v_pbm_init(p, node, devhandle); + return; + +fatal_memory_error: + prom_printf("SUN4V_PCI: Fatal memory allocation error.\n"); + prom_halt(); +} diff --git a/arch/sparc64/kernel/pci_sun4v.h b/arch/sparc64/kernel/pci_sun4v.h new file mode 100644 index 000000000000..884d25f6158d --- /dev/null +++ b/arch/sparc64/kernel/pci_sun4v.h @@ -0,0 +1,31 @@ +/* pci_sun4v.h: SUN4V specific PCI controller support. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ + +#ifndef _PCI_SUN4V_H +#define _PCI_SUN4V_H + +extern long pci_sun4v_iommu_map(unsigned long devhandle, + unsigned long tsbid, + unsigned long num_ttes, + unsigned long io_attributes, + unsigned long io_page_list_pa); +extern unsigned long pci_sun4v_iommu_demap(unsigned long devhandle, + unsigned long tsbid, + unsigned long num_ttes); +extern unsigned long pci_sun4v_iommu_getmap(unsigned long devhandle, + unsigned long tsbid, + unsigned long *io_attributes, + unsigned long *real_address); +extern unsigned long pci_sun4v_config_get(unsigned long devhandle, + unsigned long pci_device, + unsigned long config_offset, + unsigned long size); +extern int pci_sun4v_config_put(unsigned long devhandle, + unsigned long pci_device, + unsigned long config_offset, + unsigned long size, + unsigned long data); + +#endif /* !(_PCI_SUN4V_H) */ diff --git a/arch/sparc64/kernel/pci_sun4v_asm.S b/arch/sparc64/kernel/pci_sun4v_asm.S new file mode 100644 index 000000000000..6604fdbf746c --- /dev/null +++ b/arch/sparc64/kernel/pci_sun4v_asm.S @@ -0,0 +1,95 @@ +/* pci_sun4v_asm: Hypervisor calls for PCI support. + * + * Copyright (C) 2006 David S. Miller <davem@davemloft.net> + */ + +#include <asm/hypervisor.h> + + /* %o0: devhandle + * %o1: tsbid + * %o2: num ttes + * %o3: io_attributes + * %o4: io_page_list phys address + * + * returns %o0: -status if status was non-zero, else + * %o0: num pages mapped + */ + .globl pci_sun4v_iommu_map +pci_sun4v_iommu_map: + mov %o5, %g1 + mov HV_FAST_PCI_IOMMU_MAP, %o5 + ta HV_FAST_TRAP + brnz,pn %o0, 1f + sub %g0, %o0, %o0 + mov %o1, %o0 +1: retl + nop + + /* %o0: devhandle + * %o1: tsbid + * %o2: num ttes + * + * returns %o0: num ttes demapped + */ + .globl pci_sun4v_iommu_demap +pci_sun4v_iommu_demap: + mov HV_FAST_PCI_IOMMU_DEMAP, %o5 + ta HV_FAST_TRAP + retl + mov %o1, %o0 + + /* %o0: devhandle + * %o1: tsbid + * %o2: &io_attributes + * %o3: &real_address + * + * returns %o0: status + */ + .globl pci_sun4v_iommu_getmap +pci_sun4v_iommu_getmap: + mov %o2, %o4 + mov HV_FAST_PCI_IOMMU_GETMAP, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + stx %o2, [%o3] + retl + mov %o0, %o0 + + /* %o0: devhandle + * %o1: pci_device + * %o2: pci_config_offset + * %o3: size + * + * returns %o0: data + * + * If there is an error, the data will be returned + * as all 1's. + */ + .globl pci_sun4v_config_get +pci_sun4v_config_get: + mov HV_FAST_PCI_CONFIG_GET, %o5 + ta HV_FAST_TRAP + brnz,a,pn %o1, 1f + mov -1, %o2 +1: retl + mov %o2, %o0 + + /* %o0: devhandle + * %o1: pci_device + * %o2: pci_config_offset + * %o3: size + * %o4: data + * + * returns %o0: status + * + * status will be zero if the operation completed + * successfully, else -1 if not + */ + .globl pci_sun4v_config_put +pci_sun4v_config_put: + mov HV_FAST_PCI_CONFIG_PUT, %o5 + ta HV_FAST_TRAP + brnz,a,pn %o1, 1f + mov -1, %o1 +1: retl + mov %o1, %o0 diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index 059b0d025224..1c7ca2f712d9 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -44,83 +44,61 @@ #include <asm/fpumacro.h> #include <asm/head.h> #include <asm/cpudata.h> +#include <asm/mmu_context.h> #include <asm/unistd.h> +#include <asm/hypervisor.h> /* #define VERBOSE_SHOWREGS */ -/* - * Nothing special yet... - */ -void default_idle(void) -{ -} - -#ifndef CONFIG_SMP - -/* - * the idle loop on a Sparc... ;) - */ -void cpu_idle(void) +static void sparc64_yield(void) { - /* endless idle loop with no priority at all */ - for (;;) { - /* If current->work.need_resched is zero we should really - * setup for a system wakup event and execute a shutdown - * instruction. - * - * But this requires writing back the contents of the - * L2 cache etc. so implement this later. -DaveM - */ - while (!need_resched()) - barrier(); + if (tlb_type != hypervisor) + return; - preempt_enable_no_resched(); - schedule(); - preempt_disable(); - check_pgt_cache(); + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb__after_clear_bit(); + + while (!need_resched()) { + unsigned long pstate; + + /* Disable interrupts. */ + __asm__ __volatile__( + "rdpr %%pstate, %0\n\t" + "andn %0, %1, %0\n\t" + "wrpr %0, %%g0, %%pstate" + : "=&r" (pstate) + : "i" (PSTATE_IE)); + + if (!need_resched()) + sun4v_cpu_yield(); + + /* Re-enable interrupts. */ + __asm__ __volatile__( + "rdpr %%pstate, %0\n\t" + "or %0, %1, %0\n\t" + "wrpr %0, %%g0, %%pstate" + : "=&r" (pstate) + : "i" (PSTATE_IE)); } -} -#else + set_thread_flag(TIF_POLLING_NRFLAG); +} -/* - * the idle loop on a UltraMultiPenguin... - * - * TIF_POLLING_NRFLAG is set because we do not sleep the cpu - * inside of the idler task, so an interrupt is not needed - * to get a clean fast response. - * - * XXX Reverify this assumption... -DaveM - * - * Addendum: We do want it to do something for the signal - * delivery case, we detect that by just seeing - * if we are trying to send this to an idler or not. - */ +/* The idle loop on sparc64. */ void cpu_idle(void) { - cpuinfo_sparc *cpuinfo = &local_cpu_data(); set_thread_flag(TIF_POLLING_NRFLAG); while(1) { if (need_resched()) { - cpuinfo->idle_volume = 0; preempt_enable_no_resched(); schedule(); preempt_disable(); - check_pgt_cache(); } - cpuinfo->idle_volume++; - - /* The store ordering is so that IRQ handlers on - * other cpus see our increasing idleness for the buddy - * redistribution algorithm. -DaveM - */ - membar_storeload_storestore(); + sparc64_yield(); } } -#endif - extern char reboot_command []; extern void (*prom_palette)(int); @@ -354,6 +332,7 @@ void show_regs(struct pt_regs *regs) extern long etrap, etraptl1; #endif __show_regs(regs); +#if 0 #ifdef CONFIG_SMP { extern void smp_report_regs(void); @@ -361,6 +340,7 @@ void show_regs(struct pt_regs *regs) smp_report_regs(); } #endif +#endif #ifdef VERBOSE_SHOWREGS if (regs->tpc >= &etrap && regs->tpc < &etraptl1 && @@ -433,30 +413,15 @@ void exit_thread(void) void flush_thread(void) { struct thread_info *t = current_thread_info(); + struct mm_struct *mm; if (t->flags & _TIF_ABI_PENDING) t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT); - if (t->task->mm) { - unsigned long pgd_cache = 0UL; - if (test_thread_flag(TIF_32BIT)) { - struct mm_struct *mm = t->task->mm; - pgd_t *pgd0 = &mm->pgd[0]; - pud_t *pud0 = pud_offset(pgd0, 0); + mm = t->task->mm; + if (mm) + tsb_context_switch(mm); - if (pud_none(*pud0)) { - pmd_t *page = pmd_alloc_one(mm, 0); - pud_set(pud0, page); - } - pgd_cache = get_pgd_cache(pgd0); - } - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (pgd_cache), - "r" (TSB_REG), - "i" (ASI_DMMU)); - } set_thread_wsaved(0); /* Turn off performance counters if on. */ @@ -555,6 +520,18 @@ void synchronize_user_stack(void) } } +static void stack_unaligned(unsigned long sp) +{ + siginfo_t info; + + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRALN; + info.si_addr = (void __user *) sp; + info.si_trapno = 0; + force_sig_info(SIGBUS, &info, current); +} + void fault_in_user_windows(void) { struct thread_info *t = current_thread_info(); @@ -570,13 +547,17 @@ void fault_in_user_windows(void) flush_user_windows(); window = get_thread_wsaved(); - if (window != 0) { + if (likely(window != 0)) { window -= 1; do { unsigned long sp = (t->rwbuf_stkptrs[window] + bias); struct reg_window *rwin = &t->reg_window[window]; - if (copy_to_user((char __user *)sp, rwin, winsize)) + if (unlikely(sp & 0x7UL)) + stack_unaligned(sp); + + if (unlikely(copy_to_user((char __user *)sp, + rwin, winsize))) goto barf; } while (window--); } diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c index 3f9746f856d2..eb93e9c52846 100644 --- a/arch/sparc64/kernel/ptrace.c +++ b/arch/sparc64/kernel/ptrace.c @@ -124,6 +124,9 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, { BUG_ON(len > PAGE_SIZE); + if (tlb_type == hypervisor) + return; + #ifdef DCACHE_ALIASING_POSSIBLE /* If bit 13 of the kernel address we used to access the * user page is the same as the virtual address that page diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S index b80eba0081ca..7130e866f935 100644 --- a/arch/sparc64/kernel/rtrap.S +++ b/arch/sparc64/kernel/rtrap.S @@ -223,12 +223,26 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 ldx [%sp + PTREGS_OFF + PT_V9_G3], %g3 ldx [%sp + PTREGS_OFF + PT_V9_G4], %g4 ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5 - mov TSB_REG, %g6 - brnz,a,pn %l3, 1f - ldxa [%g6] ASI_IMMU, %g5 -1: ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6 + brz,pt %l3, 1f + mov %g6, %l2 + + /* Must do this before thread reg is clobbered below. */ + LOAD_PER_CPU_BASE(%g5, %g6, %i0, %i1, %i2) +1: + ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6 ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7 - wrpr %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate + + /* Normal globals are restored, go to trap globals. */ +661: wrpr %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate + nop + .section .sun4v_2insn_patch, "ax" + .word 661b + wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate + SET_GL(1) + .previous + + mov %l2, %g6 + ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0 ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1 @@ -252,27 +266,108 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 brnz,pn %l3, kern_rtt mov PRIMARY_CONTEXT, %l7 - ldxa [%l7 + %l7] ASI_DMMU, %l0 + +661: ldxa [%l7 + %l7] ASI_DMMU, %l0 + .section .sun4v_1insn_patch, "ax" + .word 661b + ldxa [%l7 + %l7] ASI_MMU, %l0 + .previous + sethi %hi(sparc64_kern_pri_nuc_bits), %l1 ldx [%l1 + %lo(sparc64_kern_pri_nuc_bits)], %l1 or %l0, %l1, %l0 - stxa %l0, [%l7] ASI_DMMU - flush %g6 + +661: stxa %l0, [%l7] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %l0, [%l7] ASI_MMU + .previous + + sethi %hi(KERNBASE), %l7 + flush %l7 rdpr %wstate, %l1 rdpr %otherwin, %l2 srl %l1, 3, %l1 wrpr %l2, %g0, %canrestore wrpr %l1, %g0, %wstate - wrpr %g0, %g0, %otherwin + brnz,pt %l2, user_rtt_restore + wrpr %g0, %g0, %otherwin + + ldx [%g6 + TI_FLAGS], %g3 + wr %g0, ASI_AIUP, %asi + rdpr %cwp, %g1 + andcc %g3, _TIF_32BIT, %g0 + sub %g1, 1, %g1 + bne,pt %xcc, user_rtt_fill_32bit + wrpr %g1, %cwp + ba,a,pt %xcc, user_rtt_fill_64bit + +user_rtt_fill_fixup: + rdpr %cwp, %g1 + add %g1, 1, %g1 + wrpr %g1, 0x0, %cwp + + rdpr %wstate, %g2 + sll %g2, 3, %g2 + wrpr %g2, 0x0, %wstate + + /* We know %canrestore and %otherwin are both zero. */ + + sethi %hi(sparc64_kern_pri_context), %g2 + ldx [%g2 + %lo(sparc64_kern_pri_context)], %g2 + mov PRIMARY_CONTEXT, %g1 + +661: stxa %g2, [%g1] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g1] ASI_MMU + .previous + + sethi %hi(KERNBASE), %g1 + flush %g1 + + or %g4, FAULT_CODE_WINFIXUP, %g4 + stb %g4, [%g6 + TI_FAULT_CODE] + stx %g5, [%g6 + TI_FAULT_ADDR] + + mov %g6, %l1 + wrpr %g0, 0x0, %tl + +661: nop + .section .sun4v_1insn_patch, "ax" + .word 661b + SET_GL(0) + .previous + + wrpr %g0, RTRAP_PSTATE, %pstate + + mov %l1, %g6 + ldx [%g6 + TI_TASK], %g4 + LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3) + call do_sparc64_fault + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + +user_rtt_pre_restore: + add %g1, 1, %g1 + wrpr %g1, 0x0, %cwp + +user_rtt_restore: restore rdpr %canrestore, %g1 wrpr %g1, 0x0, %cleanwin retry nop -kern_rtt: restore +kern_rtt: rdpr %canrestore, %g1 + brz,pn %g1, kern_rtt_fill + nop +kern_rtt_restore: + restore retry + to_kernel: #ifdef CONFIG_PREEMPT ldsw [%g6 + TI_PRE_COUNT], %l5 diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c index d95a1bcf163d..1d6ffdeabd4c 100644 --- a/arch/sparc64/kernel/sbus.c +++ b/arch/sparc64/kernel/sbus.c @@ -693,11 +693,11 @@ void sbus_set_sbus64(struct sbus_dev *sdev, int bursts) /* SBUS SYSIO INO number to Sparc PIL level. */ static unsigned char sysio_ino_to_pil[] = { - 0, 4, 4, 7, 5, 7, 8, 9, /* SBUS slot 0 */ - 0, 4, 4, 7, 5, 7, 8, 9, /* SBUS slot 1 */ - 0, 4, 4, 7, 5, 7, 8, 9, /* SBUS slot 2 */ - 0, 4, 4, 7, 5, 7, 8, 9, /* SBUS slot 3 */ - 4, /* Onboard SCSI */ + 0, 5, 5, 7, 5, 7, 8, 9, /* SBUS slot 0 */ + 0, 5, 5, 7, 5, 7, 8, 9, /* SBUS slot 1 */ + 0, 5, 5, 7, 5, 7, 8, 9, /* SBUS slot 2 */ + 0, 5, 5, 7, 5, 7, 8, 9, /* SBUS slot 3 */ + 5, /* Onboard SCSI */ 5, /* Onboard Ethernet */ /*XXX*/ 8, /* Onboard BPP */ 0, /* Bogon */ diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 158bd31e15b7..7d0e67c1ce50 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -64,12 +64,6 @@ struct screen_info screen_info = { 16 /* orig-video-points */ }; -/* Typing sync at the prom prompt calls the function pointed to by - * the sync callback which I set to the following function. - * This should sync all filesystems and return, for now it just - * prints out pretty messages and returns. - */ - void (*prom_palette)(int); void (*prom_keyboard)(void); @@ -79,259 +73,6 @@ prom_console_write(struct console *con, const char *s, unsigned n) prom_write(s, n); } -static struct console prom_console = { - .name = "prom", - .write = prom_console_write, - .flags = CON_CONSDEV | CON_ENABLED, - .index = -1, -}; - -#define PROM_TRUE -1 -#define PROM_FALSE 0 - -/* Pretty sick eh? */ -int prom_callback(long *args) -{ - struct console *cons, *saved_console = NULL; - unsigned long flags; - char *cmd; - extern spinlock_t prom_entry_lock; - - if (!args) - return -1; - if (!(cmd = (char *)args[0])) - return -1; - - /* - * The callback can be invoked on the cpu that first dropped - * into prom_cmdline after taking the serial interrupt, or on - * a slave processor that was smp_captured() if the - * administrator has done a switch-cpu inside obp. In either - * case, the cpu is marked as in-interrupt. Drop IRQ locks. - */ - irq_exit(); - - /* XXX Revisit the locking here someday. This is a debugging - * XXX feature so it isnt all that critical. -DaveM - */ - local_irq_save(flags); - - spin_unlock(&prom_entry_lock); - cons = console_drivers; - while (cons) { - unregister_console(cons); - cons->flags &= ~(CON_PRINTBUFFER); - cons->next = saved_console; - saved_console = cons; - cons = console_drivers; - } - register_console(&prom_console); - if (!strcmp(cmd, "sync")) { - prom_printf("PROM `%s' command...\n", cmd); - show_free_areas(); - if (current->pid != 0) { - local_irq_enable(); - sys_sync(); - local_irq_disable(); - } - args[2] = 0; - args[args[1] + 3] = -1; - prom_printf("Returning to PROM\n"); - } else if (!strcmp(cmd, "va>tte-data")) { - unsigned long ctx, va; - unsigned long tte = 0; - long res = PROM_FALSE; - - ctx = args[3]; - va = args[4]; - if (ctx) { - /* - * Find process owning ctx, lookup mapping. - */ - struct task_struct *p; - struct mm_struct *mm = NULL; - pgd_t *pgdp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - pte_t pte; - - for_each_process(p) { - mm = p->mm; - if (CTX_NRBITS(mm->context) == ctx) - break; - } - if (!mm || - CTX_NRBITS(mm->context) != ctx) - goto done; - - pgdp = pgd_offset(mm, va); - if (pgd_none(*pgdp)) - goto done; - pudp = pud_offset(pgdp, va); - if (pud_none(*pudp)) - goto done; - pmdp = pmd_offset(pudp, va); - if (pmd_none(*pmdp)) - goto done; - - /* Preemption implicitly disabled by virtue of - * being called from inside OBP. - */ - ptep = pte_offset_map(pmdp, va); - pte = *ptep; - if (pte_present(pte)) { - tte = pte_val(pte); - res = PROM_TRUE; - } - pte_unmap(ptep); - goto done; - } - - if ((va >= KERNBASE) && (va < (KERNBASE + (4 * 1024 * 1024)))) { - extern unsigned long sparc64_kern_pri_context; - - /* Spitfire Errata #32 workaround */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (sparc64_kern_pri_context), - "r" (PRIMARY_CONTEXT), - "i" (ASI_DMMU)); - - /* - * Locked down tlb entry. - */ - - if (tlb_type == spitfire) - tte = spitfire_get_dtlb_data(SPITFIRE_HIGHEST_LOCKED_TLBENT); - else if (tlb_type == cheetah || tlb_type == cheetah_plus) - tte = cheetah_get_ldtlb_data(CHEETAH_HIGHEST_LOCKED_TLBENT); - - res = PROM_TRUE; - goto done; - } - - if (va < PGDIR_SIZE) { - /* - * vmalloc or prom_inherited mapping. - */ - pgd_t *pgdp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - pte_t pte; - int error; - - if ((va >= LOW_OBP_ADDRESS) && (va < HI_OBP_ADDRESS)) { - tte = prom_virt_to_phys(va, &error); - if (!error) - res = PROM_TRUE; - goto done; - } - pgdp = pgd_offset_k(va); - if (pgd_none(*pgdp)) - goto done; - pudp = pud_offset(pgdp, va); - if (pud_none(*pudp)) - goto done; - pmdp = pmd_offset(pudp, va); - if (pmd_none(*pmdp)) - goto done; - - /* Preemption implicitly disabled by virtue of - * being called from inside OBP. - */ - ptep = pte_offset_kernel(pmdp, va); - pte = *ptep; - if (pte_present(pte)) { - tte = pte_val(pte); - res = PROM_TRUE; - } - goto done; - } - - if (va < PAGE_OFFSET) { - /* - * No mappings here. - */ - goto done; - } - - if (va & (1UL << 40)) { - /* - * I/O page. - */ - - tte = (__pa(va) & _PAGE_PADDR) | - _PAGE_VALID | _PAGE_SZ4MB | - _PAGE_E | _PAGE_P | _PAGE_W; - res = PROM_TRUE; - goto done; - } - - /* - * Normal page. - */ - tte = (__pa(va) & _PAGE_PADDR) | - _PAGE_VALID | _PAGE_SZ4MB | - _PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W; - res = PROM_TRUE; - - done: - if (res == PROM_TRUE) { - args[2] = 3; - args[args[1] + 3] = 0; - args[args[1] + 4] = res; - args[args[1] + 5] = tte; - } else { - args[2] = 2; - args[args[1] + 3] = 0; - args[args[1] + 4] = res; - } - } else if (!strcmp(cmd, ".soft1")) { - unsigned long tte; - - tte = args[3]; - prom_printf("%lx:\"%s%s%s%s%s\" ", - (tte & _PAGE_SOFT) >> 7, - tte & _PAGE_MODIFIED ? "M" : "-", - tte & _PAGE_ACCESSED ? "A" : "-", - tte & _PAGE_READ ? "W" : "-", - tte & _PAGE_WRITE ? "R" : "-", - tte & _PAGE_PRESENT ? "P" : "-"); - - args[2] = 2; - args[args[1] + 3] = 0; - args[args[1] + 4] = PROM_TRUE; - } else if (!strcmp(cmd, ".soft2")) { - unsigned long tte; - - tte = args[3]; - prom_printf("%lx ", (tte & 0x07FC000000000000UL) >> 50); - - args[2] = 2; - args[args[1] + 3] = 0; - args[args[1] + 4] = PROM_TRUE; - } else { - prom_printf("unknown PROM `%s' command...\n", cmd); - } - unregister_console(&prom_console); - while (saved_console) { - cons = saved_console; - saved_console = cons->next; - register_console(cons); - } - spin_lock(&prom_entry_lock); - local_irq_restore(flags); - - /* - * Restore in-interrupt status for a resume from obp. - */ - irq_enter(); - return 0; -} - unsigned int boot_flags = 0; #define BOOTME_DEBUG 0x1 #define BOOTME_SINGLE 0x2 @@ -479,15 +220,99 @@ char reboot_command[COMMAND_LINE_SIZE]; static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 }; -void register_prom_callbacks(void) +static void __init per_cpu_patch(void) { - prom_setcallback(prom_callback); - prom_feval(": linux-va>tte-data 2 \" va>tte-data\" $callback drop ; " - "' linux-va>tte-data to va>tte-data"); - prom_feval(": linux-.soft1 1 \" .soft1\" $callback 2drop ; " - "' linux-.soft1 to .soft1"); - prom_feval(": linux-.soft2 1 \" .soft2\" $callback 2drop ; " - "' linux-.soft2 to .soft2"); + struct cpuid_patch_entry *p; + unsigned long ver; + int is_jbus; + + if (tlb_type == spitfire && !this_is_starfire) + return; + + is_jbus = 0; + if (tlb_type != hypervisor) { + __asm__ ("rdpr %%ver, %0" : "=r" (ver)); + is_jbus = ((ver >> 32UL) == __JALAPENO_ID || + (ver >> 32UL) == __SERRANO_ID); + } + + p = &__cpuid_patch; + while (p < &__cpuid_patch_end) { + unsigned long addr = p->addr; + unsigned int *insns; + + switch (tlb_type) { + case spitfire: + insns = &p->starfire[0]; + break; + case cheetah: + case cheetah_plus: + if (is_jbus) + insns = &p->cheetah_jbus[0]; + else + insns = &p->cheetah_safari[0]; + break; + case hypervisor: + insns = &p->sun4v[0]; + break; + default: + prom_printf("Unknown cpu type, halting.\n"); + prom_halt(); + }; + + *(unsigned int *) (addr + 0) = insns[0]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + *(unsigned int *) (addr + 4) = insns[1]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + + *(unsigned int *) (addr + 8) = insns[2]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 8)); + + *(unsigned int *) (addr + 12) = insns[3]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 12)); + + p++; + } +} + +static void __init sun4v_patch(void) +{ + struct sun4v_1insn_patch_entry *p1; + struct sun4v_2insn_patch_entry *p2; + + if (tlb_type != hypervisor) + return; + + p1 = &__sun4v_1insn_patch; + while (p1 < &__sun4v_1insn_patch_end) { + unsigned long addr = p1->addr; + + *(unsigned int *) (addr + 0) = p1->insn; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + p1++; + } + + p2 = &__sun4v_2insn_patch; + while (p2 < &__sun4v_2insn_patch_end) { + unsigned long addr = p2->addr; + + *(unsigned int *) (addr + 0) = p2->insns[0]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + *(unsigned int *) (addr + 4) = p2->insns[1]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + + p2++; + } } void __init setup_arch(char **cmdline_p) @@ -496,7 +321,10 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = prom_getbootargs(); strcpy(saved_command_line, *cmdline_p); - printk("ARCH: SUN4U\n"); + if (tlb_type == hypervisor) + printk("ARCH: SUN4V\n"); + else + printk("ARCH: SUN4U\n"); #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; @@ -507,6 +335,13 @@ void __init setup_arch(char **cmdline_p) /* Work out if we are starfire early on */ check_if_starfire(); + /* Now we know enough to patch the get_cpuid sequences + * used by trap code. + */ + per_cpu_patch(); + + sun4v_patch(); + boot_flags_init(*cmdline_p); idprom_init(); @@ -514,7 +349,7 @@ void __init setup_arch(char **cmdline_p) if (!root_flags) root_mountflags &= ~MS_RDONLY; ROOT_DEV = old_decode_dev(root_dev); -#ifdef CONFIG_BLK_DEV_INITRD +#ifdef CONFIG_BLK_DEV_RAM rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK; rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0); rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0); @@ -544,6 +379,9 @@ void __init setup_arch(char **cmdline_p) smp_setup_cpu_possible_map(); + /* Get boot processor trap_block[] setup. */ + init_cur_cpu_trap(current_thread_info()); + paging_init(); } @@ -565,6 +403,12 @@ static int __init set_preferred_console(void) serial_console = 2; } else if (idev == PROMDEV_IRSC && odev == PROMDEV_ORSC) { serial_console = 3; + } else if (idev == PROMDEV_IVCONS && odev == PROMDEV_OVCONS) { + /* sunhv_console_init() doesn't check the serial_console + * value anyways... + */ + serial_console = 4; + return add_preferred_console("ttyHV", 0, NULL); } else { prom_printf("Inconsistent console: " "input %d, output %d\n", @@ -598,9 +442,8 @@ static int show_cpuinfo(struct seq_file *m, void *__unused) seq_printf(m, "cpu\t\t: %s\n" "fpu\t\t: %s\n" - "promlib\t\t: Version 3 Revision %d\n" - "prom\t\t: %d.%d.%d\n" - "type\t\t: sun4u\n" + "prom\t\t: %s\n" + "type\t\t: %s\n" "ncpus probed\t: %d\n" "ncpus active\t: %d\n" "D$ parity tl1\t: %u\n" @@ -612,10 +455,10 @@ static int show_cpuinfo(struct seq_file *m, void *__unused) , sparc_cpu_type, sparc_fpu_type, - prom_rev, - prom_prev >> 16, - (prom_prev >> 8) & 0xff, - prom_prev & 0xff, + prom_version, + ((tlb_type == hypervisor) ? + "sun4v" : + "sun4u"), ncpus_probed, num_online_cpus(), dcache_parity_tl1_occurred, @@ -692,15 +535,11 @@ static int __init topology_init(void) while (!cpu_find_by_instance(ncpus_probed, NULL, NULL)) ncpus_probed++; - for (i = 0; i < NR_CPUS; i++) { - if (cpu_possible(i)) { - struct cpu *p = kmalloc(sizeof(*p), GFP_KERNEL); - - if (p) { - memset(p, 0, sizeof(*p)); - register_cpu(p, i, NULL); - err = 0; - } + for_each_cpu(i) { + struct cpu *p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p) { + register_cpu(p, i, NULL); + err = 0; } } diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 1f7ad8a69052..373a701c90a5 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -38,6 +38,7 @@ #include <asm/timer.h> #include <asm/starfire.h> #include <asm/tlb.h> +#include <asm/sections.h> extern void calibrate_delay(void); @@ -46,6 +47,8 @@ static unsigned char boot_cpu_id; cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE; +cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly = + { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; static cpumask_t smp_commenced_mask; static cpumask_t cpu_callout_map; @@ -77,7 +80,7 @@ void smp_bogo(struct seq_file *m) void __init smp_store_cpu_info(int id) { - int cpu_node; + int cpu_node, def; /* multiplier and counter set by smp_setup_percpu_timer() */ @@ -87,24 +90,32 @@ void __init smp_store_cpu_info(int id) cpu_data(id).clock_tick = prom_getintdefault(cpu_node, "clock-frequency", 0); - cpu_data(id).pgcache_size = 0; - cpu_data(id).pte_cache[0] = NULL; - cpu_data(id).pte_cache[1] = NULL; - cpu_data(id).pgd_cache = NULL; - cpu_data(id).idle_volume = 1; - + def = ((tlb_type == hypervisor) ? (8 * 1024) : (16 * 1024)); cpu_data(id).dcache_size = prom_getintdefault(cpu_node, "dcache-size", - 16 * 1024); + def); + + def = 32; cpu_data(id).dcache_line_size = - prom_getintdefault(cpu_node, "dcache-line-size", 32); + prom_getintdefault(cpu_node, "dcache-line-size", def); + + def = 16 * 1024; cpu_data(id).icache_size = prom_getintdefault(cpu_node, "icache-size", - 16 * 1024); + def); + + def = 32; cpu_data(id).icache_line_size = - prom_getintdefault(cpu_node, "icache-line-size", 32); + prom_getintdefault(cpu_node, "icache-line-size", def); + + def = ((tlb_type == hypervisor) ? + (3 * 1024 * 1024) : + (4 * 1024 * 1024)); cpu_data(id).ecache_size = prom_getintdefault(cpu_node, "ecache-size", - 4 * 1024 * 1024); + def); + + def = 64; cpu_data(id).ecache_line_size = - prom_getintdefault(cpu_node, "ecache-line-size", 64); + prom_getintdefault(cpu_node, "ecache-line-size", def); + printk("CPU[%d]: Caches " "D[sz(%d):line_sz(%d)] " "I[sz(%d):line_sz(%d)] " @@ -119,27 +130,16 @@ static void smp_setup_percpu_timer(void); static volatile unsigned long callin_flag = 0; -extern void inherit_locked_prom_mappings(int save_p); - -static inline void cpu_setup_percpu_base(unsigned long cpu_id) -{ - __asm__ __volatile__("mov %0, %%g5\n\t" - "stxa %0, [%1] %2\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (__per_cpu_offset(cpu_id)), - "r" (TSB_REG), "i" (ASI_IMMU)); -} - void __init smp_callin(void) { int cpuid = hard_smp_processor_id(); - inherit_locked_prom_mappings(0); + __local_per_cpu_offset = __per_cpu_offset(cpuid); - __flush_tlb_all(); + if (tlb_type == hypervisor) + sun4v_ktsb_register(); - cpu_setup_percpu_base(cpuid); + __flush_tlb_all(); smp_setup_percpu_timer(); @@ -316,6 +316,8 @@ static void smp_synchronize_one_tick(int cpu) spin_unlock_irqrestore(&itc_sync_lock, flags); } +extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load); + extern unsigned long sparc64_cpu_startup; /* The OBP cpu startup callback truncates the 3rd arg cookie to @@ -331,21 +333,31 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu) unsigned long cookie = (unsigned long)(&cpu_new_thread); struct task_struct *p; - int timeout, ret, cpu_node; + int timeout, ret; p = fork_idle(cpu); callin_flag = 0; cpu_new_thread = task_thread_info(p); cpu_set(cpu, cpu_callout_map); - cpu_find_by_mid(cpu, &cpu_node); - prom_startcpu(cpu_node, entry, cookie); + if (tlb_type == hypervisor) { + /* Alloc the mondo queues, cpu will load them. */ + sun4v_init_mondo_queues(0, cpu, 1, 0); + + prom_startcpu_cpuid(cpu, entry, cookie); + } else { + int cpu_node; + + cpu_find_by_mid(cpu, &cpu_node); + prom_startcpu(cpu_node, entry, cookie); + } for (timeout = 0; timeout < 5000000; timeout++) { if (callin_flag) break; udelay(100); } + if (callin_flag) { ret = 0; } else { @@ -441,7 +453,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) { u64 pstate, ver; - int nack_busy_id, is_jalapeno; + int nack_busy_id, is_jbus; if (cpus_empty(mask)) return; @@ -451,7 +463,8 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas * derivative processor. */ __asm__ ("rdpr %%ver, %0" : "=r" (ver)); - is_jalapeno = ((ver >> 32) == 0x003e0016); + is_jbus = ((ver >> 32) == __JALAPENO_ID || + (ver >> 32) == __SERRANO_ID); __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); @@ -476,7 +489,7 @@ retry: for_each_cpu_mask(i, mask) { u64 target = (i << 14) | 0x70; - if (!is_jalapeno) + if (!is_jbus) target |= (nack_busy_id << 24); __asm__ __volatile__( "stxa %%g0, [%0] %1\n\t" @@ -529,7 +542,7 @@ retry: for_each_cpu_mask(i, mask) { u64 check_mask; - if (is_jalapeno) + if (is_jbus) check_mask = (0x2UL << (2*i)); else check_mask = (0x2UL << @@ -544,6 +557,155 @@ retry: } } +/* Multi-cpu list version. */ +static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) +{ + struct trap_per_cpu *tb; + u16 *cpu_list; + u64 *mondo; + cpumask_t error_mask; + unsigned long flags, status; + int cnt, retries, this_cpu, prev_sent, i; + + /* We have to do this whole thing with interrupts fully disabled. + * Otherwise if we send an xcall from interrupt context it will + * corrupt both our mondo block and cpu list state. + * + * One consequence of this is that we cannot use timeout mechanisms + * that depend upon interrupts being delivered locally. So, for + * example, we cannot sample jiffies and expect it to advance. + * + * Fortunately, udelay() uses %stick/%tick so we can use that. + */ + local_irq_save(flags); + + this_cpu = smp_processor_id(); + tb = &trap_block[this_cpu]; + + mondo = __va(tb->cpu_mondo_block_pa); + mondo[0] = data0; + mondo[1] = data1; + mondo[2] = data2; + wmb(); + + cpu_list = __va(tb->cpu_list_pa); + + /* Setup the initial cpu list. */ + cnt = 0; + for_each_cpu_mask(i, mask) + cpu_list[cnt++] = i; + + cpus_clear(error_mask); + retries = 0; + prev_sent = 0; + do { + int forward_progress, n_sent; + + status = sun4v_cpu_mondo_send(cnt, + tb->cpu_list_pa, + tb->cpu_mondo_block_pa); + + /* HV_EOK means all cpus received the xcall, we're done. */ + if (likely(status == HV_EOK)) + break; + + /* First, see if we made any forward progress. + * + * The hypervisor indicates successful sends by setting + * cpu list entries to the value 0xffff. + */ + n_sent = 0; + for (i = 0; i < cnt; i++) { + if (likely(cpu_list[i] == 0xffff)) + n_sent++; + } + + forward_progress = 0; + if (n_sent > prev_sent) + forward_progress = 1; + + prev_sent = n_sent; + + /* If we get a HV_ECPUERROR, then one or more of the cpus + * in the list are in error state. Use the cpu_state() + * hypervisor call to find out which cpus are in error state. + */ + if (unlikely(status == HV_ECPUERROR)) { + for (i = 0; i < cnt; i++) { + long err; + u16 cpu; + + cpu = cpu_list[i]; + if (cpu == 0xffff) + continue; + + err = sun4v_cpu_state(cpu); + if (err >= 0 && + err == HV_CPU_STATE_ERROR) { + cpu_list[i] = 0xffff; + cpu_set(cpu, error_mask); + } + } + } else if (unlikely(status != HV_EWOULDBLOCK)) + goto fatal_mondo_error; + + /* Don't bother rewriting the CPU list, just leave the + * 0xffff and non-0xffff entries in there and the + * hypervisor will do the right thing. + * + * Only advance timeout state if we didn't make any + * forward progress. + */ + if (unlikely(!forward_progress)) { + if (unlikely(++retries > 10000)) + goto fatal_mondo_timeout; + + /* Delay a little bit to let other cpus catch up + * on their cpu mondo queue work. + */ + udelay(2 * cnt); + } + } while (1); + + local_irq_restore(flags); + + if (unlikely(!cpus_empty(error_mask))) + goto fatal_mondo_cpu_error; + + return; + +fatal_mondo_cpu_error: + printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " + "were in error state\n", + this_cpu); + printk(KERN_CRIT "CPU[%d]: Error mask [ ", this_cpu); + for_each_cpu_mask(i, error_mask) + printk("%d ", i); + printk("]\n"); + return; + +fatal_mondo_timeout: + local_irq_restore(flags); + printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " + " progress after %d retries.\n", + this_cpu, retries); + goto dump_cpu_list_and_out; + +fatal_mondo_error: + local_irq_restore(flags); + printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", + this_cpu, status); + printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " + "mondo_block_pa(%lx)\n", + this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa); + +dump_cpu_list_and_out: + printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu); + for (i = 0; i < cnt; i++) + printk("%u ", cpu_list[i]); + printk("]\n"); +} + /* Send cross call to all processors mentioned in MASK * except self. */ @@ -557,8 +719,10 @@ static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 d if (tlb_type == spitfire) spitfire_xcall_deliver(data0, data1, data2, mask); - else + else if (tlb_type == cheetah || tlb_type == cheetah_plus) cheetah_xcall_deliver(data0, data1, data2, mask); + else + hypervisor_xcall_deliver(data0, data1, data2, mask); /* NOTE: Caller runs local copy on master. */ put_cpu(); @@ -594,16 +758,13 @@ extern unsigned long xcall_call_function; * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. */ -int smp_call_function(void (*func)(void *info), void *info, - int nonatomic, int wait) +static int smp_call_function_mask(void (*func)(void *info), void *info, + int nonatomic, int wait, cpumask_t mask) { struct call_data_struct data; - int cpus = num_online_cpus() - 1; + int cpus; long timeout; - if (!cpus) - return 0; - /* Can deadlock when called with interrupts disabled */ WARN_ON(irqs_disabled()); @@ -614,9 +775,14 @@ int smp_call_function(void (*func)(void *info), void *info, spin_lock(&call_lock); + cpu_clear(smp_processor_id(), mask); + cpus = cpus_weight(mask); + if (!cpus) + goto out_unlock; + call_data = &data; - smp_cross_call(&xcall_call_function, 0, 0, 0); + smp_cross_call_masked(&xcall_call_function, 0, 0, 0, mask); /* * Wait for other cpus to complete function or at @@ -630,18 +796,25 @@ int smp_call_function(void (*func)(void *info), void *info, udelay(1); } +out_unlock: spin_unlock(&call_lock); return 0; out_timeout: spin_unlock(&call_lock); - printk("XCALL: Remote cpus not responding, ncpus=%ld finished=%ld\n", - (long) num_online_cpus() - 1L, - (long) atomic_read(&data.finished)); + printk("XCALL: Remote cpus not responding, ncpus=%d finished=%d\n", + cpus, atomic_read(&data.finished)); return 0; } +int smp_call_function(void (*func)(void *info), void *info, + int nonatomic, int wait) +{ + return smp_call_function_mask(func, info, nonatomic, wait, + cpu_online_map); +} + void smp_call_function_client(int irq, struct pt_regs *regs) { void (*func) (void *info) = call_data->func; @@ -659,13 +832,25 @@ void smp_call_function_client(int irq, struct pt_regs *regs) } } +static void tsb_sync(void *info) +{ + struct mm_struct *mm = info; + + if (current->active_mm == mm) + tsb_context_switch(mm); +} + +void smp_tsb_sync(struct mm_struct *mm) +{ + smp_call_function_mask(tsb_sync, mm, 0, 1, mm->cpu_vm_mask); +} + extern unsigned long xcall_flush_tlb_mm; extern unsigned long xcall_flush_tlb_pending; extern unsigned long xcall_flush_tlb_kernel_range; -extern unsigned long xcall_flush_tlb_all_spitfire; -extern unsigned long xcall_flush_tlb_all_cheetah; extern unsigned long xcall_report_regs; extern unsigned long xcall_receive_signal; +extern unsigned long xcall_new_mmu_context_version; #ifdef DCACHE_ALIASING_POSSIBLE extern unsigned long xcall_flush_dcache_page_cheetah; @@ -693,11 +878,17 @@ static __inline__ void __local_flush_dcache_page(struct page *page) void smp_flush_dcache_page_impl(struct page *page, int cpu) { cpumask_t mask = cpumask_of_cpu(cpu); - int this_cpu = get_cpu(); + int this_cpu; + + if (tlb_type == hypervisor) + return; #ifdef CONFIG_DEBUG_DCFLUSH atomic_inc(&dcpage_flushes); #endif + + this_cpu = get_cpu(); + if (cpu == this_cpu) { __local_flush_dcache_page(page); } else if (cpu_online(cpu)) { @@ -713,7 +904,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) __pa(pg_addr), (u64) pg_addr, mask); - } else { + } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { #ifdef DCACHE_ALIASING_POSSIBLE data0 = ((u64)&xcall_flush_dcache_page_cheetah); @@ -735,7 +926,12 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) void *pg_addr = page_address(page); cpumask_t mask = cpu_online_map; u64 data0; - int this_cpu = get_cpu(); + int this_cpu; + + if (tlb_type == hypervisor) + return; + + this_cpu = get_cpu(); cpu_clear(this_cpu, mask); @@ -752,7 +948,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) __pa(pg_addr), (u64) pg_addr, mask); - } else { + } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { #ifdef DCACHE_ALIASING_POSSIBLE data0 = ((u64)&xcall_flush_dcache_page_cheetah); cheetah_xcall_deliver(data0, @@ -769,38 +965,58 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) put_cpu(); } +static void __smp_receive_signal_mask(cpumask_t mask) +{ + smp_cross_call_masked(&xcall_receive_signal, 0, 0, 0, mask); +} + void smp_receive_signal(int cpu) { cpumask_t mask = cpumask_of_cpu(cpu); - if (cpu_online(cpu)) { - u64 data0 = (((u64)&xcall_receive_signal) & 0xffffffff); - - if (tlb_type == spitfire) - spitfire_xcall_deliver(data0, 0, 0, mask); - else - cheetah_xcall_deliver(data0, 0, 0, mask); - } + if (cpu_online(cpu)) + __smp_receive_signal_mask(mask); } void smp_receive_signal_client(int irq, struct pt_regs *regs) { - /* Just return, rtrap takes care of the rest. */ clear_softint(1 << irq); } -void smp_report_regs(void) +void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) { - smp_cross_call(&xcall_report_regs, 0, 0, 0); + struct mm_struct *mm; + unsigned long flags; + + clear_softint(1 << irq); + + /* See if we need to allocate a new TLB context because + * the version of the one we are using is now out of date. + */ + mm = current->active_mm; + if (unlikely(!mm || (mm == &init_mm))) + return; + + spin_lock_irqsave(&mm->context.lock, flags); + + if (unlikely(!CTX_VALID(mm->context))) + get_new_mmu_context(mm); + + spin_unlock_irqrestore(&mm->context.lock, flags); + + load_secondary_context(mm); + __flush_tlb_mm(CTX_HWBITS(mm->context), + SECONDARY_CONTEXT); } -void smp_flush_tlb_all(void) +void smp_new_mmu_context_version(void) { - if (tlb_type == spitfire) - smp_cross_call(&xcall_flush_tlb_all_spitfire, 0, 0, 0); - else - smp_cross_call(&xcall_flush_tlb_all_cheetah, 0, 0, 0); - __flush_tlb_all(); + smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0); +} + +void smp_report_regs(void) +{ + smp_cross_call(&xcall_report_regs, 0, 0, 0); } /* We know that the window frames of the user have been flushed @@ -944,24 +1160,19 @@ void smp_release(void) * can service tlb flush xcalls... */ extern void prom_world(int); -extern void save_alternate_globals(unsigned long *); -extern void restore_alternate_globals(unsigned long *); + void smp_penguin_jailcell(int irq, struct pt_regs *regs) { - unsigned long global_save[24]; - clear_softint(1 << irq); preempt_disable(); __asm__ __volatile__("flushw"); - save_alternate_globals(global_save); prom_world(1); atomic_inc(&smp_capture_registry); membar_storeload_storestore(); while (penguins_are_doing_time) rmb(); - restore_alternate_globals(global_save); atomic_dec(&smp_capture_registry); prom_world(0); @@ -1082,6 +1293,8 @@ int setup_profiling_timer(unsigned int multiplier) /* Constrain the number of cpus to max_cpus. */ void __init smp_prepare_cpus(unsigned int max_cpus) { + int i; + if (num_possible_cpus() > max_cpus) { int instance, mid; @@ -1096,6 +1309,20 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } } + for_each_cpu(i) { + if (tlb_type == hypervisor) { + int j; + + /* XXX get this mapping from machine description */ + for_each_cpu(j) { + if ((j >> 2) == (i >> 2)) + cpu_set(j, cpu_sibling_map[i]); + } + } else { + cpu_set(i, cpu_sibling_map[i]); + } + } + smp_store_cpu_info(boot_cpu_id); } @@ -1117,12 +1344,15 @@ void __init smp_setup_cpu_possible_map(void) void __devinit smp_prepare_boot_cpu(void) { - if (hard_smp_processor_id() >= NR_CPUS) { + int cpu = hard_smp_processor_id(); + + if (cpu >= NR_CPUS) { prom_printf("Serious problem, boot cpu id >= NR_CPUS\n"); prom_halt(); } - current_thread_info()->cpu = hard_smp_processor_id(); + current_thread_info()->cpu = cpu; + __local_per_cpu_offset = __per_cpu_offset(cpu); cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), phys_cpu_present_map); @@ -1139,7 +1369,11 @@ int __devinit __cpu_up(unsigned int cpu) if (!cpu_isset(cpu, cpu_online_map)) { ret = -ENODEV; } else { - smp_synchronize_one_tick(cpu); + /* On SUN4V, writes to %tick and %stick are + * not allowed. + */ + if (tlb_type != hypervisor) + smp_synchronize_one_tick(cpu); } } return ret; @@ -1183,12 +1417,9 @@ void __init setup_per_cpu_areas(void) { unsigned long goal, size, i; char *ptr; - /* Created by linker magic */ - extern char __per_cpu_start[], __per_cpu_end[]; /* Copy section for each CPU (we discard the original) */ - goal = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); - + goal = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); #ifdef CONFIG_MODULES if (goal < PERCPU_ENOUGH_ROOM) goal = PERCPU_ENOUGH_ROOM; @@ -1197,31 +1428,10 @@ void __init setup_per_cpu_areas(void) for (size = 1UL; size < goal; size <<= 1UL) __per_cpu_shift++; - /* Make sure the resulting __per_cpu_base value - * will fit in the 43-bit sign extended IMMU - * TSB register. - */ - ptr = __alloc_bootmem(size * NR_CPUS, PAGE_SIZE, - (unsigned long) __per_cpu_start); + ptr = alloc_bootmem(size * NR_CPUS); __per_cpu_base = ptr - __per_cpu_start; - if ((__per_cpu_shift < PAGE_SHIFT) || - (__per_cpu_base & ~PAGE_MASK) || - (__per_cpu_base != (((long) __per_cpu_base << 20) >> 20))) { - prom_printf("PER_CPU: Invalid layout, " - "ptr[%p] shift[%lx] base[%lx]\n", - ptr, __per_cpu_shift, __per_cpu_base); - prom_halt(); - } - for (i = 0; i < NR_CPUS; i++, ptr += size) memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - - /* Finally, load in the boot cpu's base value. - * We abuse the IMMU TSB register for trap handler - * entry and exit loading of %g5. That is why it - * has to be page aligned. - */ - cpu_setup_percpu_base(hard_smp_processor_id()); } diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 3c06bfb92a8c..9914a17651b4 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -95,9 +95,6 @@ extern int __ashrdi3(int, int); extern int dump_fpu (struct pt_regs * regs, elf_fpregset_t * fpregs); -extern unsigned long phys_base; -extern unsigned long pfn_base; - extern unsigned int sys_call_table[]; extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); @@ -108,6 +105,14 @@ extern void xor_vis_4(unsigned long, unsigned long *, unsigned long *, extern void xor_vis_5(unsigned long, unsigned long *, unsigned long *, unsigned long *, unsigned long *, unsigned long *); +extern void xor_niagara_2(unsigned long, unsigned long *, unsigned long *); +extern void xor_niagara_3(unsigned long, unsigned long *, unsigned long *, + unsigned long *); +extern void xor_niagara_4(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *); +extern void xor_niagara_5(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *, unsigned long *); + /* Per-CPU information table */ EXPORT_PER_CPU_SYMBOL(__cpu_data); @@ -241,10 +246,6 @@ EXPORT_SYMBOL(verify_compat_iovec); #endif EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(pte_alloc_one_kernel); -#ifndef CONFIG_SMP -EXPORT_SYMBOL(pgt_quicklists); -#endif EXPORT_SYMBOL(put_fs_struct); /* math-emu wants this */ @@ -339,14 +340,10 @@ EXPORT_SYMBOL(copy_to_user_fixup); EXPORT_SYMBOL(copy_from_user_fixup); EXPORT_SYMBOL(copy_in_user_fixup); EXPORT_SYMBOL(__strncpy_from_user); -EXPORT_SYMBOL(__bzero_noasi); +EXPORT_SYMBOL(__clear_user); /* Various address conversion macros use this. */ -EXPORT_SYMBOL(phys_base); -EXPORT_SYMBOL(pfn_base); EXPORT_SYMBOL(sparc64_valid_addr_bitmap); -EXPORT_SYMBOL(page_to_pfn); -EXPORT_SYMBOL(pfn_to_page); /* No version information on this, heavily used in inline asm, * and will always be 'void __ret_efault(void)'. @@ -392,4 +389,9 @@ EXPORT_SYMBOL(xor_vis_3); EXPORT_SYMBOL(xor_vis_4); EXPORT_SYMBOL(xor_vis_5); +EXPORT_SYMBOL(xor_niagara_2); +EXPORT_SYMBOL(xor_niagara_3); +EXPORT_SYMBOL(xor_niagara_4); +EXPORT_SYMBOL(xor_niagara_5); + EXPORT_SYMBOL(prom_palette); diff --git a/arch/sparc64/kernel/sun4v_ivec.S b/arch/sparc64/kernel/sun4v_ivec.S new file mode 100644 index 000000000000..b49a68bdda43 --- /dev/null +++ b/arch/sparc64/kernel/sun4v_ivec.S @@ -0,0 +1,334 @@ +/* sun4v_ivec.S: Sun4v interrupt vector handling. + * + * Copyright (C) 2006 <davem@davemloft.net> + */ + +#include <asm/cpudata.h> +#include <asm/intr_queue.h> + + .text + .align 32 + +sun4v_cpu_mondo: + /* Head offset in %g2, tail offset in %g4. + * If they are the same, no work. + */ + mov INTRQ_CPU_MONDO_HEAD, %g2 + ldxa [%g2] ASI_QUEUE, %g2 + mov INTRQ_CPU_MONDO_TAIL, %g4 + ldxa [%g4] ASI_QUEUE, %g4 + cmp %g2, %g4 + be,pn %xcc, sun4v_cpu_mondo_queue_empty + nop + + /* Get &trap_block[smp_processor_id()] into %g3. */ + ldxa [%g0] ASI_SCRATCHPAD, %g3 + sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3 + + /* Get CPU mondo queue base phys address into %g7. */ + ldx [%g3 + TRAP_PER_CPU_CPU_MONDO_PA], %g7 + + /* Now get the cross-call arguments and handler PC, same + * layout as sun4u: + * + * 1st 64-bit word: low half is 32-bit PC, put into %g3 and jmpl to it + * high half is context arg to MMU flushes, into %g5 + * 2nd 64-bit word: 64-bit arg, load into %g1 + * 3rd 64-bit word: 64-bit arg, load into %g7 + */ + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g3 + add %g2, 0x8, %g2 + srlx %g3, 32, %g5 + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g1 + add %g2, 0x8, %g2 + srl %g3, 0, %g3 + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g7 + add %g2, 0x40 - 0x8 - 0x8, %g2 + + /* Update queue head pointer. */ + sethi %hi(8192 - 1), %g4 + or %g4, %lo(8192 - 1), %g4 + and %g2, %g4, %g2 + + mov INTRQ_CPU_MONDO_HEAD, %g4 + stxa %g2, [%g4] ASI_QUEUE + membar #Sync + + jmpl %g3, %g0 + nop + +sun4v_cpu_mondo_queue_empty: + retry + +sun4v_dev_mondo: + /* Head offset in %g2, tail offset in %g4. */ + mov INTRQ_DEVICE_MONDO_HEAD, %g2 + ldxa [%g2] ASI_QUEUE, %g2 + mov INTRQ_DEVICE_MONDO_TAIL, %g4 + ldxa [%g4] ASI_QUEUE, %g4 + cmp %g2, %g4 + be,pn %xcc, sun4v_dev_mondo_queue_empty + nop + + /* Get &trap_block[smp_processor_id()] into %g3. */ + ldxa [%g0] ASI_SCRATCHPAD, %g3 + sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3 + + /* Get DEV mondo queue base phys address into %g5. */ + ldx [%g3 + TRAP_PER_CPU_DEV_MONDO_PA], %g5 + + /* Load IVEC into %g3. */ + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + add %g2, 0x40, %g2 + + /* XXX There can be a full 64-byte block of data here. + * XXX This is how we can get at MSI vector data. + * XXX Current we do not capture this, but when we do we'll + * XXX need to add a 64-byte storage area in the struct ino_bucket + * XXX or the struct irq_desc. + */ + + /* Update queue head pointer, this frees up some registers. */ + sethi %hi(8192 - 1), %g4 + or %g4, %lo(8192 - 1), %g4 + and %g2, %g4, %g2 + + mov INTRQ_DEVICE_MONDO_HEAD, %g4 + stxa %g2, [%g4] ASI_QUEUE + membar #Sync + + /* Get &__irq_work[smp_processor_id()] into %g1. */ + TRAP_LOAD_IRQ_WORK(%g1, %g4) + + /* Get &ivector_table[IVEC] into %g4. */ + sethi %hi(ivector_table), %g4 + sllx %g3, 5, %g3 + or %g4, %lo(ivector_table), %g4 + add %g4, %g3, %g4 + + /* Load IRQ %pil into %g5. */ + ldub [%g4 + 0x04], %g5 + + /* Insert ivector_table[] entry into __irq_work[] queue. */ + sllx %g5, 2, %g3 + lduw [%g1 + %g3], %g2 /* g2 = irq_work(cpu, pil) */ + stw %g2, [%g4 + 0x00] /* bucket->irq_chain = g2 */ + stw %g4, [%g1 + %g3] /* irq_work(cpu, pil) = bucket */ + + /* Signal the interrupt by setting (1 << pil) in %softint. */ + mov 1, %g2 + sllx %g2, %g5, %g2 + wr %g2, 0x0, %set_softint + +sun4v_dev_mondo_queue_empty: + retry + +sun4v_res_mondo: + /* Head offset in %g2, tail offset in %g4. */ + mov INTRQ_RESUM_MONDO_HEAD, %g2 + ldxa [%g2] ASI_QUEUE, %g2 + mov INTRQ_RESUM_MONDO_TAIL, %g4 + ldxa [%g4] ASI_QUEUE, %g4 + cmp %g2, %g4 + be,pn %xcc, sun4v_res_mondo_queue_empty + nop + + /* Get &trap_block[smp_processor_id()] into %g3. */ + ldxa [%g0] ASI_SCRATCHPAD, %g3 + sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3 + + /* Get RES mondo queue base phys address into %g5. */ + ldx [%g3 + TRAP_PER_CPU_RESUM_MONDO_PA], %g5 + + /* Get RES kernel buffer base phys address into %g7. */ + ldx [%g3 + TRAP_PER_CPU_RESUM_KBUF_PA], %g7 + + /* If the first word is non-zero, queue is full. */ + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g1 + brnz,pn %g1, sun4v_res_mondo_queue_full + nop + + /* Remember this entry's offset in %g1. */ + mov %g2, %g1 + + /* Copy 64-byte queue entry into kernel buffer. */ + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + + /* Update queue head pointer. */ + sethi %hi(8192 - 1), %g4 + or %g4, %lo(8192 - 1), %g4 + and %g2, %g4, %g2 + + mov INTRQ_RESUM_MONDO_HEAD, %g4 + stxa %g2, [%g4] ASI_QUEUE + membar #Sync + + /* Disable interrupts and save register state so we can call + * C code. The etrap handling will leave %g4 in %l4 for us + * when it's done. + */ + rdpr %pil, %g2 + wrpr %g0, 15, %pil + mov %g1, %g4 + ba,pt %xcc, etrap_irq + rd %pc, %g7 + + /* Log the event. */ + add %sp, PTREGS_OFF, %o0 + call sun4v_resum_error + mov %l4, %o1 + + /* Return from trap. */ + ba,pt %xcc, rtrap_irq + nop + +sun4v_res_mondo_queue_empty: + retry + +sun4v_res_mondo_queue_full: + /* The queue is full, consolidate our damage by setting + * the head equal to the tail. We'll just trap again otherwise. + * Call C code to log the event. + */ + mov INTRQ_RESUM_MONDO_HEAD, %g2 + stxa %g4, [%g2] ASI_QUEUE + membar #Sync + + rdpr %pil, %g2 + wrpr %g0, 15, %pil + ba,pt %xcc, etrap_irq + rd %pc, %g7 + + call sun4v_resum_overflow + add %sp, PTREGS_OFF, %o0 + + ba,pt %xcc, rtrap_irq + nop + +sun4v_nonres_mondo: + /* Head offset in %g2, tail offset in %g4. */ + mov INTRQ_NONRESUM_MONDO_HEAD, %g2 + ldxa [%g2] ASI_QUEUE, %g2 + mov INTRQ_NONRESUM_MONDO_TAIL, %g4 + ldxa [%g4] ASI_QUEUE, %g4 + cmp %g2, %g4 + be,pn %xcc, sun4v_nonres_mondo_queue_empty + nop + + /* Get &trap_block[smp_processor_id()] into %g3. */ + ldxa [%g0] ASI_SCRATCHPAD, %g3 + sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3 + + /* Get RES mondo queue base phys address into %g5. */ + ldx [%g3 + TRAP_PER_CPU_NONRESUM_MONDO_PA], %g5 + + /* Get RES kernel buffer base phys address into %g7. */ + ldx [%g3 + TRAP_PER_CPU_NONRESUM_KBUF_PA], %g7 + + /* If the first word is non-zero, queue is full. */ + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g1 + brnz,pn %g1, sun4v_nonres_mondo_queue_full + nop + + /* Remember this entry's offset in %g1. */ + mov %g2, %g1 + + /* Copy 64-byte queue entry into kernel buffer. */ + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + + /* Update queue head pointer. */ + sethi %hi(8192 - 1), %g4 + or %g4, %lo(8192 - 1), %g4 + and %g2, %g4, %g2 + + mov INTRQ_NONRESUM_MONDO_HEAD, %g4 + stxa %g2, [%g4] ASI_QUEUE + membar #Sync + + /* Disable interrupts and save register state so we can call + * C code. The etrap handling will leave %g4 in %l4 for us + * when it's done. + */ + rdpr %pil, %g2 + wrpr %g0, 15, %pil + mov %g1, %g4 + ba,pt %xcc, etrap_irq + rd %pc, %g7 + + /* Log the event. */ + add %sp, PTREGS_OFF, %o0 + call sun4v_nonresum_error + mov %l4, %o1 + + /* Return from trap. */ + ba,pt %xcc, rtrap_irq + nop + +sun4v_nonres_mondo_queue_empty: + retry + +sun4v_nonres_mondo_queue_full: + /* The queue is full, consolidate our damage by setting + * the head equal to the tail. We'll just trap again otherwise. + * Call C code to log the event. + */ + mov INTRQ_NONRESUM_MONDO_HEAD, %g2 + stxa %g4, [%g2] ASI_QUEUE + membar #Sync + + rdpr %pil, %g2 + wrpr %g0, 15, %pil + ba,pt %xcc, etrap_irq + rd %pc, %g7 + + call sun4v_nonresum_overflow + add %sp, PTREGS_OFF, %o0 + + ba,pt %xcc, rtrap_irq + nop diff --git a/arch/sparc64/kernel/sun4v_tlb_miss.S b/arch/sparc64/kernel/sun4v_tlb_miss.S new file mode 100644 index 000000000000..ab23ddb7116e --- /dev/null +++ b/arch/sparc64/kernel/sun4v_tlb_miss.S @@ -0,0 +1,421 @@ +/* sun4v_tlb_miss.S: Sun4v TLB miss handlers. + * + * Copyright (C) 2006 <davem@davemloft.net> + */ + + .text + .align 32 + + /* Load ITLB fault information into VADDR and CTX, using BASE. */ +#define LOAD_ITLB_INFO(BASE, VADDR, CTX) \ + ldx [BASE + HV_FAULT_I_ADDR_OFFSET], VADDR; \ + ldx [BASE + HV_FAULT_I_CTX_OFFSET], CTX; + + /* Load DTLB fault information into VADDR and CTX, using BASE. */ +#define LOAD_DTLB_INFO(BASE, VADDR, CTX) \ + ldx [BASE + HV_FAULT_D_ADDR_OFFSET], VADDR; \ + ldx [BASE + HV_FAULT_D_CTX_OFFSET], CTX; + + /* DEST = (VADDR >> 22) + * + * Branch to ZERO_CTX_LABEL if context is zero. + */ +#define COMPUTE_TAG_TARGET(DEST, VADDR, CTX, ZERO_CTX_LABEL) \ + srlx VADDR, 22, DEST; \ + brz,pn CTX, ZERO_CTX_LABEL; \ + nop; + + /* Create TSB pointer. This is something like: + * + * index_mask = (512 << (tsb_reg & 0x7UL)) - 1UL; + * tsb_base = tsb_reg & ~0x7UL; + * tsb_index = ((vaddr >> PAGE_SHIFT) & tsb_mask); + * tsb_ptr = tsb_base + (tsb_index * 16); + */ +#define COMPUTE_TSB_PTR(TSB_PTR, VADDR, TMP1, TMP2) \ + and TSB_PTR, 0x7, TMP1; \ + mov 512, TMP2; \ + andn TSB_PTR, 0x7, TSB_PTR; \ + sllx TMP2, TMP1, TMP2; \ + srlx VADDR, PAGE_SHIFT, TMP1; \ + sub TMP2, 1, TMP2; \ + and TMP1, TMP2, TMP1; \ + sllx TMP1, 4, TMP1; \ + add TSB_PTR, TMP1, TSB_PTR; + +sun4v_itlb_miss: + /* Load MMU Miss base into %g2. */ + ldxa [%g0] ASI_SCRATCHPAD, %g2 + + /* Load UTSB reg into %g1. */ + mov SCRATCHPAD_UTSBREG1, %g1 + ldxa [%g1] ASI_SCRATCHPAD, %g1 + + LOAD_ITLB_INFO(%g2, %g4, %g5) + COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_itlb_4v) + COMPUTE_TSB_PTR(%g1, %g4, %g3, %g7) + + /* Load TSB tag/pte into %g2/%g3 and compare the tag. */ + ldda [%g1] ASI_QUAD_LDD_PHYS_4V, %g2 + cmp %g2, %g6 + bne,a,pn %xcc, tsb_miss_page_table_walk + mov FAULT_CODE_ITLB, %g3 + andcc %g3, _PAGE_EXEC_4V, %g0 + be,a,pn %xcc, tsb_do_fault + mov FAULT_CODE_ITLB, %g3 + + /* We have a valid entry, make hypervisor call to load + * I-TLB and return from trap. + * + * %g3: PTE + * %g4: vaddr + */ +sun4v_itlb_load: + ldxa [%g0] ASI_SCRATCHPAD, %g6 + mov %o0, %g1 ! save %o0 + mov %o1, %g2 ! save %o1 + mov %o2, %g5 ! save %o2 + mov %o3, %g7 ! save %o3 + mov %g4, %o0 ! vaddr + ldx [%g6 + HV_FAULT_I_CTX_OFFSET], %o1 ! ctx + mov %g3, %o2 ! PTE + mov HV_MMU_IMMU, %o3 ! flags + ta HV_MMU_MAP_ADDR_TRAP + brnz,pn %o0, sun4v_itlb_error + mov %g2, %o1 ! restore %o1 + mov %g1, %o0 ! restore %o0 + mov %g5, %o2 ! restore %o2 + mov %g7, %o3 ! restore %o3 + + retry + +sun4v_dtlb_miss: + /* Load MMU Miss base into %g2. */ + ldxa [%g0] ASI_SCRATCHPAD, %g2 + + /* Load UTSB reg into %g1. */ + mov SCRATCHPAD_UTSBREG1, %g1 + ldxa [%g1] ASI_SCRATCHPAD, %g1 + + LOAD_DTLB_INFO(%g2, %g4, %g5) + COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_dtlb_4v) + COMPUTE_TSB_PTR(%g1, %g4, %g3, %g7) + + /* Load TSB tag/pte into %g2/%g3 and compare the tag. */ + ldda [%g1] ASI_QUAD_LDD_PHYS_4V, %g2 + cmp %g2, %g6 + bne,a,pn %xcc, tsb_miss_page_table_walk + mov FAULT_CODE_DTLB, %g3 + + /* We have a valid entry, make hypervisor call to load + * D-TLB and return from trap. + * + * %g3: PTE + * %g4: vaddr + */ +sun4v_dtlb_load: + ldxa [%g0] ASI_SCRATCHPAD, %g6 + mov %o0, %g1 ! save %o0 + mov %o1, %g2 ! save %o1 + mov %o2, %g5 ! save %o2 + mov %o3, %g7 ! save %o3 + mov %g4, %o0 ! vaddr + ldx [%g6 + HV_FAULT_D_CTX_OFFSET], %o1 ! ctx + mov %g3, %o2 ! PTE + mov HV_MMU_DMMU, %o3 ! flags + ta HV_MMU_MAP_ADDR_TRAP + brnz,pn %o0, sun4v_dtlb_error + mov %g2, %o1 ! restore %o1 + mov %g1, %o0 ! restore %o0 + mov %g5, %o2 ! restore %o2 + mov %g7, %o3 ! restore %o3 + + retry + +sun4v_dtlb_prot: + SET_GL(1) + + /* Load MMU Miss base into %g5. */ + ldxa [%g0] ASI_SCRATCHPAD, %g5 + + ldx [%g5 + HV_FAULT_D_ADDR_OFFSET], %g5 + rdpr %tl, %g1 + cmp %g1, 1 + bgu,pn %xcc, winfix_trampoline + nop + ba,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4 + + /* Called from trap table: + * %g4: vaddr + * %g5: context + * %g6: TAG TARGET + */ +sun4v_itsb_miss: + mov SCRATCHPAD_UTSBREG1, %g1 + ldxa [%g1] ASI_SCRATCHPAD, %g1 + brz,pn %g5, kvmap_itlb_4v + mov FAULT_CODE_ITLB, %g3 + ba,a,pt %xcc, sun4v_tsb_miss_common + + /* Called from trap table: + * %g4: vaddr + * %g5: context + * %g6: TAG TARGET + */ +sun4v_dtsb_miss: + mov SCRATCHPAD_UTSBREG1, %g1 + ldxa [%g1] ASI_SCRATCHPAD, %g1 + brz,pn %g5, kvmap_dtlb_4v + mov FAULT_CODE_DTLB, %g3 + + /* fallthrough */ + + /* Create TSB pointer into %g1. This is something like: + * + * index_mask = (512 << (tsb_reg & 0x7UL)) - 1UL; + * tsb_base = tsb_reg & ~0x7UL; + * tsb_index = ((vaddr >> PAGE_SHIFT) & tsb_mask); + * tsb_ptr = tsb_base + (tsb_index * 16); + */ +sun4v_tsb_miss_common: + COMPUTE_TSB_PTR(%g1, %g4, %g5, %g7) + + /* Branch directly to page table lookup. We have SCRATCHPAD_MMU_MISS + * still in %g2, so it's quite trivial to get at the PGD PHYS value + * so we can preload it into %g7. + */ + sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2 + ba,pt %xcc, tsb_miss_page_table_walk_sun4v_fastpath + ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7 + +sun4v_itlb_error: + sethi %hi(sun4v_err_itlb_vaddr), %g1 + stx %g4, [%g1 + %lo(sun4v_err_itlb_vaddr)] + sethi %hi(sun4v_err_itlb_ctx), %g1 + ldxa [%g0] ASI_SCRATCHPAD, %g6 + ldx [%g6 + HV_FAULT_I_CTX_OFFSET], %o1 + stx %o1, [%g1 + %lo(sun4v_err_itlb_ctx)] + sethi %hi(sun4v_err_itlb_pte), %g1 + stx %g3, [%g1 + %lo(sun4v_err_itlb_pte)] + sethi %hi(sun4v_err_itlb_error), %g1 + stx %o0, [%g1 + %lo(sun4v_err_itlb_error)] + + rdpr %tl, %g4 + cmp %g4, 1 + ble,pt %icc, 1f + sethi %hi(2f), %g7 + ba,pt %xcc, etraptl1 + or %g7, %lo(2f), %g7 + +1: ba,pt %xcc, etrap +2: or %g7, %lo(2b), %g7 + call sun4v_itlb_error_report + add %sp, PTREGS_OFF, %o0 + + /* NOTREACHED */ + +sun4v_dtlb_error: + sethi %hi(sun4v_err_dtlb_vaddr), %g1 + stx %g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)] + sethi %hi(sun4v_err_dtlb_ctx), %g1 + ldxa [%g0] ASI_SCRATCHPAD, %g6 + ldx [%g6 + HV_FAULT_D_CTX_OFFSET], %o1 + stx %o1, [%g1 + %lo(sun4v_err_dtlb_ctx)] + sethi %hi(sun4v_err_dtlb_pte), %g1 + stx %g3, [%g1 + %lo(sun4v_err_dtlb_pte)] + sethi %hi(sun4v_err_dtlb_error), %g1 + stx %o0, [%g1 + %lo(sun4v_err_dtlb_error)] + + rdpr %tl, %g4 + cmp %g4, 1 + ble,pt %icc, 1f + sethi %hi(2f), %g7 + ba,pt %xcc, etraptl1 + or %g7, %lo(2f), %g7 + +1: ba,pt %xcc, etrap +2: or %g7, %lo(2b), %g7 + call sun4v_dtlb_error_report + add %sp, PTREGS_OFF, %o0 + + /* NOTREACHED */ + + /* Instruction Access Exception, tl0. */ +sun4v_iacc: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_I_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_I_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_I_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call sun4v_insn_access_exception + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 + + /* Instruction Access Exception, tl1. */ +sun4v_iacc_tl1: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_I_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_I_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_I_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etraptl1 + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call sun4v_insn_access_exception_tl1 + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 + + /* Data Access Exception, tl0. */ +sun4v_dacc: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call sun4v_data_access_exception + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 + + /* Data Access Exception, tl1. */ +sun4v_dacc_tl1: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etraptl1 + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call sun4v_data_access_exception_tl1 + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 + + /* Memory Address Unaligned. */ +sun4v_mna: + /* Window fixup? */ + rdpr %tl, %g2 + cmp %g2, 1 + ble,pt %icc, 1f + nop + + SET_GL(1) + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g5 + mov HV_FAULT_TYPE_UNALIGNED, %g3 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g4 + sllx %g3, 16, %g3 + or %g4, %g3, %g4 + ba,pt %xcc, winfix_mna + rdpr %tpc, %g3 + /* not reached */ + +1: ldxa [%g0] ASI_SCRATCHPAD, %g2 + mov HV_FAULT_TYPE_UNALIGNED, %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call sun4v_do_mna + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 + + /* Privileged Action. */ +sun4v_privact: + ba,pt %xcc, etrap + rd %pc, %g7 + call do_privact + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 + + /* Unaligned ldd float, tl0. */ +sun4v_lddfmna: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call handle_lddfmna + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 + + /* Unaligned std float, tl0. */ +sun4v_stdfmna: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call handle_stdfmna + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define SUN4V_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl sun4v_patch_tlb_handlers + .type sun4v_patch_tlb_handlers,#function +sun4v_patch_tlb_handlers: + SUN4V_DO_PATCH(tl0_iamiss, sun4v_itlb_miss) + SUN4V_DO_PATCH(tl1_iamiss, sun4v_itlb_miss) + SUN4V_DO_PATCH(tl0_damiss, sun4v_dtlb_miss) + SUN4V_DO_PATCH(tl1_damiss, sun4v_dtlb_miss) + SUN4V_DO_PATCH(tl0_daprot, sun4v_dtlb_prot) + SUN4V_DO_PATCH(tl1_daprot, sun4v_dtlb_prot) + SUN4V_DO_PATCH(tl0_iax, sun4v_iacc) + SUN4V_DO_PATCH(tl1_iax, sun4v_iacc_tl1) + SUN4V_DO_PATCH(tl0_dax, sun4v_dacc) + SUN4V_DO_PATCH(tl1_dax, sun4v_dacc_tl1) + SUN4V_DO_PATCH(tl0_mna, sun4v_mna) + SUN4V_DO_PATCH(tl1_mna, sun4v_mna) + SUN4V_DO_PATCH(tl0_lddfmna, sun4v_lddfmna) + SUN4V_DO_PATCH(tl0_stdfmna, sun4v_stdfmna) + SUN4V_DO_PATCH(tl0_privact, sun4v_privact) + retl + nop + .size sun4v_patch_tlb_handlers,.-sun4v_patch_tlb_handlers diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c index 5f8c822a2b4a..7a869138c37f 100644 --- a/arch/sparc64/kernel/sys_sparc.c +++ b/arch/sparc64/kernel/sys_sparc.c @@ -25,25 +25,93 @@ #include <linux/syscalls.h> #include <linux/ipc.h> #include <linux/personality.h> +#include <linux/random.h> #include <asm/uaccess.h> #include <asm/ipc.h> #include <asm/utrap.h> #include <asm/perfctr.h> +#include <asm/a.out.h> /* #define DEBUG_UNIMP_SYSCALL */ -/* XXX Make this per-binary type, this way we can detect the type of - * XXX a binary. Every Sparc executable calls this very early on. - */ asmlinkage unsigned long sys_getpagesize(void) { return PAGE_SIZE; } -#define COLOUR_ALIGN(addr,pgoff) \ - ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \ - (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1))) +#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL)) +#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL)) + +/* Does addr --> addr+len fall within 4GB of the VA-space hole or + * overflow past the end of the 64-bit address space? + */ +static inline int invalid_64bit_range(unsigned long addr, unsigned long len) +{ + unsigned long va_exclude_start, va_exclude_end; + + va_exclude_start = VA_EXCLUDE_START; + va_exclude_end = VA_EXCLUDE_END; + + if (unlikely(len >= va_exclude_start)) + return 1; + + if (unlikely((addr + len) < addr)) + return 1; + + if (unlikely((addr >= va_exclude_start && addr < va_exclude_end) || + ((addr + len) >= va_exclude_start && + (addr + len) < va_exclude_end))) + return 1; + + return 0; +} + +/* Does start,end straddle the VA-space hole? */ +static inline int straddles_64bit_va_hole(unsigned long start, unsigned long end) +{ + unsigned long va_exclude_start, va_exclude_end; + + va_exclude_start = VA_EXCLUDE_START; + va_exclude_end = VA_EXCLUDE_END; + + if (likely(start < va_exclude_start && end < va_exclude_start)) + return 0; + + if (likely(start >= va_exclude_end && end >= va_exclude_end)) + return 0; + + return 1; +} + +/* These functions differ from the default implementations in + * mm/mmap.c in two ways: + * + * 1) For file backed MAP_SHARED mmap()'s we D-cache color align, + * for fixed such mappings we just validate what the user gave us. + * 2) For 64-bit tasks we avoid mapping anything within 4GB of + * the spitfire/niagara VA-hole. + */ + +static inline unsigned long COLOUR_ALIGN(unsigned long addr, + unsigned long pgoff) +{ + unsigned long base = (addr+SHMLBA-1)&~(SHMLBA-1); + unsigned long off = (pgoff<<PAGE_SHIFT) & (SHMLBA-1); + + return base + off; +} + +static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr, + unsigned long pgoff) +{ + unsigned long base = addr & ~(SHMLBA-1); + unsigned long off = (pgoff<<PAGE_SHIFT) & (SHMLBA-1); + + if (base + off <= addr) + return base + off; + return base - off; +} unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { @@ -64,8 +132,8 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi } if (test_thread_flag(TIF_32BIT)) - task_size = 0xf0000000UL; - if (len > task_size || len > -PAGE_OFFSET) + task_size = STACK_TOP32; + if (unlikely(len > task_size || len >= VA_EXCLUDE_START)) return -ENOMEM; do_color_align = 0; @@ -84,11 +152,12 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi return addr; } - if (len <= mm->cached_hole_size) { + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = TASK_UNMAPPED_BASE; mm->cached_hole_size = 0; - mm->free_area_cache = TASK_UNMAPPED_BASE; } - start_addr = addr = mm->free_area_cache; task_size -= len; @@ -100,11 +169,12 @@ full_search: for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ - if (addr < PAGE_OFFSET && -PAGE_OFFSET - len < addr) { - addr = PAGE_OFFSET; - vma = find_vma(mm, PAGE_OFFSET); + if (addr < VA_EXCLUDE_START && + (addr + len) >= VA_EXCLUDE_START) { + addr = VA_EXCLUDE_END; + vma = find_vma(mm, VA_EXCLUDE_END); } - if (task_size < addr) { + if (unlikely(task_size < addr)) { if (start_addr != TASK_UNMAPPED_BASE) { start_addr = addr = TASK_UNMAPPED_BASE; mm->cached_hole_size = 0; @@ -112,7 +182,7 @@ full_search: } return -ENOMEM; } - if (!vma || addr + len <= vma->vm_start) { + if (likely(!vma || addr + len <= vma->vm_start)) { /* * Remember the place where we stopped the search: */ @@ -128,6 +198,121 @@ full_search: } } +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long task_size = STACK_TOP32; + unsigned long addr = addr0; + int do_color_align; + + /* This should only ever run for 32-bit processes. */ + BUG_ON(!test_thread_flag(TIF_32BIT)); + + if (flags & MAP_FIXED) { + /* We do not accept a shared mapping if it would violate + * cache aliasing constraints. + */ + if ((flags & MAP_SHARED) && + ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))) + return -EINVAL; + return addr; + } + + if (unlikely(len > task_size)) + return -ENOMEM; + + do_color_align = 0; + if (filp || (flags & MAP_SHARED)) + do_color_align = 1; + + /* requesting a specific address */ + if (addr) { + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + /* check if free_area_cache is useful for us */ + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + + /* either no address requested or can't fit in requested address hole */ + addr = mm->free_area_cache; + if (do_color_align) { + unsigned long base = COLOUR_ALIGN_DOWN(addr-len, pgoff); + + addr = base + len; + } + + /* make sure it can fit in the remaining address space */ + if (likely(addr > len)) { + vma = find_vma(mm, addr-len); + if (!vma || addr <= vma->vm_start) { + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr-len); + } + } + + if (unlikely(mm->mmap_base < len)) + goto bottomup; + + addr = mm->mmap_base-len; + if (do_color_align) + addr = COLOUR_ALIGN_DOWN(addr, pgoff); + + do { + /* + * Lookup failure means no vma is above this address, + * else if new region fits below vma->vm_start, + * return with success: + */ + vma = find_vma(mm, addr); + if (likely(!vma || addr+len <= vma->vm_start)) { + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + } + + /* remember the largest hole we saw so far */ + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = vma->vm_start-len; + if (do_color_align) + addr = COLOUR_ALIGN_DOWN(addr, pgoff); + } while (likely(len < vma->vm_start)); + +bottomup: + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->cached_hole_size = ~0UL; + mm->free_area_cache = TASK_UNMAPPED_BASE; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + + return addr; +} + /* Try to align mapping such that we align it as much as possible. */ unsigned long get_fb_unmapped_area(struct file *filp, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags) { @@ -171,15 +356,57 @@ unsigned long get_fb_unmapped_area(struct file *filp, unsigned long orig_addr, u return addr; } +/* Essentially the same as PowerPC... */ +void arch_pick_mmap_layout(struct mm_struct *mm) +{ + unsigned long random_factor = 0UL; + + if (current->flags & PF_RANDOMIZE) { + random_factor = get_random_int(); + if (test_thread_flag(TIF_32BIT)) + random_factor &= ((1 * 1024 * 1024) - 1); + else + random_factor = ((random_factor << PAGE_SHIFT) & + 0xffffffffUL); + } + + /* + * Fall back to the standard layout if the personality + * bit is set, or if the expected stack growth is unlimited: + */ + if (!test_thread_flag(TIF_32BIT) || + (current->personality & ADDR_COMPAT_LAYOUT) || + current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY || + sysctl_legacy_va_layout) { + mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + /* We know it's 32-bit */ + unsigned long task_size = STACK_TOP32; + unsigned long gap; + + gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + if (gap < 128 * 1024 * 1024) + gap = 128 * 1024 * 1024; + if (gap > (task_size / 6 * 5)) + gap = (task_size / 6 * 5); + + mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +} + asmlinkage unsigned long sparc_brk(unsigned long brk) { /* People could try to be nasty and use ta 0x6d in 32bit programs */ - if (test_thread_flag(TIF_32BIT) && - brk >= 0xf0000000UL) + if (test_thread_flag(TIF_32BIT) && brk >= STACK_TOP32) return current->mm->brk; - if ((current->mm->brk & PAGE_OFFSET) != (brk & PAGE_OFFSET)) + if (unlikely(straddles_64bit_va_hole(current->mm->brk, brk))) return current->mm->brk; + return sys_brk(brk); } @@ -340,13 +567,16 @@ asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len, retval = -EINVAL; if (test_thread_flag(TIF_32BIT)) { - if (len > 0xf0000000UL || - ((flags & MAP_FIXED) && addr > 0xf0000000UL - len)) + if (len >= STACK_TOP32) + goto out_putf; + + if ((flags & MAP_FIXED) && addr > STACK_TOP32 - len) goto out_putf; } else { - if (len > -PAGE_OFFSET || - ((flags & MAP_FIXED) && - addr < PAGE_OFFSET && addr + len > -PAGE_OFFSET)) + if (len >= VA_EXCLUDE_START) + goto out_putf; + + if ((flags & MAP_FIXED) && invalid_64bit_range(addr, len)) goto out_putf; } @@ -365,9 +595,9 @@ asmlinkage long sys64_munmap(unsigned long addr, size_t len) { long ret; - if (len > -PAGE_OFFSET || - (addr < PAGE_OFFSET && addr + len > -PAGE_OFFSET)) + if (invalid_64bit_range(addr, len)) return -EINVAL; + down_write(¤t->mm->mmap_sem); ret = do_munmap(current->mm, addr, len); up_write(¤t->mm->mmap_sem); @@ -384,18 +614,19 @@ asmlinkage unsigned long sys64_mremap(unsigned long addr, { struct vm_area_struct *vma; unsigned long ret = -EINVAL; + if (test_thread_flag(TIF_32BIT)) goto out; - if (old_len > -PAGE_OFFSET || new_len > -PAGE_OFFSET) + if (unlikely(new_len >= VA_EXCLUDE_START)) goto out; - if (addr < PAGE_OFFSET && addr + old_len > -PAGE_OFFSET) + if (unlikely(invalid_64bit_range(addr, old_len))) goto out; + down_write(¤t->mm->mmap_sem); if (flags & MREMAP_FIXED) { - if (new_addr < PAGE_OFFSET && - new_addr + new_len > -PAGE_OFFSET) + if (invalid_64bit_range(new_addr, new_len)) goto out_sem; - } else if (addr < PAGE_OFFSET && addr + new_len > -PAGE_OFFSET) { + } else if (invalid_64bit_range(addr, new_len)) { unsigned long map_flags = 0; struct file *file = NULL; @@ -554,12 +785,10 @@ asmlinkage long sys_utrap_install(utrap_entry_t type, } if (!current_thread_info()->utraps) { current_thread_info()->utraps = - kmalloc((UT_TRAP_INSTRUCTION_31+1)*sizeof(long), GFP_KERNEL); + kzalloc((UT_TRAP_INSTRUCTION_31+1)*sizeof(long), GFP_KERNEL); if (!current_thread_info()->utraps) return -ENOMEM; current_thread_info()->utraps[0] = 1; - memset(current_thread_info()->utraps+1, 0, - UT_TRAP_INSTRUCTION_31*sizeof(long)); } else { if ((utrap_handler_t)current_thread_info()->utraps[type] != new_p && current_thread_info()->utraps[0] > 1) { diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index 417727bd87ba..0e41df024489 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -62,6 +62,7 @@ #include <asm/fpumacro.h> #include <asm/semaphore.h> #include <asm/mmu_context.h> +#include <asm/a.out.h> asmlinkage long sys32_chown16(const char __user * filename, u16 user, u16 group) { @@ -1039,15 +1040,15 @@ asmlinkage unsigned long sys32_mremap(unsigned long addr, unsigned long ret = -EINVAL; unsigned long new_addr = __new_addr; - if (old_len > 0xf0000000UL || new_len > 0xf0000000UL) + if (old_len > STACK_TOP32 || new_len > STACK_TOP32) goto out; - if (addr > 0xf0000000UL - old_len) + if (addr > STACK_TOP32 - old_len) goto out; down_write(¤t->mm->mmap_sem); if (flags & MREMAP_FIXED) { - if (new_addr > 0xf0000000UL - new_len) + if (new_addr > STACK_TOP32 - new_len) goto out_sem; - } else if (addr > 0xf0000000UL - new_len) { + } else if (addr > STACK_TOP32 - new_len) { unsigned long map_flags = 0; struct file *file = NULL; diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c index a22930d62adf..7d61f1bfd3d3 100644 --- a/arch/sparc64/kernel/time.c +++ b/arch/sparc64/kernel/time.c @@ -30,6 +30,8 @@ #include <linux/cpufreq.h> #include <linux/percpu.h> #include <linux/profile.h> +#include <linux/miscdevice.h> +#include <linux/rtc.h> #include <asm/oplib.h> #include <asm/mostek.h> @@ -45,6 +47,7 @@ #include <asm/smp.h> #include <asm/sections.h> #include <asm/cpudata.h> +#include <asm/uaccess.h> DEFINE_SPINLOCK(mostek_lock); DEFINE_SPINLOCK(rtc_lock); @@ -193,16 +196,22 @@ struct sparc64_tick_ops *tick_ops __read_mostly = &tick_operations; static void stick_init_tick(unsigned long offset) { - tick_disable_protection(); - - /* Let the user get at STICK too. */ - __asm__ __volatile__( - " rd %%asr24, %%g2\n" - " andn %%g2, %0, %%g2\n" - " wr %%g2, 0, %%asr24" - : /* no outputs */ - : "r" (TICK_PRIV_BIT) - : "g1", "g2"); + /* Writes to the %tick and %stick register are not + * allowed on sun4v. The Hypervisor controls that + * bit, per-strand. + */ + if (tlb_type != hypervisor) { + tick_disable_protection(); + + /* Let the user get at STICK too. */ + __asm__ __volatile__( + " rd %%asr24, %%g2\n" + " andn %%g2, %0, %%g2\n" + " wr %%g2, 0, %%asr24" + : /* no outputs */ + : "r" (TICK_PRIV_BIT) + : "g1", "g2"); + } __asm__ __volatile__( " rd %%asr24, %%g1\n" @@ -683,6 +692,83 @@ static void __init set_system_time(void) } } +/* davem suggests we keep this within the 4M locked kernel image */ +static u32 starfire_get_time(void) +{ + static char obp_gettod[32]; + static u32 unix_tod; + + sprintf(obp_gettod, "h# %08x unix-gettod", + (unsigned int) (long) &unix_tod); + prom_feval(obp_gettod); + + return unix_tod; +} + +static int starfire_set_time(u32 val) +{ + /* Do nothing, time is set using the service processor + * console on this platform. + */ + return 0; +} + +static u32 hypervisor_get_time(void) +{ + register unsigned long func asm("%o5"); + register unsigned long arg0 asm("%o0"); + register unsigned long arg1 asm("%o1"); + int retries = 10000; + +retry: + func = HV_FAST_TOD_GET; + arg0 = 0; + arg1 = 0; + __asm__ __volatile__("ta %6" + : "=&r" (func), "=&r" (arg0), "=&r" (arg1) + : "0" (func), "1" (arg0), "2" (arg1), + "i" (HV_FAST_TRAP)); + if (arg0 == HV_EOK) + return arg1; + if (arg0 == HV_EWOULDBLOCK) { + if (--retries > 0) { + udelay(100); + goto retry; + } + printk(KERN_WARNING "SUN4V: tod_get() timed out.\n"); + return 0; + } + printk(KERN_WARNING "SUN4V: tod_get() not supported.\n"); + return 0; +} + +static int hypervisor_set_time(u32 secs) +{ + register unsigned long func asm("%o5"); + register unsigned long arg0 asm("%o0"); + int retries = 10000; + +retry: + func = HV_FAST_TOD_SET; + arg0 = secs; + __asm__ __volatile__("ta %4" + : "=&r" (func), "=&r" (arg0) + : "0" (func), "1" (arg0), + "i" (HV_FAST_TRAP)); + if (arg0 == HV_EOK) + return 0; + if (arg0 == HV_EWOULDBLOCK) { + if (--retries > 0) { + udelay(100); + goto retry; + } + printk(KERN_WARNING "SUN4V: tod_set() timed out.\n"); + return -EAGAIN; + } + printk(KERN_WARNING "SUN4V: tod_set() not supported.\n"); + return -EOPNOTSUPP; +} + void __init clock_probe(void) { struct linux_prom_registers clk_reg[2]; @@ -702,14 +788,14 @@ void __init clock_probe(void) if (this_is_starfire) { - /* davem suggests we keep this within the 4M locked kernel image */ - static char obp_gettod[256]; - static u32 unix_tod; - - sprintf(obp_gettod, "h# %08x unix-gettod", - (unsigned int) (long) &unix_tod); - prom_feval(obp_gettod); - xtime.tv_sec = unix_tod; + xtime.tv_sec = starfire_get_time(); + xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); + set_normalized_timespec(&wall_to_monotonic, + -xtime.tv_sec, -xtime.tv_nsec); + return; + } + if (tlb_type == hypervisor) { + xtime.tv_sec = hypervisor_get_time(); xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); @@ -981,11 +1067,10 @@ static void sparc64_start_timers(irqreturn_t (*cfunc)(int, void *, struct pt_reg } struct freq_table { - unsigned long udelay_val_ref; unsigned long clock_tick_ref; unsigned int ref_freq; }; -static DEFINE_PER_CPU(struct freq_table, sparc64_freq_table) = { 0, 0, 0 }; +static DEFINE_PER_CPU(struct freq_table, sparc64_freq_table) = { 0, 0 }; unsigned long sparc64_get_clock_tick(unsigned int cpu) { @@ -1007,16 +1092,11 @@ static int sparc64_cpufreq_notifier(struct notifier_block *nb, unsigned long val if (!ft->ref_freq) { ft->ref_freq = freq->old; - ft->udelay_val_ref = cpu_data(cpu).udelay_val; ft->clock_tick_ref = cpu_data(cpu).clock_tick; } if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || (val == CPUFREQ_RESUMECHANGE)) { - cpu_data(cpu).udelay_val = - cpufreq_scale(ft->udelay_val_ref, - ft->ref_freq, - freq->new); cpu_data(cpu).clock_tick = cpufreq_scale(ft->clock_tick_ref, ft->ref_freq, @@ -1179,3 +1259,246 @@ static int set_rtc_mmss(unsigned long nowtime) return retval; } } + +#define RTC_IS_OPEN 0x01 /* means /dev/rtc is in use */ +static unsigned char mini_rtc_status; /* bitmapped status byte. */ + +/* months start at 0 now */ +static unsigned char days_in_mo[] = +{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + +#define FEBRUARY 2 +#define STARTOFTIME 1970 +#define SECDAY 86400L +#define SECYR (SECDAY * 365) +#define leapyear(year) ((year) % 4 == 0 && \ + ((year) % 100 != 0 || (year) % 400 == 0)) +#define days_in_year(a) (leapyear(a) ? 366 : 365) +#define days_in_month(a) (month_days[(a) - 1]) + +static int month_days[12] = { + 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 +}; + +/* + * This only works for the Gregorian calendar - i.e. after 1752 (in the UK) + */ +static void GregorianDay(struct rtc_time * tm) +{ + int leapsToDate; + int lastYear; + int day; + int MonthOffset[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }; + + lastYear = tm->tm_year - 1; + + /* + * Number of leap corrections to apply up to end of last year + */ + leapsToDate = lastYear / 4 - lastYear / 100 + lastYear / 400; + + /* + * This year is a leap year if it is divisible by 4 except when it is + * divisible by 100 unless it is divisible by 400 + * + * e.g. 1904 was a leap year, 1900 was not, 1996 is, and 2000 was + */ + day = tm->tm_mon > 2 && leapyear(tm->tm_year); + + day += lastYear*365 + leapsToDate + MonthOffset[tm->tm_mon-1] + + tm->tm_mday; + + tm->tm_wday = day % 7; +} + +static void to_tm(int tim, struct rtc_time *tm) +{ + register int i; + register long hms, day; + + day = tim / SECDAY; + hms = tim % SECDAY; + + /* Hours, minutes, seconds are easy */ + tm->tm_hour = hms / 3600; + tm->tm_min = (hms % 3600) / 60; + tm->tm_sec = (hms % 3600) % 60; + + /* Number of years in days */ + for (i = STARTOFTIME; day >= days_in_year(i); i++) + day -= days_in_year(i); + tm->tm_year = i; + + /* Number of months in days left */ + if (leapyear(tm->tm_year)) + days_in_month(FEBRUARY) = 29; + for (i = 1; day >= days_in_month(i); i++) + day -= days_in_month(i); + days_in_month(FEBRUARY) = 28; + tm->tm_mon = i; + + /* Days are what is left over (+1) from all that. */ + tm->tm_mday = day + 1; + + /* + * Determine the day of week + */ + GregorianDay(tm); +} + +/* Both Starfire and SUN4V give us seconds since Jan 1st, 1970, + * aka Unix time. So we have to convert to/from rtc_time. + */ +static inline void mini_get_rtc_time(struct rtc_time *time) +{ + unsigned long flags; + u32 seconds; + + spin_lock_irqsave(&rtc_lock, flags); + seconds = 0; + if (this_is_starfire) + seconds = starfire_get_time(); + else if (tlb_type == hypervisor) + seconds = hypervisor_get_time(); + spin_unlock_irqrestore(&rtc_lock, flags); + + to_tm(seconds, time); + time->tm_year -= 1900; + time->tm_mon -= 1; +} + +static inline int mini_set_rtc_time(struct rtc_time *time) +{ + u32 seconds = mktime(time->tm_year + 1900, time->tm_mon + 1, + time->tm_mday, time->tm_hour, + time->tm_min, time->tm_sec); + unsigned long flags; + int err; + + spin_lock_irqsave(&rtc_lock, flags); + err = -ENODEV; + if (this_is_starfire) + err = starfire_set_time(seconds); + else if (tlb_type == hypervisor) + err = hypervisor_set_time(seconds); + spin_unlock_irqrestore(&rtc_lock, flags); + + return err; +} + +static int mini_rtc_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct rtc_time wtime; + void __user *argp = (void __user *)arg; + + switch (cmd) { + + case RTC_PLL_GET: + return -EINVAL; + + case RTC_PLL_SET: + return -EINVAL; + + case RTC_UIE_OFF: /* disable ints from RTC updates. */ + return 0; + + case RTC_UIE_ON: /* enable ints for RTC updates. */ + return -EINVAL; + + case RTC_RD_TIME: /* Read the time/date from RTC */ + /* this doesn't get week-day, who cares */ + memset(&wtime, 0, sizeof(wtime)); + mini_get_rtc_time(&wtime); + + return copy_to_user(argp, &wtime, sizeof(wtime)) ? -EFAULT : 0; + + case RTC_SET_TIME: /* Set the RTC */ + { + int year; + unsigned char leap_yr; + + if (!capable(CAP_SYS_TIME)) + return -EACCES; + + if (copy_from_user(&wtime, argp, sizeof(wtime))) + return -EFAULT; + + year = wtime.tm_year + 1900; + leap_yr = ((!(year % 4) && (year % 100)) || + !(year % 400)); + + if ((wtime.tm_mon < 0 || wtime.tm_mon > 11) || (wtime.tm_mday < 1)) + return -EINVAL; + + if (wtime.tm_mday < 0 || wtime.tm_mday > + (days_in_mo[wtime.tm_mon] + ((wtime.tm_mon == 1) && leap_yr))) + return -EINVAL; + + if (wtime.tm_hour < 0 || wtime.tm_hour >= 24 || + wtime.tm_min < 0 || wtime.tm_min >= 60 || + wtime.tm_sec < 0 || wtime.tm_sec >= 60) + return -EINVAL; + + return mini_set_rtc_time(&wtime); + } + } + + return -EINVAL; +} + +static int mini_rtc_open(struct inode *inode, struct file *file) +{ + if (mini_rtc_status & RTC_IS_OPEN) + return -EBUSY; + + mini_rtc_status |= RTC_IS_OPEN; + + return 0; +} + +static int mini_rtc_release(struct inode *inode, struct file *file) +{ + mini_rtc_status &= ~RTC_IS_OPEN; + return 0; +} + + +static struct file_operations mini_rtc_fops = { + .owner = THIS_MODULE, + .ioctl = mini_rtc_ioctl, + .open = mini_rtc_open, + .release = mini_rtc_release, +}; + +static struct miscdevice rtc_mini_dev = +{ + .minor = RTC_MINOR, + .name = "rtc", + .fops = &mini_rtc_fops, +}; + +static int __init rtc_mini_init(void) +{ + int retval; + + if (tlb_type != hypervisor && !this_is_starfire) + return -ENODEV; + + printk(KERN_INFO "Mini RTC Driver\n"); + + retval = misc_register(&rtc_mini_dev); + if (retval < 0) + return retval; + + return 0; +} + +static void __exit rtc_mini_exit(void) +{ + misc_deregister(&rtc_mini_dev); +} + + +module_init(rtc_mini_init); +module_exit(rtc_mini_exit); diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S index 9478551cb020..a4dc01a3d238 100644 --- a/arch/sparc64/kernel/trampoline.S +++ b/arch/sparc64/kernel/trampoline.S @@ -16,6 +16,8 @@ #include <asm/processor.h> #include <asm/thread_info.h> #include <asm/mmu.h> +#include <asm/hypervisor.h> +#include <asm/cpudata.h> .data .align 8 @@ -28,14 +30,19 @@ itlb_load: dtlb_load: .asciz "SUNW,dtlb-load" + /* XXX __cpuinit this thing XXX */ +#define TRAMP_STACK_SIZE 1024 + .align 16 +tramp_stack: + .skip TRAMP_STACK_SIZE + .text .align 8 .globl sparc64_cpu_startup, sparc64_cpu_startup_end sparc64_cpu_startup: - flushw - - BRANCH_IF_CHEETAH_BASE(g1,g5,cheetah_startup) - BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g5,cheetah_plus_startup) + BRANCH_IF_SUN4V(g1, niagara_startup) + BRANCH_IF_CHEETAH_BASE(g1, g5, cheetah_startup) + BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1, g5, cheetah_plus_startup) ba,pt %xcc, spitfire_startup nop @@ -55,6 +62,7 @@ cheetah_startup: or %g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5 stxa %g5, [%g0] ASI_DCU_CONTROL_REG membar #Sync + /* fallthru */ cheetah_generic_startup: mov TSB_EXTENSION_P, %g3 @@ -70,7 +78,9 @@ cheetah_generic_startup: stxa %g0, [%g3] ASI_DMMU stxa %g0, [%g3] ASI_IMMU membar #Sync + /* fallthru */ +niagara_startup: /* Disable STICK_INT interrupts. */ sethi %hi(0x80000000), %g5 sllx %g5, 32, %g5 @@ -85,17 +95,17 @@ spitfire_startup: membar #Sync startup_continue: - wrpr %g0, 15, %pil - sethi %hi(0x80000000), %g2 sllx %g2, 32, %g2 wr %g2, 0, %tick_cmpr + mov %o0, %l0 + + BRANCH_IF_SUN4V(g1, niagara_lock_tlb) + /* Call OBP by hand to lock KERNBASE into i/d tlbs. * We lock 2 consequetive entries if we are 'bigkernel'. */ - mov %o0, %l0 - sethi %hi(prom_entry_lock), %g2 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 membar #StoreLoad | #StoreStore @@ -105,7 +115,6 @@ startup_continue: sethi %hi(p1275buf), %g2 or %g2, %lo(p1275buf), %g2 ldx [%g2 + 0x10], %l2 - mov %sp, %l1 add %l2, -(192 + 128), %sp flushw @@ -142,8 +151,7 @@ startup_continue: sethi %hi(bigkernel), %g2 lduw [%g2 + %lo(bigkernel)], %g2 - cmp %g2, 0 - be,pt %icc, do_dtlb + brz,pt %g2, do_dtlb nop sethi %hi(call_method), %g2 @@ -214,8 +222,7 @@ do_dtlb: sethi %hi(bigkernel), %g2 lduw [%g2 + %lo(bigkernel)], %g2 - cmp %g2, 0 - be,pt %icc, do_unlock + brz,pt %g2, do_unlock nop sethi %hi(call_method), %g2 @@ -257,99 +264,180 @@ do_unlock: stb %g0, [%g2 + %lo(prom_entry_lock)] membar #StoreStore | #StoreLoad - mov %l1, %sp - flushw + ba,pt %xcc, after_lock_tlb + nop + +niagara_lock_tlb: + mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 + sethi %hi(KERNBASE), %o0 + clr %o1 + sethi %hi(kern_locked_tte_data), %o2 + ldx [%o2 + %lo(kern_locked_tte_data)], %o2 + mov HV_MMU_IMMU, %o3 + ta HV_FAST_TRAP + + mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 + sethi %hi(KERNBASE), %o0 + clr %o1 + sethi %hi(kern_locked_tte_data), %o2 + ldx [%o2 + %lo(kern_locked_tte_data)], %o2 + mov HV_MMU_DMMU, %o3 + ta HV_FAST_TRAP - mov %l0, %o0 + sethi %hi(bigkernel), %g2 + lduw [%g2 + %lo(bigkernel)], %g2 + brz,pt %g2, after_lock_tlb + nop + mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 + sethi %hi(KERNBASE + 0x400000), %o0 + clr %o1 + sethi %hi(kern_locked_tte_data), %o2 + ldx [%o2 + %lo(kern_locked_tte_data)], %o2 + sethi %hi(0x400000), %o3 + add %o2, %o3, %o2 + mov HV_MMU_IMMU, %o3 + ta HV_FAST_TRAP + + mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 + sethi %hi(KERNBASE + 0x400000), %o0 + clr %o1 + sethi %hi(kern_locked_tte_data), %o2 + ldx [%o2 + %lo(kern_locked_tte_data)], %o2 + sethi %hi(0x400000), %o3 + add %o2, %o3, %o2 + mov HV_MMU_DMMU, %o3 + ta HV_FAST_TRAP + +after_lock_tlb: wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate wr %g0, 0, %fprs - /* XXX Buggy PROM... */ - srl %o0, 0, %o0 - ldx [%o0], %g6 - wr %g0, ASI_P, %asi mov PRIMARY_CONTEXT, %g7 - stxa %g0, [%g7] ASI_DMMU + +661: stxa %g0, [%g7] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g0, [%g7] ASI_MMU + .previous + membar #Sync mov SECONDARY_CONTEXT, %g7 - stxa %g0, [%g7] ASI_DMMU + +661: stxa %g0, [%g7] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g0, [%g7] ASI_MMU + .previous + membar #Sync - mov 1, %g5 - sllx %g5, THREAD_SHIFT, %g5 - sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 - add %g6, %g5, %sp + /* Everything we do here, until we properly take over the + * trap table, must be done with extreme care. We cannot + * make any references to %g6 (current thread pointer), + * %g4 (current task pointer), or %g5 (base of current cpu's + * per-cpu area) until we properly take over the trap table + * from the firmware and hypervisor. + * + * Get onto temporary stack which is in the locked kernel image. + */ + sethi %hi(tramp_stack), %g1 + or %g1, %lo(tramp_stack), %g1 + add %g1, TRAMP_STACK_SIZE, %g1 + sub %g1, STACKFRAME_SZ + STACK_BIAS, %sp mov 0, %fp - wrpr %g0, 0, %wstate - wrpr %g0, 0, %tl + /* Put garbage in these registers to trap any access to them. */ + set 0xdeadbeef, %g4 + set 0xdeadbeef, %g5 + set 0xdeadbeef, %g6 - /* Setup the trap globals, then we can resurface. */ - rdpr %pstate, %o1 - mov %g6, %o2 - wrpr %o1, PSTATE_AG, %pstate - sethi %hi(sparc64_ttable_tl0), %g5 - wrpr %g5, %tba - mov %o2, %g6 - - wrpr %o1, PSTATE_MG, %pstate -#define KERN_HIGHBITS ((_PAGE_VALID|_PAGE_SZ4MB)^0xfffff80000000000) -#define KERN_LOWBITS (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W) - - mov TSB_REG, %g1 - stxa %g0, [%g1] ASI_DMMU - membar #Sync - mov TLB_SFSR, %g1 - sethi %uhi(KERN_HIGHBITS), %g2 - or %g2, %ulo(KERN_HIGHBITS), %g2 - sllx %g2, 32, %g2 - or %g2, KERN_LOWBITS, %g2 + call init_irqwork_curcpu + nop - BRANCH_IF_ANY_CHEETAH(g3,g7,9f) + sethi %hi(tlb_type), %g3 + lduw [%g3 + %lo(tlb_type)], %g2 + cmp %g2, 3 + bne,pt %icc, 1f + nop - ba,pt %xcc, 1f + call hard_smp_processor_id nop + + mov %o0, %o1 + mov 0, %o0 + mov 0, %o2 + call sun4v_init_mondo_queues + mov 1, %o3 -9: - sethi %uhi(VPTE_BASE_CHEETAH), %g3 - or %g3, %ulo(VPTE_BASE_CHEETAH), %g3 - ba,pt %xcc, 2f - sllx %g3, 32, %g3 -1: - sethi %uhi(VPTE_BASE_SPITFIRE), %g3 - or %g3, %ulo(VPTE_BASE_SPITFIRE), %g3 - sllx %g3, 32, %g3 +1: call init_cur_cpu_trap + ldx [%l0], %o0 + + /* Start using proper page size encodings in ctx register. */ + sethi %hi(sparc64_kern_pri_context), %g3 + ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2 + mov PRIMARY_CONTEXT, %g1 -2: - clr %g7 -#undef KERN_HIGHBITS -#undef KERN_LOWBITS +661: stxa %g2, [%g1] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g1] ASI_MMU + .previous - wrpr %o1, 0x0, %pstate - ldx [%g6 + TI_TASK], %g4 + membar #Sync wrpr %g0, 0, %wstate - call init_irqwork_curcpu + /* As a hack, put &init_thread_union into %g6. + * prom_world() loads from here to restore the %asi + * register. + */ + sethi %hi(init_thread_union), %g6 + or %g6, %lo(init_thread_union), %g6 + + sethi %hi(is_sun4v), %o0 + lduw [%o0 + %lo(is_sun4v)], %o0 + brz,pt %o0, 1f nop - /* Start using proper page size encodings in ctx register. */ - sethi %hi(sparc64_kern_pri_context), %g3 - ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2 - mov PRIMARY_CONTEXT, %g1 - stxa %g2, [%g1] ASI_DMMU - membar #Sync + TRAP_LOAD_TRAP_BLOCK(%g2, %g3) + add %g2, TRAP_PER_CPU_FAULT_INFO, %g2 + stxa %g2, [%g0] ASI_SCRATCHPAD + + /* Compute physical address: + * + * paddr = kern_base + (mmfsa_vaddr - KERNBASE) + */ + sethi %hi(KERNBASE), %g3 + sub %g2, %g3, %g2 + sethi %hi(kern_base), %g3 + ldx [%g3 + %lo(kern_base)], %g3 + add %g2, %g3, %o1 + + call prom_set_trap_table_sun4v + sethi %hi(sparc64_ttable_tl0), %o0 + + ba,pt %xcc, 2f + nop + +1: call prom_set_trap_table + sethi %hi(sparc64_ttable_tl0), %o0 + +2: ldx [%l0], %g6 + ldx [%g6 + TI_TASK], %g4 + + mov 1, %g5 + sllx %g5, THREAD_SHIFT, %g5 + sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 + add %g6, %g5, %sp + mov 0, %fp rdpr %pstate, %o1 or %o1, PSTATE_IE, %o1 wrpr %o1, 0, %pstate - call prom_set_trap_table - sethi %hi(sparc64_ttable_tl0), %o0 - call smp_callin nop call cpu_idle diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 8d44ae5a15e3..7f7dba0ca96a 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -38,6 +38,7 @@ #include <asm/processor.h> #include <asm/timer.h> #include <asm/kdebug.h> +#include <asm/head.h> #ifdef CONFIG_KMOD #include <linux/kmod.h> #endif @@ -72,12 +73,14 @@ struct tl1_traplog { static void dump_tl1_traplog(struct tl1_traplog *p) { - int i; + int i, limit; + + printk(KERN_EMERG "TRAPLOG: Error at trap level 0x%lx, " + "dumping track stack.\n", p->tl); - printk("TRAPLOG: Error at trap level 0x%lx, dumping track stack.\n", - p->tl); - for (i = 0; i < 4; i++) { - printk(KERN_CRIT + limit = (tlb_type == hypervisor) ? 2 : 4; + for (i = 0; i < limit; i++) { + printk(KERN_EMERG "TRAPLOG: Trap level %d TSTATE[%016lx] TPC[%016lx] " "TNPC[%016lx] TT[%lx]\n", i + 1, @@ -179,6 +182,45 @@ void spitfire_insn_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr spitfire_insn_access_exception(regs, sfsr, sfar); } +void sun4v_insn_access_exception(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) +{ + unsigned short type = (type_ctx >> 16); + unsigned short ctx = (type_ctx & 0xffff); + siginfo_t info; + + if (notify_die(DIE_TRAP, "instruction access exception", regs, + 0, 0x8, SIGTRAP) == NOTIFY_STOP) + return; + + if (regs->tstate & TSTATE_PRIV) { + printk("sun4v_insn_access_exception: ADDR[%016lx] " + "CTX[%04x] TYPE[%04x], going.\n", + addr, ctx, type); + die_if_kernel("Iax", regs); + } + + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info.si_addr = (void __user *) addr; + info.si_trapno = 0; + force_sig_info(SIGSEGV, &info, current); +} + +void sun4v_insn_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) +{ + if (notify_die(DIE_TRAP_TL1, "instruction access exception tl1", regs, + 0, 0x8, SIGTRAP) == NOTIFY_STOP) + return; + + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + sun4v_insn_access_exception(regs, addr, type_ctx); +} + void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) { siginfo_t info; @@ -227,6 +269,45 @@ void spitfire_data_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr spitfire_data_access_exception(regs, sfsr, sfar); } +void sun4v_data_access_exception(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) +{ + unsigned short type = (type_ctx >> 16); + unsigned short ctx = (type_ctx & 0xffff); + siginfo_t info; + + if (notify_die(DIE_TRAP, "data access exception", regs, + 0, 0x8, SIGTRAP) == NOTIFY_STOP) + return; + + if (regs->tstate & TSTATE_PRIV) { + printk("sun4v_data_access_exception: ADDR[%016lx] " + "CTX[%04x] TYPE[%04x], going.\n", + addr, ctx, type); + die_if_kernel("Dax", regs); + } + + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info.si_addr = (void __user *) addr; + info.si_trapno = 0; + force_sig_info(SIGSEGV, &info, current); +} + +void sun4v_data_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) +{ + if (notify_die(DIE_TRAP_TL1, "data access exception tl1", regs, + 0, 0x8, SIGTRAP) == NOTIFY_STOP) + return; + + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + sun4v_data_access_exception(regs, addr, type_ctx); +} + #ifdef CONFIG_PCI /* This is really pathetic... */ extern volatile int pci_poke_in_progress; @@ -788,7 +869,8 @@ void __init cheetah_ecache_flush_init(void) cheetah_error_log[i].afsr = CHAFSR_INVALID; __asm__ ("rdpr %%ver, %0" : "=r" (ver)); - if ((ver >> 32) == 0x003e0016) { + if ((ver >> 32) == __JALAPENO_ID || + (ver >> 32) == __SERRANO_ID) { cheetah_error_table = &__jalapeno_error_table[0]; cheetah_afsr_errors = JPAFSR_ERRORS; } else if ((ver >> 32) == 0x003e0015) { @@ -1666,6 +1748,238 @@ void cheetah_plus_parity_error(int type, struct pt_regs *regs) regs->tpc); } +struct sun4v_error_entry { + u64 err_handle; + u64 err_stick; + + u32 err_type; +#define SUN4V_ERR_TYPE_UNDEFINED 0 +#define SUN4V_ERR_TYPE_UNCORRECTED_RES 1 +#define SUN4V_ERR_TYPE_PRECISE_NONRES 2 +#define SUN4V_ERR_TYPE_DEFERRED_NONRES 3 +#define SUN4V_ERR_TYPE_WARNING_RES 4 + + u32 err_attrs; +#define SUN4V_ERR_ATTRS_PROCESSOR 0x00000001 +#define SUN4V_ERR_ATTRS_MEMORY 0x00000002 +#define SUN4V_ERR_ATTRS_PIO 0x00000004 +#define SUN4V_ERR_ATTRS_INT_REGISTERS 0x00000008 +#define SUN4V_ERR_ATTRS_FPU_REGISTERS 0x00000010 +#define SUN4V_ERR_ATTRS_USER_MODE 0x01000000 +#define SUN4V_ERR_ATTRS_PRIV_MODE 0x02000000 +#define SUN4V_ERR_ATTRS_RES_QUEUE_FULL 0x80000000 + + u64 err_raddr; + u32 err_size; + u16 err_cpu; + u16 err_pad; +}; + +static atomic_t sun4v_resum_oflow_cnt = ATOMIC_INIT(0); +static atomic_t sun4v_nonresum_oflow_cnt = ATOMIC_INIT(0); + +static const char *sun4v_err_type_to_str(u32 type) +{ + switch (type) { + case SUN4V_ERR_TYPE_UNDEFINED: + return "undefined"; + case SUN4V_ERR_TYPE_UNCORRECTED_RES: + return "uncorrected resumable"; + case SUN4V_ERR_TYPE_PRECISE_NONRES: + return "precise nonresumable"; + case SUN4V_ERR_TYPE_DEFERRED_NONRES: + return "deferred nonresumable"; + case SUN4V_ERR_TYPE_WARNING_RES: + return "warning resumable"; + default: + return "unknown"; + }; +} + +static void sun4v_log_error(struct sun4v_error_entry *ent, int cpu, const char *pfx, atomic_t *ocnt) +{ + int cnt; + + printk("%s: Reporting on cpu %d\n", pfx, cpu); + printk("%s: err_handle[%lx] err_stick[%lx] err_type[%08x:%s]\n", + pfx, + ent->err_handle, ent->err_stick, + ent->err_type, + sun4v_err_type_to_str(ent->err_type)); + printk("%s: err_attrs[%08x:%s %s %s %s %s %s %s %s]\n", + pfx, + ent->err_attrs, + ((ent->err_attrs & SUN4V_ERR_ATTRS_PROCESSOR) ? + "processor" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_MEMORY) ? + "memory" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_PIO) ? + "pio" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_INT_REGISTERS) ? + "integer-regs" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_FPU_REGISTERS) ? + "fpu-regs" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_USER_MODE) ? + "user" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_PRIV_MODE) ? + "privileged" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_RES_QUEUE_FULL) ? + "queue-full" : "")); + printk("%s: err_raddr[%016lx] err_size[%u] err_cpu[%u]\n", + pfx, + ent->err_raddr, ent->err_size, ent->err_cpu); + + if ((cnt = atomic_read(ocnt)) != 0) { + atomic_set(ocnt, 0); + wmb(); + printk("%s: Queue overflowed %d times.\n", + pfx, cnt); + } +} + +/* We run with %pil set to 15 and PSTATE_IE enabled in %pstate. + * Log the event and clear the first word of the entry. + */ +void sun4v_resum_error(struct pt_regs *regs, unsigned long offset) +{ + struct sun4v_error_entry *ent, local_copy; + struct trap_per_cpu *tb; + unsigned long paddr; + int cpu; + + cpu = get_cpu(); + + tb = &trap_block[cpu]; + paddr = tb->resum_kernel_buf_pa + offset; + ent = __va(paddr); + + memcpy(&local_copy, ent, sizeof(struct sun4v_error_entry)); + + /* We have a local copy now, so release the entry. */ + ent->err_handle = 0; + wmb(); + + put_cpu(); + + sun4v_log_error(&local_copy, cpu, + KERN_ERR "RESUMABLE ERROR", + &sun4v_resum_oflow_cnt); +} + +/* If we try to printk() we'll probably make matters worse, by trying + * to retake locks this cpu already holds or causing more errors. So + * just bump a counter, and we'll report these counter bumps above. + */ +void sun4v_resum_overflow(struct pt_regs *regs) +{ + atomic_inc(&sun4v_resum_oflow_cnt); +} + +/* We run with %pil set to 15 and PSTATE_IE enabled in %pstate. + * Log the event, clear the first word of the entry, and die. + */ +void sun4v_nonresum_error(struct pt_regs *regs, unsigned long offset) +{ + struct sun4v_error_entry *ent, local_copy; + struct trap_per_cpu *tb; + unsigned long paddr; + int cpu; + + cpu = get_cpu(); + + tb = &trap_block[cpu]; + paddr = tb->nonresum_kernel_buf_pa + offset; + ent = __va(paddr); + + memcpy(&local_copy, ent, sizeof(struct sun4v_error_entry)); + + /* We have a local copy now, so release the entry. */ + ent->err_handle = 0; + wmb(); + + put_cpu(); + +#ifdef CONFIG_PCI + /* Check for the special PCI poke sequence. */ + if (pci_poke_in_progress && pci_poke_cpu == cpu) { + pci_poke_faulted = 1; + regs->tpc += 4; + regs->tnpc = regs->tpc + 4; + return; + } +#endif + + sun4v_log_error(&local_copy, cpu, + KERN_EMERG "NON-RESUMABLE ERROR", + &sun4v_nonresum_oflow_cnt); + + panic("Non-resumable error."); +} + +/* If we try to printk() we'll probably make matters worse, by trying + * to retake locks this cpu already holds or causing more errors. So + * just bump a counter, and we'll report these counter bumps above. + */ +void sun4v_nonresum_overflow(struct pt_regs *regs) +{ + /* XXX Actually even this can make not that much sense. Perhaps + * XXX we should just pull the plug and panic directly from here? + */ + atomic_inc(&sun4v_nonresum_oflow_cnt); +} + +unsigned long sun4v_err_itlb_vaddr; +unsigned long sun4v_err_itlb_ctx; +unsigned long sun4v_err_itlb_pte; +unsigned long sun4v_err_itlb_error; + +void sun4v_itlb_error_report(struct pt_regs *regs, int tl) +{ + if (tl > 1) + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + + printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n", + regs->tpc, tl); + printk(KERN_EMERG "SUN4V-ITLB: vaddr[%lx] ctx[%lx] " + "pte[%lx] error[%lx]\n", + sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx, + sun4v_err_itlb_pte, sun4v_err_itlb_error); + + prom_halt(); +} + +unsigned long sun4v_err_dtlb_vaddr; +unsigned long sun4v_err_dtlb_ctx; +unsigned long sun4v_err_dtlb_pte; +unsigned long sun4v_err_dtlb_error; + +void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) +{ + if (tl > 1) + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + + printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n", + regs->tpc, tl); + printk(KERN_EMERG "SUN4V-DTLB: vaddr[%lx] ctx[%lx] " + "pte[%lx] error[%lx]\n", + sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx, + sun4v_err_dtlb_pte, sun4v_err_dtlb_error); + + prom_halt(); +} + +void hypervisor_tlbop_error(unsigned long err, unsigned long op) +{ + printk(KERN_CRIT "SUN4V: TLB hv call error %lu for op %lu\n", + err, op); +} + +void hypervisor_tlbop_error_xcall(unsigned long err, unsigned long op) +{ + printk(KERN_CRIT "SUN4V: XCALL TLB hv call error %lu for op %lu\n", + err, op); +} + void do_fpe_common(struct pt_regs *regs) { if (regs->tstate & TSTATE_PRIV) { @@ -1924,10 +2238,11 @@ void die_if_kernel(char *str, struct pt_regs *regs) } user_instruction_dump ((unsigned int __user *) regs->tpc); } +#if 0 #ifdef CONFIG_SMP smp_report_regs(); #endif - +#endif if (regs->tstate & TSTATE_PRIV) do_exit(SIGKILL); do_exit(SIGSEGV); @@ -1958,6 +2273,11 @@ void do_illegal_instruction(struct pt_regs *regs) } else if ((insn & 0xc1580000) == 0xc1100000) /* LDQ/STQ */ { if (handle_ldf_stq(insn, regs)) return; + } else if (tlb_type == hypervisor) { + extern int vis_emul(struct pt_regs *, unsigned int); + + if (!vis_emul(regs, insn)) + return; } } info.si_signo = SIGILL; @@ -1968,6 +2288,8 @@ void do_illegal_instruction(struct pt_regs *regs) force_sig_info(SIGILL, &info, current); } +extern void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn); + void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr) { siginfo_t info; @@ -1977,13 +2299,7 @@ void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned lo return; if (regs->tstate & TSTATE_PRIV) { - extern void kernel_unaligned_trap(struct pt_regs *regs, - unsigned int insn, - unsigned long sfar, - unsigned long sfsr); - - kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc), - sfar, sfsr); + kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc)); return; } info.si_signo = SIGBUS; @@ -1994,6 +2310,26 @@ void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned lo force_sig_info(SIGBUS, &info, current); } +void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) +{ + siginfo_t info; + + if (notify_die(DIE_TRAP, "memory address unaligned", regs, + 0, 0x34, SIGSEGV) == NOTIFY_STOP) + return; + + if (regs->tstate & TSTATE_PRIV) { + kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc)); + return; + } + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRALN; + info.si_addr = (void __user *) addr; + info.si_trapno = 0; + force_sig_info(SIGBUS, &info, current); +} + void do_privop(struct pt_regs *regs) { siginfo_t info; @@ -2130,7 +2466,22 @@ void do_getpsr(struct pt_regs *regs) } } +struct trap_per_cpu trap_block[NR_CPUS]; + +/* This can get invoked before sched_init() so play it super safe + * and use hard_smp_processor_id(). + */ +void init_cur_cpu_trap(struct thread_info *t) +{ + int cpu = hard_smp_processor_id(); + struct trap_per_cpu *p = &trap_block[cpu]; + + p->thread = t; + p->pgd_paddr = 0; +} + extern void thread_info_offsets_are_bolixed_dave(void); +extern void trap_per_cpu_offsets_are_bolixed_dave(void); /* Only invoked on boot processor. */ void __init trap_init(void) @@ -2154,7 +2505,6 @@ void __init trap_init(void) TI_KERN_CNTD0 != offsetof(struct thread_info, kernel_cntd0) || TI_KERN_CNTD1 != offsetof(struct thread_info, kernel_cntd1) || TI_PCR != offsetof(struct thread_info, pcr_reg) || - TI_CEE_STUFF != offsetof(struct thread_info, cee_stuff) || TI_PRE_COUNT != offsetof(struct thread_info, preempt_count) || TI_NEW_CHILD != offsetof(struct thread_info, new_child) || TI_SYS_NOERROR != offsetof(struct thread_info, syscall_noerror) || @@ -2165,6 +2515,29 @@ void __init trap_init(void) (TI_FPREGS & (64 - 1))) thread_info_offsets_are_bolixed_dave(); + if (TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, thread) || + (TRAP_PER_CPU_PGD_PADDR != + offsetof(struct trap_per_cpu, pgd_paddr)) || + (TRAP_PER_CPU_CPU_MONDO_PA != + offsetof(struct trap_per_cpu, cpu_mondo_pa)) || + (TRAP_PER_CPU_DEV_MONDO_PA != + offsetof(struct trap_per_cpu, dev_mondo_pa)) || + (TRAP_PER_CPU_RESUM_MONDO_PA != + offsetof(struct trap_per_cpu, resum_mondo_pa)) || + (TRAP_PER_CPU_RESUM_KBUF_PA != + offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) || + (TRAP_PER_CPU_NONRESUM_MONDO_PA != + offsetof(struct trap_per_cpu, nonresum_mondo_pa)) || + (TRAP_PER_CPU_NONRESUM_KBUF_PA != + offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) || + (TRAP_PER_CPU_FAULT_INFO != + offsetof(struct trap_per_cpu, fault_info)) || + (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA != + offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) || + (TRAP_PER_CPU_CPU_LIST_PA != + offsetof(struct trap_per_cpu, cpu_list_pa))) + trap_per_cpu_offsets_are_bolixed_dave(); + /* Attach to the address space of init_task. On SMP we * do this in smp.c:smp_callin for other cpus. */ diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S new file mode 100644 index 000000000000..118baea44f69 --- /dev/null +++ b/arch/sparc64/kernel/tsb.S @@ -0,0 +1,442 @@ +/* tsb.S: Sparc64 TSB table handling. + * + * Copyright (C) 2006 David S. Miller <davem@davemloft.net> + */ + +#include <asm/tsb.h> +#include <asm/hypervisor.h> + + .text + .align 32 + + /* Invoked from TLB miss handler, we are in the + * MMU global registers and they are setup like + * this: + * + * %g1: TSB entry pointer + * %g2: available temporary + * %g3: FAULT_CODE_{D,I}TLB + * %g4: available temporary + * %g5: available temporary + * %g6: TAG TARGET + * %g7: available temporary, will be loaded by us with + * the physical address base of the linux page + * tables for the current address space + */ +tsb_miss_dtlb: + mov TLB_TAG_ACCESS, %g4 + ba,pt %xcc, tsb_miss_page_table_walk + ldxa [%g4] ASI_DMMU, %g4 + +tsb_miss_itlb: + mov TLB_TAG_ACCESS, %g4 + ba,pt %xcc, tsb_miss_page_table_walk + ldxa [%g4] ASI_IMMU, %g4 + + /* At this point we have: + * %g1 -- TSB entry address + * %g3 -- FAULT_CODE_{D,I}TLB + * %g4 -- missing virtual address + * %g6 -- TAG TARGET (vaddr >> 22) + */ +tsb_miss_page_table_walk: + TRAP_LOAD_PGD_PHYS(%g7, %g5) + + /* And now we have the PGD base physical address in %g7. */ +tsb_miss_page_table_walk_sun4v_fastpath: + USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault) + + /* At this point we have: + * %g1 -- TSB entry address + * %g3 -- FAULT_CODE_{D,I}TLB + * %g5 -- physical address of PTE in Linux page tables + * %g6 -- TAG TARGET (vaddr >> 22) + */ +tsb_reload: + TSB_LOCK_TAG(%g1, %g2, %g7) + + /* Load and check PTE. */ + ldxa [%g5] ASI_PHYS_USE_EC, %g5 + mov 1, %g7 + sllx %g7, TSB_TAG_INVALID_BIT, %g7 + brgez,a,pn %g5, tsb_do_fault + TSB_STORE(%g1, %g7) + + TSB_WRITE(%g1, %g5, %g6) + + /* Finally, load TLB and return from trap. */ +tsb_tlb_reload: + cmp %g3, FAULT_CODE_DTLB + bne,pn %xcc, tsb_itlb_load + nop + +tsb_dtlb_load: + +661: stxa %g5, [%g0] ASI_DTLB_DATA_IN + retry + .section .sun4v_2insn_patch, "ax" + .word 661b + nop + nop + .previous + + /* For sun4v the ASI_DTLB_DATA_IN store and the retry + * instruction get nop'd out and we get here to branch + * to the sun4v tlb load code. The registers are setup + * as follows: + * + * %g4: vaddr + * %g5: PTE + * %g6: TAG + * + * The sun4v TLB load wants the PTE in %g3 so we fix that + * up here. + */ + ba,pt %xcc, sun4v_dtlb_load + mov %g5, %g3 + +tsb_itlb_load: + /* Executable bit must be set. */ +661: andcc %g5, _PAGE_EXEC_4U, %g0 + .section .sun4v_1insn_patch, "ax" + .word 661b + andcc %g5, _PAGE_EXEC_4V, %g0 + .previous + + be,pn %xcc, tsb_do_fault + nop + +661: stxa %g5, [%g0] ASI_ITLB_DATA_IN + retry + .section .sun4v_2insn_patch, "ax" + .word 661b + nop + nop + .previous + + /* For sun4v the ASI_ITLB_DATA_IN store and the retry + * instruction get nop'd out and we get here to branch + * to the sun4v tlb load code. The registers are setup + * as follows: + * + * %g4: vaddr + * %g5: PTE + * %g6: TAG + * + * The sun4v TLB load wants the PTE in %g3 so we fix that + * up here. + */ + ba,pt %xcc, sun4v_itlb_load + mov %g5, %g3 + + /* No valid entry in the page tables, do full fault + * processing. + */ + + .globl tsb_do_fault +tsb_do_fault: + cmp %g3, FAULT_CODE_DTLB + +661: rdpr %pstate, %g5 + wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate + .section .sun4v_2insn_patch, "ax" + .word 661b + SET_GL(1) + ldxa [%g0] ASI_SCRATCHPAD, %g4 + .previous + + bne,pn %xcc, tsb_do_itlb_fault + nop + +tsb_do_dtlb_fault: + rdpr %tl, %g3 + cmp %g3, 1 + +661: mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_DMMU, %g5 + .section .sun4v_2insn_patch, "ax" + .word 661b + ldx [%g4 + HV_FAULT_D_ADDR_OFFSET], %g5 + nop + .previous + + be,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_DTLB, %g4 + ba,pt %xcc, winfix_trampoline + nop + +tsb_do_itlb_fault: + rdpr %tpc, %g5 + ba,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_ITLB, %g4 + + .globl sparc64_realfault_common +sparc64_realfault_common: + /* fault code in %g4, fault address in %g5, etrap will + * preserve these two values in %l4 and %l5 respectively + */ + ba,pt %xcc, etrap ! Save trap state +1: rd %pc, %g7 ! ... + stb %l4, [%g6 + TI_FAULT_CODE] ! Save fault code + stx %l5, [%g6 + TI_FAULT_ADDR] ! Save fault address + call do_sparc64_fault ! Call fault handler + add %sp, PTREGS_OFF, %o0 ! Compute pt_regs arg + ba,pt %xcc, rtrap_clr_l6 ! Restore cpu state + nop ! Delay slot (fill me) + +winfix_trampoline: + rdpr %tpc, %g3 ! Prepare winfixup TNPC + or %g3, 0x7c, %g3 ! Compute branch offset + wrpr %g3, %tnpc ! Write it into TNPC + done ! Trap return + + /* Insert an entry into the TSB. + * + * %o0: TSB entry pointer (virt or phys address) + * %o1: tag + * %o2: pte + */ + .align 32 + .globl __tsb_insert +__tsb_insert: + rdpr %pstate, %o5 + wrpr %o5, PSTATE_IE, %pstate + TSB_LOCK_TAG(%o0, %g2, %g3) + TSB_WRITE(%o0, %o2, %o1) + wrpr %o5, %pstate + retl + nop + .size __tsb_insert, .-__tsb_insert + + /* Flush the given TSB entry if it has the matching + * tag. + * + * %o0: TSB entry pointer (virt or phys address) + * %o1: tag + */ + .align 32 + .globl tsb_flush + .type tsb_flush,#function +tsb_flush: + sethi %hi(TSB_TAG_LOCK_HIGH), %g2 +1: TSB_LOAD_TAG(%o0, %g1) + srlx %g1, 32, %o3 + andcc %o3, %g2, %g0 + bne,pn %icc, 1b + membar #LoadLoad + cmp %g1, %o1 + mov 1, %o3 + bne,pt %xcc, 2f + sllx %o3, TSB_TAG_INVALID_BIT, %o3 + TSB_CAS_TAG(%o0, %g1, %o3) + cmp %g1, %o3 + bne,pn %xcc, 1b + nop +2: retl + TSB_MEMBAR + .size tsb_flush, .-tsb_flush + + /* Reload MMU related context switch state at + * schedule() time. + * + * %o0: page table physical address + * %o1: TSB register value + * %o2: TSB virtual address + * %o3: TSB mapping locked PTE + * %o4: Hypervisor TSB descriptor physical address + * + * We have to run this whole thing with interrupts + * disabled so that the current cpu doesn't change + * due to preemption. + */ + .align 32 + .globl __tsb_context_switch + .type __tsb_context_switch,#function +__tsb_context_switch: + rdpr %pstate, %o5 + wrpr %o5, PSTATE_IE, %pstate + + ldub [%g6 + TI_CPU], %g1 + sethi %hi(trap_block), %g2 + sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g1 + or %g2, %lo(trap_block), %g2 + add %g2, %g1, %g2 + stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR] + + sethi %hi(tlb_type), %g1 + lduw [%g1 + %lo(tlb_type)], %g1 + cmp %g1, 3 + bne,pt %icc, 1f + nop + + /* Hypervisor TSB switch. */ + mov SCRATCHPAD_UTSBREG1, %g1 + stxa %o1, [%g1] ASI_SCRATCHPAD + mov -1, %g2 + mov SCRATCHPAD_UTSBREG2, %g1 + stxa %g2, [%g1] ASI_SCRATCHPAD + + /* Save away %o5's %pstate, we have to use %o5 for + * the hypervisor call. + */ + mov %o5, %g1 + + mov HV_FAST_MMU_TSB_CTXNON0, %o5 + mov 1, %o0 + mov %o4, %o1 + ta HV_FAST_TRAP + + /* Finish up and restore %o5. */ + ba,pt %xcc, 9f + mov %g1, %o5 + + /* SUN4U TSB switch. */ +1: mov TSB_REG, %g1 + stxa %o1, [%g1] ASI_DMMU + membar #Sync + stxa %o1, [%g1] ASI_IMMU + membar #Sync + +2: brz %o2, 9f + nop + + sethi %hi(sparc64_highest_unlocked_tlb_ent), %g2 + mov TLB_TAG_ACCESS, %g1 + lduw [%g2 + %lo(sparc64_highest_unlocked_tlb_ent)], %g2 + stxa %o2, [%g1] ASI_DMMU + membar #Sync + sllx %g2, 3, %g2 + stxa %o3, [%g2] ASI_DTLB_DATA_ACCESS + membar #Sync +9: + wrpr %o5, %pstate + + retl + nop + .size __tsb_context_switch, .-__tsb_context_switch + +#define TSB_PASS_BITS ((1 << TSB_TAG_LOCK_BIT) | \ + (1 << TSB_TAG_INVALID_BIT)) + + .align 32 + .globl copy_tsb + .type copy_tsb,#function +copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size + * %o2=new_tsb_base, %o3=new_tsb_size + */ + sethi %uhi(TSB_PASS_BITS), %g7 + srlx %o3, 4, %o3 + add %o0, %o1, %g1 /* end of old tsb */ + sllx %g7, 32, %g7 + sub %o3, 1, %o3 /* %o3 == new tsb hash mask */ + +661: prefetcha [%o0] ASI_N, #one_read + .section .tsb_phys_patch, "ax" + .word 661b + prefetcha [%o0] ASI_PHYS_USE_EC, #one_read + .previous + +90: andcc %o0, (64 - 1), %g0 + bne 1f + add %o0, 64, %o5 + +661: prefetcha [%o5] ASI_N, #one_read + .section .tsb_phys_patch, "ax" + .word 661b + prefetcha [%o5] ASI_PHYS_USE_EC, #one_read + .previous + +1: TSB_LOAD_QUAD(%o0, %g2) /* %g2/%g3 == TSB entry */ + andcc %g2, %g7, %g0 /* LOCK or INVALID set? */ + bne,pn %xcc, 80f /* Skip it */ + sllx %g2, 22, %o4 /* TAG --> VADDR */ + + /* This can definitely be computed faster... */ + srlx %o0, 4, %o5 /* Build index */ + and %o5, 511, %o5 /* Mask index */ + sllx %o5, PAGE_SHIFT, %o5 /* Put into vaddr position */ + or %o4, %o5, %o4 /* Full VADDR. */ + srlx %o4, PAGE_SHIFT, %o4 /* Shift down to create index */ + and %o4, %o3, %o4 /* Mask with new_tsb_nents-1 */ + sllx %o4, 4, %o4 /* Shift back up into tsb ent offset */ + TSB_STORE(%o2 + %o4, %g2) /* Store TAG */ + add %o4, 0x8, %o4 /* Advance to TTE */ + TSB_STORE(%o2 + %o4, %g3) /* Store TTE */ + +80: add %o0, 16, %o0 + cmp %o0, %g1 + bne,pt %xcc, 90b + nop + + retl + TSB_MEMBAR + .size copy_tsb, .-copy_tsb + + /* Set the invalid bit in all TSB entries. */ + .align 32 + .globl tsb_init + .type tsb_init,#function +tsb_init: /* %o0 = TSB vaddr, %o1 = size in bytes */ + prefetch [%o0 + 0x000], #n_writes + mov 1, %g1 + prefetch [%o0 + 0x040], #n_writes + sllx %g1, TSB_TAG_INVALID_BIT, %g1 + prefetch [%o0 + 0x080], #n_writes +1: prefetch [%o0 + 0x0c0], #n_writes + stx %g1, [%o0 + 0x00] + stx %g1, [%o0 + 0x10] + stx %g1, [%o0 + 0x20] + stx %g1, [%o0 + 0x30] + prefetch [%o0 + 0x100], #n_writes + stx %g1, [%o0 + 0x40] + stx %g1, [%o0 + 0x50] + stx %g1, [%o0 + 0x60] + stx %g1, [%o0 + 0x70] + prefetch [%o0 + 0x140], #n_writes + stx %g1, [%o0 + 0x80] + stx %g1, [%o0 + 0x90] + stx %g1, [%o0 + 0xa0] + stx %g1, [%o0 + 0xb0] + prefetch [%o0 + 0x180], #n_writes + stx %g1, [%o0 + 0xc0] + stx %g1, [%o0 + 0xd0] + stx %g1, [%o0 + 0xe0] + stx %g1, [%o0 + 0xf0] + subcc %o1, 0x100, %o1 + bne,pt %xcc, 1b + add %o0, 0x100, %o0 + retl + nop + nop + nop + .size tsb_init, .-tsb_init + + .globl NGtsb_init + .type NGtsb_init,#function +NGtsb_init: + rd %asi, %g2 + mov 1, %g1 + wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi + sllx %g1, TSB_TAG_INVALID_BIT, %g1 +1: stxa %g1, [%o0 + 0x00] %asi + stxa %g1, [%o0 + 0x10] %asi + stxa %g1, [%o0 + 0x20] %asi + stxa %g1, [%o0 + 0x30] %asi + stxa %g1, [%o0 + 0x40] %asi + stxa %g1, [%o0 + 0x50] %asi + stxa %g1, [%o0 + 0x60] %asi + stxa %g1, [%o0 + 0x70] %asi + stxa %g1, [%o0 + 0x80] %asi + stxa %g1, [%o0 + 0x90] %asi + stxa %g1, [%o0 + 0xa0] %asi + stxa %g1, [%o0 + 0xb0] %asi + stxa %g1, [%o0 + 0xc0] %asi + stxa %g1, [%o0 + 0xd0] %asi + stxa %g1, [%o0 + 0xe0] %asi + stxa %g1, [%o0 + 0xf0] %asi + subcc %o1, 0x100, %o1 + bne,pt %xcc, 1b + add %o0, 0x100, %o0 + retl + wr %g2, 0x0, %asi + .size NGtsb_init, .-NGtsb_init diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S index 8365bc1f81f3..5d901519db55 100644 --- a/arch/sparc64/kernel/ttable.S +++ b/arch/sparc64/kernel/ttable.S @@ -1,7 +1,6 @@ -/* $Id: ttable.S,v 1.38 2002/02/09 19:49:30 davem Exp $ - * ttable.S: Sparc V9 Trap Table(s) with SpitFire/Cheetah extensions. +/* ttable.S: Sparc V9 Trap Table(s) with SpitFire/Cheetah/SUN4V extensions. * - * Copyright (C) 1996, 2001 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1996, 2001, 2006 David S. Miller (davem@davemloft.net) */ #include <linux/config.h> @@ -19,7 +18,7 @@ tl0_resv000: BOOT_KERNEL BTRAP(0x1) BTRAP(0x2) BTRAP(0x3) tl0_resv004: BTRAP(0x4) BTRAP(0x5) BTRAP(0x6) BTRAP(0x7) tl0_iax: membar #Sync TRAP_NOSAVE_7INSNS(__spitfire_insn_access_exception) -tl0_resv009: BTRAP(0x9) +tl0_itsb_4v: SUN4V_ITSB_MISS tl0_iae: membar #Sync TRAP_NOSAVE_7INSNS(__spitfire_access_error) tl0_resv00b: BTRAP(0xb) BTRAP(0xc) BTRAP(0xd) BTRAP(0xe) BTRAP(0xf) @@ -38,7 +37,7 @@ tl0_div0: TRAP(do_div0) tl0_resv029: BTRAP(0x29) BTRAP(0x2a) BTRAP(0x2b) BTRAP(0x2c) BTRAP(0x2d) BTRAP(0x2e) tl0_resv02f: BTRAP(0x2f) tl0_dax: TRAP_NOSAVE(__spitfire_data_access_exception) -tl0_resv031: BTRAP(0x31) +tl0_dtsb_4v: SUN4V_DTSB_MISS tl0_dae: membar #Sync TRAP_NOSAVE_7INSNS(__spitfire_access_error) tl0_resv033: BTRAP(0x33) @@ -52,12 +51,13 @@ tl0_resv03e: BTRAP(0x3e) BTRAP(0x3f) BTRAP(0x40) tl0_irq1: TRAP_IRQ(smp_call_function_client, 1) tl0_irq2: TRAP_IRQ(smp_receive_signal_client, 2) tl0_irq3: TRAP_IRQ(smp_penguin_jailcell, 3) +tl0_irq4: TRAP_IRQ(smp_new_mmu_context_version_client, 4) #else tl0_irq1: BTRAP(0x41) tl0_irq2: BTRAP(0x42) tl0_irq3: BTRAP(0x43) +tl0_irq4: BTRAP(0x44) #endif -tl0_irq4: TRAP_IRQ(handler_irq, 4) tl0_irq5: TRAP_IRQ(handler_irq, 5) TRAP_IRQ(handler_irq, 6) tl0_irq7: TRAP_IRQ(handler_irq, 7) TRAP_IRQ(handler_irq, 8) tl0_irq9: TRAP_IRQ(handler_irq, 9) TRAP_IRQ(handler_irq, 10) @@ -78,9 +78,9 @@ tl0_vaw: TRAP(do_vaw) tl0_cee: membar #Sync TRAP_NOSAVE_7INSNS(__spitfire_cee_trap) tl0_iamiss: -#include "itlb_base.S" +#include "itlb_miss.S" tl0_damiss: -#include "dtlb_base.S" +#include "dtlb_miss.S" tl0_daprot: #include "dtlb_prot.S" tl0_fecc: BTRAP(0x70) /* Fast-ECC on Cheetah */ @@ -88,15 +88,18 @@ tl0_dcpe: BTRAP(0x71) /* D-cache Parity Error on Cheetah+ */ tl0_icpe: BTRAP(0x72) /* I-cache Parity Error on Cheetah+ */ tl0_resv073: BTRAP(0x73) BTRAP(0x74) BTRAP(0x75) tl0_resv076: BTRAP(0x76) BTRAP(0x77) BTRAP(0x78) BTRAP(0x79) BTRAP(0x7a) BTRAP(0x7b) -tl0_resv07c: BTRAP(0x7c) BTRAP(0x7d) BTRAP(0x7e) BTRAP(0x7f) +tl0_cpu_mondo: TRAP_NOSAVE(sun4v_cpu_mondo) +tl0_dev_mondo: TRAP_NOSAVE(sun4v_dev_mondo) +tl0_res_mondo: TRAP_NOSAVE(sun4v_res_mondo) +tl0_nres_mondo: TRAP_NOSAVE(sun4v_nonres_mondo) tl0_s0n: SPILL_0_NORMAL tl0_s1n: SPILL_1_NORMAL tl0_s2n: SPILL_2_NORMAL -tl0_s3n: SPILL_3_NORMAL -tl0_s4n: SPILL_4_NORMAL -tl0_s5n: SPILL_5_NORMAL -tl0_s6n: SPILL_6_NORMAL -tl0_s7n: SPILL_7_NORMAL +tl0_s3n: SPILL_0_NORMAL_ETRAP +tl0_s4n: SPILL_1_GENERIC_ETRAP +tl0_s5n: SPILL_1_GENERIC_ETRAP_FIXUP +tl0_s6n: SPILL_2_GENERIC_ETRAP +tl0_s7n: SPILL_2_GENERIC_ETRAP_FIXUP tl0_s0o: SPILL_0_OTHER tl0_s1o: SPILL_1_OTHER tl0_s2o: SPILL_2_OTHER @@ -110,9 +113,9 @@ tl0_f1n: FILL_1_NORMAL tl0_f2n: FILL_2_NORMAL tl0_f3n: FILL_3_NORMAL tl0_f4n: FILL_4_NORMAL -tl0_f5n: FILL_5_NORMAL -tl0_f6n: FILL_6_NORMAL -tl0_f7n: FILL_7_NORMAL +tl0_f5n: FILL_0_NORMAL_RTRAP +tl0_f6n: FILL_1_GENERIC_RTRAP +tl0_f7n: FILL_2_GENERIC_RTRAP tl0_f0o: FILL_0_OTHER tl0_f1o: FILL_1_OTHER tl0_f2o: FILL_2_OTHER @@ -128,7 +131,7 @@ tl0_flushw: FLUSH_WINDOW_TRAP tl0_resv104: BTRAP(0x104) BTRAP(0x105) BTRAP(0x106) BTRAP(0x107) .globl tl0_solaris tl0_solaris: SOLARIS_SYSCALL_TRAP -tl0_netbsd: NETBSD_SYSCALL_TRAP +tl0_resv109: BTRAP(0x109) tl0_resv10a: BTRAP(0x10a) BTRAP(0x10b) BTRAP(0x10c) BTRAP(0x10d) BTRAP(0x10e) tl0_resv10f: BTRAP(0x10f) tl0_linux32: LINUX_32BIT_SYSCALL_TRAP @@ -179,7 +182,7 @@ sparc64_ttable_tl1: tl1_resv000: BOOT_KERNEL BTRAPTL1(0x1) BTRAPTL1(0x2) BTRAPTL1(0x3) tl1_resv004: BTRAPTL1(0x4) BTRAPTL1(0x5) BTRAPTL1(0x6) BTRAPTL1(0x7) tl1_iax: TRAP_NOSAVE(__spitfire_insn_access_exception_tl1) -tl1_resv009: BTRAPTL1(0x9) +tl1_itsb_4v: SUN4V_ITSB_MISS tl1_iae: membar #Sync TRAP_NOSAVE_7INSNS(__spitfire_access_error) tl1_resv00b: BTRAPTL1(0xb) BTRAPTL1(0xc) BTRAPTL1(0xd) BTRAPTL1(0xe) BTRAPTL1(0xf) @@ -198,7 +201,7 @@ tl1_div0: TRAPTL1(do_div0_tl1) tl1_resv029: BTRAPTL1(0x29) BTRAPTL1(0x2a) BTRAPTL1(0x2b) BTRAPTL1(0x2c) tl1_resv02d: BTRAPTL1(0x2d) BTRAPTL1(0x2e) BTRAPTL1(0x2f) tl1_dax: TRAP_NOSAVE(__spitfire_data_access_exception_tl1) -tl1_resv031: BTRAPTL1(0x31) +tl1_dtsb_4v: SUN4V_DTSB_MISS tl1_dae: membar #Sync TRAP_NOSAVE_7INSNS(__spitfire_access_error) tl1_resv033: BTRAPTL1(0x33) @@ -222,26 +225,10 @@ tl1_resv05c: BTRAPTL1(0x5c) BTRAPTL1(0x5d) BTRAPTL1(0x5e) BTRAPTL1(0x5f) tl1_ivec: TRAP_IVEC tl1_paw: TRAPTL1(do_paw_tl1) tl1_vaw: TRAPTL1(do_vaw_tl1) - - /* The grotty trick to save %g1 into current->thread.cee_stuff - * is because when we take this trap we could be interrupting - * trap code already using the trap alternate global registers. - * - * We cross our fingers and pray that this store/load does - * not cause yet another CEE trap. - */ -tl1_cee: membar #Sync - stx %g1, [%g6 + TI_CEE_STUFF] - ldxa [%g0] ASI_AFSR, %g1 - membar #Sync - stxa %g1, [%g0] ASI_AFSR - membar #Sync - ldx [%g6 + TI_CEE_STUFF], %g1 - retry - +tl1_cee: BTRAPTL1(0x63) tl1_iamiss: BTRAPTL1(0x64) BTRAPTL1(0x65) BTRAPTL1(0x66) BTRAPTL1(0x67) tl1_damiss: -#include "dtlb_backend.S" +#include "dtlb_miss.S" tl1_daprot: #include "dtlb_prot.S" tl1_fecc: BTRAPTL1(0x70) /* Fast-ECC on Cheetah */ diff --git a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c index 70faf630603b..001e8518331f 100644 --- a/arch/sparc64/kernel/unaligned.c +++ b/arch/sparc64/kernel/unaligned.c @@ -277,7 +277,7 @@ static void kernel_mna_trap_fault(void) regs->tstate |= (ASI_AIUS << 24UL); } -asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn, unsigned long sfar, unsigned long sfsr) +asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn) { enum direction dir = decode_direction(insn); int size = decode_access_size(insn); @@ -405,6 +405,9 @@ extern void do_privact(struct pt_regs *regs); extern void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar); +extern void sun4v_data_access_exception(struct pt_regs *regs, + unsigned long addr, + unsigned long type_ctx); int handle_ldf_stq(u32 insn, struct pt_regs *regs) { @@ -447,14 +450,20 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs) break; } default: - spitfire_data_access_exception(regs, 0, addr); + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, addr, 0); + else + spitfire_data_access_exception(regs, 0, addr); return 1; } if (put_user (first >> 32, (u32 __user *)addr) || __put_user ((u32)first, (u32 __user *)(addr + 4)) || __put_user (second >> 32, (u32 __user *)(addr + 8)) || __put_user ((u32)second, (u32 __user *)(addr + 12))) { - spitfire_data_access_exception(regs, 0, addr); + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, addr, 0); + else + spitfire_data_access_exception(regs, 0, addr); return 1; } } else { @@ -467,7 +476,10 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs) do_privact(regs); return 1; } else if (asi > ASI_SNFL) { - spitfire_data_access_exception(regs, 0, addr); + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, addr, 0); + else + spitfire_data_access_exception(regs, 0, addr); return 1; } switch (insn & 0x180000) { @@ -484,7 +496,10 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs) err |= __get_user (data[i], (u32 __user *)(addr + 4*i)); } if (err && !(asi & 0x2 /* NF */)) { - spitfire_data_access_exception(regs, 0, addr); + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, addr, 0); + else + spitfire_data_access_exception(regs, 0, addr); return 1; } if (asi & 0x8) /* Little */ { @@ -548,7 +563,7 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr u32 insn; u32 first, second; u64 value; - u8 asi, freg; + u8 freg; int flag; struct fpustate *f = FPUSTATE; @@ -557,7 +572,7 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr if (test_thread_flag(TIF_32BIT)) pc = (u32)pc; if (get_user(insn, (u32 __user *) pc) != -EFAULT) { - asi = sfsr >> 16; + int asi = decode_asi(insn, regs); if ((asi > ASI_SNFL) || (asi < ASI_P)) goto daex; @@ -587,7 +602,11 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr *(u64 *)(f->regs + freg) = value; current_thread_info()->fpsaved[0] |= flag; } else { -daex: spitfire_data_access_exception(regs, sfsr, sfar); +daex: + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, sfar, sfsr); + else + spitfire_data_access_exception(regs, sfsr, sfar); return; } advance(regs); @@ -600,7 +619,7 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr unsigned long tstate = regs->tstate; u32 insn; u64 value; - u8 asi, freg; + u8 freg; int flag; struct fpustate *f = FPUSTATE; @@ -609,8 +628,8 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr if (test_thread_flag(TIF_32BIT)) pc = (u32)pc; if (get_user(insn, (u32 __user *) pc) != -EFAULT) { + int asi = decode_asi(insn, regs); freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20); - asi = sfsr >> 16; value = 0; flag = (freg < 32) ? FPRS_DL : FPRS_DU; if ((asi > ASI_SNFL) || @@ -631,7 +650,11 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr __put_user ((u32)value, (u32 __user *)(sfar + 4))) goto daex; } else { -daex: spitfire_data_access_exception(regs, sfsr, sfar); +daex: + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, sfar, sfsr); + else + spitfire_data_access_exception(regs, sfsr, sfar); return; } advance(regs); diff --git a/arch/sparc64/kernel/us2e_cpufreq.c b/arch/sparc64/kernel/us2e_cpufreq.c index b35dc8dc995a..1f83fe6a82d6 100644 --- a/arch/sparc64/kernel/us2e_cpufreq.c +++ b/arch/sparc64/kernel/us2e_cpufreq.c @@ -346,6 +346,9 @@ static int __init us2e_freq_init(void) unsigned long manuf, impl, ver; int ret; + if (tlb_type != spitfire) + return -ENODEV; + __asm__("rdpr %%ver, %0" : "=r" (ver)); manuf = ((ver >> 48) & 0xffff); impl = ((ver >> 32) & 0xffff); @@ -354,20 +357,16 @@ static int __init us2e_freq_init(void) struct cpufreq_driver *driver; ret = -ENOMEM; - driver = kmalloc(sizeof(struct cpufreq_driver), GFP_KERNEL); + driver = kzalloc(sizeof(struct cpufreq_driver), GFP_KERNEL); if (!driver) goto err_out; - memset(driver, 0, sizeof(*driver)); - us2e_freq_table = kmalloc( + us2e_freq_table = kzalloc( (NR_CPUS * sizeof(struct us2e_freq_percpu_info)), GFP_KERNEL); if (!us2e_freq_table) goto err_out; - memset(us2e_freq_table, 0, - (NR_CPUS * sizeof(struct us2e_freq_percpu_info))); - driver->init = us2e_freq_cpu_init; driver->verify = us2e_freq_verify; driver->target = us2e_freq_target; diff --git a/arch/sparc64/kernel/us3_cpufreq.c b/arch/sparc64/kernel/us3_cpufreq.c index 6d1f9a3c464f..47e3acafb5be 100644 --- a/arch/sparc64/kernel/us3_cpufreq.c +++ b/arch/sparc64/kernel/us3_cpufreq.c @@ -203,6 +203,9 @@ static int __init us3_freq_init(void) unsigned long manuf, impl, ver; int ret; + if (tlb_type != cheetah && tlb_type != cheetah_plus) + return -ENODEV; + __asm__("rdpr %%ver, %0" : "=r" (ver)); manuf = ((ver >> 48) & 0xffff); impl = ((ver >> 32) & 0xffff); @@ -215,20 +218,16 @@ static int __init us3_freq_init(void) struct cpufreq_driver *driver; ret = -ENOMEM; - driver = kmalloc(sizeof(struct cpufreq_driver), GFP_KERNEL); + driver = kzalloc(sizeof(struct cpufreq_driver), GFP_KERNEL); if (!driver) goto err_out; - memset(driver, 0, sizeof(*driver)); - us3_freq_table = kmalloc( + us3_freq_table = kzalloc( (NR_CPUS * sizeof(struct us3_freq_percpu_info)), GFP_KERNEL); if (!us3_freq_table) goto err_out; - memset(us3_freq_table, 0, - (NR_CPUS * sizeof(struct us3_freq_percpu_info))); - driver->init = us3_freq_cpu_init; driver->verify = us3_freq_verify; driver->target = us3_freq_target; diff --git a/arch/sparc64/kernel/visemul.c b/arch/sparc64/kernel/visemul.c new file mode 100644 index 000000000000..84fedaa38aae --- /dev/null +++ b/arch/sparc64/kernel/visemul.c @@ -0,0 +1,894 @@ +/* visemul.c: Emulation of VIS instructions. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/thread_info.h> + +#include <asm/ptrace.h> +#include <asm/pstate.h> +#include <asm/system.h> +#include <asm/fpumacro.h> +#include <asm/uaccess.h> + +/* OPF field of various VIS instructions. */ + +/* 000111011 - four 16-bit packs */ +#define FPACK16_OPF 0x03b + +/* 000111010 - two 32-bit packs */ +#define FPACK32_OPF 0x03a + +/* 000111101 - four 16-bit packs */ +#define FPACKFIX_OPF 0x03d + +/* 001001101 - four 16-bit expands */ +#define FEXPAND_OPF 0x04d + +/* 001001011 - two 32-bit merges */ +#define FPMERGE_OPF 0x04b + +/* 000110001 - 8-by-16-bit partitoned product */ +#define FMUL8x16_OPF 0x031 + +/* 000110011 - 8-by-16-bit upper alpha partitioned product */ +#define FMUL8x16AU_OPF 0x033 + +/* 000110101 - 8-by-16-bit lower alpha partitioned product */ +#define FMUL8x16AL_OPF 0x035 + +/* 000110110 - upper 8-by-16-bit partitioned product */ +#define FMUL8SUx16_OPF 0x036 + +/* 000110111 - lower 8-by-16-bit partitioned product */ +#define FMUL8ULx16_OPF 0x037 + +/* 000111000 - upper 8-by-16-bit partitioned product */ +#define FMULD8SUx16_OPF 0x038 + +/* 000111001 - lower unsigned 8-by-16-bit partitioned product */ +#define FMULD8ULx16_OPF 0x039 + +/* 000101000 - four 16-bit compare; set rd if src1 > src2 */ +#define FCMPGT16_OPF 0x028 + +/* 000101100 - two 32-bit compare; set rd if src1 > src2 */ +#define FCMPGT32_OPF 0x02c + +/* 000100000 - four 16-bit compare; set rd if src1 <= src2 */ +#define FCMPLE16_OPF 0x020 + +/* 000100100 - two 32-bit compare; set rd if src1 <= src2 */ +#define FCMPLE32_OPF 0x024 + +/* 000100010 - four 16-bit compare; set rd if src1 != src2 */ +#define FCMPNE16_OPF 0x022 + +/* 000100110 - two 32-bit compare; set rd if src1 != src2 */ +#define FCMPNE32_OPF 0x026 + +/* 000101010 - four 16-bit compare; set rd if src1 == src2 */ +#define FCMPEQ16_OPF 0x02a + +/* 000101110 - two 32-bit compare; set rd if src1 == src2 */ +#define FCMPEQ32_OPF 0x02e + +/* 000000000 - Eight 8-bit edge boundary processing */ +#define EDGE8_OPF 0x000 + +/* 000000001 - Eight 8-bit edge boundary processing, no CC */ +#define EDGE8N_OPF 0x001 + +/* 000000010 - Eight 8-bit edge boundary processing, little-endian */ +#define EDGE8L_OPF 0x002 + +/* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */ +#define EDGE8LN_OPF 0x003 + +/* 000000100 - Four 16-bit edge boundary processing */ +#define EDGE16_OPF 0x004 + +/* 000000101 - Four 16-bit edge boundary processing, no CC */ +#define EDGE16N_OPF 0x005 + +/* 000000110 - Four 16-bit edge boundary processing, little-endian */ +#define EDGE16L_OPF 0x006 + +/* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */ +#define EDGE16LN_OPF 0x007 + +/* 000001000 - Two 32-bit edge boundary processing */ +#define EDGE32_OPF 0x008 + +/* 000001001 - Two 32-bit edge boundary processing, no CC */ +#define EDGE32N_OPF 0x009 + +/* 000001010 - Two 32-bit edge boundary processing, little-endian */ +#define EDGE32L_OPF 0x00a + +/* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */ +#define EDGE32LN_OPF 0x00b + +/* 000111110 - distance between 8 8-bit components */ +#define PDIST_OPF 0x03e + +/* 000010000 - convert 8-bit 3-D address to blocked byte address */ +#define ARRAY8_OPF 0x010 + +/* 000010010 - convert 16-bit 3-D address to blocked byte address */ +#define ARRAY16_OPF 0x012 + +/* 000010100 - convert 32-bit 3-D address to blocked byte address */ +#define ARRAY32_OPF 0x014 + +/* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */ +#define BMASK_OPF 0x019 + +/* 001001100 - Permute bytes as specified by GSR.MASK */ +#define BSHUFFLE_OPF 0x04c + +#define VIS_OPCODE_MASK ((0x3 << 30) | (0x3f << 19)) +#define VIS_OPCODE_VAL ((0x2 << 30) | (0x36 << 19)) + +#define VIS_OPF_SHIFT 5 +#define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT) + +#define RS1(INSN) (((INSN) >> 24) & 0x1f) +#define RS2(INSN) (((INSN) >> 0) & 0x1f) +#define RD(INSN) (((INSN) >> 25) & 0x1f) + +static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2, + unsigned int rd, int from_kernel) +{ + if (rs2 >= 16 || rs1 >= 16 || rd >= 16) { + if (from_kernel != 0) + __asm__ __volatile__("flushw"); + else + flushw_user(); + } +} + +static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) +{ + unsigned long value; + + if (reg < 16) + return (!reg ? 0 : regs->u_regs[reg]); + if (regs->tstate & TSTATE_PRIV) { + struct reg_window *win; + win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); + value = win->locals[reg - 16]; + } else if (test_thread_flag(TIF_32BIT)) { + struct reg_window32 __user *win32; + win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); + get_user(value, &win32->locals[reg - 16]); + } else { + struct reg_window __user *win; + win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); + get_user(value, &win->locals[reg - 16]); + } + return value; +} + +static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg, + struct pt_regs *regs) +{ + BUG_ON(reg < 16); + BUG_ON(regs->tstate & TSTATE_PRIV); + + if (test_thread_flag(TIF_32BIT)) { + struct reg_window32 __user *win32; + win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); + return (unsigned long __user *)&win32->locals[reg - 16]; + } else { + struct reg_window __user *win; + win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); + return &win->locals[reg - 16]; + } +} + +static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg, + struct pt_regs *regs) +{ + BUG_ON(reg >= 16); + BUG_ON(regs->tstate & TSTATE_PRIV); + + return ®s->u_regs[reg]; +} + +static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd) +{ + if (rd < 16) { + unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs); + + *rd_kern = val; + } else { + unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs); + + if (test_thread_flag(TIF_32BIT)) + __put_user((u32)val, (u32 __user *)rd_user); + else + __put_user(val, rd_user); + } +} + +static inline unsigned long fpd_regval(struct fpustate *f, + unsigned int insn_regnum) +{ + insn_regnum = (((insn_regnum & 1) << 5) | + (insn_regnum & 0x1e)); + + return *(unsigned long *) &f->regs[insn_regnum]; +} + +static inline unsigned long *fpd_regaddr(struct fpustate *f, + unsigned int insn_regnum) +{ + insn_regnum = (((insn_regnum & 1) << 5) | + (insn_regnum & 0x1e)); + + return (unsigned long *) &f->regs[insn_regnum]; +} + +static inline unsigned int fps_regval(struct fpustate *f, + unsigned int insn_regnum) +{ + return f->regs[insn_regnum]; +} + +static inline unsigned int *fps_regaddr(struct fpustate *f, + unsigned int insn_regnum) +{ + return &f->regs[insn_regnum]; +} + +struct edge_tab { + u16 left, right; +}; +struct edge_tab edge8_tab[8] = { + { 0xff, 0x80 }, + { 0x7f, 0xc0 }, + { 0x3f, 0xe0 }, + { 0x1f, 0xf0 }, + { 0x0f, 0xf8 }, + { 0x07, 0xfc }, + { 0x03, 0xfe }, + { 0x01, 0xff }, +}; +struct edge_tab edge8_tab_l[8] = { + { 0xff, 0x01 }, + { 0xfe, 0x03 }, + { 0xfc, 0x07 }, + { 0xf8, 0x0f }, + { 0xf0, 0x1f }, + { 0xe0, 0x3f }, + { 0xc0, 0x7f }, + { 0x80, 0xff }, +}; +struct edge_tab edge16_tab[4] = { + { 0xf, 0x8 }, + { 0x7, 0xc }, + { 0x3, 0xe }, + { 0x1, 0xf }, +}; +struct edge_tab edge16_tab_l[4] = { + { 0xf, 0x1 }, + { 0xe, 0x3 }, + { 0xc, 0x7 }, + { 0x8, 0xf }, +}; +struct edge_tab edge32_tab[2] = { + { 0x3, 0x2 }, + { 0x1, 0x3 }, +}; +struct edge_tab edge32_tab_l[2] = { + { 0x3, 0x1 }, + { 0x2, 0x3 }, +}; + +static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf) +{ + unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val; + u16 left, right; + + maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); + orig_rs1 = rs1 = fetch_reg(RS1(insn), regs); + orig_rs2 = rs2 = fetch_reg(RS2(insn), regs); + + if (test_thread_flag(TIF_32BIT)) { + rs1 = rs1 & 0xffffffff; + rs2 = rs2 & 0xffffffff; + } + switch (opf) { + default: + case EDGE8_OPF: + case EDGE8N_OPF: + left = edge8_tab[rs1 & 0x7].left; + right = edge8_tab[rs2 & 0x7].right; + break; + case EDGE8L_OPF: + case EDGE8LN_OPF: + left = edge8_tab_l[rs1 & 0x7].left; + right = edge8_tab_l[rs2 & 0x7].right; + break; + + case EDGE16_OPF: + case EDGE16N_OPF: + left = edge16_tab[(rs1 >> 1) & 0x3].left; + right = edge16_tab[(rs2 >> 1) & 0x3].right; + break; + + case EDGE16L_OPF: + case EDGE16LN_OPF: + left = edge16_tab_l[(rs1 >> 1) & 0x3].left; + right = edge16_tab_l[(rs2 >> 1) & 0x3].right; + break; + + case EDGE32_OPF: + case EDGE32N_OPF: + left = edge32_tab[(rs1 >> 2) & 0x1].left; + right = edge32_tab[(rs2 >> 2) & 0x1].right; + break; + + case EDGE32L_OPF: + case EDGE32LN_OPF: + left = edge32_tab_l[(rs1 >> 2) & 0x1].left; + right = edge32_tab_l[(rs2 >> 2) & 0x1].right; + break; + }; + + if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL)) + rd_val = right & left; + else + rd_val = left; + + store_reg(regs, rd_val, RD(insn)); + + switch (opf) { + case EDGE8_OPF: + case EDGE8L_OPF: + case EDGE16_OPF: + case EDGE16L_OPF: + case EDGE32_OPF: + case EDGE32L_OPF: { + unsigned long ccr, tstate; + + __asm__ __volatile__("subcc %1, %2, %%g0\n\t" + "rd %%ccr, %0" + : "=r" (ccr) + : "r" (orig_rs1), "r" (orig_rs2) + : "cc"); + tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC); + regs->tstate = tstate | (ccr << 32UL); + } + }; +} + +static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf) +{ + unsigned long rs1, rs2, rd_val; + unsigned int bits, bits_mask; + + maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); + rs1 = fetch_reg(RS1(insn), regs); + rs2 = fetch_reg(RS2(insn), regs); + + bits = (rs2 > 5 ? 5 : rs2); + bits_mask = (1UL << bits) - 1UL; + + rd_val = ((((rs1 >> 11) & 0x3) << 0) | + (((rs1 >> 33) & 0x3) << 2) | + (((rs1 >> 55) & 0x1) << 4) | + (((rs1 >> 13) & 0xf) << 5) | + (((rs1 >> 35) & 0xf) << 9) | + (((rs1 >> 56) & 0xf) << 13) | + (((rs1 >> 17) & bits_mask) << 17) | + (((rs1 >> 39) & bits_mask) << (17 + bits)) | + (((rs1 >> 60) & 0xf) << (17 + (2*bits)))); + + switch (opf) { + case ARRAY16_OPF: + rd_val <<= 1; + break; + + case ARRAY32_OPF: + rd_val <<= 2; + }; + + store_reg(regs, rd_val, RD(insn)); +} + +static void bmask(struct pt_regs *regs, unsigned int insn) +{ + unsigned long rs1, rs2, rd_val, gsr; + + maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); + rs1 = fetch_reg(RS1(insn), regs); + rs2 = fetch_reg(RS2(insn), regs); + rd_val = rs1 + rs2; + + store_reg(regs, rd_val, RD(insn)); + + gsr = current_thread_info()->gsr[0] & 0xffffffff; + gsr |= rd_val << 32UL; + current_thread_info()->gsr[0] = gsr; +} + +static void bshuffle(struct pt_regs *regs, unsigned int insn) +{ + struct fpustate *f = FPUSTATE; + unsigned long rs1, rs2, rd_val; + unsigned long bmask, i; + + bmask = current_thread_info()->gsr[0] >> 32UL; + + rs1 = fpd_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS2(insn)); + + rd_val = 0UL; + for (i = 0; i < 8; i++) { + unsigned long which = (bmask >> (i * 4)) & 0xf; + unsigned long byte; + + if (which < 8) + byte = (rs1 >> (which * 8)) & 0xff; + else + byte = (rs2 >> ((which-8)*8)) & 0xff; + rd_val |= (byte << (i * 8)); + } + + *fpd_regaddr(f, RD(insn)) = rd_val; +} + +static void pdist(struct pt_regs *regs, unsigned int insn) +{ + struct fpustate *f = FPUSTATE; + unsigned long rs1, rs2, *rd, rd_val; + unsigned long i; + + rs1 = fpd_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS1(insn)); + rd = fpd_regaddr(f, RD(insn)); + + rd_val = *rd; + + for (i = 0; i < 8; i++) { + s16 s1, s2; + + s1 = (rs1 >> (56 - (i * 8))) & 0xff; + s2 = (rs2 >> (56 - (i * 8))) & 0xff; + + /* Absolute value of difference. */ + s1 -= s2; + if (s1 < 0) + s1 = ~s1 + 1; + + rd_val += s1; + } + + *rd = rd_val; +} + +static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf) +{ + struct fpustate *f = FPUSTATE; + unsigned long rs1, rs2, gsr, scale, rd_val; + + gsr = current_thread_info()->gsr[0]; + scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f); + switch (opf) { + case FPACK16_OPF: { + unsigned long byte; + + rs2 = fpd_regval(f, RS2(insn)); + rd_val = 0; + for (byte = 0; byte < 4; byte++) { + unsigned int val; + s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL; + int scaled = src << scale; + int from_fixed = scaled >> 7; + + val = ((from_fixed < 0) ? + 0 : + (from_fixed > 255) ? + 255 : from_fixed); + + rd_val |= (val << (8 * byte)); + } + *fps_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FPACK32_OPF: { + unsigned long word; + + rs1 = fpd_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS2(insn)); + rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL); + for (word = 0; word < 2; word++) { + unsigned long val; + s32 src = (rs2 >> (word * 32UL)); + s64 scaled = src << scale; + s64 from_fixed = scaled >> 23; + + val = ((from_fixed < 0) ? + 0 : + (from_fixed > 255) ? + 255 : from_fixed); + + rd_val |= (val << (32 * word)); + } + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FPACKFIX_OPF: { + unsigned long word; + + rs2 = fpd_regval(f, RS2(insn)); + + rd_val = 0; + for (word = 0; word < 2; word++) { + long val; + s32 src = (rs2 >> (word * 32UL)); + s64 scaled = src << scale; + s64 from_fixed = scaled >> 16; + + val = ((from_fixed < -32768) ? + -32768 : + (from_fixed > 32767) ? + 32767 : from_fixed); + + rd_val |= ((val & 0xffff) << (word * 16)); + } + *fps_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FEXPAND_OPF: { + unsigned long byte; + + rs2 = fps_regval(f, RS2(insn)); + + rd_val = 0; + for (byte = 0; byte < 4; byte++) { + unsigned long val; + u8 src = (rs2 >> (byte * 8)) & 0xff; + + val = src << 4; + + rd_val |= (val << (byte * 16)); + } + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FPMERGE_OPF: { + rs1 = fps_regval(f, RS1(insn)); + rs2 = fps_regval(f, RS2(insn)); + + rd_val = (((rs2 & 0x000000ff) << 0) | + ((rs1 & 0x000000ff) << 8) | + ((rs2 & 0x0000ff00) << 8) | + ((rs1 & 0x0000ff00) << 16) | + ((rs2 & 0x00ff0000) << 16) | + ((rs1 & 0x00ff0000) << 24) | + ((rs2 & 0xff000000) << 24) | + ((rs1 & 0xff000000) << 32)); + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + }; +} + +static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf) +{ + struct fpustate *f = FPUSTATE; + unsigned long rs1, rs2, rd_val; + + switch (opf) { + case FMUL8x16_OPF: { + unsigned long byte; + + rs1 = fps_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS2(insn)); + + rd_val = 0; + for (byte = 0; byte < 4; byte++) { + u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; + s16 src2 = (rs2 >> (byte * 16)) & 0xffff; + u32 prod = src1 * src2; + u16 scaled = ((prod & 0x00ffff00) >> 8); + + /* Round up. */ + if (prod & 0x80) + scaled++; + rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); + } + + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FMUL8x16AU_OPF: + case FMUL8x16AL_OPF: { + unsigned long byte; + s16 src2; + + rs1 = fps_regval(f, RS1(insn)); + rs2 = fps_regval(f, RS2(insn)); + + rd_val = 0; + src2 = (rs2 >> (opf == FMUL8x16AU_OPF) ? 16 : 0); + for (byte = 0; byte < 4; byte++) { + u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; + u32 prod = src1 * src2; + u16 scaled = ((prod & 0x00ffff00) >> 8); + + /* Round up. */ + if (prod & 0x80) + scaled++; + rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); + } + + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FMUL8SUx16_OPF: + case FMUL8ULx16_OPF: { + unsigned long byte, ushift; + + rs1 = fpd_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS2(insn)); + + rd_val = 0; + ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0; + for (byte = 0; byte < 4; byte++) { + u16 src1; + s16 src2; + u32 prod; + u16 scaled; + + src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); + src2 = ((rs2 >> (16 * byte)) & 0xffff); + prod = src1 * src2; + scaled = ((prod & 0x00ffff00) >> 8); + + /* Round up. */ + if (prod & 0x80) + scaled++; + rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); + } + + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FMULD8SUx16_OPF: + case FMULD8ULx16_OPF: { + unsigned long byte, ushift; + + rs1 = fps_regval(f, RS1(insn)); + rs2 = fps_regval(f, RS2(insn)); + + rd_val = 0; + ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0; + for (byte = 0; byte < 2; byte++) { + u16 src1; + s16 src2; + u32 prod; + u16 scaled; + + src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); + src2 = ((rs2 >> (16 * byte)) & 0xffff); + prod = src1 * src2; + scaled = ((prod & 0x00ffff00) >> 8); + + /* Round up. */ + if (prod & 0x80) + scaled++; + rd_val |= ((scaled & 0xffffUL) << + ((byte * 32UL) + 7UL)); + } + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + }; +} + +static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf) +{ + struct fpustate *f = FPUSTATE; + unsigned long rs1, rs2, rd_val, i; + + rs1 = fpd_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS2(insn)); + + rd_val = 0; + + switch (opf) { + case FCMPGT16_OPF: + for (i = 0; i < 4; i++) { + s16 a = (rs1 >> (i * 16)) & 0xffff; + s16 b = (rs2 >> (i * 16)) & 0xffff; + + if (a > b) + rd_val |= 1 << i; + } + break; + + case FCMPGT32_OPF: + for (i = 0; i < 2; i++) { + s32 a = (rs1 >> (i * 32)) & 0xffff; + s32 b = (rs2 >> (i * 32)) & 0xffff; + + if (a > b) + rd_val |= 1 << i; + } + break; + + case FCMPLE16_OPF: + for (i = 0; i < 4; i++) { + s16 a = (rs1 >> (i * 16)) & 0xffff; + s16 b = (rs2 >> (i * 16)) & 0xffff; + + if (a <= b) + rd_val |= 1 << i; + } + break; + + case FCMPLE32_OPF: + for (i = 0; i < 2; i++) { + s32 a = (rs1 >> (i * 32)) & 0xffff; + s32 b = (rs2 >> (i * 32)) & 0xffff; + + if (a <= b) + rd_val |= 1 << i; + } + break; + + case FCMPNE16_OPF: + for (i = 0; i < 4; i++) { + s16 a = (rs1 >> (i * 16)) & 0xffff; + s16 b = (rs2 >> (i * 16)) & 0xffff; + + if (a != b) + rd_val |= 1 << i; + } + break; + + case FCMPNE32_OPF: + for (i = 0; i < 2; i++) { + s32 a = (rs1 >> (i * 32)) & 0xffff; + s32 b = (rs2 >> (i * 32)) & 0xffff; + + if (a != b) + rd_val |= 1 << i; + } + break; + + case FCMPEQ16_OPF: + for (i = 0; i < 4; i++) { + s16 a = (rs1 >> (i * 16)) & 0xffff; + s16 b = (rs2 >> (i * 16)) & 0xffff; + + if (a == b) + rd_val |= 1 << i; + } + break; + + case FCMPEQ32_OPF: + for (i = 0; i < 2; i++) { + s32 a = (rs1 >> (i * 32)) & 0xffff; + s32 b = (rs2 >> (i * 32)) & 0xffff; + + if (a == b) + rd_val |= 1 << i; + } + break; + }; + + maybe_flush_windows(0, 0, RD(insn), 0); + store_reg(regs, rd_val, RD(insn)); +} + +/* Emulate the VIS instructions which are not implemented in + * hardware on Niagara. + */ +int vis_emul(struct pt_regs *regs, unsigned int insn) +{ + unsigned long pc = regs->tpc; + unsigned int opf; + + BUG_ON(regs->tstate & TSTATE_PRIV); + + if (test_thread_flag(TIF_32BIT)) + pc = (u32)pc; + + if (get_user(insn, (u32 __user *) pc)) + return -EFAULT; + + if ((insn & VIS_OPCODE_MASK) != VIS_OPCODE_VAL) + return -EINVAL; + + opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT; + switch (opf) { + default: + return -EINVAL; + + /* Pixel Formatting Instructions. */ + case FPACK16_OPF: + case FPACK32_OPF: + case FPACKFIX_OPF: + case FEXPAND_OPF: + case FPMERGE_OPF: + pformat(regs, insn, opf); + break; + + /* Partitioned Multiply Instructions */ + case FMUL8x16_OPF: + case FMUL8x16AU_OPF: + case FMUL8x16AL_OPF: + case FMUL8SUx16_OPF: + case FMUL8ULx16_OPF: + case FMULD8SUx16_OPF: + case FMULD8ULx16_OPF: + pmul(regs, insn, opf); + break; + + /* Pixel Compare Instructions */ + case FCMPGT16_OPF: + case FCMPGT32_OPF: + case FCMPLE16_OPF: + case FCMPLE32_OPF: + case FCMPNE16_OPF: + case FCMPNE32_OPF: + case FCMPEQ16_OPF: + case FCMPEQ32_OPF: + pcmp(regs, insn, opf); + break; + + /* Edge Handling Instructions */ + case EDGE8_OPF: + case EDGE8N_OPF: + case EDGE8L_OPF: + case EDGE8LN_OPF: + case EDGE16_OPF: + case EDGE16N_OPF: + case EDGE16L_OPF: + case EDGE16LN_OPF: + case EDGE32_OPF: + case EDGE32N_OPF: + case EDGE32L_OPF: + case EDGE32LN_OPF: + edge(regs, insn, opf); + break; + + /* Pixel Component Distance */ + case PDIST_OPF: + pdist(regs, insn); + break; + + /* Three-Dimensional Array Addressing Instructions */ + case ARRAY8_OPF: + case ARRAY16_OPF: + case ARRAY32_OPF: + array(regs, insn, opf); + break; + + /* Byte Mask and Shuffle Instructions */ + case BMASK_OPF: + bmask(regs, insn); + break; + + case BSHUFFLE_OPF: + bshuffle(regs, insn); + break; + }; + + regs->tpc = regs->tnpc; + regs->tnpc += 4; + return 0; +} diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S index 467d13a0d5c1..b097379a49a8 100644 --- a/arch/sparc64/kernel/vmlinux.lds.S +++ b/arch/sparc64/kernel/vmlinux.lds.S @@ -70,6 +70,22 @@ SECTIONS .con_initcall.init : { *(.con_initcall.init) } __con_initcall_end = .; SECURITY_INIT + . = ALIGN(4); + __tsb_ldquad_phys_patch = .; + .tsb_ldquad_phys_patch : { *(.tsb_ldquad_phys_patch) } + __tsb_ldquad_phys_patch_end = .; + __tsb_phys_patch = .; + .tsb_phys_patch : { *(.tsb_phys_patch) } + __tsb_phys_patch_end = .; + __cpuid_patch = .; + .cpuid_patch : { *(.cpuid_patch) } + __cpuid_patch_end = .; + __sun4v_1insn_patch = .; + .sun4v_1insn_patch : { *(.sun4v_1insn_patch) } + __sun4v_1insn_patch_end = .; + __sun4v_2insn_patch = .; + .sun4v_2insn_patch : { *(.sun4v_2insn_patch) } + __sun4v_2insn_patch_end = .; . = ALIGN(8192); __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S index 39160926267b..c4aa110a10e5 100644 --- a/arch/sparc64/kernel/winfixup.S +++ b/arch/sparc64/kernel/winfixup.S @@ -1,8 +1,6 @@ -/* $Id: winfixup.S,v 1.30 2002/02/09 19:49:30 davem Exp $ +/* winfixup.S: Handle cases where user stack pointer is found to be bogus. * - * winfixup.S: Handle cases where user stack pointer is found to be bogus. - * - * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1997, 2006 David S. Miller (davem@davemloft.net) */ #include <asm/asi.h> @@ -15,374 +13,144 @@ .text -set_pcontext: - sethi %hi(sparc64_kern_pri_context), %l1 - ldx [%l1 + %lo(sparc64_kern_pri_context)], %l1 - mov PRIMARY_CONTEXT, %g1 - stxa %l1, [%g1] ASI_DMMU - flush %g6 - retl - nop + /* It used to be the case that these register window fault + * handlers could run via the save and restore instructions + * done by the trap entry and exit code. They now do the + * window spill/fill by hand, so that case no longer can occur. + */ .align 32 - - /* Here are the rules, pay attention. - * - * The kernel is disallowed from touching user space while - * the trap level is greater than zero, except for from within - * the window spill/fill handlers. This must be followed - * so that we can easily detect the case where we tried to - * spill/fill with a bogus (or unmapped) user stack pointer. - * - * These are layed out in a special way for cache reasons, - * don't touch... - */ - .globl fill_fixup, spill_fixup fill_fixup: - rdpr %tstate, %g1 - andcc %g1, TSTATE_PRIV, %g0 - or %g4, FAULT_CODE_WINFIXUP, %g4 - be,pt %xcc, window_scheisse_from_user_common - and %g1, TSTATE_CWP, %g1 - - /* This is the extremely complex case, but it does happen from - * time to time if things are just right. Essentially the restore - * done in rtrap right before going back to user mode, with tl=1 - * and that levels trap stack registers all setup, took a fill trap, - * the user stack was not mapped in the tlb, and tlb miss occurred, - * the pte found was not valid, and a simple ref bit watch update - * could not satisfy the miss, so we got here. - * - * We must carefully unwind the state so we get back to tl=0, preserve - * all the register values we were going to give to the user. Luckily - * most things are where they need to be, we also have the address - * which triggered the fault handy as well. - * - * Also note that we must preserve %l5 and %l6. If the user was - * returning from a system call, we must make it look this way - * after we process the fill fault on the users stack. - * - * First, get into the window where the original restore was executed. - */ - - rdpr %wstate, %g2 ! Grab user mode wstate. - wrpr %g1, %cwp ! Get into the right window. - sll %g2, 3, %g2 ! NORMAL-->OTHER - - wrpr %g0, 0x0, %canrestore ! Standard etrap stuff. - wrpr %g2, 0x0, %wstate ! This must be consistent. - wrpr %g0, 0x0, %otherwin ! We know this. - call set_pcontext ! Change contexts... + TRAP_LOAD_THREAD_REG(%g6, %g1) + rdpr %tstate, %g1 + and %g1, TSTATE_CWP, %g1 + or %g4, FAULT_CODE_WINFIXUP, %g4 + stb %g4, [%g6 + TI_FAULT_CODE] + stx %g5, [%g6 + TI_FAULT_ADDR] + wrpr %g1, %cwp + ba,pt %xcc, etrap + rd %pc, %g7 + call do_sparc64_fault + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap_clr_l6 nop - rdpr %pstate, %l1 ! Prepare to change globals. - mov %g6, %o7 ! Get current. - - andn %l1, PSTATE_MM, %l1 ! We want to be in RMO - stb %g4, [%g6 + TI_FAULT_CODE] - stx %g5, [%g6 + TI_FAULT_ADDR] - wrpr %g0, 0x0, %tl ! Out of trap levels. - wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate - mov %o7, %g6 - ldx [%g6 + TI_TASK], %g4 -#ifdef CONFIG_SMP - mov TSB_REG, %g1 - ldxa [%g1] ASI_IMMU, %g5 -#endif - /* This is the same as below, except we handle this a bit special - * since we must preserve %l5 and %l6, see comment above. - */ - call do_sparc64_fault - add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop ! yes, nop is correct - - /* Be very careful about usage of the alternate globals here. - * You cannot touch %g4/%g5 as that has the fault information - * should this be from usermode. Also be careful for the case - * where we get here from the save instruction in etrap.S when - * coming from either user or kernel (does not matter which, it - * is the same problem in both cases). Essentially this means - * do not touch %g7 or %g2 so we handle the two cases fine. + /* Be very careful about usage of the trap globals here. + * You cannot touch %g5 as that has the fault information. */ spill_fixup: - ldx [%g6 + TI_FLAGS], %g1 - andcc %g1, _TIF_32BIT, %g0 - ldub [%g6 + TI_WSAVED], %g1 - - sll %g1, 3, %g3 - add %g6, %g3, %g3 - stx %sp, [%g3 + TI_RWIN_SPTRS] - sll %g1, 7, %g3 - bne,pt %xcc, 1f - add %g6, %g3, %g3 - stx %l0, [%g3 + TI_REG_WINDOW + 0x00] - stx %l1, [%g3 + TI_REG_WINDOW + 0x08] - - stx %l2, [%g3 + TI_REG_WINDOW + 0x10] - stx %l3, [%g3 + TI_REG_WINDOW + 0x18] - stx %l4, [%g3 + TI_REG_WINDOW + 0x20] - stx %l5, [%g3 + TI_REG_WINDOW + 0x28] - stx %l6, [%g3 + TI_REG_WINDOW + 0x30] - stx %l7, [%g3 + TI_REG_WINDOW + 0x38] - stx %i0, [%g3 + TI_REG_WINDOW + 0x40] - stx %i1, [%g3 + TI_REG_WINDOW + 0x48] - - stx %i2, [%g3 + TI_REG_WINDOW + 0x50] - stx %i3, [%g3 + TI_REG_WINDOW + 0x58] - stx %i4, [%g3 + TI_REG_WINDOW + 0x60] - stx %i5, [%g3 + TI_REG_WINDOW + 0x68] - stx %i6, [%g3 + TI_REG_WINDOW + 0x70] - b,pt %xcc, 2f - stx %i7, [%g3 + TI_REG_WINDOW + 0x78] -1: stw %l0, [%g3 + TI_REG_WINDOW + 0x00] - - stw %l1, [%g3 + TI_REG_WINDOW + 0x04] - stw %l2, [%g3 + TI_REG_WINDOW + 0x08] - stw %l3, [%g3 + TI_REG_WINDOW + 0x0c] - stw %l4, [%g3 + TI_REG_WINDOW + 0x10] - stw %l5, [%g3 + TI_REG_WINDOW + 0x14] - stw %l6, [%g3 + TI_REG_WINDOW + 0x18] - stw %l7, [%g3 + TI_REG_WINDOW + 0x1c] - stw %i0, [%g3 + TI_REG_WINDOW + 0x20] - - stw %i1, [%g3 + TI_REG_WINDOW + 0x24] - stw %i2, [%g3 + TI_REG_WINDOW + 0x28] - stw %i3, [%g3 + TI_REG_WINDOW + 0x2c] - stw %i4, [%g3 + TI_REG_WINDOW + 0x30] - stw %i5, [%g3 + TI_REG_WINDOW + 0x34] - stw %i6, [%g3 + TI_REG_WINDOW + 0x38] - stw %i7, [%g3 + TI_REG_WINDOW + 0x3c] -2: add %g1, 1, %g1 - - stb %g1, [%g6 + TI_WSAVED] - rdpr %tstate, %g1 - andcc %g1, TSTATE_PRIV, %g0 +spill_fixup_mna: +spill_fixup_dax: + TRAP_LOAD_THREAD_REG(%g6, %g1) + ldx [%g6 + TI_FLAGS], %g1 + andcc %g1, _TIF_32BIT, %g0 + ldub [%g6 + TI_WSAVED], %g1 + sll %g1, 3, %g3 + add %g6, %g3, %g3 + stx %sp, [%g3 + TI_RWIN_SPTRS] + sll %g1, 7, %g3 + bne,pt %xcc, 1f + add %g6, %g3, %g3 + stx %l0, [%g3 + TI_REG_WINDOW + 0x00] + stx %l1, [%g3 + TI_REG_WINDOW + 0x08] + stx %l2, [%g3 + TI_REG_WINDOW + 0x10] + stx %l3, [%g3 + TI_REG_WINDOW + 0x18] + stx %l4, [%g3 + TI_REG_WINDOW + 0x20] + stx %l5, [%g3 + TI_REG_WINDOW + 0x28] + stx %l6, [%g3 + TI_REG_WINDOW + 0x30] + stx %l7, [%g3 + TI_REG_WINDOW + 0x38] + stx %i0, [%g3 + TI_REG_WINDOW + 0x40] + stx %i1, [%g3 + TI_REG_WINDOW + 0x48] + stx %i2, [%g3 + TI_REG_WINDOW + 0x50] + stx %i3, [%g3 + TI_REG_WINDOW + 0x58] + stx %i4, [%g3 + TI_REG_WINDOW + 0x60] + stx %i5, [%g3 + TI_REG_WINDOW + 0x68] + stx %i6, [%g3 + TI_REG_WINDOW + 0x70] + ba,pt %xcc, 2f + stx %i7, [%g3 + TI_REG_WINDOW + 0x78] +1: stw %l0, [%g3 + TI_REG_WINDOW + 0x00] + stw %l1, [%g3 + TI_REG_WINDOW + 0x04] + stw %l2, [%g3 + TI_REG_WINDOW + 0x08] + stw %l3, [%g3 + TI_REG_WINDOW + 0x0c] + stw %l4, [%g3 + TI_REG_WINDOW + 0x10] + stw %l5, [%g3 + TI_REG_WINDOW + 0x14] + stw %l6, [%g3 + TI_REG_WINDOW + 0x18] + stw %l7, [%g3 + TI_REG_WINDOW + 0x1c] + stw %i0, [%g3 + TI_REG_WINDOW + 0x20] + stw %i1, [%g3 + TI_REG_WINDOW + 0x24] + stw %i2, [%g3 + TI_REG_WINDOW + 0x28] + stw %i3, [%g3 + TI_REG_WINDOW + 0x2c] + stw %i4, [%g3 + TI_REG_WINDOW + 0x30] + stw %i5, [%g3 + TI_REG_WINDOW + 0x34] + stw %i6, [%g3 + TI_REG_WINDOW + 0x38] + stw %i7, [%g3 + TI_REG_WINDOW + 0x3c] +2: add %g1, 1, %g1 + stb %g1, [%g6 + TI_WSAVED] + rdpr %tstate, %g1 + andcc %g1, TSTATE_PRIV, %g0 saved - and %g1, TSTATE_CWP, %g1 - be,pn %xcc, window_scheisse_from_user_common - mov FAULT_CODE_WRITE | FAULT_CODE_DTLB | FAULT_CODE_WINFIXUP, %g4 + be,pn %xcc, 1f + and %g1, TSTATE_CWP, %g1 retry +1: mov FAULT_CODE_WRITE | FAULT_CODE_DTLB | FAULT_CODE_WINFIXUP, %g4 + stb %g4, [%g6 + TI_FAULT_CODE] + stx %g5, [%g6 + TI_FAULT_ADDR] + wrpr %g1, %cwp + ba,pt %xcc, etrap + rd %pc, %g7 + call do_sparc64_fault + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 -window_scheisse_from_user_common: - stb %g4, [%g6 + TI_FAULT_CODE] - stx %g5, [%g6 + TI_FAULT_ADDR] - wrpr %g1, %cwp - ba,pt %xcc, etrap - rd %pc, %g7 - call do_sparc64_fault - add %sp, PTREGS_OFF, %o0 - ba,a,pt %xcc, rtrap_clr_l6 - - .globl winfix_mna, fill_fixup_mna, spill_fixup_mna winfix_mna: - andn %g3, 0x7f, %g3 - add %g3, 0x78, %g3 - wrpr %g3, %tnpc + andn %g3, 0x7f, %g3 + add %g3, 0x78, %g3 + wrpr %g3, %tnpc done -fill_fixup_mna: - rdpr %tstate, %g1 - andcc %g1, TSTATE_PRIV, %g0 - be,pt %xcc, window_mna_from_user_common - and %g1, TSTATE_CWP, %g1 - /* Please, see fill_fixup commentary about why we must preserve - * %l5 and %l6 to preserve absolute correct semantics. - */ - rdpr %wstate, %g2 ! Grab user mode wstate. - wrpr %g1, %cwp ! Get into the right window. - sll %g2, 3, %g2 ! NORMAL-->OTHER - wrpr %g0, 0x0, %canrestore ! Standard etrap stuff. - - wrpr %g2, 0x0, %wstate ! This must be consistent. - wrpr %g0, 0x0, %otherwin ! We know this. - call set_pcontext ! Change contexts... +fill_fixup_mna: + rdpr %tstate, %g1 + and %g1, TSTATE_CWP, %g1 + wrpr %g1, %cwp + ba,pt %xcc, etrap + rd %pc, %g7 + sethi %hi(tlb_type), %g1 + lduw [%g1 + %lo(tlb_type)], %g1 + cmp %g1, 3 + bne,pt %icc, 1f + add %sp, PTREGS_OFF, %o0 + mov %l4, %o2 + call sun4v_do_mna + mov %l5, %o1 + ba,a,pt %xcc, rtrap_clr_l6 +1: mov %l4, %o1 + mov %l5, %o2 + call mem_address_unaligned nop - rdpr %pstate, %l1 ! Prepare to change globals. - mov %g4, %o2 ! Setup args for - mov %g5, %o1 ! final call to mem_address_unaligned. - andn %l1, PSTATE_MM, %l1 ! We want to be in RMO + ba,a,pt %xcc, rtrap_clr_l6 - mov %g6, %o7 ! Stash away current. - wrpr %g0, 0x0, %tl ! Out of trap levels. - wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate - mov %o7, %g6 ! Get current back. - ldx [%g6 + TI_TASK], %g4 ! Finish it. -#ifdef CONFIG_SMP - mov TSB_REG, %g1 - ldxa [%g1] ASI_IMMU, %g5 -#endif - call mem_address_unaligned - add %sp, PTREGS_OFF, %o0 - - b,pt %xcc, rtrap - nop ! yes, the nop is correct -spill_fixup_mna: - ldx [%g6 + TI_FLAGS], %g1 - andcc %g1, _TIF_32BIT, %g0 - ldub [%g6 + TI_WSAVED], %g1 - sll %g1, 3, %g3 - add %g6, %g3, %g3 - stx %sp, [%g3 + TI_RWIN_SPTRS] - - sll %g1, 7, %g3 - bne,pt %xcc, 1f - add %g6, %g3, %g3 - stx %l0, [%g3 + TI_REG_WINDOW + 0x00] - stx %l1, [%g3 + TI_REG_WINDOW + 0x08] - stx %l2, [%g3 + TI_REG_WINDOW + 0x10] - stx %l3, [%g3 + TI_REG_WINDOW + 0x18] - stx %l4, [%g3 + TI_REG_WINDOW + 0x20] - - stx %l5, [%g3 + TI_REG_WINDOW + 0x28] - stx %l6, [%g3 + TI_REG_WINDOW + 0x30] - stx %l7, [%g3 + TI_REG_WINDOW + 0x38] - stx %i0, [%g3 + TI_REG_WINDOW + 0x40] - stx %i1, [%g3 + TI_REG_WINDOW + 0x48] - stx %i2, [%g3 + TI_REG_WINDOW + 0x50] - stx %i3, [%g3 + TI_REG_WINDOW + 0x58] - stx %i4, [%g3 + TI_REG_WINDOW + 0x60] - - stx %i5, [%g3 + TI_REG_WINDOW + 0x68] - stx %i6, [%g3 + TI_REG_WINDOW + 0x70] - stx %i7, [%g3 + TI_REG_WINDOW + 0x78] - b,pt %xcc, 2f - add %g1, 1, %g1 -1: std %l0, [%g3 + TI_REG_WINDOW + 0x00] - std %l2, [%g3 + TI_REG_WINDOW + 0x08] - std %l4, [%g3 + TI_REG_WINDOW + 0x10] - - std %l6, [%g3 + TI_REG_WINDOW + 0x18] - std %i0, [%g3 + TI_REG_WINDOW + 0x20] - std %i2, [%g3 + TI_REG_WINDOW + 0x28] - std %i4, [%g3 + TI_REG_WINDOW + 0x30] - std %i6, [%g3 + TI_REG_WINDOW + 0x38] - add %g1, 1, %g1 -2: stb %g1, [%g6 + TI_WSAVED] - rdpr %tstate, %g1 - - andcc %g1, TSTATE_PRIV, %g0 - saved - be,pn %xcc, window_mna_from_user_common - and %g1, TSTATE_CWP, %g1 - retry -window_mna_from_user_common: - wrpr %g1, %cwp - sethi %hi(109f), %g7 - ba,pt %xcc, etrap -109: or %g7, %lo(109b), %g7 - mov %l4, %o2 - mov %l5, %o1 - call mem_address_unaligned - add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - clr %l6 - - /* These are only needed for 64-bit mode processes which - * put their stack pointer into the VPTE area and there - * happens to be a VPTE tlb entry mapped there during - * a spill/fill trap to that stack frame. - */ - .globl winfix_dax, fill_fixup_dax, spill_fixup_dax winfix_dax: - andn %g3, 0x7f, %g3 - add %g3, 0x74, %g3 - wrpr %g3, %tnpc + andn %g3, 0x7f, %g3 + add %g3, 0x74, %g3 + wrpr %g3, %tnpc done -fill_fixup_dax: - rdpr %tstate, %g1 - andcc %g1, TSTATE_PRIV, %g0 - be,pt %xcc, window_dax_from_user_common - and %g1, TSTATE_CWP, %g1 - - /* Please, see fill_fixup commentary about why we must preserve - * %l5 and %l6 to preserve absolute correct semantics. - */ - rdpr %wstate, %g2 ! Grab user mode wstate. - wrpr %g1, %cwp ! Get into the right window. - sll %g2, 3, %g2 ! NORMAL-->OTHER - wrpr %g0, 0x0, %canrestore ! Standard etrap stuff. - wrpr %g2, 0x0, %wstate ! This must be consistent. - wrpr %g0, 0x0, %otherwin ! We know this. - call set_pcontext ! Change contexts... +fill_fixup_dax: + rdpr %tstate, %g1 + and %g1, TSTATE_CWP, %g1 + wrpr %g1, %cwp + ba,pt %xcc, etrap + rd %pc, %g7 + sethi %hi(tlb_type), %g1 + mov %l4, %o1 + lduw [%g1 + %lo(tlb_type)], %g1 + mov %l5, %o2 + cmp %g1, 3 + bne,pt %icc, 1f + add %sp, PTREGS_OFF, %o0 + call sun4v_data_access_exception nop - rdpr %pstate, %l1 ! Prepare to change globals. - mov %g4, %o1 ! Setup args for - mov %g5, %o2 ! final call to spitfire_data_access_exception. - andn %l1, PSTATE_MM, %l1 ! We want to be in RMO - - mov %g6, %o7 ! Stash away current. - wrpr %g0, 0x0, %tl ! Out of trap levels. - wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate - mov %o7, %g6 ! Get current back. - ldx [%g6 + TI_TASK], %g4 ! Finish it. -#ifdef CONFIG_SMP - mov TSB_REG, %g1 - ldxa [%g1] ASI_IMMU, %g5 -#endif - call spitfire_data_access_exception - add %sp, PTREGS_OFF, %o0 - - b,pt %xcc, rtrap - nop ! yes, the nop is correct -spill_fixup_dax: - ldx [%g6 + TI_FLAGS], %g1 - andcc %g1, _TIF_32BIT, %g0 - ldub [%g6 + TI_WSAVED], %g1 - sll %g1, 3, %g3 - add %g6, %g3, %g3 - stx %sp, [%g3 + TI_RWIN_SPTRS] - - sll %g1, 7, %g3 - bne,pt %xcc, 1f - add %g6, %g3, %g3 - stx %l0, [%g3 + TI_REG_WINDOW + 0x00] - stx %l1, [%g3 + TI_REG_WINDOW + 0x08] - stx %l2, [%g3 + TI_REG_WINDOW + 0x10] - stx %l3, [%g3 + TI_REG_WINDOW + 0x18] - stx %l4, [%g3 + TI_REG_WINDOW + 0x20] - - stx %l5, [%g3 + TI_REG_WINDOW + 0x28] - stx %l6, [%g3 + TI_REG_WINDOW + 0x30] - stx %l7, [%g3 + TI_REG_WINDOW + 0x38] - stx %i0, [%g3 + TI_REG_WINDOW + 0x40] - stx %i1, [%g3 + TI_REG_WINDOW + 0x48] - stx %i2, [%g3 + TI_REG_WINDOW + 0x50] - stx %i3, [%g3 + TI_REG_WINDOW + 0x58] - stx %i4, [%g3 + TI_REG_WINDOW + 0x60] - - stx %i5, [%g3 + TI_REG_WINDOW + 0x68] - stx %i6, [%g3 + TI_REG_WINDOW + 0x70] - stx %i7, [%g3 + TI_REG_WINDOW + 0x78] - b,pt %xcc, 2f - add %g1, 1, %g1 -1: std %l0, [%g3 + TI_REG_WINDOW + 0x00] - std %l2, [%g3 + TI_REG_WINDOW + 0x08] - std %l4, [%g3 + TI_REG_WINDOW + 0x10] - - std %l6, [%g3 + TI_REG_WINDOW + 0x18] - std %i0, [%g3 + TI_REG_WINDOW + 0x20] - std %i2, [%g3 + TI_REG_WINDOW + 0x28] - std %i4, [%g3 + TI_REG_WINDOW + 0x30] - std %i6, [%g3 + TI_REG_WINDOW + 0x38] - add %g1, 1, %g1 -2: stb %g1, [%g6 + TI_WSAVED] - rdpr %tstate, %g1 - - andcc %g1, TSTATE_PRIV, %g0 - saved - be,pn %xcc, window_dax_from_user_common - and %g1, TSTATE_CWP, %g1 - retry -window_dax_from_user_common: - wrpr %g1, %cwp - sethi %hi(109f), %g7 - ba,pt %xcc, etrap -109: or %g7, %lo(109b), %g7 - mov %l4, %o1 - mov %l5, %o2 - call spitfire_data_access_exception - add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - clr %l6 + ba,a,pt %xcc, rtrap_clr_l6 +1: call spitfire_data_access_exception + nop + ba,a,pt %xcc, rtrap_clr_l6 diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile index c295806500f7..8812ded19f01 100644 --- a/arch/sparc64/lib/Makefile +++ b/arch/sparc64/lib/Makefile @@ -11,6 +11,8 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \ VISsave.o atomic.o bitops.o \ U1memcpy.o U1copy_from_user.o U1copy_to_user.o \ U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ + NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \ + NGpage.o NGbzero.o \ copy_in_user.o user_fixup.o memmove.o \ mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o diff --git a/arch/sparc64/lib/NGbzero.S b/arch/sparc64/lib/NGbzero.S new file mode 100644 index 000000000000..e86baece5cc8 --- /dev/null +++ b/arch/sparc64/lib/NGbzero.S @@ -0,0 +1,163 @@ +/* NGbzero.S: Niagara optimized memset/clear_user. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ +#include <asm/asi.h> + +#define EX_ST(x,y) \ +98: x,y; \ + .section .fixup; \ + .align 4; \ +99: retl; \ + mov %o1, %o0; \ + .section __ex_table; \ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + + .text + + .globl NGmemset + .type NGmemset, #function +NGmemset: /* %o0=buf, %o1=pat, %o2=len */ + and %o1, 0xff, %o3 + mov %o2, %o1 + sllx %o3, 8, %g1 + or %g1, %o3, %o2 + sllx %o2, 16, %g1 + or %g1, %o2, %o2 + sllx %o2, 32, %g1 + ba,pt %xcc, 1f + or %g1, %o2, %o2 + + .globl NGbzero + .type NGbzero, #function +NGbzero: + clr %o2 +1: brz,pn %o1, NGbzero_return + mov %o0, %o3 + + /* %o5: saved %asi, restored at NGbzero_done + * %g7: store-init %asi to use + * %o4: non-store-init %asi to use + */ + rd %asi, %o5 + mov ASI_BLK_INIT_QUAD_LDD_P, %g7 + mov ASI_P, %o4 + wr %o4, 0x0, %asi + +NGbzero_from_clear_user: + cmp %o1, 15 + bl,pn %icc, NGbzero_tiny + andcc %o0, 0x7, %g1 + be,pt %xcc, 2f + mov 8, %g2 + sub %g2, %g1, %g1 + sub %o1, %g1, %o1 +1: EX_ST(stba %o2, [%o0 + 0x00] %asi) + subcc %g1, 1, %g1 + bne,pt %xcc, 1b + add %o0, 1, %o0 +2: cmp %o1, 128 + bl,pn %icc, NGbzero_medium + andcc %o0, (64 - 1), %g1 + be,pt %xcc, NGbzero_pre_loop + mov 64, %g2 + sub %g2, %g1, %g1 + sub %o1, %g1, %o1 +1: EX_ST(stxa %o2, [%o0 + 0x00] %asi) + subcc %g1, 8, %g1 + bne,pt %xcc, 1b + add %o0, 8, %o0 + +NGbzero_pre_loop: + wr %g7, 0x0, %asi + andn %o1, (64 - 1), %g1 + sub %o1, %g1, %o1 +NGbzero_loop: + EX_ST(stxa %o2, [%o0 + 0x00] %asi) + EX_ST(stxa %o2, [%o0 + 0x08] %asi) + EX_ST(stxa %o2, [%o0 + 0x10] %asi) + EX_ST(stxa %o2, [%o0 + 0x18] %asi) + EX_ST(stxa %o2, [%o0 + 0x20] %asi) + EX_ST(stxa %o2, [%o0 + 0x28] %asi) + EX_ST(stxa %o2, [%o0 + 0x30] %asi) + EX_ST(stxa %o2, [%o0 + 0x38] %asi) + subcc %g1, 64, %g1 + bne,pt %xcc, NGbzero_loop + add %o0, 64, %o0 + + wr %o4, 0x0, %asi + brz,pn %o1, NGbzero_done +NGbzero_medium: + andncc %o1, 0x7, %g1 + be,pn %xcc, 2f + sub %o1, %g1, %o1 +1: EX_ST(stxa %o2, [%o0 + 0x00] %asi) + subcc %g1, 8, %g1 + bne,pt %xcc, 1b + add %o0, 8, %o0 +2: brz,pt %o1, NGbzero_done + nop + +NGbzero_tiny: +1: EX_ST(stba %o2, [%o0 + 0x00] %asi) + subcc %o1, 1, %o1 + bne,pt %icc, 1b + add %o0, 1, %o0 + + /* fallthrough */ + +NGbzero_done: + wr %o5, 0x0, %asi + +NGbzero_return: + retl + mov %o3, %o0 + .size NGbzero, .-NGbzero + .size NGmemset, .-NGmemset + + .globl NGclear_user + .type NGclear_user, #function +NGclear_user: /* %o0=buf, %o1=len */ + rd %asi, %o5 + brz,pn %o1, NGbzero_done + clr %o3 + cmp %o5, ASI_AIUS + bne,pn %icc, NGbzero + clr %o2 + mov ASI_BLK_INIT_QUAD_LDD_AIUS, %g7 + ba,pt %xcc, NGbzero_from_clear_user + mov ASI_AIUS, %o4 + .size NGclear_user, .-NGclear_user + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define NG_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl niagara_patch_bzero + .type niagara_patch_bzero,#function +niagara_patch_bzero: + NG_DO_PATCH(memset, NGmemset) + NG_DO_PATCH(__bzero, NGbzero) + NG_DO_PATCH(__clear_user, NGclear_user) + NG_DO_PATCH(tsb_init, NGtsb_init) + retl + nop + .size niagara_patch_bzero,.-niagara_patch_bzero diff --git a/arch/sparc64/lib/NGcopy_from_user.S b/arch/sparc64/lib/NGcopy_from_user.S new file mode 100644 index 000000000000..2d93456f76dd --- /dev/null +++ b/arch/sparc64/lib/NGcopy_from_user.S @@ -0,0 +1,37 @@ +/* NGcopy_from_user.S: Niagara optimized copy from userspace. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ + +#define EX_LD(x) \ +98: x; \ + .section .fixup; \ + .align 4; \ +99: wr %g0, ASI_AIUS, %asi;\ + retl; \ + mov 1, %o0; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#define FUNC_NAME NGcopy_from_user +#define LOAD(type,addr,dest) type##a [addr] ASI_AIUS, dest +#define LOAD_TWIN(addr_reg,dest0,dest1) \ + ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_AIUS, dest0 +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, memcpy_user_stub; \ + nop +#endif + +#include "NGmemcpy.S" diff --git a/arch/sparc64/lib/NGcopy_to_user.S b/arch/sparc64/lib/NGcopy_to_user.S new file mode 100644 index 000000000000..34112d5054ef --- /dev/null +++ b/arch/sparc64/lib/NGcopy_to_user.S @@ -0,0 +1,40 @@ +/* NGcopy_to_user.S: Niagara optimized copy to userspace. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ + +#define EX_ST(x) \ +98: x; \ + .section .fixup; \ + .align 4; \ +99: wr %g0, ASI_AIUS, %asi;\ + retl; \ + mov 1, %o0; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#define FUNC_NAME NGcopy_to_user +#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ + /* Writing to %asi is _expensive_ so we hardcode it. + * Reading %asi to check for KERNEL_DS is comparatively + * cheap. + */ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, memcpy_user_stub; \ + nop +#endif + +#include "NGmemcpy.S" diff --git a/arch/sparc64/lib/NGmemcpy.S b/arch/sparc64/lib/NGmemcpy.S new file mode 100644 index 000000000000..8e522b3dc095 --- /dev/null +++ b/arch/sparc64/lib/NGmemcpy.S @@ -0,0 +1,368 @@ +/* NGmemcpy.S: Niagara optimized memcpy. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ + +#ifdef __KERNEL__ +#include <asm/asi.h> +#include <asm/thread_info.h> +#define GLOBAL_SPARE %g7 +#define RESTORE_ASI(TMP) \ + ldub [%g6 + TI_CURRENT_DS], TMP; \ + wr TMP, 0x0, %asi; +#else +#define GLOBAL_SPARE %g5 +#define RESTORE_ASI(TMP) \ + wr %g0, ASI_PNF, %asi +#endif + +#ifndef STORE_ASI +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P +#endif + +#ifndef EX_LD +#define EX_LD(x) x +#endif + +#ifndef EX_ST +#define EX_ST(x) x +#endif + +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +#endif + +#ifndef LOAD +#ifndef MEMCPY_DEBUG +#define LOAD(type,addr,dest) type [addr], dest +#else +#define LOAD(type,addr,dest) type##a [addr] 0x80, dest +#endif +#endif + +#ifndef LOAD_TWIN +#define LOAD_TWIN(addr_reg,dest0,dest1) \ + ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_P, dest0 +#endif + +#ifndef STORE +#define STORE(type,src,addr) type src, [addr] +#endif + +#ifndef STORE_INIT +#define STORE_INIT(src,addr) stxa src, [addr] %asi +#endif + +#ifndef FUNC_NAME +#define FUNC_NAME NGmemcpy +#endif + +#ifndef PREAMBLE +#define PREAMBLE +#endif + +#ifndef XCC +#define XCC xcc +#endif + + .register %g2,#scratch + .register %g3,#scratch + + .text + .align 64 + + .globl FUNC_NAME + .type FUNC_NAME,#function +FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ + srlx %o2, 31, %g2 + cmp %g2, 0 + tne %xcc, 5 + PREAMBLE + mov %o0, GLOBAL_SPARE + cmp %o2, 0 + be,pn %XCC, 85f + or %o0, %o1, %o3 + cmp %o2, 16 + blu,a,pn %XCC, 80f + or %o3, %o2, %o3 + + /* 2 blocks (128 bytes) is the minimum we can do the block + * copy with. We need to ensure that we'll iterate at least + * once in the block copy loop. At worst we'll need to align + * the destination to a 64-byte boundary which can chew up + * to (64 - 1) bytes from the length before we perform the + * block copy loop. + */ + cmp %o2, (2 * 64) + blu,pt %XCC, 70f + andcc %o3, 0x7, %g0 + + /* %o0: dst + * %o1: src + * %o2: len (known to be >= 128) + * + * The block copy loops will use %o4/%o5,%g2/%g3 as + * temporaries while copying the data. + */ + + LOAD(prefetch, %o1, #one_read) + wr %g0, STORE_ASI, %asi + + /* Align destination on 64-byte boundary. */ + andcc %o0, (64 - 1), %o4 + be,pt %XCC, 2f + sub %o4, 64, %o4 + sub %g0, %o4, %o4 ! bytes to align dst + sub %o2, %o4, %o2 +1: subcc %o4, 1, %o4 + EX_LD(LOAD(ldub, %o1, %g1)) + EX_ST(STORE(stb, %g1, %o0)) + add %o1, 1, %o1 + bne,pt %XCC, 1b + add %o0, 1, %o0 + + /* If the source is on a 16-byte boundary we can do + * the direct block copy loop. If it is 8-byte aligned + * we can do the 16-byte loads offset by -8 bytes and the + * init stores offset by one register. + * + * If the source is not even 8-byte aligned, we need to do + * shifting and masking (basically integer faligndata). + * + * The careful bit with init stores is that if we store + * to any part of the cache line we have to store the whole + * cacheline else we can end up with corrupt L2 cache line + * contents. Since the loop works on 64-bytes of 64-byte + * aligned store data at a time, this is easy to ensure. + */ +2: + andcc %o1, (16 - 1), %o4 + andn %o2, (64 - 1), %g1 ! block copy loop iterator + sub %o2, %g1, %o2 ! final sub-block copy bytes + be,pt %XCC, 50f + cmp %o4, 8 + be,a,pt %XCC, 10f + sub %o1, 0x8, %o1 + + /* Neither 8-byte nor 16-byte aligned, shift and mask. */ + mov %g1, %o4 + and %o1, 0x7, %g1 + sll %g1, 3, %g1 + mov 64, %o3 + andn %o1, 0x7, %o1 + EX_LD(LOAD(ldx, %o1, %g2)) + sub %o3, %g1, %o3 + sllx %g2, %g1, %g2 + +#define SWIVEL_ONE_DWORD(SRC, TMP1, TMP2, PRE_VAL, PRE_SHIFT, POST_SHIFT, DST)\ + EX_LD(LOAD(ldx, SRC, TMP1)); \ + srlx TMP1, PRE_SHIFT, TMP2; \ + or TMP2, PRE_VAL, TMP2; \ + EX_ST(STORE_INIT(TMP2, DST)); \ + sllx TMP1, POST_SHIFT, PRE_VAL; + +1: add %o1, 0x8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x00) + add %o1, 0x8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x08) + add %o1, 0x8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x10) + add %o1, 0x8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x18) + add %o1, 32, %o1 + LOAD(prefetch, %o1, #one_read) + sub %o1, 32 - 8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x20) + add %o1, 8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x28) + add %o1, 8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x30) + add %o1, 8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x38) + subcc %o4, 64, %o4 + bne,pt %XCC, 1b + add %o0, 64, %o0 + +#undef SWIVEL_ONE_DWORD + + srl %g1, 3, %g1 + ba,pt %XCC, 60f + add %o1, %g1, %o1 + +10: /* Destination is 64-byte aligned, source was only 8-byte + * aligned but it has been subtracted by 8 and we perform + * one twin load ahead, then add 8 back into source when + * we finish the loop. + */ + EX_LD(LOAD_TWIN(%o1, %o4, %o5)) +1: add %o1, 16, %o1 + EX_LD(LOAD_TWIN(%o1, %g2, %g3)) + add %o1, 16 + 32, %o1 + LOAD(prefetch, %o1, #one_read) + sub %o1, 32, %o1 + EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line + EX_ST(STORE_INIT(%g2, %o0 + 0x08)) + EX_LD(LOAD_TWIN(%o1, %o4, %o5)) + add %o1, 16, %o1 + EX_ST(STORE_INIT(%g3, %o0 + 0x10)) + EX_ST(STORE_INIT(%o4, %o0 + 0x18)) + EX_LD(LOAD_TWIN(%o1, %g2, %g3)) + add %o1, 16, %o1 + EX_ST(STORE_INIT(%o5, %o0 + 0x20)) + EX_ST(STORE_INIT(%g2, %o0 + 0x28)) + EX_LD(LOAD_TWIN(%o1, %o4, %o5)) + EX_ST(STORE_INIT(%g3, %o0 + 0x30)) + EX_ST(STORE_INIT(%o4, %o0 + 0x38)) + subcc %g1, 64, %g1 + bne,pt %XCC, 1b + add %o0, 64, %o0 + + ba,pt %XCC, 60f + add %o1, 0x8, %o1 + +50: /* Destination is 64-byte aligned, and source is 16-byte + * aligned. + */ +1: EX_LD(LOAD_TWIN(%o1, %o4, %o5)) + add %o1, 16, %o1 + EX_LD(LOAD_TWIN(%o1, %g2, %g3)) + add %o1, 16 + 32, %o1 + LOAD(prefetch, %o1, #one_read) + sub %o1, 32, %o1 + EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line + EX_ST(STORE_INIT(%o5, %o0 + 0x08)) + EX_LD(LOAD_TWIN(%o1, %o4, %o5)) + add %o1, 16, %o1 + EX_ST(STORE_INIT(%g2, %o0 + 0x10)) + EX_ST(STORE_INIT(%g3, %o0 + 0x18)) + EX_LD(LOAD_TWIN(%o1, %g2, %g3)) + add %o1, 16, %o1 + EX_ST(STORE_INIT(%o4, %o0 + 0x20)) + EX_ST(STORE_INIT(%o5, %o0 + 0x28)) + EX_ST(STORE_INIT(%g2, %o0 + 0x30)) + EX_ST(STORE_INIT(%g3, %o0 + 0x38)) + subcc %g1, 64, %g1 + bne,pt %XCC, 1b + add %o0, 64, %o0 + /* fall through */ + +60: + /* %o2 contains any final bytes still needed to be copied + * over. If anything is left, we copy it one byte at a time. + */ + RESTORE_ASI(%o3) + brz,pt %o2, 85f + sub %o0, %o1, %o3 + ba,a,pt %XCC, 90f + + .align 64 +70: /* 16 < len <= 64 */ + bne,pn %XCC, 75f + sub %o0, %o1, %o3 + +72: + andn %o2, 0xf, %o4 + and %o2, 0xf, %o2 +1: subcc %o4, 0x10, %o4 + EX_LD(LOAD(ldx, %o1, %o5)) + add %o1, 0x08, %o1 + EX_LD(LOAD(ldx, %o1, %g1)) + sub %o1, 0x08, %o1 + EX_ST(STORE(stx, %o5, %o1 + %o3)) + add %o1, 0x8, %o1 + EX_ST(STORE(stx, %g1, %o1 + %o3)) + bgu,pt %XCC, 1b + add %o1, 0x8, %o1 +73: andcc %o2, 0x8, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x8, %o2 + EX_LD(LOAD(ldx, %o1, %o5)) + EX_ST(STORE(stx, %o5, %o1 + %o3)) + add %o1, 0x8, %o1 +1: andcc %o2, 0x4, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x4, %o2 + EX_LD(LOAD(lduw, %o1, %o5)) + EX_ST(STORE(stw, %o5, %o1 + %o3)) + add %o1, 0x4, %o1 +1: cmp %o2, 0 + be,pt %XCC, 85f + nop + ba,pt %xcc, 90f + nop + +75: + andcc %o0, 0x7, %g1 + sub %g1, 0x8, %g1 + be,pn %icc, 2f + sub %g0, %g1, %g1 + sub %o2, %g1, %o2 + +1: subcc %g1, 1, %g1 + EX_LD(LOAD(ldub, %o1, %o5)) + EX_ST(STORE(stb, %o5, %o1 + %o3)) + bgu,pt %icc, 1b + add %o1, 1, %o1 + +2: add %o1, %o3, %o0 + andcc %o1, 0x7, %g1 + bne,pt %icc, 8f + sll %g1, 3, %g1 + + cmp %o2, 16 + bgeu,pt %icc, 72b + nop + ba,a,pt %xcc, 73b + +8: mov 64, %o3 + andn %o1, 0x7, %o1 + EX_LD(LOAD(ldx, %o1, %g2)) + sub %o3, %g1, %o3 + andn %o2, 0x7, %o4 + sllx %g2, %g1, %g2 +1: add %o1, 0x8, %o1 + EX_LD(LOAD(ldx, %o1, %g3)) + subcc %o4, 0x8, %o4 + srlx %g3, %o3, %o5 + or %o5, %g2, %o5 + EX_ST(STORE(stx, %o5, %o0)) + add %o0, 0x8, %o0 + bgu,pt %icc, 1b + sllx %g3, %g1, %g2 + + srl %g1, 3, %g1 + andcc %o2, 0x7, %o2 + be,pn %icc, 85f + add %o1, %g1, %o1 + ba,pt %xcc, 90f + sub %o0, %o1, %o3 + + .align 64 +80: /* 0 < len <= 16 */ + andcc %o3, 0x3, %g0 + bne,pn %XCC, 90f + sub %o0, %o1, %o3 + +1: + subcc %o2, 4, %o2 + EX_LD(LOAD(lduw, %o1, %g1)) + EX_ST(STORE(stw, %g1, %o1 + %o3)) + bgu,pt %XCC, 1b + add %o1, 4, %o1 + +85: retl + mov EX_RETVAL(GLOBAL_SPARE), %o0 + + .align 32 +90: + subcc %o2, 1, %o2 + EX_LD(LOAD(ldub, %o1, %g1)) + EX_ST(STORE(stb, %g1, %o1 + %o3)) + bgu,pt %XCC, 90b + add %o1, 1, %o1 + retl + mov EX_RETVAL(GLOBAL_SPARE), %o0 + + .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc64/lib/NGpage.S b/arch/sparc64/lib/NGpage.S new file mode 100644 index 000000000000..7d7c3bb8dcbf --- /dev/null +++ b/arch/sparc64/lib/NGpage.S @@ -0,0 +1,96 @@ +/* NGpage.S: Niagara optimize clear and copy page. + * + * Copyright (C) 2006 (davem@davemloft.net) + */ + +#include <asm/asi.h> +#include <asm/page.h> + + .text + .align 32 + + /* This is heavily simplified from the sun4u variants + * because Niagara does not have any D-cache aliasing issues + * and also we don't need to use the FPU in order to implement + * an optimal page copy/clear. + */ + +NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ + prefetch [%o1 + 0x00], #one_read + mov 8, %g1 + mov 16, %g2 + mov 24, %g3 + set PAGE_SIZE, %g7 + +1: ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2 + ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4 + prefetch [%o1 + 0x40], #one_read + add %o1, 32, %o1 + stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P + stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P + ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2 + stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P + stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P + ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4 + add %o1, 32, %o1 + add %o0, 32, %o0 + stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P + stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P + stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P + stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P + subcc %g7, 64, %g7 + bne,pt %xcc, 1b + add %o0, 32, %o0 + retl + nop + +NGclear_page: /* %o0=dest */ +NGclear_user_page: /* %o0=dest, %o1=vaddr */ + mov 8, %g1 + mov 16, %g2 + mov 24, %g3 + set PAGE_SIZE, %g7 + +1: stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P + stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P + stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P + stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 32, %o0 + stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P + stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P + stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P + stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P + subcc %g7, 64, %g7 + bne,pt %xcc, 1b + add %o0, 32, %o0 + retl + nop + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define NG_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl niagara_patch_pageops + .type niagara_patch_pageops,#function +niagara_patch_pageops: + NG_DO_PATCH(copy_user_page, NGcopy_user_page) + NG_DO_PATCH(_clear_page, NGclear_page) + NG_DO_PATCH(clear_user_page, NGclear_user_page) + retl + nop + .size niagara_patch_pageops,.-niagara_patch_pageops diff --git a/arch/sparc64/lib/NGpatch.S b/arch/sparc64/lib/NGpatch.S new file mode 100644 index 000000000000..3b0674fc3366 --- /dev/null +++ b/arch/sparc64/lib/NGpatch.S @@ -0,0 +1,33 @@ +/* NGpatch.S: Patch Ultra-I routines with Niagara variant. + * + * Copyright (C) 2006 David S. Miller <davem@davemloft.net> + */ + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define NG_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl niagara_patch_copyops + .type niagara_patch_copyops,#function +niagara_patch_copyops: + NG_DO_PATCH(memcpy, NGmemcpy) + NG_DO_PATCH(___copy_from_user, NGcopy_from_user) + NG_DO_PATCH(___copy_to_user, NGcopy_to_user) + retl + nop + .size niagara_patch_copyops,.-niagara_patch_copyops diff --git a/arch/sparc64/lib/U3patch.S b/arch/sparc64/lib/U3patch.S index e2b6c5e4b95a..ecc302619a6e 100644 --- a/arch/sparc64/lib/U3patch.S +++ b/arch/sparc64/lib/U3patch.S @@ -12,7 +12,8 @@ or %g2, %lo(OLD), %g2; \ sub %g1, %g2, %g1; \ sethi %hi(BRANCH_ALWAYS), %g3; \ - srl %g1, 2, %g1; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ or %g3, %lo(BRANCH_ALWAYS), %g3; \ or %g3, %g1, %g3; \ stw %g3, [%g2]; \ diff --git a/arch/sparc64/lib/bzero.S b/arch/sparc64/lib/bzero.S index 1d2abcfa4e52..c7bbae8c590f 100644 --- a/arch/sparc64/lib/bzero.S +++ b/arch/sparc64/lib/bzero.S @@ -98,12 +98,12 @@ __bzero_done: .text; \ .align 4; - .globl __bzero_noasi - .type __bzero_noasi, #function -__bzero_noasi: /* %o0=buf, %o1=len */ - brz,pn %o1, __bzero_noasi_done + .globl __clear_user + .type __clear_user, #function +__clear_user: /* %o0=buf, %o1=len */ + brz,pn %o1, __clear_user_done cmp %o1, 16 - bl,pn %icc, __bzero_noasi_tiny + bl,pn %icc, __clear_user_tiny EX_ST(prefetcha [%o0 + 0x00] %asi, #n_writes) andcc %o0, 0x3, %g0 be,pt %icc, 2f @@ -145,14 +145,14 @@ __bzero_noasi: /* %o0=buf, %o1=len */ subcc %g1, 8, %g1 bne,pt %icc, 5b add %o0, 0x8, %o0 -6: brz,pt %o1, __bzero_noasi_done +6: brz,pt %o1, __clear_user_done nop -__bzero_noasi_tiny: +__clear_user_tiny: 1: EX_ST(stba %g0, [%o0 + 0x00] %asi) subcc %o1, 1, %o1 bne,pt %icc, 1b add %o0, 1, %o0 -__bzero_noasi_done: +__clear_user_done: retl clr %o0 - .size __bzero_noasi, .-__bzero_noasi + .size __clear_user, .-__clear_user diff --git a/arch/sparc64/lib/clear_page.S b/arch/sparc64/lib/clear_page.S index b59884ef051d..77e531f6c2a7 100644 --- a/arch/sparc64/lib/clear_page.S +++ b/arch/sparc64/lib/clear_page.S @@ -9,6 +9,7 @@ #include <asm/page.h> #include <asm/pgtable.h> #include <asm/spitfire.h> +#include <asm/head.h> /* What we used to do was lock a TLB entry into a specific * TLB slot, clear the page with interrupts disabled, then @@ -22,9 +23,6 @@ * disable preemption during the clear. */ -#define TTE_BITS_TOP (_PAGE_VALID | _PAGE_SZBITS) -#define TTE_BITS_BOTTOM (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W) - .text .globl _clear_page @@ -43,12 +41,11 @@ clear_user_page: /* %o0=dest, %o1=vaddr */ sethi %hi(PAGE_SIZE), %o4 sllx %g2, 32, %g2 - sethi %uhi(TTE_BITS_TOP), %g3 + sethi %hi(PAGE_KERNEL_LOCKED), %g3 - sllx %g3, 32, %g3 + ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 sub %o0, %g2, %g1 ! paddr - or %g3, TTE_BITS_BOTTOM, %g3 and %o1, %o4, %o0 ! vaddr D-cache alias bit or %g1, %g3, %g1 ! TTE data @@ -66,7 +63,8 @@ clear_user_page: /* %o0=dest, %o1=vaddr */ wrpr %o4, PSTATE_IE, %pstate stxa %o0, [%g3] ASI_DMMU stxa %g1, [%g0] ASI_DTLB_DATA_IN - flush %g6 + sethi %hi(KERNBASE), %g1 + flush %g1 wrpr %o4, 0x0, %pstate mov 1, %o4 diff --git a/arch/sparc64/lib/copy_page.S b/arch/sparc64/lib/copy_page.S index feebb14fd27a..37460666a5c3 100644 --- a/arch/sparc64/lib/copy_page.S +++ b/arch/sparc64/lib/copy_page.S @@ -23,8 +23,6 @@ * disable preemption during the clear. */ -#define TTE_BITS_TOP (_PAGE_VALID | _PAGE_SZBITS) -#define TTE_BITS_BOTTOM (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W) #define DCACHE_SIZE (PAGE_SIZE * 2) #if (PAGE_SHIFT == 13) || (PAGE_SHIFT == 19) @@ -52,13 +50,12 @@ copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ sethi %hi(PAGE_SIZE), %o3 sllx %g2, 32, %g2 - sethi %uhi(TTE_BITS_TOP), %g3 + sethi %hi(PAGE_KERNEL_LOCKED), %g3 - sllx %g3, 32, %g3 + ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 sub %o0, %g2, %g1 ! dest paddr sub %o1, %g2, %g2 ! src paddr - or %g3, TTE_BITS_BOTTOM, %g3 and %o2, %o3, %o0 ! vaddr D-cache alias bit or %g1, %g3, %g1 ! dest TTE data diff --git a/arch/sparc64/lib/delay.c b/arch/sparc64/lib/delay.c index e8808727617a..fb27e54a03ee 100644 --- a/arch/sparc64/lib/delay.c +++ b/arch/sparc64/lib/delay.c @@ -1,6 +1,6 @@ /* delay.c: Delay loops for sparc64 * - * Copyright (C) 2004 David S. Miller <davem@redhat.com> + * Copyright (C) 2004, 2006 David S. Miller <davem@davemloft.net> * * Based heavily upon x86 variant which is: * Copyright (C) 1993 Linus Torvalds @@ -8,19 +8,16 @@ */ #include <linux/delay.h> +#include <asm/timer.h> void __delay(unsigned long loops) { - __asm__ __volatile__( -" b,pt %%xcc, 1f\n" -" cmp %0, 0\n" -" .align 32\n" -"1:\n" -" bne,pt %%xcc, 1b\n" -" subcc %0, 1, %0\n" - : "=&r" (loops) - : "0" (loops) - : "cc"); + unsigned long bclock, now; + + bclock = tick_ops->get_tick(); + do { + now = tick_ops->get_tick(); + } while ((now-bclock) < loops); } /* We used to multiply by HZ after shifting down by 32 bits diff --git a/arch/sparc64/lib/xor.S b/arch/sparc64/lib/xor.S index 4cd5d2be1ae1..a79c8888170d 100644 --- a/arch/sparc64/lib/xor.S +++ b/arch/sparc64/lib/xor.S @@ -2,9 +2,10 @@ * arch/sparc64/lib/xor.S * * High speed xor_block operation for RAID4/5 utilizing the - * UltraSparc Visual Instruction Set. + * UltraSparc Visual Instruction Set and Niagara store-init/twin-load. * * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 2006 David S. Miller <davem@davemloft.net> */ #include <asm/visasm.h> @@ -19,6 +20,8 @@ */ .text .align 32 + + /* VIS versions. */ .globl xor_vis_2 .type xor_vis_2,#function xor_vis_2: @@ -352,3 +355,298 @@ xor_vis_5: ret restore .size xor_vis_5, .-xor_vis_5 + + /* Niagara versions. */ + .globl xor_niagara_2 + .type xor_niagara_2,#function +xor_niagara_2: /* %o0=bytes, %o1=dest, %o2=src */ + save %sp, -192, %sp + prefetch [%i1], #n_writes + prefetch [%i2], #one_read + rd %asi, %g7 + wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi + srlx %i0, 6, %g1 + mov %i1, %i0 + mov %i2, %i1 +1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src + 0x00 */ + ldda [%i1 + 0x10] %asi, %i4 /* %i4/%i5 = src + 0x10 */ + ldda [%i1 + 0x20] %asi, %g2 /* %g2/%g3 = src + 0x20 */ + ldda [%i1 + 0x30] %asi, %l0 /* %l0/%l1 = src + 0x30 */ + prefetch [%i1 + 0x40], #one_read + ldda [%i0 + 0x00] %asi, %o0 /* %o0/%o1 = dest + 0x00 */ + ldda [%i0 + 0x10] %asi, %o2 /* %o2/%o3 = dest + 0x10 */ + ldda [%i0 + 0x20] %asi, %o4 /* %o4/%o5 = dest + 0x20 */ + ldda [%i0 + 0x30] %asi, %l2 /* %l2/%l3 = dest + 0x30 */ + prefetch [%i0 + 0x40], #n_writes + xor %o0, %i2, %o0 + xor %o1, %i3, %o1 + stxa %o0, [%i0 + 0x00] %asi + stxa %o1, [%i0 + 0x08] %asi + xor %o2, %i4, %o2 + xor %o3, %i5, %o3 + stxa %o2, [%i0 + 0x10] %asi + stxa %o3, [%i0 + 0x18] %asi + xor %o4, %g2, %o4 + xor %o5, %g3, %o5 + stxa %o4, [%i0 + 0x20] %asi + stxa %o5, [%i0 + 0x28] %asi + xor %l2, %l0, %l2 + xor %l3, %l1, %l3 + stxa %l2, [%i0 + 0x30] %asi + stxa %l3, [%i0 + 0x38] %asi + add %i0, 0x40, %i0 + subcc %g1, 1, %g1 + bne,pt %xcc, 1b + add %i1, 0x40, %i1 + membar #Sync + wr %g7, 0x0, %asi + ret + restore + .size xor_niagara_2, .-xor_niagara_2 + + .globl xor_niagara_3 + .type xor_niagara_3,#function +xor_niagara_3: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */ + save %sp, -192, %sp + prefetch [%i1], #n_writes + prefetch [%i2], #one_read + prefetch [%i3], #one_read + rd %asi, %g7 + wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi + srlx %i0, 6, %g1 + mov %i1, %i0 + mov %i2, %i1 + mov %i3, %l7 +1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src1 + 0x00 */ + ldda [%i1 + 0x10] %asi, %i4 /* %i4/%i5 = src1 + 0x10 */ + ldda [%l7 + 0x00] %asi, %g2 /* %g2/%g3 = src2 + 0x00 */ + ldda [%l7 + 0x10] %asi, %l0 /* %l0/%l1 = src2 + 0x10 */ + ldda [%i0 + 0x00] %asi, %o0 /* %o0/%o1 = dest + 0x00 */ + ldda [%i0 + 0x10] %asi, %o2 /* %o2/%o3 = dest + 0x10 */ + xor %g2, %i2, %g2 + xor %g3, %i3, %g3 + xor %o0, %g2, %o0 + xor %o1, %g3, %o1 + stxa %o0, [%i0 + 0x00] %asi + stxa %o1, [%i0 + 0x08] %asi + ldda [%i1 + 0x20] %asi, %i2 /* %i2/%i3 = src1 + 0x20 */ + ldda [%l7 + 0x20] %asi, %g2 /* %g2/%g3 = src2 + 0x20 */ + ldda [%i0 + 0x20] %asi, %o0 /* %o0/%o1 = dest + 0x20 */ + xor %l0, %i4, %l0 + xor %l1, %i5, %l1 + xor %o2, %l0, %o2 + xor %o3, %l1, %o3 + stxa %o2, [%i0 + 0x10] %asi + stxa %o3, [%i0 + 0x18] %asi + ldda [%i1 + 0x30] %asi, %i4 /* %i4/%i5 = src1 + 0x30 */ + ldda [%l7 + 0x30] %asi, %l0 /* %l0/%l1 = src2 + 0x30 */ + ldda [%i0 + 0x30] %asi, %o2 /* %o2/%o3 = dest + 0x30 */ + prefetch [%i1 + 0x40], #one_read + prefetch [%l7 + 0x40], #one_read + prefetch [%i0 + 0x40], #n_writes + xor %g2, %i2, %g2 + xor %g3, %i3, %g3 + xor %o0, %g2, %o0 + xor %o1, %g3, %o1 + stxa %o0, [%i0 + 0x20] %asi + stxa %o1, [%i0 + 0x28] %asi + xor %l0, %i4, %l0 + xor %l1, %i5, %l1 + xor %o2, %l0, %o2 + xor %o3, %l1, %o3 + stxa %o2, [%i0 + 0x30] %asi + stxa %o3, [%i0 + 0x38] %asi + add %i0, 0x40, %i0 + add %i1, 0x40, %i1 + subcc %g1, 1, %g1 + bne,pt %xcc, 1b + add %l7, 0x40, %l7 + membar #Sync + wr %g7, 0x0, %asi + ret + restore + .size xor_niagara_3, .-xor_niagara_3 + + .globl xor_niagara_4 + .type xor_niagara_4,#function +xor_niagara_4: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */ + save %sp, -192, %sp + prefetch [%i1], #n_writes + prefetch [%i2], #one_read + prefetch [%i3], #one_read + prefetch [%i4], #one_read + rd %asi, %g7 + wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi + srlx %i0, 6, %g1 + mov %i1, %i0 + mov %i2, %i1 + mov %i3, %l7 + mov %i4, %l6 +1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src1 + 0x00 */ + ldda [%l7 + 0x00] %asi, %i4 /* %i4/%i5 = src2 + 0x00 */ + ldda [%l6 + 0x00] %asi, %g2 /* %g2/%g3 = src3 + 0x00 */ + ldda [%i0 + 0x00] %asi, %l0 /* %l0/%l1 = dest + 0x00 */ + xor %i4, %i2, %i4 + xor %i5, %i3, %i5 + ldda [%i1 + 0x10] %asi, %i2 /* %i2/%i3 = src1 + 0x10 */ + xor %g2, %i4, %g2 + xor %g3, %i5, %g3 + ldda [%i7 + 0x10] %asi, %i4 /* %i4/%i5 = src2 + 0x10 */ + xor %l0, %g2, %l0 + xor %l1, %g3, %l1 + stxa %l0, [%i0 + 0x00] %asi + stxa %l1, [%i0 + 0x08] %asi + ldda [%i6 + 0x10] %asi, %g2 /* %g2/%g3 = src3 + 0x10 */ + ldda [%i0 + 0x10] %asi, %l0 /* %l0/%l1 = dest + 0x10 */ + + xor %i4, %i2, %i4 + xor %i5, %i3, %i5 + ldda [%i1 + 0x20] %asi, %i2 /* %i2/%i3 = src1 + 0x20 */ + xor %g2, %i4, %g2 + xor %g3, %i5, %g3 + ldda [%i7 + 0x20] %asi, %i4 /* %i4/%i5 = src2 + 0x20 */ + xor %l0, %g2, %l0 + xor %l1, %g3, %l1 + stxa %l0, [%i0 + 0x10] %asi + stxa %l1, [%i0 + 0x18] %asi + ldda [%i6 + 0x20] %asi, %g2 /* %g2/%g3 = src3 + 0x20 */ + ldda [%i0 + 0x20] %asi, %l0 /* %l0/%l1 = dest + 0x20 */ + + xor %i4, %i2, %i4 + xor %i5, %i3, %i5 + ldda [%i1 + 0x30] %asi, %i2 /* %i2/%i3 = src1 + 0x30 */ + xor %g2, %i4, %g2 + xor %g3, %i5, %g3 + ldda [%i7 + 0x30] %asi, %i4 /* %i4/%i5 = src2 + 0x30 */ + xor %l0, %g2, %l0 + xor %l1, %g3, %l1 + stxa %l0, [%i0 + 0x20] %asi + stxa %l1, [%i0 + 0x28] %asi + ldda [%i6 + 0x30] %asi, %g2 /* %g2/%g3 = src3 + 0x30 */ + ldda [%i0 + 0x30] %asi, %l0 /* %l0/%l1 = dest + 0x30 */ + + prefetch [%i1 + 0x40], #one_read + prefetch [%l7 + 0x40], #one_read + prefetch [%l6 + 0x40], #one_read + prefetch [%i0 + 0x40], #n_writes + + xor %i4, %i2, %i4 + xor %i5, %i3, %i5 + xor %g2, %i4, %g2 + xor %g3, %i5, %g3 + xor %l0, %g2, %l0 + xor %l1, %g3, %l1 + stxa %l0, [%i0 + 0x30] %asi + stxa %l1, [%i0 + 0x38] %asi + + add %i0, 0x40, %i0 + add %i1, 0x40, %i1 + add %l7, 0x40, %l7 + subcc %g1, 1, %g1 + bne,pt %xcc, 1b + add %l6, 0x40, %l6 + membar #Sync + wr %g7, 0x0, %asi + ret + restore + .size xor_niagara_4, .-xor_niagara_4 + + .globl xor_niagara_5 + .type xor_niagara_5,#function +xor_niagara_5: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 */ + save %sp, -192, %sp + prefetch [%i1], #n_writes + prefetch [%i2], #one_read + prefetch [%i3], #one_read + prefetch [%i4], #one_read + prefetch [%i5], #one_read + rd %asi, %g7 + wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi + srlx %i0, 6, %g1 + mov %i1, %i0 + mov %i2, %i1 + mov %i3, %l7 + mov %i4, %l6 + mov %i5, %l5 +1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src1 + 0x00 */ + ldda [%l7 + 0x00] %asi, %i4 /* %i4/%i5 = src2 + 0x00 */ + ldda [%l6 + 0x00] %asi, %g2 /* %g2/%g3 = src3 + 0x00 */ + ldda [%l5 + 0x00] %asi, %l0 /* %l0/%l1 = src4 + 0x00 */ + ldda [%i0 + 0x00] %asi, %l2 /* %l2/%l3 = dest + 0x00 */ + xor %i4, %i2, %i4 + xor %i5, %i3, %i5 + ldda [%i1 + 0x10] %asi, %i2 /* %i2/%i3 = src1 + 0x10 */ + xor %g2, %i4, %g2 + xor %g3, %i5, %g3 + ldda [%l7 + 0x10] %asi, %i4 /* %i4/%i5 = src2 + 0x10 */ + xor %l0, %g2, %l0 + xor %l1, %g3, %l1 + ldda [%l6 + 0x10] %asi, %g2 /* %g2/%g3 = src3 + 0x10 */ + xor %l2, %l0, %l2 + xor %l3, %l1, %l3 + stxa %l2, [%i0 + 0x00] %asi + stxa %l3, [%i0 + 0x08] %asi + ldda [%l5 + 0x10] %asi, %l0 /* %l0/%l1 = src4 + 0x10 */ + ldda [%i0 + 0x10] %asi, %l2 /* %l2/%l3 = dest + 0x10 */ + + xor %i4, %i2, %i4 + xor %i5, %i3, %i5 + ldda [%i1 + 0x20] %asi, %i2 /* %i2/%i3 = src1 + 0x20 */ + xor %g2, %i4, %g2 + xor %g3, %i5, %g3 + ldda [%l7 + 0x20] %asi, %i4 /* %i4/%i5 = src2 + 0x20 */ + xor %l0, %g2, %l0 + xor %l1, %g3, %l1 + ldda [%l6 + 0x20] %asi, %g2 /* %g2/%g3 = src3 + 0x20 */ + xor %l2, %l0, %l2 + xor %l3, %l1, %l3 + stxa %l2, [%i0 + 0x10] %asi + stxa %l3, [%i0 + 0x18] %asi + ldda [%l5 + 0x20] %asi, %l0 /* %l0/%l1 = src4 + 0x20 */ + ldda [%i0 + 0x20] %asi, %l2 /* %l2/%l3 = dest + 0x20 */ + + xor %i4, %i2, %i4 + xor %i5, %i3, %i5 + ldda [%i1 + 0x30] %asi, %i2 /* %i2/%i3 = src1 + 0x30 */ + xor %g2, %i4, %g2 + xor %g3, %i5, %g3 + ldda [%l7 + 0x30] %asi, %i4 /* %i4/%i5 = src2 + 0x30 */ + xor %l0, %g2, %l0 + xor %l1, %g3, %l1 + ldda [%l6 + 0x30] %asi, %g2 /* %g2/%g3 = src3 + 0x30 */ + xor %l2, %l0, %l2 + xor %l3, %l1, %l3 + stxa %l2, [%i0 + 0x20] %asi + stxa %l3, [%i0 + 0x28] %asi + ldda [%l5 + 0x30] %asi, %l0 /* %l0/%l1 = src4 + 0x30 */ + ldda [%i0 + 0x30] %asi, %l2 /* %l2/%l3 = dest + 0x30 */ + + prefetch [%i1 + 0x40], #one_read + prefetch [%l7 + 0x40], #one_read + prefetch [%l6 + 0x40], #one_read + prefetch [%l5 + 0x40], #one_read + prefetch [%i0 + 0x40], #n_writes + + xor %i4, %i2, %i4 + xor %i5, %i3, %i5 + xor %g2, %i4, %g2 + xor %g3, %i5, %g3 + xor %l0, %g2, %l0 + xor %l1, %g3, %l1 + xor %l2, %l0, %l2 + xor %l3, %l1, %l3 + stxa %l2, [%i0 + 0x30] %asi + stxa %l3, [%i0 + 0x38] %asi + + add %i0, 0x40, %i0 + add %i1, 0x40, %i1 + add %l7, 0x40, %l7 + add %l6, 0x40, %l6 + subcc %g1, 1, %g1 + bne,pt %xcc, 1b + add %l5, 0x40, %l5 + membar #Sync + wr %g7, 0x0, %asi + ret + restore + .size xor_niagara_5, .-xor_niagara_5 diff --git a/arch/sparc64/math-emu/math.c b/arch/sparc64/math-emu/math.c index 2ae05cd7b773..6ee496c2864a 100644 --- a/arch/sparc64/math-emu/math.c +++ b/arch/sparc64/math-emu/math.c @@ -206,9 +206,29 @@ int do_mathemu(struct pt_regs *regs, struct fpustate *f) case FSTOQ: TYPE(3,3,1,1,1,0,0); break; case FDTOQ: TYPE(3,3,1,2,1,0,0); break; case FQTOI: TYPE(3,1,0,3,1,0,0); break; + + /* We can get either unimplemented or unfinished + * for these cases. Pre-Niagara systems generate + * unfinished fpop for SUBNORMAL cases, and Niagara + * always gives unimplemented fpop for fsqrt{s,d}. + */ + case FSQRTS: { + unsigned long x = current_thread_info()->xfsr[0]; + + x = (x >> 14) & 0xf; + TYPE(x,1,1,1,1,0,0); + break; + } + + case FSQRTD: { + unsigned long x = current_thread_info()->xfsr[0]; + + x = (x >> 14) & 0xf; + TYPE(x,2,1,2,1,0,0); + break; + } + /* SUBNORMAL - ftt == 2 */ - case FSQRTS: TYPE(2,1,1,1,1,0,0); break; - case FSQRTD: TYPE(2,2,1,2,1,0,0); break; case FADDD: case FSUBD: case FMULD: diff --git a/arch/sparc64/mm/Makefile b/arch/sparc64/mm/Makefile index 9d0960e69f48..e415bf942bcd 100644 --- a/arch/sparc64/mm/Makefile +++ b/arch/sparc64/mm/Makefile @@ -5,6 +5,6 @@ EXTRA_AFLAGS := -ansi EXTRA_CFLAGS := -Werror -obj-y := ultra.o tlb.o fault.o init.o generic.o +obj-y := ultra.o tlb.o tsb.o fault.o init.o generic.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c index 6f0539aa44d0..63b6cc0cd5d5 100644 --- a/arch/sparc64/mm/fault.c +++ b/arch/sparc64/mm/fault.c @@ -29,6 +29,7 @@ #include <asm/lsu.h> #include <asm/sections.h> #include <asm/kdebug.h> +#include <asm/mmu_context.h> /* * To debug kernel to catch accesses to certain virtual/physical addresses. @@ -91,12 +92,13 @@ static void __kprobes unhandled_fault(unsigned long address, die_if_kernel("Oops", regs); } -static void bad_kernel_pc(struct pt_regs *regs) +static void bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr) { unsigned long *ksp; printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n", regs->tpc); + printk(KERN_CRIT "OOPS: Fault was to vaddr[%lx]\n", vaddr); __asm__("mov %%sp, %0" : "=r" (ksp)); show_stack(current, ksp); unhandled_fault(regs->tpc, current, regs); @@ -137,7 +139,7 @@ static unsigned int get_user_insn(unsigned long tpc) if (!pte_present(pte)) goto out; - pa = (pte_val(pte) & _PAGE_PADDR); + pa = (pte_pfn(pte) << PAGE_SHIFT); pa += (tpc & ~PAGE_MASK); /* Use phys bypass so we don't pollute dtlb/dcache. */ @@ -257,7 +259,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) struct vm_area_struct *vma; unsigned int insn = 0; int si_code, fault_code; - unsigned long address; + unsigned long address, mm_rss; fault_code = get_thread_fault_code(); @@ -280,7 +282,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) (tpc >= MODULES_VADDR && tpc < MODULES_END)) { /* Valid, no problems... */ } else { - bad_kernel_pc(regs); + bad_kernel_pc(regs, address); return; } } @@ -406,6 +408,11 @@ good_area: } up_read(&mm->mmap_sem); + + mm_rss = get_mm_rss(mm); + if (unlikely(mm_rss >= mm->context.tsb_rss_limit)) + tsb_grow(mm, mm_rss); + return; /* diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c index 580b63da836b..5fc5c579e35e 100644 --- a/arch/sparc64/mm/generic.c +++ b/arch/sparc64/mm/generic.c @@ -15,15 +15,6 @@ #include <asm/page.h> #include <asm/tlbflush.h> -static inline pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space) -{ - pte_t pte; - pte_val(pte) = (((page) | pgprot_val(prot) | _PAGE_E) & - ~(unsigned long)_PAGE_CACHE); - pte_val(pte) |= (((unsigned long)space) << 32); - return pte; -} - /* Remap IO memory, the same way as remap_pfn_range(), but use * the obio memory space. * @@ -48,24 +39,29 @@ static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte, pte_t entry; unsigned long curend = address + PAGE_SIZE; - entry = mk_pte_io(offset, prot, space); + entry = mk_pte_io(offset, prot, space, PAGE_SIZE); if (!(address & 0xffff)) { - if (!(address & 0x3fffff) && !(offset & 0x3ffffe) && end >= address + 0x400000) { - entry = mk_pte_io(offset, - __pgprot(pgprot_val (prot) | _PAGE_SZ4MB), - space); + if (PAGE_SIZE < (4 * 1024 * 1024) && + !(address & 0x3fffff) && + !(offset & 0x3ffffe) && + end >= address + 0x400000) { + entry = mk_pte_io(offset, prot, space, + 4 * 1024 * 1024); curend = address + 0x400000; offset += 0x400000; - } else if (!(address & 0x7ffff) && !(offset & 0x7fffe) && end >= address + 0x80000) { - entry = mk_pte_io(offset, - __pgprot(pgprot_val (prot) | _PAGE_SZ512K), - space); + } else if (PAGE_SIZE < (512 * 1024) && + !(address & 0x7ffff) && + !(offset & 0x7fffe) && + end >= address + 0x80000) { + entry = mk_pte_io(offset, prot, space, + 512 * 1024 * 1024); curend = address + 0x80000; offset += 0x80000; - } else if (!(offset & 0xfffe) && end >= address + 0x10000) { - entry = mk_pte_io(offset, - __pgprot(pgprot_val (prot) | _PAGE_SZ64K), - space); + } else if (PAGE_SIZE < (64 * 1024) && + !(offset & 0xfffe) && + end >= address + 0x10000) { + entry = mk_pte_io(offset, prot, space, + 64 * 1024); curend = address + 0x10000; offset += 0x10000; } else diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c index 625cbb336a23..a7a24869d045 100644 --- a/arch/sparc64/mm/hugetlbpage.c +++ b/arch/sparc64/mm/hugetlbpage.c @@ -1,7 +1,7 @@ /* * SPARC64 Huge TLB page support. * - * Copyright (C) 2002, 2003 David S. Miller (davem@redhat.com) + * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net) */ #include <linux/config.h> @@ -22,6 +22,175 @@ #include <asm/cacheflush.h> #include <asm/mmu_context.h> +/* Slightly simplified from the non-hugepage variant because by + * definition we don't have to worry about any page coloring stuff + */ +#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL)) +#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL)) + +static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct * vma; + unsigned long task_size = TASK_SIZE; + unsigned long start_addr; + + if (test_thread_flag(TIF_32BIT)) + task_size = STACK_TOP32; + if (unlikely(len >= VA_EXCLUDE_START)) + return -ENOMEM; + + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + } + + task_size -= len; + +full_search: + addr = ALIGN(addr, HPAGE_SIZE); + + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (addr < VA_EXCLUDE_START && + (addr + len) >= VA_EXCLUDE_START) { + addr = VA_EXCLUDE_END; + vma = find_vma(mm, VA_EXCLUDE_END); + } + if (unlikely(task_size < addr)) { + if (start_addr != TASK_UNMAPPED_BASE) { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + goto full_search; + } + return -ENOMEM; + } + if (likely(!vma || addr + len <= vma->vm_start)) { + /* + * Remember the place where we stopped the search: + */ + mm->free_area_cache = addr + len; + return addr; + } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + addr = ALIGN(vma->vm_end, HPAGE_SIZE); + } +} + +static unsigned long +hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, + const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long addr = addr0; + + /* This should only ever run for 32-bit processes. */ + BUG_ON(!test_thread_flag(TIF_32BIT)); + + /* check if free_area_cache is useful for us */ + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + + /* either no address requested or can't fit in requested address hole */ + addr = mm->free_area_cache & HPAGE_MASK; + + /* make sure it can fit in the remaining address space */ + if (likely(addr > len)) { + vma = find_vma(mm, addr-len); + if (!vma || addr <= vma->vm_start) { + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr-len); + } + } + + if (unlikely(mm->mmap_base < len)) + goto bottomup; + + addr = (mm->mmap_base-len) & HPAGE_MASK; + + do { + /* + * Lookup failure means no vma is above this address, + * else if new region fits below vma->vm_start, + * return with success: + */ + vma = find_vma(mm, addr); + if (likely(!vma || addr+len <= vma->vm_start)) { + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + } + + /* remember the largest hole we saw so far */ + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = (vma->vm_start-len) & HPAGE_MASK; + } while (likely(len < vma->vm_start)); + +bottomup: + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->cached_hole_size = ~0UL; + mm->free_area_cache = TASK_UNMAPPED_BASE; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + + return addr; +} + +unsigned long +hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long task_size = TASK_SIZE; + + if (test_thread_flag(TIF_32BIT)) + task_size = STACK_TOP32; + + if (len & ~HPAGE_MASK) + return -EINVAL; + if (len > task_size) + return -ENOMEM; + + if (addr) { + addr = ALIGN(addr, HPAGE_SIZE); + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + if (mm->get_unmapped_area == arch_get_unmapped_area) + return hugetlb_get_unmapped_area_bottomup(file, addr, len, + pgoff, flags); + else + return hugetlb_get_unmapped_area_topdown(file, addr, len, + pgoff, flags); +} + pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; @@ -48,12 +217,14 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pmd_t *pmd; pte_t *pte = NULL; + addr &= HPAGE_MASK; + pgd = pgd_offset(mm, addr); - if (pgd) { + if (!pgd_none(*pgd)) { pud = pud_offset(pgd, addr); - if (pud) { + if (!pud_none(*pud)) { pmd = pmd_offset(pud, addr); - if (pmd) + if (!pmd_none(*pmd)) pte = pte_offset_map(pmd, addr); } } diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 1e44ee26cee8..c2b556106fc1 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -6,6 +6,7 @@ */ #include <linux/config.h> +#include <linux/module.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/string.h> @@ -39,9 +40,27 @@ #include <asm/tlb.h> #include <asm/spitfire.h> #include <asm/sections.h> +#include <asm/tsb.h> +#include <asm/hypervisor.h> extern void device_scan(void); +#define MAX_PHYS_ADDRESS (1UL << 42UL) +#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL) +#define KPTE_BITMAP_BYTES \ + ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 8) + +unsigned long kern_linear_pte_xor[2] __read_mostly; + +/* A bitmap, one bit for every 256MB of physical memory. If the bit + * is clear, we should use a 4MB page (via kern_linear_pte_xor[0]) else + * if set we should use a 256MB page (via kern_linear_pte_xor[1]). + */ +unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; + +/* A special kernel TSB for 4MB and 256MB linear mappings. */ +struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; + #define MAX_BANKS 32 static struct linux_prom64_registers pavail[MAX_BANKS] __initdata; @@ -111,11 +130,9 @@ static void __init read_obp_memory(const char *property, unsigned long *sparc64_valid_addr_bitmap __read_mostly; -/* Ugly, but necessary... -DaveM */ -unsigned long phys_base __read_mostly; +/* Kernel physical address base and size in bytes. */ unsigned long kern_base __read_mostly; unsigned long kern_size __read_mostly; -unsigned long pfn_base __read_mostly; /* get_new_mmu_context() uses "cache + 1". */ DEFINE_SPINLOCK(ctx_alloc_lock); @@ -141,24 +158,28 @@ unsigned long sparc64_kern_sec_context __read_mostly; int bigkernel = 0; -/* XXX Tune this... */ -#define PGT_CACHE_LOW 25 -#define PGT_CACHE_HIGH 50 +kmem_cache_t *pgtable_cache __read_mostly; + +static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) +{ + clear_page(addr); +} + +extern void tsb_cache_init(void); -void check_pgt_cache(void) +void pgtable_cache_init(void) { - preempt_disable(); - if (pgtable_cache_size > PGT_CACHE_HIGH) { - do { - if (pgd_quicklist) - free_pgd_slow(get_pgd_fast()); - if (pte_quicklist[0]) - free_pte_slow(pte_alloc_one_fast(NULL, 0)); - if (pte_quicklist[1]) - free_pte_slow(pte_alloc_one_fast(NULL, 1 << (PAGE_SHIFT + 10))); - } while (pgtable_cache_size > PGT_CACHE_LOW); + pgtable_cache = kmem_cache_create("pgtable_cache", + PAGE_SIZE, PAGE_SIZE, + SLAB_HWCACHE_ALIGN | + SLAB_MUST_HWCACHE_ALIGN, + zero_ctor, + NULL); + if (!pgtable_cache) { + prom_printf("Could not create pgtable_cache\n"); + prom_halt(); } - preempt_enable(); + tsb_cache_init(); } #ifdef CONFIG_DEBUG_DCFLUSH @@ -168,8 +189,9 @@ atomic_t dcpage_flushes_xcall = ATOMIC_INIT(0); #endif #endif -__inline__ void flush_dcache_page_impl(struct page *page) +inline void flush_dcache_page_impl(struct page *page) { + BUG_ON(tlb_type == hypervisor); #ifdef CONFIG_DEBUG_DCFLUSH atomic_inc(&dcpage_flushes); #endif @@ -186,8 +208,8 @@ __inline__ void flush_dcache_page_impl(struct page *page) } #define PG_dcache_dirty PG_arch_1 -#define PG_dcache_cpu_shift 24 -#define PG_dcache_cpu_mask (256 - 1) +#define PG_dcache_cpu_shift 24UL +#define PG_dcache_cpu_mask (256UL - 1UL) #if NR_CPUS > 256 #error D-cache dirty tracking and thread_info->cpu need fixing for > 256 cpus @@ -243,32 +265,61 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c : "g1", "g7"); } +static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte) +{ + unsigned long tsb_addr = (unsigned long) ent; + + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + tsb_addr = __pa(tsb_addr); + + __tsb_insert(tsb_addr, tag, pte); +} + +unsigned long _PAGE_ALL_SZ_BITS __read_mostly; +unsigned long _PAGE_SZBITS __read_mostly; + void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) { - struct page *page; - unsigned long pfn; - unsigned long pg_flags; - - pfn = pte_pfn(pte); - if (pfn_valid(pfn) && - (page = pfn_to_page(pfn), page_mapping(page)) && - ((pg_flags = page->flags) & (1UL << PG_dcache_dirty))) { - int cpu = ((pg_flags >> PG_dcache_cpu_shift) & - PG_dcache_cpu_mask); - int this_cpu = get_cpu(); - - /* This is just to optimize away some function calls - * in the SMP case. - */ - if (cpu == this_cpu) - flush_dcache_page_impl(page); - else - smp_flush_dcache_page_impl(page, cpu); + struct mm_struct *mm; + struct tsb *tsb; + unsigned long tag, flags; + + if (tlb_type != hypervisor) { + unsigned long pfn = pte_pfn(pte); + unsigned long pg_flags; + struct page *page; + + if (pfn_valid(pfn) && + (page = pfn_to_page(pfn), page_mapping(page)) && + ((pg_flags = page->flags) & (1UL << PG_dcache_dirty))) { + int cpu = ((pg_flags >> PG_dcache_cpu_shift) & + PG_dcache_cpu_mask); + int this_cpu = get_cpu(); + + /* This is just to optimize away some function calls + * in the SMP case. + */ + if (cpu == this_cpu) + flush_dcache_page_impl(page); + else + smp_flush_dcache_page_impl(page, cpu); - clear_dcache_dirty_cpu(page, cpu); + clear_dcache_dirty_cpu(page, cpu); - put_cpu(); + put_cpu(); + } } + + mm = vma->vm_mm; + + spin_lock_irqsave(&mm->context.lock, flags); + + tsb = &mm->context.tsb[(address >> PAGE_SHIFT) & + (mm->context.tsb_nentries - 1UL)]; + tag = (address >> 22UL); + tsb_insert(tsb, tag, pte_val(pte)); + + spin_unlock_irqrestore(&mm->context.lock, flags); } void flush_dcache_page(struct page *page) @@ -276,6 +327,9 @@ void flush_dcache_page(struct page *page) struct address_space *mapping; int this_cpu; + if (tlb_type == hypervisor) + return; + /* Do not bother with the expensive D-cache flush if it * is merely the zero page. The 'bigcore' testcase in GDB * causes this case to run millions of times. @@ -311,7 +365,7 @@ out: void __kprobes flush_icache_range(unsigned long start, unsigned long end) { - /* Cheetah has coherent I-cache. */ + /* Cheetah and Hypervisor platform cpus have coherent I-cache. */ if (tlb_type == spitfire) { unsigned long kaddr; @@ -320,16 +374,6 @@ void __kprobes flush_icache_range(unsigned long start, unsigned long end) } } -unsigned long page_to_pfn(struct page *page) -{ - return (unsigned long) ((page - mem_map) + pfn_base); -} - -struct page *pfn_to_page(unsigned long pfn) -{ - return (mem_map + (pfn - pfn_base)); -} - void show_mem(void) { printk("Mem-info:\n"); @@ -338,7 +382,6 @@ void show_mem(void) nr_swap_pages << (PAGE_SHIFT-10)); printk("%ld pages of RAM\n", num_physpages); printk("%d free pages\n", nr_free_pages()); - printk("%d pages in page table cache\n",pgtable_cache_size); } void mmu_info(struct seq_file *m) @@ -349,6 +392,8 @@ void mmu_info(struct seq_file *m) seq_printf(m, "MMU Type\t: Cheetah+\n"); else if (tlb_type == spitfire) seq_printf(m, "MMU Type\t: Spitfire\n"); + else if (tlb_type == hypervisor) + seq_printf(m, "MMU Type\t: Hypervisor (sun4v)\n"); else seq_printf(m, "MMU Type\t: ???\n"); @@ -371,45 +416,13 @@ struct linux_prom_translation { /* Exported for kernel TLB miss handling in ktlb.S */ struct linux_prom_translation prom_trans[512] __read_mostly; unsigned int prom_trans_ents __read_mostly; -unsigned int swapper_pgd_zero __read_mostly; - -extern unsigned long prom_boot_page; -extern void prom_remap(unsigned long physpage, unsigned long virtpage, int mmu_ihandle); -extern int prom_get_mmu_ihandle(void); -extern void register_prom_callbacks(void); /* Exported for SMP bootup purposes. */ unsigned long kern_locked_tte_data; -/* - * Translate PROM's mapping we capture at boot time into physical address. - * The second parameter is only set from prom_callback() invocations. - */ -unsigned long prom_virt_to_phys(unsigned long promva, int *error) -{ - int i; - - for (i = 0; i < prom_trans_ents; i++) { - struct linux_prom_translation *p = &prom_trans[i]; - - if (promva >= p->virt && - promva < (p->virt + p->size)) { - unsigned long base = p->data & _PAGE_PADDR; - - if (error) - *error = 0; - return base + (promva & (8192 - 1)); - } - } - if (error) - *error = 1; - return 0UL; -} - /* The obp translations are saved based on 8k pagesize, since obp can * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS -> - * HI_OBP_ADDRESS range are handled in ktlb.S and do not use the vpte - * scheme (also, see rant in inherit_locked_prom_mappings()). + * HI_OBP_ADDRESS range are handled in ktlb.S. */ static inline int in_obp_range(unsigned long vaddr) { @@ -490,6 +503,36 @@ static void __init read_obp_translations(void) } } +static void __init hypervisor_tlb_lock(unsigned long vaddr, + unsigned long pte, + unsigned long mmu) +{ + register unsigned long func asm("%o5"); + register unsigned long arg0 asm("%o0"); + register unsigned long arg1 asm("%o1"); + register unsigned long arg2 asm("%o2"); + register unsigned long arg3 asm("%o3"); + + func = HV_FAST_MMU_MAP_PERM_ADDR; + arg0 = vaddr; + arg1 = 0; + arg2 = pte; + arg3 = mmu; + __asm__ __volatile__("ta 0x80" + : "=&r" (func), "=&r" (arg0), + "=&r" (arg1), "=&r" (arg2), + "=&r" (arg3) + : "0" (func), "1" (arg0), "2" (arg1), + "3" (arg2), "4" (arg3)); + if (arg0 != 0) { + prom_printf("hypervisor_tlb_lock[%lx:%lx:%lx:%lx]: " + "errors with %lx\n", vaddr, 0, pte, mmu, arg0); + prom_halt(); + } +} + +static unsigned long kern_large_tte(unsigned long paddr); + static void __init remap_kernel(void) { unsigned long phys_page, tte_vaddr, tte_data; @@ -497,25 +540,34 @@ static void __init remap_kernel(void) tte_vaddr = (unsigned long) KERNBASE; phys_page = (prom_boot_mapping_phys_low >> 22UL) << 22UL; - tte_data = (phys_page | (_PAGE_VALID | _PAGE_SZ4MB | - _PAGE_CP | _PAGE_CV | _PAGE_P | - _PAGE_L | _PAGE_W)); + tte_data = kern_large_tte(phys_page); kern_locked_tte_data = tte_data; - /* Now lock us into the TLBs via OBP. */ - prom_dtlb_load(tlb_ent, tte_data, tte_vaddr); - prom_itlb_load(tlb_ent, tte_data, tte_vaddr); - if (bigkernel) { - tlb_ent -= 1; - prom_dtlb_load(tlb_ent, - tte_data + 0x400000, - tte_vaddr + 0x400000); - prom_itlb_load(tlb_ent, - tte_data + 0x400000, - tte_vaddr + 0x400000); + /* Now lock us into the TLBs via Hypervisor or OBP. */ + if (tlb_type == hypervisor) { + hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU); + hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU); + if (bigkernel) { + tte_vaddr += 0x400000; + tte_data += 0x400000; + hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU); + hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU); + } + } else { + prom_dtlb_load(tlb_ent, tte_data, tte_vaddr); + prom_itlb_load(tlb_ent, tte_data, tte_vaddr); + if (bigkernel) { + tlb_ent -= 1; + prom_dtlb_load(tlb_ent, + tte_data + 0x400000, + tte_vaddr + 0x400000); + prom_itlb_load(tlb_ent, + tte_data + 0x400000, + tte_vaddr + 0x400000); + } + sparc64_highest_unlocked_tlb_ent = tlb_ent - 1; } - sparc64_highest_unlocked_tlb_ent = tlb_ent - 1; if (tlb_type == cheetah_plus) { sparc64_kern_pri_context = (CTX_CHEETAH_PLUS_CTX0 | CTX_CHEETAH_PLUS_NUC); @@ -533,372 +585,14 @@ static void __init inherit_prom_mappings(void) prom_printf("Remapping the kernel... "); remap_kernel(); prom_printf("done.\n"); - - prom_printf("Registering callbacks... "); - register_prom_callbacks(); - prom_printf("done.\n"); -} - -/* The OBP specifications for sun4u mark 0xfffffffc00000000 and - * upwards as reserved for use by the firmware (I wonder if this - * will be the same on Cheetah...). We use this virtual address - * range for the VPTE table mappings of the nucleus so we need - * to zap them when we enter the PROM. -DaveM - */ -static void __flush_nucleus_vptes(void) -{ - unsigned long prom_reserved_base = 0xfffffffc00000000UL; - int i; - - /* Only DTLB must be checked for VPTE entries. */ - if (tlb_type == spitfire) { - for (i = 0; i < 63; i++) { - unsigned long tag; - - /* Spitfire Errata #32 workaround */ - /* NOTE: Always runs on spitfire, so no cheetah+ - * page size encodings. - */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - - tag = spitfire_get_dtlb_tag(i); - if (((tag & ~(PAGE_MASK)) == 0) && - ((tag & (PAGE_MASK)) >= prom_reserved_base)) { - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); - spitfire_put_dtlb_data(i, 0x0UL); - } - } - } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { - for (i = 0; i < 512; i++) { - unsigned long tag = cheetah_get_dtlb_tag(i, 2); - - if ((tag & ~PAGE_MASK) == 0 && - (tag & PAGE_MASK) >= prom_reserved_base) { - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); - cheetah_put_dtlb_data(i, 0x0UL, 2); - } - - if (tlb_type != cheetah_plus) - continue; - - tag = cheetah_get_dtlb_tag(i, 3); - - if ((tag & ~PAGE_MASK) == 0 && - (tag & PAGE_MASK) >= prom_reserved_base) { - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); - cheetah_put_dtlb_data(i, 0x0UL, 3); - } - } - } else { - /* Implement me :-) */ - BUG(); - } } -static int prom_ditlb_set; -struct prom_tlb_entry { - int tlb_ent; - unsigned long tlb_tag; - unsigned long tlb_data; -}; -struct prom_tlb_entry prom_itlb[16], prom_dtlb[16]; - void prom_world(int enter) { - unsigned long pstate; - int i; - if (!enter) set_fs((mm_segment_t) { get_thread_current_ds() }); - if (!prom_ditlb_set) - return; - - /* Make sure the following runs atomically. */ - __asm__ __volatile__("flushw\n\t" - "rdpr %%pstate, %0\n\t" - "wrpr %0, %1, %%pstate" - : "=r" (pstate) - : "i" (PSTATE_IE)); - - if (enter) { - /* Kick out nucleus VPTEs. */ - __flush_nucleus_vptes(); - - /* Install PROM world. */ - for (i = 0; i < 16; i++) { - if (prom_dtlb[i].tlb_ent != -1) { - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "membar #Sync" - : : "r" (prom_dtlb[i].tlb_tag), "r" (TLB_TAG_ACCESS), - "i" (ASI_DMMU)); - if (tlb_type == spitfire) - spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent, - prom_dtlb[i].tlb_data); - else if (tlb_type == cheetah || tlb_type == cheetah_plus) - cheetah_put_ldtlb_data(prom_dtlb[i].tlb_ent, - prom_dtlb[i].tlb_data); - } - if (prom_itlb[i].tlb_ent != -1) { - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "membar #Sync" - : : "r" (prom_itlb[i].tlb_tag), - "r" (TLB_TAG_ACCESS), - "i" (ASI_IMMU)); - if (tlb_type == spitfire) - spitfire_put_itlb_data(prom_itlb[i].tlb_ent, - prom_itlb[i].tlb_data); - else if (tlb_type == cheetah || tlb_type == cheetah_plus) - cheetah_put_litlb_data(prom_itlb[i].tlb_ent, - prom_itlb[i].tlb_data); - } - } - } else { - for (i = 0; i < 16; i++) { - if (prom_dtlb[i].tlb_ent != -1) { - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); - if (tlb_type == spitfire) - spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent, 0x0UL); - else - cheetah_put_ldtlb_data(prom_dtlb[i].tlb_ent, 0x0UL); - } - if (prom_itlb[i].tlb_ent != -1) { - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : : "r" (TLB_TAG_ACCESS), - "i" (ASI_IMMU)); - if (tlb_type == spitfire) - spitfire_put_itlb_data(prom_itlb[i].tlb_ent, 0x0UL); - else - cheetah_put_litlb_data(prom_itlb[i].tlb_ent, 0x0UL); - } - } - } - __asm__ __volatile__("wrpr %0, 0, %%pstate" - : : "r" (pstate)); -} - -void inherit_locked_prom_mappings(int save_p) -{ - int i; - int dtlb_seen = 0; - int itlb_seen = 0; - - /* Fucking losing PROM has more mappings in the TLB, but - * it (conveniently) fails to mention any of these in the - * translations property. The only ones that matter are - * the locked PROM tlb entries, so we impose the following - * irrecovable rule on the PROM, it is allowed 8 locked - * entries in the ITLB and 8 in the DTLB. - * - * Supposedly the upper 16GB of the address space is - * reserved for OBP, BUT I WISH THIS WAS DOCUMENTED - * SOMEWHERE!!!!!!!!!!!!!!!!! Furthermore the entire interface - * used between the client program and the firmware on sun5 - * systems to coordinate mmu mappings is also COMPLETELY - * UNDOCUMENTED!!!!!! Thanks S(t)un! - */ - if (save_p) { - for (i = 0; i < 16; i++) { - prom_itlb[i].tlb_ent = -1; - prom_dtlb[i].tlb_ent = -1; - } - } - if (tlb_type == spitfire) { - int high = sparc64_highest_unlocked_tlb_ent; - for (i = 0; i <= high; i++) { - unsigned long data; - - /* Spitfire Errata #32 workaround */ - /* NOTE: Always runs on spitfire, so no cheetah+ - * page size encodings. - */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - - data = spitfire_get_dtlb_data(i); - if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) { - unsigned long tag; - - /* Spitfire Errata #32 workaround */ - /* NOTE: Always runs on spitfire, so no - * cheetah+ page size encodings. - */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - - tag = spitfire_get_dtlb_tag(i); - if (save_p) { - prom_dtlb[dtlb_seen].tlb_ent = i; - prom_dtlb[dtlb_seen].tlb_tag = tag; - prom_dtlb[dtlb_seen].tlb_data = data; - } - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); - spitfire_put_dtlb_data(i, 0x0UL); - - dtlb_seen++; - if (dtlb_seen > 15) - break; - } - } - - for (i = 0; i < high; i++) { - unsigned long data; - - /* Spitfire Errata #32 workaround */ - /* NOTE: Always runs on spitfire, so no - * cheetah+ page size encodings. - */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - - data = spitfire_get_itlb_data(i); - if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) { - unsigned long tag; - - /* Spitfire Errata #32 workaround */ - /* NOTE: Always runs on spitfire, so no - * cheetah+ page size encodings. - */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - - tag = spitfire_get_itlb_tag(i); - if (save_p) { - prom_itlb[itlb_seen].tlb_ent = i; - prom_itlb[itlb_seen].tlb_tag = tag; - prom_itlb[itlb_seen].tlb_data = data; - } - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU)); - spitfire_put_itlb_data(i, 0x0UL); - - itlb_seen++; - if (itlb_seen > 15) - break; - } - } - } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { - int high = sparc64_highest_unlocked_tlb_ent; - - for (i = 0; i <= high; i++) { - unsigned long data; - - data = cheetah_get_ldtlb_data(i); - if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) { - unsigned long tag; - - tag = cheetah_get_ldtlb_tag(i); - if (save_p) { - prom_dtlb[dtlb_seen].tlb_ent = i; - prom_dtlb[dtlb_seen].tlb_tag = tag; - prom_dtlb[dtlb_seen].tlb_data = data; - } - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); - cheetah_put_ldtlb_data(i, 0x0UL); - - dtlb_seen++; - if (dtlb_seen > 15) - break; - } - } - - for (i = 0; i < high; i++) { - unsigned long data; - - data = cheetah_get_litlb_data(i); - if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) { - unsigned long tag; - - tag = cheetah_get_litlb_tag(i); - if (save_p) { - prom_itlb[itlb_seen].tlb_ent = i; - prom_itlb[itlb_seen].tlb_tag = tag; - prom_itlb[itlb_seen].tlb_data = data; - } - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU)); - cheetah_put_litlb_data(i, 0x0UL); - - itlb_seen++; - if (itlb_seen > 15) - break; - } - } - } else { - /* Implement me :-) */ - BUG(); - } - if (save_p) - prom_ditlb_set = 1; -} - -/* Give PROM back his world, done during reboots... */ -void prom_reload_locked(void) -{ - int i; - - for (i = 0; i < 16; i++) { - if (prom_dtlb[i].tlb_ent != -1) { - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "membar #Sync" - : : "r" (prom_dtlb[i].tlb_tag), "r" (TLB_TAG_ACCESS), - "i" (ASI_DMMU)); - if (tlb_type == spitfire) - spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent, - prom_dtlb[i].tlb_data); - else if (tlb_type == cheetah || tlb_type == cheetah_plus) - cheetah_put_ldtlb_data(prom_dtlb[i].tlb_ent, - prom_dtlb[i].tlb_data); - } - - if (prom_itlb[i].tlb_ent != -1) { - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "membar #Sync" - : : "r" (prom_itlb[i].tlb_tag), - "r" (TLB_TAG_ACCESS), - "i" (ASI_IMMU)); - if (tlb_type == spitfire) - spitfire_put_itlb_data(prom_itlb[i].tlb_ent, - prom_itlb[i].tlb_data); - else - cheetah_put_litlb_data(prom_itlb[i].tlb_ent, - prom_itlb[i].tlb_data); - } - } + __asm__ __volatile__("flushw"); } #ifdef DCACHE_ALIASING_POSSIBLE @@ -914,7 +608,7 @@ void __flush_dcache_range(unsigned long start, unsigned long end) if (++n >= 512) break; } - } else { + } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { start = __pa(start); end = __pa(end); for (va = start; va < end; va += 32) @@ -927,63 +621,6 @@ void __flush_dcache_range(unsigned long start, unsigned long end) } #endif /* DCACHE_ALIASING_POSSIBLE */ -/* If not locked, zap it. */ -void __flush_tlb_all(void) -{ - unsigned long pstate; - int i; - - __asm__ __volatile__("flushw\n\t" - "rdpr %%pstate, %0\n\t" - "wrpr %0, %1, %%pstate" - : "=r" (pstate) - : "i" (PSTATE_IE)); - if (tlb_type == spitfire) { - for (i = 0; i < 64; i++) { - /* Spitfire Errata #32 workaround */ - /* NOTE: Always runs on spitfire, so no - * cheetah+ page size encodings. - */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - - if (!(spitfire_get_dtlb_data(i) & _PAGE_L)) { - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); - spitfire_put_dtlb_data(i, 0x0UL); - } - - /* Spitfire Errata #32 workaround */ - /* NOTE: Always runs on spitfire, so no - * cheetah+ page size encodings. - */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - - if (!(spitfire_get_itlb_data(i) & _PAGE_L)) { - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU)); - spitfire_put_itlb_data(i, 0x0UL); - } - } - } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { - cheetah_flush_dtlb_all(); - cheetah_flush_itlb_all(); - } - __asm__ __volatile__("wrpr %0, 0, %%pstate" - : : "r" (pstate)); -} - /* Caller does TLB context flushing on local CPU if necessary. * The caller also ensures that CTX_VALID(mm->context) is false. * @@ -991,17 +628,21 @@ void __flush_tlb_all(void) * let the user have CTX 0 (nucleus) or we ever use a CTX * version of zero (and thus NO_CONTEXT would not be caught * by version mis-match tests in mmu_context.h). + * + * Always invoked with interrupts disabled. */ void get_new_mmu_context(struct mm_struct *mm) { unsigned long ctx, new_ctx; unsigned long orig_pgsz_bits; - + unsigned long flags; + int new_version; - spin_lock(&ctx_alloc_lock); + spin_lock_irqsave(&ctx_alloc_lock, flags); orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); ctx = (tlb_context_cache + 1) & CTX_NR_MASK; new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); + new_version = 0; if (new_ctx >= (1 << CTX_NR_BITS)) { new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1); if (new_ctx >= ctx) { @@ -1024,6 +665,7 @@ void get_new_mmu_context(struct mm_struct *mm) mmu_context_bmap[i + 2] = 0; mmu_context_bmap[i + 3] = 0; } + new_version = 1; goto out; } } @@ -1032,79 +674,10 @@ void get_new_mmu_context(struct mm_struct *mm) out: tlb_context_cache = new_ctx; mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; - spin_unlock(&ctx_alloc_lock); -} - -#ifndef CONFIG_SMP -struct pgtable_cache_struct pgt_quicklists; -#endif - -/* OK, we have to color these pages. The page tables are accessed - * by non-Dcache enabled mapping in the VPTE area by the dtlb_backend.S - * code, as well as by PAGE_OFFSET range direct-mapped addresses by - * other parts of the kernel. By coloring, we make sure that the tlbmiss - * fast handlers do not get data from old/garbage dcache lines that - * correspond to an old/stale virtual address (user/kernel) that - * previously mapped the pagetable page while accessing vpte range - * addresses. The idea is that if the vpte color and PAGE_OFFSET range - * color is the same, then when the kernel initializes the pagetable - * using the later address range, accesses with the first address - * range will see the newly initialized data rather than the garbage. - */ -#ifdef DCACHE_ALIASING_POSSIBLE -#define DC_ALIAS_SHIFT 1 -#else -#define DC_ALIAS_SHIFT 0 -#endif -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) -{ - struct page *page; - unsigned long color; - - { - pte_t *ptep = pte_alloc_one_fast(mm, address); - - if (ptep) - return ptep; - } + spin_unlock_irqrestore(&ctx_alloc_lock, flags); - color = VPTE_COLOR(address); - page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, DC_ALIAS_SHIFT); - if (page) { - unsigned long *to_free; - unsigned long paddr; - pte_t *pte; - -#ifdef DCACHE_ALIASING_POSSIBLE - set_page_count(page, 1); - ClearPageCompound(page); - - set_page_count((page + 1), 1); - ClearPageCompound(page + 1); -#endif - paddr = (unsigned long) page_address(page); - memset((char *)paddr, 0, (PAGE_SIZE << DC_ALIAS_SHIFT)); - - if (!color) { - pte = (pte_t *) paddr; - to_free = (unsigned long *) (paddr + PAGE_SIZE); - } else { - pte = (pte_t *) (paddr + PAGE_SIZE); - to_free = (unsigned long *) paddr; - } - -#ifdef DCACHE_ALIASING_POSSIBLE - /* Now free the other one up, adjust cache size. */ - preempt_disable(); - *to_free = (unsigned long) pte_quicklist[color ^ 0x1]; - pte_quicklist[color ^ 0x1] = to_free; - pgtable_cache_size++; - preempt_enable(); -#endif - - return pte; - } - return NULL; + if (unlikely(new_version)) + smp_new_mmu_context_version(); } void sparc_ultra_dump_itlb(void) @@ -1196,9 +769,78 @@ void sparc_ultra_dump_dtlb(void) extern unsigned long cmdline_memory_size; -unsigned long __init bootmem_init(unsigned long *pages_avail) +/* Find a free area for the bootmem map, avoiding the kernel image + * and the initial ramdisk. + */ +static unsigned long __init choose_bootmap_pfn(unsigned long start_pfn, + unsigned long end_pfn) +{ + unsigned long avoid_start, avoid_end, bootmap_size; + int i; + + bootmap_size = ((end_pfn - start_pfn) + 7) / 8; + bootmap_size = ALIGN(bootmap_size, sizeof(long)); + + avoid_start = avoid_end = 0; +#ifdef CONFIG_BLK_DEV_INITRD + avoid_start = initrd_start; + avoid_end = PAGE_ALIGN(initrd_end); +#endif + +#ifdef CONFIG_DEBUG_BOOTMEM + prom_printf("choose_bootmap_pfn: kern[%lx:%lx] avoid[%lx:%lx]\n", + kern_base, PAGE_ALIGN(kern_base + kern_size), + avoid_start, avoid_end); +#endif + for (i = 0; i < pavail_ents; i++) { + unsigned long start, end; + + start = pavail[i].phys_addr; + end = start + pavail[i].reg_size; + + while (start < end) { + if (start >= kern_base && + start < PAGE_ALIGN(kern_base + kern_size)) { + start = PAGE_ALIGN(kern_base + kern_size); + continue; + } + if (start >= avoid_start && start < avoid_end) { + start = avoid_end; + continue; + } + + if ((end - start) < bootmap_size) + break; + + if (start < kern_base && + (start + bootmap_size) > kern_base) { + start = PAGE_ALIGN(kern_base + kern_size); + continue; + } + + if (start < avoid_start && + (start + bootmap_size) > avoid_start) { + start = avoid_end; + continue; + } + + /* OK, it doesn't overlap anything, use it. */ +#ifdef CONFIG_DEBUG_BOOTMEM + prom_printf("choose_bootmap_pfn: Using %lx [%lx]\n", + start >> PAGE_SHIFT, start); +#endif + return start >> PAGE_SHIFT; + } + } + + prom_printf("Cannot find free area for bootmap, aborting.\n"); + prom_halt(); +} + +static unsigned long __init bootmem_init(unsigned long *pages_avail, + unsigned long phys_base) { - unsigned long bootmap_size, start_pfn, end_pfn; + unsigned long bootmap_size, end_pfn; unsigned long end_of_phys_memory = 0UL; unsigned long bootmap_pfn, bytes_avail, size; int i; @@ -1236,14 +878,6 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) *pages_avail = bytes_avail >> PAGE_SHIFT; - /* Start with page aligned address of last symbol in kernel - * image. The kernel is hard mapped below PAGE_OFFSET in a - * 4MB locked TLB translation. - */ - start_pfn = PAGE_ALIGN(kern_base + kern_size) >> PAGE_SHIFT; - - bootmap_pfn = start_pfn; - end_pfn = end_of_phys_memory >> PAGE_SHIFT; #ifdef CONFIG_BLK_DEV_INITRD @@ -1260,23 +894,22 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) "(0x%016lx > 0x%016lx)\ndisabling initrd\n", initrd_end, end_of_phys_memory); initrd_start = 0; - } - if (initrd_start) { - if (initrd_start >= (start_pfn << PAGE_SHIFT) && - initrd_start < (start_pfn << PAGE_SHIFT) + 2 * PAGE_SIZE) - bootmap_pfn = PAGE_ALIGN (initrd_end) >> PAGE_SHIFT; + initrd_end = 0; } } #endif /* Initialize the boot-time allocator. */ max_pfn = max_low_pfn = end_pfn; - min_low_pfn = pfn_base; + min_low_pfn = (phys_base >> PAGE_SHIFT); + + bootmap_pfn = choose_bootmap_pfn(min_low_pfn, end_pfn); #ifdef CONFIG_DEBUG_BOOTMEM prom_printf("init_bootmem(min[%lx], bootmap[%lx], max[%lx])\n", min_low_pfn, bootmap_pfn, max_low_pfn); #endif - bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, pfn_base, end_pfn); + bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, + min_low_pfn, end_pfn); /* Now register the available physical memory with the * allocator. @@ -1324,9 +957,26 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size); *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT; + for (i = 0; i < pavail_ents; i++) { + unsigned long start_pfn, end_pfn; + + start_pfn = pavail[i].phys_addr >> PAGE_SHIFT; + end_pfn = (start_pfn + (pavail[i].reg_size >> PAGE_SHIFT)); +#ifdef CONFIG_DEBUG_BOOTMEM + prom_printf("memory_present(0, %lx, %lx)\n", + start_pfn, end_pfn); +#endif + memory_present(0, start_pfn, end_pfn); + } + + sparse_init(); + return end_pfn; } +static struct linux_prom64_registers pall[MAX_BANKS] __initdata; +static int pall_ents __initdata; + #ifdef CONFIG_DEBUG_PAGEALLOC static unsigned long kernel_map_range(unsigned long pstart, unsigned long pend, pgprot_t prot) { @@ -1382,14 +1032,44 @@ static unsigned long kernel_map_range(unsigned long pstart, unsigned long pend, return alloc_bytes; } -static struct linux_prom64_registers pall[MAX_BANKS] __initdata; -static int pall_ents __initdata; - extern unsigned int kvmap_linear_patch[1]; +#endif /* CONFIG_DEBUG_PAGEALLOC */ + +static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) +{ + const unsigned long shift_256MB = 28; + const unsigned long mask_256MB = ((1UL << shift_256MB) - 1UL); + const unsigned long size_256MB = (1UL << shift_256MB); + + while (start < end) { + long remains; + + remains = end - start; + if (remains < size_256MB) + break; + + if (start & mask_256MB) { + start = (start + size_256MB) & ~mask_256MB; + continue; + } + + while (remains >= size_256MB) { + unsigned long index = start >> shift_256MB; + + __set_bit(index, kpte_linear_bitmap); + + start += size_256MB; + remains -= size_256MB; + } + } +} static void __init kernel_physical_mapping_init(void) { - unsigned long i, mem_alloced = 0UL; + unsigned long i; +#ifdef CONFIG_DEBUG_PAGEALLOC + unsigned long mem_alloced = 0UL; +#endif read_obp_memory("reg", &pall[0], &pall_ents); @@ -1398,10 +1078,16 @@ static void __init kernel_physical_mapping_init(void) phys_start = pall[i].phys_addr; phys_end = phys_start + pall[i].reg_size; + + mark_kpte_bitmap(phys_start, phys_end); + +#ifdef CONFIG_DEBUG_PAGEALLOC mem_alloced += kernel_map_range(phys_start, phys_end, PAGE_KERNEL); +#endif } +#ifdef CONFIG_DEBUG_PAGEALLOC printk("Allocated %ld bytes for kernel page tables.\n", mem_alloced); @@ -1409,8 +1095,10 @@ static void __init kernel_physical_mapping_init(void) flushi(&kvmap_linear_patch[0]); __flush_tlb_all(); +#endif } +#ifdef CONFIG_DEBUG_PAGEALLOC void kernel_map_pages(struct page *page, int numpages, int enable) { unsigned long phys_start = page_to_pfn(page) << PAGE_SHIFT; @@ -1419,6 +1107,9 @@ void kernel_map_pages(struct page *page, int numpages, int enable) kernel_map_range(phys_start, phys_end, (enable ? PAGE_KERNEL : __pgprot(0))); + flush_tsb_kernel_range(PAGE_OFFSET + phys_start, + PAGE_OFFSET + phys_end); + /* we should perform an IPI and flush all tlbs, * but that can deadlock->flush only current cpu. */ @@ -1439,18 +1130,150 @@ unsigned long __init find_ecache_flush_span(unsigned long size) return ~0UL; } +static void __init tsb_phys_patch(void) +{ + struct tsb_ldquad_phys_patch_entry *pquad; + struct tsb_phys_patch_entry *p; + + pquad = &__tsb_ldquad_phys_patch; + while (pquad < &__tsb_ldquad_phys_patch_end) { + unsigned long addr = pquad->addr; + + if (tlb_type == hypervisor) + *(unsigned int *) addr = pquad->sun4v_insn; + else + *(unsigned int *) addr = pquad->sun4u_insn; + wmb(); + __asm__ __volatile__("flush %0" + : /* no outputs */ + : "r" (addr)); + + pquad++; + } + + p = &__tsb_phys_patch; + while (p < &__tsb_phys_patch_end) { + unsigned long addr = p->addr; + + *(unsigned int *) addr = p->insn; + wmb(); + __asm__ __volatile__("flush %0" + : /* no outputs */ + : "r" (addr)); + + p++; + } +} + +/* Don't mark as init, we give this to the Hypervisor. */ +static struct hv_tsb_descr ktsb_descr[2]; +extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; + +static void __init sun4v_ktsb_init(void) +{ + unsigned long ktsb_pa; + + /* First KTSB for PAGE_SIZE mappings. */ + ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE); + + switch (PAGE_SIZE) { + case 8 * 1024: + default: + ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_8K; + ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_8K; + break; + + case 64 * 1024: + ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_64K; + ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_64K; + break; + + case 512 * 1024: + ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_512K; + ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_512K; + break; + + case 4 * 1024 * 1024: + ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_4MB; + ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_4MB; + break; + }; + + ktsb_descr[0].assoc = 1; + ktsb_descr[0].num_ttes = KERNEL_TSB_NENTRIES; + ktsb_descr[0].ctx_idx = 0; + ktsb_descr[0].tsb_base = ktsb_pa; + ktsb_descr[0].resv = 0; + + /* Second KTSB for 4MB/256MB mappings. */ + ktsb_pa = (kern_base + + ((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); + + ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB; + ktsb_descr[1].pgsz_mask = (HV_PGSZ_MASK_4MB | + HV_PGSZ_MASK_256MB); + ktsb_descr[1].assoc = 1; + ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES; + ktsb_descr[1].ctx_idx = 0; + ktsb_descr[1].tsb_base = ktsb_pa; + ktsb_descr[1].resv = 0; +} + +void __cpuinit sun4v_ktsb_register(void) +{ + register unsigned long func asm("%o5"); + register unsigned long arg0 asm("%o0"); + register unsigned long arg1 asm("%o1"); + unsigned long pa; + + pa = kern_base + ((unsigned long)&ktsb_descr[0] - KERNBASE); + + func = HV_FAST_MMU_TSB_CTX0; + arg0 = 2; + arg1 = pa; + __asm__ __volatile__("ta %6" + : "=&r" (func), "=&r" (arg0), "=&r" (arg1) + : "0" (func), "1" (arg0), "2" (arg1), + "i" (HV_FAST_TRAP)); +} + /* paging_init() sets up the page tables */ extern void cheetah_ecache_flush_init(void); +extern void sun4v_patch_tlb_handlers(void); static unsigned long last_valid_pfn; pgd_t swapper_pg_dir[2048]; +static void sun4u_pgprot_init(void); +static void sun4v_pgprot_init(void); + void __init paging_init(void) { - unsigned long end_pfn, pages_avail, shift; + unsigned long end_pfn, pages_avail, shift, phys_base; unsigned long real_end, i; + kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL; + kern_size = (unsigned long)&_end - (unsigned long)KERNBASE; + + /* Invalidate both kernel TSBs. */ + memset(swapper_tsb, 0x40, sizeof(swapper_tsb)); + memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb)); + + if (tlb_type == hypervisor) + sun4v_pgprot_init(); + else + sun4u_pgprot_init(); + + if (tlb_type == cheetah_plus || + tlb_type == hypervisor) + tsb_phys_patch(); + + if (tlb_type == hypervisor) { + sun4v_patch_tlb_handlers(); + sun4v_ktsb_init(); + } + /* Find available physical memory... */ read_obp_memory("available", &pavail[0], &pavail_ents); @@ -1458,11 +1281,6 @@ void __init paging_init(void) for (i = 0; i < pavail_ents; i++) phys_base = min(phys_base, pavail[i].phys_addr); - pfn_base = phys_base >> PAGE_SHIFT; - - kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL; - kern_size = (unsigned long)&_end - (unsigned long)KERNBASE; - set_bit(0, mmu_context_bmap); shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE); @@ -1486,47 +1304,38 @@ void __init paging_init(void) pud_set(pud_offset(&swapper_pg_dir[0], 0), swapper_low_pmd_dir + (shift / sizeof(pgd_t))); - swapper_pgd_zero = pgd_val(swapper_pg_dir[0]); - inherit_prom_mappings(); - /* Ok, we can use our TLB miss and window trap handlers safely. - * We need to do a quick peek here to see if we are on StarFire - * or not, so setup_tba can setup the IRQ globals correctly (it - * needs to get the hard smp processor id correctly). - */ - { - extern void setup_tba(int); - setup_tba(this_is_starfire); - } - - inherit_locked_prom_mappings(1); + /* Ok, we can use our TLB miss and window trap handlers safely. */ + setup_tba(); __flush_tlb_all(); + if (tlb_type == hypervisor) + sun4v_ktsb_register(); + /* Setup bootmem... */ pages_avail = 0; - last_valid_pfn = end_pfn = bootmem_init(&pages_avail); + last_valid_pfn = end_pfn = bootmem_init(&pages_avail, phys_base); + + max_mapnr = last_valid_pfn; -#ifdef CONFIG_DEBUG_PAGEALLOC kernel_physical_mapping_init(); -#endif { unsigned long zones_size[MAX_NR_ZONES]; unsigned long zholes_size[MAX_NR_ZONES]; - unsigned long npages; int znum; for (znum = 0; znum < MAX_NR_ZONES; znum++) zones_size[znum] = zholes_size[znum] = 0; - npages = end_pfn - pfn_base; - zones_size[ZONE_DMA] = npages; - zholes_size[ZONE_DMA] = npages - pages_avail; + zones_size[ZONE_DMA] = end_pfn; + zholes_size[ZONE_DMA] = end_pfn - pages_avail; free_area_init_node(0, &contig_page_data, zones_size, - phys_base >> PAGE_SHIFT, zholes_size); + __pa(PAGE_OFFSET) >> PAGE_SHIFT, + zholes_size); } device_scan(); @@ -1596,7 +1405,6 @@ void __init mem_init(void) taint_real_pages(); - max_mapnr = last_valid_pfn - pfn_base; high_memory = __va(last_valid_pfn << PAGE_SHIFT); #ifdef CONFIG_DEBUG_BOOTMEM @@ -1676,3 +1484,342 @@ void free_initrd_mem(unsigned long start, unsigned long end) } } #endif + +#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U) +#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V) +#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U) +#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V) +#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R) +#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R) + +pgprot_t PAGE_KERNEL __read_mostly; +EXPORT_SYMBOL(PAGE_KERNEL); + +pgprot_t PAGE_KERNEL_LOCKED __read_mostly; +pgprot_t PAGE_COPY __read_mostly; + +pgprot_t PAGE_SHARED __read_mostly; +EXPORT_SYMBOL(PAGE_SHARED); + +pgprot_t PAGE_EXEC __read_mostly; +unsigned long pg_iobits __read_mostly; + +unsigned long _PAGE_IE __read_mostly; + +unsigned long _PAGE_E __read_mostly; +EXPORT_SYMBOL(_PAGE_E); + +unsigned long _PAGE_CACHE __read_mostly; +EXPORT_SYMBOL(_PAGE_CACHE); + +static void prot_init_common(unsigned long page_none, + unsigned long page_shared, + unsigned long page_copy, + unsigned long page_readonly, + unsigned long page_exec_bit) +{ + PAGE_COPY = __pgprot(page_copy); + PAGE_SHARED = __pgprot(page_shared); + + protection_map[0x0] = __pgprot(page_none); + protection_map[0x1] = __pgprot(page_readonly & ~page_exec_bit); + protection_map[0x2] = __pgprot(page_copy & ~page_exec_bit); + protection_map[0x3] = __pgprot(page_copy & ~page_exec_bit); + protection_map[0x4] = __pgprot(page_readonly); + protection_map[0x5] = __pgprot(page_readonly); + protection_map[0x6] = __pgprot(page_copy); + protection_map[0x7] = __pgprot(page_copy); + protection_map[0x8] = __pgprot(page_none); + protection_map[0x9] = __pgprot(page_readonly & ~page_exec_bit); + protection_map[0xa] = __pgprot(page_shared & ~page_exec_bit); + protection_map[0xb] = __pgprot(page_shared & ~page_exec_bit); + protection_map[0xc] = __pgprot(page_readonly); + protection_map[0xd] = __pgprot(page_readonly); + protection_map[0xe] = __pgprot(page_shared); + protection_map[0xf] = __pgprot(page_shared); +} + +static void __init sun4u_pgprot_init(void) +{ + unsigned long page_none, page_shared, page_copy, page_readonly; + unsigned long page_exec_bit; + + PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | + _PAGE_CACHE_4U | _PAGE_P_4U | + __ACCESS_BITS_4U | __DIRTY_BITS_4U | + _PAGE_EXEC_4U); + PAGE_KERNEL_LOCKED = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | + _PAGE_CACHE_4U | _PAGE_P_4U | + __ACCESS_BITS_4U | __DIRTY_BITS_4U | + _PAGE_EXEC_4U | _PAGE_L_4U); + PAGE_EXEC = __pgprot(_PAGE_EXEC_4U); + + _PAGE_IE = _PAGE_IE_4U; + _PAGE_E = _PAGE_E_4U; + _PAGE_CACHE = _PAGE_CACHE_4U; + + pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4U | __DIRTY_BITS_4U | + __ACCESS_BITS_4U | _PAGE_E_4U); + + kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^ + 0xfffff80000000000; + kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U | + _PAGE_P_4U | _PAGE_W_4U); + + /* XXX Should use 256MB on Panther. XXX */ + kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; + + _PAGE_SZBITS = _PAGE_SZBITS_4U; + _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U | + _PAGE_SZ64K_4U | _PAGE_SZ8K_4U | + _PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U); + + + page_none = _PAGE_PRESENT_4U | _PAGE_ACCESSED_4U | _PAGE_CACHE_4U; + page_shared = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U | + __ACCESS_BITS_4U | _PAGE_WRITE_4U | _PAGE_EXEC_4U); + page_copy = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U | + __ACCESS_BITS_4U | _PAGE_EXEC_4U); + page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U | + __ACCESS_BITS_4U | _PAGE_EXEC_4U); + + page_exec_bit = _PAGE_EXEC_4U; + + prot_init_common(page_none, page_shared, page_copy, page_readonly, + page_exec_bit); +} + +static void __init sun4v_pgprot_init(void) +{ + unsigned long page_none, page_shared, page_copy, page_readonly; + unsigned long page_exec_bit; + + PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | + _PAGE_CACHE_4V | _PAGE_P_4V | + __ACCESS_BITS_4V | __DIRTY_BITS_4V | + _PAGE_EXEC_4V); + PAGE_KERNEL_LOCKED = PAGE_KERNEL; + PAGE_EXEC = __pgprot(_PAGE_EXEC_4V); + + _PAGE_IE = _PAGE_IE_4V; + _PAGE_E = _PAGE_E_4V; + _PAGE_CACHE = _PAGE_CACHE_4V; + + kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^ + 0xfffff80000000000; + kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + + kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ + 0xfffff80000000000; + kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + + pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V | + __ACCESS_BITS_4V | _PAGE_E_4V); + + _PAGE_SZBITS = _PAGE_SZBITS_4V; + _PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V | + _PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V | + _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V | + _PAGE_SZ64K_4V | _PAGE_SZ8K_4V); + + page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V; + page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V); + page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + __ACCESS_BITS_4V | _PAGE_EXEC_4V); + page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + __ACCESS_BITS_4V | _PAGE_EXEC_4V); + + page_exec_bit = _PAGE_EXEC_4V; + + prot_init_common(page_none, page_shared, page_copy, page_readonly, + page_exec_bit); +} + +unsigned long pte_sz_bits(unsigned long sz) +{ + if (tlb_type == hypervisor) { + switch (sz) { + case 8 * 1024: + default: + return _PAGE_SZ8K_4V; + case 64 * 1024: + return _PAGE_SZ64K_4V; + case 512 * 1024: + return _PAGE_SZ512K_4V; + case 4 * 1024 * 1024: + return _PAGE_SZ4MB_4V; + }; + } else { + switch (sz) { + case 8 * 1024: + default: + return _PAGE_SZ8K_4U; + case 64 * 1024: + return _PAGE_SZ64K_4U; + case 512 * 1024: + return _PAGE_SZ512K_4U; + case 4 * 1024 * 1024: + return _PAGE_SZ4MB_4U; + }; + } +} + +pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space, unsigned long page_size) +{ + pte_t pte; + + pte_val(pte) = page | pgprot_val(pgprot_noncached(prot)); + pte_val(pte) |= (((unsigned long)space) << 32); + pte_val(pte) |= pte_sz_bits(page_size); + + return pte; +} + +static unsigned long kern_large_tte(unsigned long paddr) +{ + unsigned long val; + + val = (_PAGE_VALID | _PAGE_SZ4MB_4U | + _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_P_4U | + _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U); + if (tlb_type == hypervisor) + val = (_PAGE_VALID | _PAGE_SZ4MB_4V | + _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | + _PAGE_EXEC_4V | _PAGE_W_4V); + + return val | paddr; +} + +/* + * Translate PROM's mapping we capture at boot time into physical address. + * The second parameter is only set from prom_callback() invocations. + */ +unsigned long prom_virt_to_phys(unsigned long promva, int *error) +{ + unsigned long mask; + int i; + + mask = _PAGE_PADDR_4U; + if (tlb_type == hypervisor) + mask = _PAGE_PADDR_4V; + + for (i = 0; i < prom_trans_ents; i++) { + struct linux_prom_translation *p = &prom_trans[i]; + + if (promva >= p->virt && + promva < (p->virt + p->size)) { + unsigned long base = p->data & mask; + + if (error) + *error = 0; + return base + (promva & (8192 - 1)); + } + } + if (error) + *error = 1; + return 0UL; +} + +/* XXX We should kill off this ugly thing at so me point. XXX */ +unsigned long sun4u_get_pte(unsigned long addr) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + unsigned long mask = _PAGE_PADDR_4U; + + if (tlb_type == hypervisor) + mask = _PAGE_PADDR_4V; + + if (addr >= PAGE_OFFSET) + return addr & mask; + + if ((addr >= LOW_OBP_ADDRESS) && (addr < HI_OBP_ADDRESS)) + return prom_virt_to_phys(addr, NULL); + + pgdp = pgd_offset_k(addr); + pudp = pud_offset(pgdp, addr); + pmdp = pmd_offset(pudp, addr); + ptep = pte_offset_kernel(pmdp, addr); + + return pte_val(*ptep) & mask; +} + +/* If not locked, zap it. */ +void __flush_tlb_all(void) +{ + unsigned long pstate; + int i; + + __asm__ __volatile__("flushw\n\t" + "rdpr %%pstate, %0\n\t" + "wrpr %0, %1, %%pstate" + : "=r" (pstate) + : "i" (PSTATE_IE)); + if (tlb_type == spitfire) { + for (i = 0; i < 64; i++) { + /* Spitfire Errata #32 workaround */ + /* NOTE: Always runs on spitfire, so no + * cheetah+ page size encodings. + */ + __asm__ __volatile__("stxa %0, [%1] %2\n\t" + "flush %%g6" + : /* No outputs */ + : "r" (0), + "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); + + if (!(spitfire_get_dtlb_data(i) & _PAGE_L_4U)) { + __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); + spitfire_put_dtlb_data(i, 0x0UL); + } + + /* Spitfire Errata #32 workaround */ + /* NOTE: Always runs on spitfire, so no + * cheetah+ page size encodings. + */ + __asm__ __volatile__("stxa %0, [%1] %2\n\t" + "flush %%g6" + : /* No outputs */ + : "r" (0), + "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); + + if (!(spitfire_get_itlb_data(i) & _PAGE_L_4U)) { + __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU)); + spitfire_put_itlb_data(i, 0x0UL); + } + } + } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { + cheetah_flush_dtlb_all(); + cheetah_flush_itlb_all(); + } + __asm__ __volatile__("wrpr %0, 0, %%pstate" + : : "r" (pstate)); +} + +#ifdef CONFIG_MEMORY_HOTPLUG + +void online_page(struct page *page) +{ + ClearPageReserved(page); + set_page_count(page, 0); + free_cold_page(page); + totalram_pages++; + num_physpages++; +} + +int remove_memory(u64 start, u64 size) +{ + return -EINVAL; +} + +#endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c index 8b104be4662b..a079cf42505e 100644 --- a/arch/sparc64/mm/tlb.c +++ b/arch/sparc64/mm/tlb.c @@ -25,6 +25,8 @@ void flush_tlb_pending(void) struct mmu_gather *mp = &__get_cpu_var(mmu_gathers); if (mp->tlb_nr) { + flush_tsb_user(mp); + if (CTX_VALID(mp->mm->context)) { #ifdef CONFIG_SMP smp_flush_tlb_pending(mp->mm, mp->tlb_nr, @@ -47,7 +49,8 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t if (pte_exec(orig)) vaddr |= 0x1UL; - if (pte_dirty(orig)) { + if (tlb_type != hypervisor && + pte_dirty(orig)) { unsigned long paddr, pfn = pte_pfn(orig); struct address_space *mapping; struct page *page; @@ -89,62 +92,3 @@ no_cache_flush: if (nr >= TLB_BATCH_NR) flush_tlb_pending(); } - -void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end) -{ - struct mmu_gather *mp = &__get_cpu_var(mmu_gathers); - unsigned long nr = mp->tlb_nr; - long s = start, e = end, vpte_base; - - if (mp->fullmm) - return; - - /* If start is greater than end, that is a real problem. */ - BUG_ON(start > end); - - /* However, straddling the VA space hole is quite normal. */ - s &= PMD_MASK; - e = (e + PMD_SIZE - 1) & PMD_MASK; - - vpte_base = (tlb_type == spitfire ? - VPTE_BASE_SPITFIRE : - VPTE_BASE_CHEETAH); - - if (unlikely(nr != 0 && mm != mp->mm)) { - flush_tlb_pending(); - nr = 0; - } - - if (nr == 0) - mp->mm = mm; - - start = vpte_base + (s >> (PAGE_SHIFT - 3)); - end = vpte_base + (e >> (PAGE_SHIFT - 3)); - - /* If the request straddles the VA space hole, we - * need to swap start and end. The reason this - * occurs is that "vpte_base" is the center of - * the linear page table mapping area. Thus, - * high addresses with the sign bit set map to - * addresses below vpte_base and non-sign bit - * addresses map to addresses above vpte_base. - */ - if (end < start) { - unsigned long tmp = start; - - start = end; - end = tmp; - } - - while (start < end) { - mp->vaddrs[nr] = start; - mp->tlb_nr = ++nr; - if (nr >= TLB_BATCH_NR) { - flush_tlb_pending(); - nr = 0; - } - start += PAGE_SIZE; - } - if (nr) - flush_tlb_pending(); -} diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c new file mode 100644 index 000000000000..b2064e2a44d6 --- /dev/null +++ b/arch/sparc64/mm/tsb.c @@ -0,0 +1,440 @@ +/* arch/sparc64/mm/tsb.c + * + * Copyright (C) 2006 David S. Miller <davem@davemloft.net> + */ + +#include <linux/kernel.h> +#include <asm/system.h> +#include <asm/page.h> +#include <asm/tlbflush.h> +#include <asm/tlb.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/tsb.h> +#include <asm/oplib.h> + +extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; + +static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long nentries) +{ + vaddr >>= PAGE_SHIFT; + return vaddr & (nentries - 1); +} + +static inline int tag_compare(unsigned long tag, unsigned long vaddr) +{ + return (tag == (vaddr >> 22)); +} + +/* TSB flushes need only occur on the processor initiating the address + * space modification, not on each cpu the address space has run on. + * Only the TLB flush needs that treatment. + */ + +void flush_tsb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long v; + + for (v = start; v < end; v += PAGE_SIZE) { + unsigned long hash = tsb_hash(v, KERNEL_TSB_NENTRIES); + struct tsb *ent = &swapper_tsb[hash]; + + if (tag_compare(ent->tag, v)) { + ent->tag = (1UL << TSB_TAG_INVALID_BIT); + membar_storeload_storestore(); + } + } +} + +void flush_tsb_user(struct mmu_gather *mp) +{ + struct mm_struct *mm = mp->mm; + unsigned long nentries, base, flags; + struct tsb *tsb; + int i; + + spin_lock_irqsave(&mm->context.lock, flags); + + tsb = mm->context.tsb; + nentries = mm->context.tsb_nentries; + + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(tsb); + else + base = (unsigned long) tsb; + + for (i = 0; i < mp->tlb_nr; i++) { + unsigned long v = mp->vaddrs[i]; + unsigned long tag, ent, hash; + + v &= ~0x1UL; + + hash = tsb_hash(v, nentries); + ent = base + (hash * sizeof(struct tsb)); + tag = (v >> 22UL); + + tsb_flush(ent, tag); + } + + spin_unlock_irqrestore(&mm->context.lock, flags); +} + +static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes) +{ + unsigned long tsb_reg, base, tsb_paddr; + unsigned long page_sz, tte; + + mm->context.tsb_nentries = tsb_bytes / sizeof(struct tsb); + + base = TSBMAP_BASE; + tte = pgprot_val(PAGE_KERNEL_LOCKED); + tsb_paddr = __pa(mm->context.tsb); + BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); + + /* Use the smallest page size that can map the whole TSB + * in one TLB entry. + */ + switch (tsb_bytes) { + case 8192 << 0: + tsb_reg = 0x0UL; +#ifdef DCACHE_ALIASING_POSSIBLE + base += (tsb_paddr & 8192); +#endif + page_sz = 8192; + break; + + case 8192 << 1: + tsb_reg = 0x1UL; + page_sz = 64 * 1024; + break; + + case 8192 << 2: + tsb_reg = 0x2UL; + page_sz = 64 * 1024; + break; + + case 8192 << 3: + tsb_reg = 0x3UL; + page_sz = 64 * 1024; + break; + + case 8192 << 4: + tsb_reg = 0x4UL; + page_sz = 512 * 1024; + break; + + case 8192 << 5: + tsb_reg = 0x5UL; + page_sz = 512 * 1024; + break; + + case 8192 << 6: + tsb_reg = 0x6UL; + page_sz = 512 * 1024; + break; + + case 8192 << 7: + tsb_reg = 0x7UL; + page_sz = 4 * 1024 * 1024; + break; + + default: + BUG(); + }; + tte |= pte_sz_bits(page_sz); + + if (tlb_type == cheetah_plus || tlb_type == hypervisor) { + /* Physical mapping, no locked TLB entry for TSB. */ + tsb_reg |= tsb_paddr; + + mm->context.tsb_reg_val = tsb_reg; + mm->context.tsb_map_vaddr = 0; + mm->context.tsb_map_pte = 0; + } else { + tsb_reg |= base; + tsb_reg |= (tsb_paddr & (page_sz - 1UL)); + tte |= (tsb_paddr & ~(page_sz - 1UL)); + + mm->context.tsb_reg_val = tsb_reg; + mm->context.tsb_map_vaddr = base; + mm->context.tsb_map_pte = tte; + } + + /* Setup the Hypervisor TSB descriptor. */ + if (tlb_type == hypervisor) { + struct hv_tsb_descr *hp = &mm->context.tsb_descr; + + switch (PAGE_SIZE) { + case 8192: + default: + hp->pgsz_idx = HV_PGSZ_IDX_8K; + break; + + case 64 * 1024: + hp->pgsz_idx = HV_PGSZ_IDX_64K; + break; + + case 512 * 1024: + hp->pgsz_idx = HV_PGSZ_IDX_512K; + break; + + case 4 * 1024 * 1024: + hp->pgsz_idx = HV_PGSZ_IDX_4MB; + break; + }; + hp->assoc = 1; + hp->num_ttes = tsb_bytes / 16; + hp->ctx_idx = 0; + switch (PAGE_SIZE) { + case 8192: + default: + hp->pgsz_mask = HV_PGSZ_MASK_8K; + break; + + case 64 * 1024: + hp->pgsz_mask = HV_PGSZ_MASK_64K; + break; + + case 512 * 1024: + hp->pgsz_mask = HV_PGSZ_MASK_512K; + break; + + case 4 * 1024 * 1024: + hp->pgsz_mask = HV_PGSZ_MASK_4MB; + break; + }; + hp->tsb_base = tsb_paddr; + hp->resv = 0; + } +} + +static kmem_cache_t *tsb_caches[8] __read_mostly; + +static const char *tsb_cache_names[8] = { + "tsb_8KB", + "tsb_16KB", + "tsb_32KB", + "tsb_64KB", + "tsb_128KB", + "tsb_256KB", + "tsb_512KB", + "tsb_1MB", +}; + +void __init tsb_cache_init(void) +{ + unsigned long i; + + for (i = 0; i < 8; i++) { + unsigned long size = 8192 << i; + const char *name = tsb_cache_names[i]; + + tsb_caches[i] = kmem_cache_create(name, + size, size, + SLAB_HWCACHE_ALIGN | + SLAB_MUST_HWCACHE_ALIGN, + NULL, NULL); + if (!tsb_caches[i]) { + prom_printf("Could not create %s cache\n", name); + prom_halt(); + } + } +} + +/* When the RSS of an address space exceeds mm->context.tsb_rss_limit, + * do_sparc64_fault() invokes this routine to try and grow the TSB. + * + * When we reach the maximum TSB size supported, we stick ~0UL into + * mm->context.tsb_rss_limit so the grow checks in update_mmu_cache() + * will not trigger any longer. + * + * The TSB can be anywhere from 8K to 1MB in size, in increasing powers + * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB + * must be 512K aligned. It also must be physically contiguous, so we + * cannot use vmalloc(). + * + * The idea here is to grow the TSB when the RSS of the process approaches + * the number of entries that the current TSB can hold at once. Currently, + * we trigger when the RSS hits 3/4 of the TSB capacity. + */ +void tsb_grow(struct mm_struct *mm, unsigned long rss) +{ + unsigned long max_tsb_size = 1 * 1024 * 1024; + unsigned long new_size, old_size, flags; + struct tsb *old_tsb, *new_tsb; + unsigned long new_cache_index, old_cache_index; + unsigned long new_rss_limit; + gfp_t gfp_flags; + + if (max_tsb_size > (PAGE_SIZE << MAX_ORDER)) + max_tsb_size = (PAGE_SIZE << MAX_ORDER); + + new_cache_index = 0; + for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) { + unsigned long n_entries = new_size / sizeof(struct tsb); + + n_entries = (n_entries * 3) / 4; + if (n_entries > rss) + break; + + new_cache_index++; + } + + if (new_size == max_tsb_size) + new_rss_limit = ~0UL; + else + new_rss_limit = ((new_size / sizeof(struct tsb)) * 3) / 4; + +retry_tsb_alloc: + gfp_flags = GFP_KERNEL; + if (new_size > (PAGE_SIZE * 2)) + gfp_flags = __GFP_NOWARN | __GFP_NORETRY; + + new_tsb = kmem_cache_alloc(tsb_caches[new_cache_index], gfp_flags); + if (unlikely(!new_tsb)) { + /* Not being able to fork due to a high-order TSB + * allocation failure is very bad behavior. Just back + * down to a 0-order allocation and force no TSB + * growing for this address space. + */ + if (mm->context.tsb == NULL && new_cache_index > 0) { + new_cache_index = 0; + new_size = 8192; + new_rss_limit = ~0UL; + goto retry_tsb_alloc; + } + + /* If we failed on a TSB grow, we are under serious + * memory pressure so don't try to grow any more. + */ + if (mm->context.tsb != NULL) + mm->context.tsb_rss_limit = ~0UL; + return; + } + + /* Mark all tags as invalid. */ + tsb_init(new_tsb, new_size); + + /* Ok, we are about to commit the changes. If we are + * growing an existing TSB the locking is very tricky, + * so WATCH OUT! + * + * We have to hold mm->context.lock while committing to the + * new TSB, this synchronizes us with processors in + * flush_tsb_user() and switch_mm() for this address space. + * + * But even with that lock held, processors run asynchronously + * accessing the old TSB via TLB miss handling. This is OK + * because those actions are just propagating state from the + * Linux page tables into the TSB, page table mappings are not + * being changed. If a real fault occurs, the processor will + * synchronize with us when it hits flush_tsb_user(), this is + * also true for the case where vmscan is modifying the page + * tables. The only thing we need to be careful with is to + * skip any locked TSB entries during copy_tsb(). + * + * When we finish committing to the new TSB, we have to drop + * the lock and ask all other cpus running this address space + * to run tsb_context_switch() to see the new TSB table. + */ + spin_lock_irqsave(&mm->context.lock, flags); + + old_tsb = mm->context.tsb; + old_cache_index = (mm->context.tsb_reg_val & 0x7UL); + old_size = mm->context.tsb_nentries * sizeof(struct tsb); + + + /* Handle multiple threads trying to grow the TSB at the same time. + * One will get in here first, and bump the size and the RSS limit. + * The others will get in here next and hit this check. + */ + if (unlikely(old_tsb && (rss < mm->context.tsb_rss_limit))) { + spin_unlock_irqrestore(&mm->context.lock, flags); + + kmem_cache_free(tsb_caches[new_cache_index], new_tsb); + return; + } + + mm->context.tsb_rss_limit = new_rss_limit; + + if (old_tsb) { + extern void copy_tsb(unsigned long old_tsb_base, + unsigned long old_tsb_size, + unsigned long new_tsb_base, + unsigned long new_tsb_size); + unsigned long old_tsb_base = (unsigned long) old_tsb; + unsigned long new_tsb_base = (unsigned long) new_tsb; + + if (tlb_type == cheetah_plus || tlb_type == hypervisor) { + old_tsb_base = __pa(old_tsb_base); + new_tsb_base = __pa(new_tsb_base); + } + copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size); + } + + mm->context.tsb = new_tsb; + setup_tsb_params(mm, new_size); + + spin_unlock_irqrestore(&mm->context.lock, flags); + + /* If old_tsb is NULL, we're being invoked for the first time + * from init_new_context(). + */ + if (old_tsb) { + /* Reload it on the local cpu. */ + tsb_context_switch(mm); + + /* Now force other processors to do the same. */ + smp_tsb_sync(mm); + + /* Now it is safe to free the old tsb. */ + kmem_cache_free(tsb_caches[old_cache_index], old_tsb); + } +} + +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + spin_lock_init(&mm->context.lock); + + mm->context.sparc64_ctx_val = 0UL; + + /* copy_mm() copies over the parent's mm_struct before calling + * us, so we need to zero out the TSB pointer or else tsb_grow() + * will be confused and think there is an older TSB to free up. + */ + mm->context.tsb = NULL; + + /* If this is fork, inherit the parent's TSB size. We would + * grow it to that size on the first page fault anyways. + */ + tsb_grow(mm, get_mm_rss(mm)); + + if (unlikely(!mm->context.tsb)) + return -ENOMEM; + + return 0; +} + +void destroy_context(struct mm_struct *mm) +{ + unsigned long flags, cache_index; + + cache_index = (mm->context.tsb_reg_val & 0x7UL); + kmem_cache_free(tsb_caches[cache_index], mm->context.tsb); + + /* We can remove these later, but for now it's useful + * to catch any bogus post-destroy_context() references + * to the TSB. + */ + mm->context.tsb = NULL; + mm->context.tsb_reg_val = 0UL; + + spin_lock_irqsave(&ctx_alloc_lock, flags); + + if (CTX_VALID(mm->context)) { + unsigned long nr = CTX_NRBITS(mm->context); + mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); + } + + spin_unlock_irqrestore(&ctx_alloc_lock, flags); +} diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index e4c9151fa116..f8479fad4047 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S @@ -15,6 +15,7 @@ #include <asm/head.h> #include <asm/thread_info.h> #include <asm/cacheflush.h> +#include <asm/hypervisor.h> /* Basically, most of the Spitfire vs. Cheetah madness * has to do with the fact that Cheetah does not support @@ -29,16 +30,18 @@ .text .align 32 .globl __flush_tlb_mm -__flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ +__flush_tlb_mm: /* 18 insns */ + /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ ldxa [%o1] ASI_DMMU, %g2 cmp %g2, %o0 bne,pn %icc, __spitfire_flush_tlb_mm_slow mov 0x50, %g3 stxa %g0, [%g3] ASI_DMMU_DEMAP stxa %g0, [%g3] ASI_IMMU_DEMAP + sethi %hi(KERNBASE), %g3 + flush %g3 retl - flush %g6 - nop + nop nop nop nop @@ -51,7 +54,7 @@ __flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ .align 32 .globl __flush_tlb_pending -__flush_tlb_pending: +__flush_tlb_pending: /* 26 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ rdpr %pstate, %g7 sllx %o1, 3, %o1 @@ -72,7 +75,8 @@ __flush_tlb_pending: brnz,pt %o1, 1b nop stxa %g2, [%o4] ASI_DMMU - flush %g6 + sethi %hi(KERNBASE), %o4 + flush %o4 retl wrpr %g7, 0x0, %pstate nop @@ -82,7 +86,8 @@ __flush_tlb_pending: .align 32 .globl __flush_tlb_kernel_range -__flush_tlb_kernel_range: /* %o0=start, %o1=end */ +__flush_tlb_kernel_range: /* 16 insns */ + /* %o0=start, %o1=end */ cmp %o0, %o1 be,pn %xcc, 2f sethi %hi(PAGE_SIZE), %o4 @@ -94,8 +99,11 @@ __flush_tlb_kernel_range: /* %o0=start, %o1=end */ membar #Sync brnz,pt %o3, 1b sub %o3, %o4, %o3 -2: retl - flush %g6 +2: sethi %hi(KERNBASE), %o3 + flush %o3 + retl + nop + nop __spitfire_flush_tlb_mm_slow: rdpr %pstate, %g1 @@ -105,7 +113,8 @@ __spitfire_flush_tlb_mm_slow: stxa %g0, [%g3] ASI_IMMU_DEMAP flush %g6 stxa %g2, [%o1] ASI_DMMU - flush %g6 + sethi %hi(KERNBASE), %o1 + flush %o1 retl wrpr %g1, 0, %pstate @@ -181,7 +190,7 @@ __flush_dcache_page: /* %o0=kaddr, %o1=flush_icache */ .previous /* Cheetah specific versions, patched at boot time. */ -__cheetah_flush_tlb_mm: /* 18 insns */ +__cheetah_flush_tlb_mm: /* 19 insns */ rdpr %pstate, %g7 andn %g7, PSTATE_IE, %g2 wrpr %g2, 0x0, %pstate @@ -196,12 +205,13 @@ __cheetah_flush_tlb_mm: /* 18 insns */ stxa %g0, [%g3] ASI_DMMU_DEMAP stxa %g0, [%g3] ASI_IMMU_DEMAP stxa %g2, [%o2] ASI_DMMU - flush %g6 + sethi %hi(KERNBASE), %o2 + flush %o2 wrpr %g0, 0, %tl retl wrpr %g7, 0x0, %pstate -__cheetah_flush_tlb_pending: /* 26 insns */ +__cheetah_flush_tlb_pending: /* 27 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ rdpr %pstate, %g7 sllx %o1, 3, %o1 @@ -225,7 +235,8 @@ __cheetah_flush_tlb_pending: /* 26 insns */ brnz,pt %o1, 1b nop stxa %g2, [%o4] ASI_DMMU - flush %g6 + sethi %hi(KERNBASE), %o4 + flush %o4 wrpr %g0, 0, %tl retl wrpr %g7, 0x0, %pstate @@ -245,7 +256,76 @@ __cheetah_flush_dcache_page: /* 11 insns */ nop #endif /* DCACHE_ALIASING_POSSIBLE */ -cheetah_patch_one: + /* Hypervisor specific versions, patched at boot time. */ +__hypervisor_tlb_tl0_error: + save %sp, -192, %sp + mov %i0, %o0 + call hypervisor_tlbop_error + mov %i1, %o1 + ret + restore + +__hypervisor_flush_tlb_mm: /* 10 insns */ + mov %o0, %o2 /* ARG2: mmu context */ + mov 0, %o0 /* ARG0: CPU lists unimplemented */ + mov 0, %o1 /* ARG1: CPU lists unimplemented */ + mov HV_MMU_ALL, %o3 /* ARG3: flags */ + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP + brnz,pn %o0, __hypervisor_tlb_tl0_error + mov HV_FAST_MMU_DEMAP_CTX, %o1 + retl + nop + +__hypervisor_flush_tlb_pending: /* 16 insns */ + /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ + sllx %o1, 3, %g1 + mov %o2, %g2 + mov %o0, %g3 +1: sub %g1, (1 << 3), %g1 + ldx [%g2 + %g1], %o0 /* ARG0: vaddr + IMMU-bit */ + mov %g3, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + srlx %o0, PAGE_SHIFT, %o0 + sllx %o0, PAGE_SHIFT, %o0 + ta HV_MMU_UNMAP_ADDR_TRAP + brnz,pn %o0, __hypervisor_tlb_tl0_error + mov HV_MMU_UNMAP_ADDR_TRAP, %o1 + brnz,pt %g1, 1b + nop + retl + nop + +__hypervisor_flush_tlb_kernel_range: /* 16 insns */ + /* %o0=start, %o1=end */ + cmp %o0, %o1 + be,pn %xcc, 2f + sethi %hi(PAGE_SIZE), %g3 + mov %o0, %g1 + sub %o1, %g1, %g2 + sub %g2, %g3, %g2 +1: add %g1, %g2, %o0 /* ARG0: virtual address */ + mov 0, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + ta HV_MMU_UNMAP_ADDR_TRAP + brnz,pn %o0, __hypervisor_tlb_tl0_error + mov HV_MMU_UNMAP_ADDR_TRAP, %o1 + brnz,pt %g2, 1b + sub %g2, %g3, %g2 +2: retl + nop + +#ifdef DCACHE_ALIASING_POSSIBLE + /* XXX Niagara and friends have an 8K cache, so no aliasing is + * XXX possible, but nothing explicit in the Hypervisor API + * XXX guarantees this. + */ +__hypervisor_flush_dcache_page: /* 2 insns */ + retl + nop +#endif + +tlb_patch_one: 1: lduw [%o1], %g1 stw %g1, [%o0] flush %o0 @@ -264,22 +344,22 @@ cheetah_patch_cachetlbops: or %o0, %lo(__flush_tlb_mm), %o0 sethi %hi(__cheetah_flush_tlb_mm), %o1 or %o1, %lo(__cheetah_flush_tlb_mm), %o1 - call cheetah_patch_one - mov 18, %o2 + call tlb_patch_one + mov 19, %o2 sethi %hi(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0 sethi %hi(__cheetah_flush_tlb_pending), %o1 or %o1, %lo(__cheetah_flush_tlb_pending), %o1 - call cheetah_patch_one - mov 26, %o2 + call tlb_patch_one + mov 27, %o2 #ifdef DCACHE_ALIASING_POSSIBLE sethi %hi(__flush_dcache_page), %o0 or %o0, %lo(__flush_dcache_page), %o0 sethi %hi(__cheetah_flush_dcache_page), %o1 or %o1, %lo(__cheetah_flush_dcache_page), %o1 - call cheetah_patch_one + call tlb_patch_one mov 11, %o2 #endif /* DCACHE_ALIASING_POSSIBLE */ @@ -295,16 +375,14 @@ cheetah_patch_cachetlbops: * %g1 address arg 1 (tlb page and range flushes) * %g7 address arg 2 (tlb range flush only) * - * %g6 ivector table, don't touch - * %g2 scratch 1 - * %g3 scratch 2 - * %g4 scratch 3 - * - * TODO: Make xcall TLB range flushes use the tricks above... -DaveM + * %g6 scratch 1 + * %g2 scratch 2 + * %g3 scratch 3 + * %g4 scratch 4 */ .align 32 .globl xcall_flush_tlb_mm -xcall_flush_tlb_mm: +xcall_flush_tlb_mm: /* 21 insns */ mov PRIMARY_CONTEXT, %g2 ldxa [%g2] ASI_DMMU, %g3 srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4 @@ -316,9 +394,19 @@ xcall_flush_tlb_mm: stxa %g0, [%g4] ASI_IMMU_DEMAP stxa %g3, [%g2] ASI_DMMU retry + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop .globl xcall_flush_tlb_pending -xcall_flush_tlb_pending: +xcall_flush_tlb_pending: /* 21 insns */ /* %g5=context, %g1=nr, %g7=vaddrs[] */ sllx %g1, 3, %g1 mov PRIMARY_CONTEXT, %g4 @@ -341,9 +429,10 @@ xcall_flush_tlb_pending: nop stxa %g2, [%g4] ASI_DMMU retry + nop .globl xcall_flush_tlb_kernel_range -xcall_flush_tlb_kernel_range: +xcall_flush_tlb_kernel_range: /* 25 insns */ sethi %hi(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2 andn %g1, %g2, %g1 @@ -360,14 +449,30 @@ xcall_flush_tlb_kernel_range: retry nop nop + nop + nop + nop + nop + nop + nop + nop + nop + nop /* This runs in a very controlled environment, so we do * not need to worry about BH races etc. */ .globl xcall_sync_tick xcall_sync_tick: - rdpr %pstate, %g2 + +661: rdpr %pstate, %g2 wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate + .section .sun4v_2insn_patch, "ax" + .word 661b + nop + nop + .previous + rdpr %pil, %g2 wrpr %g0, 15, %pil sethi %hi(109f), %g7 @@ -390,8 +495,15 @@ xcall_sync_tick: */ .globl xcall_report_regs xcall_report_regs: - rdpr %pstate, %g2 + +661: rdpr %pstate, %g2 wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate + .section .sun4v_2insn_patch, "ax" + .word 661b + nop + nop + .previous + rdpr %pil, %g2 wrpr %g0, 15, %pil sethi %hi(109f), %g7 @@ -453,62 +565,96 @@ xcall_flush_dcache_page_spitfire: /* %g1 == physical page address nop nop - .data - -errata32_hwbug: - .xword 0 - - .text - - /* These two are not performance critical... */ - .globl xcall_flush_tlb_all_spitfire -xcall_flush_tlb_all_spitfire: - /* Spitfire Errata #32 workaround. */ - sethi %hi(errata32_hwbug), %g4 - stx %g0, [%g4 + %lo(errata32_hwbug)] - - clr %g2 - clr %g3 -1: ldxa [%g3] ASI_DTLB_DATA_ACCESS, %g4 - and %g4, _PAGE_L, %g5 - brnz,pn %g5, 2f - mov TLB_TAG_ACCESS, %g7 - - stxa %g0, [%g7] ASI_DMMU - membar #Sync - stxa %g0, [%g3] ASI_DTLB_DATA_ACCESS + /* %g5: error + * %g6: tlb op + */ +__hypervisor_tlb_xcall_error: + mov %g5, %g4 + mov %g6, %g5 + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o0 + call hypervisor_tlbop_error_xcall + mov %l5, %o1 + ba,a,pt %xcc, rtrap_clr_l6 + + .globl __hypervisor_xcall_flush_tlb_mm +__hypervisor_xcall_flush_tlb_mm: /* 21 insns */ + /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */ + mov %o0, %g2 + mov %o1, %g3 + mov %o2, %g4 + mov %o3, %g1 + mov %o5, %g7 + clr %o0 /* ARG0: CPU lists unimplemented */ + clr %o1 /* ARG1: CPU lists unimplemented */ + mov %g5, %o2 /* ARG2: mmu context */ + mov HV_MMU_ALL, %o3 /* ARG3: flags */ + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP + mov HV_FAST_MMU_DEMAP_CTX, %g6 + brnz,pn %o0, __hypervisor_tlb_xcall_error + mov %o0, %g5 + mov %g2, %o0 + mov %g3, %o1 + mov %g4, %o2 + mov %g1, %o3 + mov %g7, %o5 membar #Sync + retry - /* Spitfire Errata #32 workaround. */ - sethi %hi(errata32_hwbug), %g4 - stx %g0, [%g4 + %lo(errata32_hwbug)] - -2: ldxa [%g3] ASI_ITLB_DATA_ACCESS, %g4 - and %g4, _PAGE_L, %g5 - brnz,pn %g5, 2f - mov TLB_TAG_ACCESS, %g7 - - stxa %g0, [%g7] ASI_IMMU - membar #Sync - stxa %g0, [%g3] ASI_ITLB_DATA_ACCESS + .globl __hypervisor_xcall_flush_tlb_pending +__hypervisor_xcall_flush_tlb_pending: /* 21 insns */ + /* %g5=ctx, %g1=nr, %g7=vaddrs[], %g2,%g3,%g4,g6=scratch */ + sllx %g1, 3, %g1 + mov %o0, %g2 + mov %o1, %g3 + mov %o2, %g4 +1: sub %g1, (1 << 3), %g1 + ldx [%g7 + %g1], %o0 /* ARG0: virtual address */ + mov %g5, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + srlx %o0, PAGE_SHIFT, %o0 + sllx %o0, PAGE_SHIFT, %o0 + ta HV_MMU_UNMAP_ADDR_TRAP + mov HV_MMU_UNMAP_ADDR_TRAP, %g6 + brnz,a,pn %o0, __hypervisor_tlb_xcall_error + mov %o0, %g5 + brnz,pt %g1, 1b + nop + mov %g2, %o0 + mov %g3, %o1 + mov %g4, %o2 membar #Sync - - /* Spitfire Errata #32 workaround. */ - sethi %hi(errata32_hwbug), %g4 - stx %g0, [%g4 + %lo(errata32_hwbug)] - -2: add %g2, 1, %g2 - cmp %g2, SPITFIRE_HIGHEST_LOCKED_TLBENT - ble,pt %icc, 1b - sll %g2, 3, %g3 - flush %g6 retry - .globl xcall_flush_tlb_all_cheetah -xcall_flush_tlb_all_cheetah: - mov 0x80, %g2 - stxa %g0, [%g2] ASI_DMMU_DEMAP - stxa %g0, [%g2] ASI_IMMU_DEMAP + .globl __hypervisor_xcall_flush_tlb_kernel_range +__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */ + /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */ + sethi %hi(PAGE_SIZE - 1), %g2 + or %g2, %lo(PAGE_SIZE - 1), %g2 + andn %g1, %g2, %g1 + andn %g7, %g2, %g7 + sub %g7, %g1, %g3 + add %g2, 1, %g2 + sub %g3, %g2, %g3 + mov %o0, %g2 + mov %o1, %g4 + mov %o2, %g7 +1: add %g1, %g3, %o0 /* ARG0: virtual address */ + mov 0, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + ta HV_MMU_UNMAP_ADDR_TRAP + mov HV_MMU_UNMAP_ADDR_TRAP, %g6 + brnz,pn %o0, __hypervisor_tlb_xcall_error + mov %o0, %g5 + sethi %hi(PAGE_SIZE), %o2 + brnz,pt %g3, 1b + sub %g3, %o2, %g3 + mov %g2, %o0 + mov %g4, %o1 + mov %g7, %o2 + membar #Sync retry /* These just get rescheduled to PIL vectors. */ @@ -527,4 +673,70 @@ xcall_capture: wr %g0, (1 << PIL_SMP_CAPTURE), %set_softint retry + .globl xcall_new_mmu_context_version +xcall_new_mmu_context_version: + wr %g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint + retry + #endif /* CONFIG_SMP */ + + + .globl hypervisor_patch_cachetlbops +hypervisor_patch_cachetlbops: + save %sp, -128, %sp + + sethi %hi(__flush_tlb_mm), %o0 + or %o0, %lo(__flush_tlb_mm), %o0 + sethi %hi(__hypervisor_flush_tlb_mm), %o1 + or %o1, %lo(__hypervisor_flush_tlb_mm), %o1 + call tlb_patch_one + mov 10, %o2 + + sethi %hi(__flush_tlb_pending), %o0 + or %o0, %lo(__flush_tlb_pending), %o0 + sethi %hi(__hypervisor_flush_tlb_pending), %o1 + or %o1, %lo(__hypervisor_flush_tlb_pending), %o1 + call tlb_patch_one + mov 16, %o2 + + sethi %hi(__flush_tlb_kernel_range), %o0 + or %o0, %lo(__flush_tlb_kernel_range), %o0 + sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1 + or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1 + call tlb_patch_one + mov 16, %o2 + +#ifdef DCACHE_ALIASING_POSSIBLE + sethi %hi(__flush_dcache_page), %o0 + or %o0, %lo(__flush_dcache_page), %o0 + sethi %hi(__hypervisor_flush_dcache_page), %o1 + or %o1, %lo(__hypervisor_flush_dcache_page), %o1 + call tlb_patch_one + mov 2, %o2 +#endif /* DCACHE_ALIASING_POSSIBLE */ + +#ifdef CONFIG_SMP + sethi %hi(xcall_flush_tlb_mm), %o0 + or %o0, %lo(xcall_flush_tlb_mm), %o0 + sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1 + call tlb_patch_one + mov 21, %o2 + + sethi %hi(xcall_flush_tlb_pending), %o0 + or %o0, %lo(xcall_flush_tlb_pending), %o0 + sethi %hi(__hypervisor_xcall_flush_tlb_pending), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_pending), %o1 + call tlb_patch_one + mov 21, %o2 + + sethi %hi(xcall_flush_tlb_kernel_range), %o0 + or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 + sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1 + call tlb_patch_one + mov 25, %o2 +#endif /* CONFIG_SMP */ + + ret + restore diff --git a/arch/sparc64/prom/cif.S b/arch/sparc64/prom/cif.S index 29d0ae74aed8..5f27ad779c0c 100644 --- a/arch/sparc64/prom/cif.S +++ b/arch/sparc64/prom/cif.S @@ -1,10 +1,12 @@ /* cif.S: PROM entry/exit assembler trampolines. * - * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - * Copyright (C) 2005 David S. Miller <davem@davemloft.net> + * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 2005, 2006 David S. Miller <davem@davemloft.net> */ #include <asm/pstate.h> +#include <asm/cpudata.h> +#include <asm/thread_info.h> .text .globl prom_cif_interface @@ -12,78 +14,16 @@ prom_cif_interface: sethi %hi(p1275buf), %o0 or %o0, %lo(p1275buf), %o0 ldx [%o0 + 0x010], %o1 ! prom_cif_stack - save %o1, -0x190, %sp + save %o1, -192, %sp ldx [%i0 + 0x008], %l2 ! prom_cif_handler - rdpr %pstate, %l4 - wrpr %g0, 0x15, %pstate ! save alternate globals - stx %g1, [%sp + 2047 + 0x0b0] - stx %g2, [%sp + 2047 + 0x0b8] - stx %g3, [%sp + 2047 + 0x0c0] - stx %g4, [%sp + 2047 + 0x0c8] - stx %g5, [%sp + 2047 + 0x0d0] - stx %g6, [%sp + 2047 + 0x0d8] - stx %g7, [%sp + 2047 + 0x0e0] - wrpr %g0, 0x814, %pstate ! save interrupt globals - stx %g1, [%sp + 2047 + 0x0e8] - stx %g2, [%sp + 2047 + 0x0f0] - stx %g3, [%sp + 2047 + 0x0f8] - stx %g4, [%sp + 2047 + 0x100] - stx %g5, [%sp + 2047 + 0x108] - stx %g6, [%sp + 2047 + 0x110] - stx %g7, [%sp + 2047 + 0x118] - wrpr %g0, 0x14, %pstate ! save normal globals - stx %g1, [%sp + 2047 + 0x120] - stx %g2, [%sp + 2047 + 0x128] - stx %g3, [%sp + 2047 + 0x130] - stx %g4, [%sp + 2047 + 0x138] - stx %g5, [%sp + 2047 + 0x140] - stx %g6, [%sp + 2047 + 0x148] - stx %g7, [%sp + 2047 + 0x150] - wrpr %g0, 0x414, %pstate ! save mmu globals - stx %g1, [%sp + 2047 + 0x158] - stx %g2, [%sp + 2047 + 0x160] - stx %g3, [%sp + 2047 + 0x168] - stx %g4, [%sp + 2047 + 0x170] - stx %g5, [%sp + 2047 + 0x178] - stx %g6, [%sp + 2047 + 0x180] - stx %g7, [%sp + 2047 + 0x188] - mov %g1, %l0 ! also save to locals, so we can handle - mov %g2, %l1 ! tlb faults later on, when accessing - mov %g3, %l3 ! the stack. - mov %g7, %l5 - wrpr %l4, PSTATE_IE, %pstate ! turn off interrupts + mov %g4, %l0 + mov %g5, %l1 + mov %g6, %l3 call %l2 add %i0, 0x018, %o0 ! prom_args - wrpr %g0, 0x414, %pstate ! restore mmu globals - mov %l0, %g1 - mov %l1, %g2 - mov %l3, %g3 - mov %l5, %g7 - wrpr %g0, 0x14, %pstate ! restore normal globals - ldx [%sp + 2047 + 0x120], %g1 - ldx [%sp + 2047 + 0x128], %g2 - ldx [%sp + 2047 + 0x130], %g3 - ldx [%sp + 2047 + 0x138], %g4 - ldx [%sp + 2047 + 0x140], %g5 - ldx [%sp + 2047 + 0x148], %g6 - ldx [%sp + 2047 + 0x150], %g7 - wrpr %g0, 0x814, %pstate ! restore interrupt globals - ldx [%sp + 2047 + 0x0e8], %g1 - ldx [%sp + 2047 + 0x0f0], %g2 - ldx [%sp + 2047 + 0x0f8], %g3 - ldx [%sp + 2047 + 0x100], %g4 - ldx [%sp + 2047 + 0x108], %g5 - ldx [%sp + 2047 + 0x110], %g6 - ldx [%sp + 2047 + 0x118], %g7 - wrpr %g0, 0x15, %pstate ! restore alternate globals - ldx [%sp + 2047 + 0x0b0], %g1 - ldx [%sp + 2047 + 0x0b8], %g2 - ldx [%sp + 2047 + 0x0c0], %g3 - ldx [%sp + 2047 + 0x0c8], %g4 - ldx [%sp + 2047 + 0x0d0], %g5 - ldx [%sp + 2047 + 0x0d8], %g6 - ldx [%sp + 2047 + 0x0e0], %g7 - wrpr %l4, 0, %pstate ! restore original pstate + mov %l0, %g4 + mov %l1, %g5 + mov %l3, %g6 ret restore @@ -91,135 +31,18 @@ prom_cif_interface: prom_cif_callback: sethi %hi(p1275buf), %o1 or %o1, %lo(p1275buf), %o1 - save %sp, -0x270, %sp - rdpr %pstate, %l4 - wrpr %g0, 0x15, %pstate ! save PROM alternate globals - stx %g1, [%sp + 2047 + 0x0b0] - stx %g2, [%sp + 2047 + 0x0b8] - stx %g3, [%sp + 2047 + 0x0c0] - stx %g4, [%sp + 2047 + 0x0c8] - stx %g5, [%sp + 2047 + 0x0d0] - stx %g6, [%sp + 2047 + 0x0d8] - stx %g7, [%sp + 2047 + 0x0e0] - ! restore Linux alternate globals - ldx [%sp + 2047 + 0x190], %g1 - ldx [%sp + 2047 + 0x198], %g2 - ldx [%sp + 2047 + 0x1a0], %g3 - ldx [%sp + 2047 + 0x1a8], %g4 - ldx [%sp + 2047 + 0x1b0], %g5 - ldx [%sp + 2047 + 0x1b8], %g6 - ldx [%sp + 2047 + 0x1c0], %g7 - wrpr %g0, 0x814, %pstate ! save PROM interrupt globals - stx %g1, [%sp + 2047 + 0x0e8] - stx %g2, [%sp + 2047 + 0x0f0] - stx %g3, [%sp + 2047 + 0x0f8] - stx %g4, [%sp + 2047 + 0x100] - stx %g5, [%sp + 2047 + 0x108] - stx %g6, [%sp + 2047 + 0x110] - stx %g7, [%sp + 2047 + 0x118] - ! restore Linux interrupt globals - ldx [%sp + 2047 + 0x1c8], %g1 - ldx [%sp + 2047 + 0x1d0], %g2 - ldx [%sp + 2047 + 0x1d8], %g3 - ldx [%sp + 2047 + 0x1e0], %g4 - ldx [%sp + 2047 + 0x1e8], %g5 - ldx [%sp + 2047 + 0x1f0], %g6 - ldx [%sp + 2047 + 0x1f8], %g7 - wrpr %g0, 0x14, %pstate ! save PROM normal globals - stx %g1, [%sp + 2047 + 0x120] - stx %g2, [%sp + 2047 + 0x128] - stx %g3, [%sp + 2047 + 0x130] - stx %g4, [%sp + 2047 + 0x138] - stx %g5, [%sp + 2047 + 0x140] - stx %g6, [%sp + 2047 + 0x148] - stx %g7, [%sp + 2047 + 0x150] - ! restore Linux normal globals - ldx [%sp + 2047 + 0x200], %g1 - ldx [%sp + 2047 + 0x208], %g2 - ldx [%sp + 2047 + 0x210], %g3 - ldx [%sp + 2047 + 0x218], %g4 - ldx [%sp + 2047 + 0x220], %g5 - ldx [%sp + 2047 + 0x228], %g6 - ldx [%sp + 2047 + 0x230], %g7 - wrpr %g0, 0x414, %pstate ! save PROM mmu globals - stx %g1, [%sp + 2047 + 0x158] - stx %g2, [%sp + 2047 + 0x160] - stx %g3, [%sp + 2047 + 0x168] - stx %g4, [%sp + 2047 + 0x170] - stx %g5, [%sp + 2047 + 0x178] - stx %g6, [%sp + 2047 + 0x180] - stx %g7, [%sp + 2047 + 0x188] - ! restore Linux mmu globals - ldx [%sp + 2047 + 0x238], %o0 - ldx [%sp + 2047 + 0x240], %o1 - ldx [%sp + 2047 + 0x248], %l2 - ldx [%sp + 2047 + 0x250], %l3 - ldx [%sp + 2047 + 0x258], %l5 - ldx [%sp + 2047 + 0x260], %l6 - ldx [%sp + 2047 + 0x268], %l7 - ! switch to Linux tba - sethi %hi(sparc64_ttable_tl0), %l1 - rdpr %tba, %l0 ! save PROM tba - mov %o0, %g1 - mov %o1, %g2 - mov %l2, %g3 - mov %l3, %g4 - mov %l5, %g5 - mov %l6, %g6 - mov %l7, %g7 - wrpr %l1, %tba ! install Linux tba - wrpr %l4, 0, %pstate ! restore PSTATE + save %sp, -192, %sp + TRAP_LOAD_THREAD_REG(%g6, %g1) + LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %o0) + ldx [%g6 + TI_TASK], %g4 call prom_world - mov %g0, %o0 + mov 0, %o0 ldx [%i1 + 0x000], %l2 call %l2 mov %i0, %o0 mov %o0, %l1 call prom_world - or %g0, 1, %o0 - wrpr %g0, 0x14, %pstate ! interrupts off - ! restore PROM mmu globals - ldx [%sp + 2047 + 0x158], %o0 - ldx [%sp + 2047 + 0x160], %o1 - ldx [%sp + 2047 + 0x168], %l2 - ldx [%sp + 2047 + 0x170], %l3 - ldx [%sp + 2047 + 0x178], %l5 - ldx [%sp + 2047 + 0x180], %l6 - ldx [%sp + 2047 + 0x188], %l7 - wrpr %g0, 0x414, %pstate ! restore PROM mmu globals - mov %o0, %g1 - mov %o1, %g2 - mov %l2, %g3 - mov %l3, %g4 - mov %l5, %g5 - mov %l6, %g6 - mov %l7, %g7 - wrpr %l0, %tba ! restore PROM tba - wrpr %g0, 0x14, %pstate ! restore PROM normal globals - ldx [%sp + 2047 + 0x120], %g1 - ldx [%sp + 2047 + 0x128], %g2 - ldx [%sp + 2047 + 0x130], %g3 - ldx [%sp + 2047 + 0x138], %g4 - ldx [%sp + 2047 + 0x140], %g5 - ldx [%sp + 2047 + 0x148], %g6 - ldx [%sp + 2047 + 0x150], %g7 - wrpr %g0, 0x814, %pstate ! restore PROM interrupt globals - ldx [%sp + 2047 + 0x0e8], %g1 - ldx [%sp + 2047 + 0x0f0], %g2 - ldx [%sp + 2047 + 0x0f8], %g3 - ldx [%sp + 2047 + 0x100], %g4 - ldx [%sp + 2047 + 0x108], %g5 - ldx [%sp + 2047 + 0x110], %g6 - ldx [%sp + 2047 + 0x118], %g7 - wrpr %g0, 0x15, %pstate ! restore PROM alternate globals - ldx [%sp + 2047 + 0x0b0], %g1 - ldx [%sp + 2047 + 0x0b8], %g2 - ldx [%sp + 2047 + 0x0c0], %g3 - ldx [%sp + 2047 + 0x0c8], %g4 - ldx [%sp + 2047 + 0x0d0], %g5 - ldx [%sp + 2047 + 0x0d8], %g6 - ldx [%sp + 2047 + 0x0e0], %g7 - wrpr %l4, 0, %pstate + mov 1, %o0 ret restore %l1, 0, %o0 diff --git a/arch/sparc64/prom/console.c b/arch/sparc64/prom/console.c index ac6d035dd150..7c25c54cefdc 100644 --- a/arch/sparc64/prom/console.c +++ b/arch/sparc64/prom/console.c @@ -102,6 +102,9 @@ prom_query_input_device(void) if (!strncmp (propb, "rsc", 3)) return PROMDEV_IRSC; + if (!strncmp (propb, "virtual-console", 3)) + return PROMDEV_IVCONS; + if (strncmp (propb, "tty", 3) || !propb[3]) return PROMDEV_I_UNK; @@ -143,6 +146,9 @@ prom_query_output_device(void) if (!strncmp (propb, "rsc", 3)) return PROMDEV_ORSC; + if (!strncmp (propb, "virtual-console", 3)) + return PROMDEV_OVCONS; + if (strncmp (propb, "tty", 3) || !propb[3]) return PROMDEV_O_UNK; diff --git a/arch/sparc64/prom/init.c b/arch/sparc64/prom/init.c index f3cc2d8578b2..1c0db842a6f4 100644 --- a/arch/sparc64/prom/init.c +++ b/arch/sparc64/prom/init.c @@ -14,11 +14,10 @@ #include <asm/openprom.h> #include <asm/oplib.h> -enum prom_major_version prom_vers; -unsigned int prom_rev, prom_prev; +/* OBP version string. */ +char prom_version[80]; /* The root node of the prom device tree. */ -int prom_root_node; int prom_stdin, prom_stdout; int prom_chosen_node; @@ -31,68 +30,25 @@ extern void prom_cif_init(void *, void *); void __init prom_init(void *cif_handler, void *cif_stack) { - char buffer[80], *p; - int ints[3]; int node; - int i = 0; - int bufadjust; - - prom_vers = PROM_P1275; prom_cif_init(cif_handler, cif_stack); - prom_root_node = prom_getsibling(0); - if((prom_root_node == 0) || (prom_root_node == -1)) - prom_halt(); - prom_chosen_node = prom_finddevice(prom_chosen_path); if (!prom_chosen_node || prom_chosen_node == -1) prom_halt(); - prom_stdin = prom_getint (prom_chosen_node, "stdin"); - prom_stdout = prom_getint (prom_chosen_node, "stdout"); + prom_stdin = prom_getint(prom_chosen_node, "stdin"); + prom_stdout = prom_getint(prom_chosen_node, "stdout"); node = prom_finddevice("/openprom"); if (!node || node == -1) prom_halt(); - prom_getstring (node, "version", buffer, sizeof (buffer)); - - prom_printf ("\n"); - - if (strncmp (buffer, "OBP ", 4)) - goto strange_version; - - /* - * Version field is expected to be 'OBP xx.yy.zz date...' - * However, Sun can't stick to this format very well, so - * we need to check for 'OBP xx.yy.zz date...' and adjust - * accordingly. -spot - */ - - if (strncmp (buffer, "OBP ", 5)) - bufadjust = 4; - else - bufadjust = 5; - - p = buffer + bufadjust; - while (p && isdigit(*p) && i < 3) { - ints[i++] = simple_strtoul(p, NULL, 0); - if ((p = strchr(p, '.')) != NULL) - p++; - } - if (i != 3) - goto strange_version; - - prom_rev = ints[1]; - prom_prev = (ints[0] << 16) | (ints[1] << 8) | ints[2]; - - printk ("PROMLIB: Sun IEEE Boot Prom %s\n", buffer + bufadjust); + prom_getstring(node, "version", prom_version, sizeof(prom_version)); - /* Initialization successful. */ - return; + prom_printf("\n"); -strange_version: - prom_printf ("Strange OBP version `%s'.\n", buffer); - prom_halt (); + printk("PROMLIB: Sun IEEE Boot Prom '%s'\n", prom_version); + printk("PROMLIB: Root node compatible: %s\n", prom_root_compatible); } diff --git a/arch/sparc64/prom/misc.c b/arch/sparc64/prom/misc.c index 87f5cfce23bb..577bde8b6647 100644 --- a/arch/sparc64/prom/misc.c +++ b/arch/sparc64/prom/misc.c @@ -112,28 +112,20 @@ unsigned char prom_get_idprom(char *idbuf, int num_bytes) return 0xff; } -/* Get the major prom version number. */ -int prom_version(void) -{ - return PROM_P1275; -} - -/* Get the prom plugin-revision. */ -int prom_getrev(void) -{ - return prom_rev; -} - -/* Get the prom firmware print revision. */ -int prom_getprev(void) +/* Install Linux trap table so PROM uses that instead of its own. */ +void prom_set_trap_table(unsigned long tba) { - return prom_prev; + p1275_cmd("SUNW,set-trap-table", + (P1275_ARG(0, P1275_ARG_IN_64B) | + P1275_INOUT(1, 0)), tba); } -/* Install Linux trap table so PROM uses that instead of its own. */ -void prom_set_trap_table(unsigned long tba) +void prom_set_trap_table_sun4v(unsigned long tba, unsigned long mmfsa) { - p1275_cmd("SUNW,set-trap-table", P1275_INOUT(1, 0), tba); + p1275_cmd("SUNW,set-trap-table", + (P1275_ARG(0, P1275_ARG_IN_64B) | + P1275_ARG(1, P1275_ARG_IN_64B) | + P1275_INOUT(2, 0)), tba, mmfsa); } int prom_get_mmu_ihandle(void) @@ -303,9 +295,21 @@ int prom_wakeupsystem(void) } #ifdef CONFIG_SMP -void prom_startcpu(int cpunode, unsigned long pc, unsigned long o0) +void prom_startcpu(int cpunode, unsigned long pc, unsigned long arg) +{ + p1275_cmd("SUNW,start-cpu", P1275_INOUT(3, 0), cpunode, pc, arg); +} + +void prom_startcpu_cpuid(int cpuid, unsigned long pc, unsigned long arg) +{ + p1275_cmd("SUNW,start-cpu-by-cpuid", P1275_INOUT(3, 0), + cpuid, pc, arg); +} + +void prom_stopcpu_cpuid(int cpuid) { - p1275_cmd("SUNW,start-cpu", P1275_INOUT(3, 0), cpunode, pc, o0); + p1275_cmd("SUNW,stop-cpu-by-cpuid", P1275_INOUT(1, 0), + cpuid); } void prom_stopself(void) diff --git a/arch/sparc64/prom/p1275.c b/arch/sparc64/prom/p1275.c index a5a7c5712028..2b32c489860c 100644 --- a/arch/sparc64/prom/p1275.c +++ b/arch/sparc64/prom/p1275.c @@ -30,16 +30,6 @@ extern void prom_world(int); extern void prom_cif_interface(void); extern void prom_cif_callback(void); -static inline unsigned long spitfire_get_primary_context(void) -{ - unsigned long ctx; - - __asm__ __volatile__("ldxa [%1] %2, %0" - : "=r" (ctx) - : "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - return ctx; -} - /* * This provides SMP safety on the p1275buf. prom_callback() drops this lock * to allow recursuve acquisition. @@ -55,7 +45,6 @@ long p1275_cmd(const char *service, long fmt, ...) long attrs, x; p = p1275buf.prom_buffer; - BUG_ON((spitfire_get_primary_context() & CTX_NR_MASK) != 0); spin_lock_irqsave(&prom_entry_lock, flags); diff --git a/arch/sparc64/prom/tree.c b/arch/sparc64/prom/tree.c index b1ff9e87dcc6..49075abd7cbc 100644 --- a/arch/sparc64/prom/tree.c +++ b/arch/sparc64/prom/tree.c @@ -51,7 +51,7 @@ prom_getparent(int node) __inline__ int __prom_getsibling(int node) { - return p1275_cmd ("peer", P1275_INOUT(1, 1), node); + return p1275_cmd(prom_peer_name, P1275_INOUT(1, 1), node); } __inline__ int @@ -59,9 +59,12 @@ prom_getsibling(int node) { int sibnode; - if(node == -1) return 0; + if (node == -1) + return 0; sibnode = __prom_getsibling(node); - if(sibnode == -1) return 0; + if (sibnode == -1) + return 0; + return sibnode; } diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c index 3ab4677395f2..5284996780a7 100644 --- a/arch/sparc64/solaris/misc.c +++ b/arch/sparc64/solaris/misc.c @@ -90,7 +90,7 @@ static u32 do_solaris_mmap(u32 addr, u32 len, u32 prot, u32 flags, u32 fd, u64 o len = PAGE_ALIGN(len); if(!(flags & MAP_FIXED)) addr = 0; - else if (len > 0xf0000000UL || addr > 0xf0000000UL - len) + else if (len > STACK_TOP32 || addr > STACK_TOP32 - len) goto out_putf; ret_type = flags & _MAP_NEW; flags &= ~_MAP_NEW; @@ -102,7 +102,7 @@ static u32 do_solaris_mmap(u32 addr, u32 len, u32 prot, u32 flags, u32 fd, u64 o (unsigned long) prot, (unsigned long) flags, off); up_write(¤t->mm->mmap_sem); if(!ret_type) - retval = ((retval < 0xf0000000) ? 0 : retval); + retval = ((retval < STACK_TOP32) ? 0 : retval); out_putf: if (file) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index bd49b25fba6b..6c6c5498899f 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -9097,6 +9097,10 @@ static void __devinit tg3_get_eeprom_hw_cfg(struct tg3 *tp) tp->phy_id = PHY_ID_INVALID; tp->led_ctrl = LED_CTRL_MODE_PHY_1; + /* Do not even try poking around in here on Sun parts. */ + if (tp->tg3_flags2 & TG3_FLG2_SUN_570X) + return; + tg3_read_mem(tp, NIC_SRAM_DATA_SIG, &val); if (val == NIC_SRAM_DATA_SIG_MAGIC) { u32 nic_cfg, led_cfg; diff --git a/drivers/sbus/char/bbc_i2c.c b/drivers/sbus/char/bbc_i2c.c index 1c8b612d8234..3e156e005f2e 100644 --- a/drivers/sbus/char/bbc_i2c.c +++ b/drivers/sbus/char/bbc_i2c.c @@ -440,7 +440,8 @@ static int __init bbc_i2c_init(void) struct linux_ebus_device *edev = NULL; int err, index = 0; - if (tlb_type != cheetah || !bbc_present()) + if ((tlb_type != cheetah && tlb_type != cheetah_plus) || + !bbc_present()) return -ENODEV; for_each_ebus(ebus) { @@ -486,3 +487,4 @@ static void bbc_i2c_cleanup(void) module_init(bbc_i2c_init); module_exit(bbc_i2c_cleanup); +MODULE_LICENSE("GPL"); diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index b3c561abe3f6..89e5413cc2a3 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -582,6 +582,13 @@ config SERIAL_SUNSAB_CONSOLE on your Sparc system as the console, you can do so by answering Y to this option. +config SERIAL_SUNHV + bool "Sun4v Hypervisor Console support" + depends on SPARC64 + help + This driver supports the console device found on SUN4V Sparc + systems. Say Y if you want to be able to use this device. + config SERIAL_IP22_ZILOG tristate "IP22 Zilog8530 serial support" depends on SGI_IP22 diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index eaf8e01db198..50c221af9e6d 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_SERIAL_PXA) += pxa.o obj-$(CONFIG_SERIAL_SA1100) += sa1100.o obj-$(CONFIG_SERIAL_S3C2410) += s3c2410.o obj-$(CONFIG_SERIAL_SUNCORE) += suncore.o +obj-$(CONFIG_SERIAL_SUNHV) += sunhv.o obj-$(CONFIG_SERIAL_SUNZILOG) += sunzilog.o obj-$(CONFIG_SERIAL_IP22_ZILOG) += ip22zilog.o obj-$(CONFIG_SERIAL_SUNSU) += sunsu.o diff --git a/drivers/serial/sunhv.c b/drivers/serial/sunhv.c new file mode 100644 index 000000000000..f137804b3133 --- /dev/null +++ b/drivers/serial/sunhv.c @@ -0,0 +1,550 @@ +/* sunhv.c: Serial driver for SUN4V hypervisor console. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/tty.h> +#include <linux/tty_flip.h> +#include <linux/major.h> +#include <linux/circ_buf.h> +#include <linux/serial.h> +#include <linux/sysrq.h> +#include <linux/console.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/init.h> + +#include <asm/hypervisor.h> +#include <asm/spitfire.h> +#include <asm/vdev.h> +#include <asm/oplib.h> +#include <asm/irq.h> + +#if defined(CONFIG_MAGIC_SYSRQ) +#define SUPPORT_SYSRQ +#endif + +#include <linux/serial_core.h> + +#include "suncore.h" + +#define CON_BREAK ((long)-1) +#define CON_HUP ((long)-2) + +static inline long hypervisor_con_getchar(long *status) +{ + register unsigned long func asm("%o5"); + register unsigned long arg0 asm("%o0"); + register unsigned long arg1 asm("%o1"); + + func = HV_FAST_CONS_GETCHAR; + arg0 = 0; + arg1 = 0; + __asm__ __volatile__("ta %6" + : "=&r" (func), "=&r" (arg0), "=&r" (arg1) + : "0" (func), "1" (arg0), "2" (arg1), + "i" (HV_FAST_TRAP)); + + *status = arg0; + + return (long) arg1; +} + +static inline long hypervisor_con_putchar(long ch) +{ + register unsigned long func asm("%o5"); + register unsigned long arg0 asm("%o0"); + + func = HV_FAST_CONS_PUTCHAR; + arg0 = ch; + __asm__ __volatile__("ta %4" + : "=&r" (func), "=&r" (arg0) + : "0" (func), "1" (arg0), "i" (HV_FAST_TRAP)); + + return (long) arg0; +} + +#define IGNORE_BREAK 0x1 +#define IGNORE_ALL 0x2 + +static int hung_up = 0; + +static struct tty_struct *receive_chars(struct uart_port *port, struct pt_regs *regs) +{ + struct tty_struct *tty = NULL; + int saw_console_brk = 0; + int limit = 10000; + + if (port->info != NULL) /* Unopened serial console */ + tty = port->info->tty; + + while (limit-- > 0) { + long status; + long c = hypervisor_con_getchar(&status); + unsigned char flag; + + if (status == HV_EWOULDBLOCK) + break; + + if (c == CON_BREAK) { + if (uart_handle_break(port)) + continue; + saw_console_brk = 1; + c = 0; + } + + if (c == CON_HUP) { + hung_up = 1; + uart_handle_dcd_change(port, 0); + } else if (hung_up) { + hung_up = 0; + uart_handle_dcd_change(port, 1); + } + + if (tty == NULL) { + uart_handle_sysrq_char(port, c, regs); + continue; + } + + flag = TTY_NORMAL; + port->icount.rx++; + if (c == CON_BREAK) { + port->icount.brk++; + if (uart_handle_break(port)) + continue; + flag = TTY_BREAK; + } + + if (uart_handle_sysrq_char(port, c, regs)) + continue; + + if ((port->ignore_status_mask & IGNORE_ALL) || + ((port->ignore_status_mask & IGNORE_BREAK) && + (c == CON_BREAK))) + continue; + + tty_insert_flip_char(tty, c, flag); + } + + if (saw_console_brk) + sun_do_break(); + + return tty; +} + +static void transmit_chars(struct uart_port *port) +{ + struct circ_buf *xmit; + + if (!port->info) + return; + + xmit = &port->info->xmit; + if (uart_circ_empty(xmit) || uart_tx_stopped(port)) + return; + + while (!uart_circ_empty(xmit)) { + long status = hypervisor_con_putchar(xmit->buf[xmit->tail]); + + if (status != HV_EOK) + break; + + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + port->icount.tx++; + } + + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(port); +} + +static irqreturn_t sunhv_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct uart_port *port = dev_id; + struct tty_struct *tty; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + tty = receive_chars(port, regs); + transmit_chars(port); + spin_unlock_irqrestore(&port->lock, flags); + + if (tty) + tty_flip_buffer_push(tty); + + return IRQ_HANDLED; +} + +/* port->lock is not held. */ +static unsigned int sunhv_tx_empty(struct uart_port *port) +{ + /* Transmitter is always empty for us. If the circ buffer + * is non-empty or there is an x_char pending, our caller + * will do the right thing and ignore what we return here. + */ + return TIOCSER_TEMT; +} + +/* port->lock held by caller. */ +static void sunhv_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + return; +} + +/* port->lock is held by caller and interrupts are disabled. */ +static unsigned int sunhv_get_mctrl(struct uart_port *port) +{ + return TIOCM_DSR | TIOCM_CAR | TIOCM_CTS; +} + +/* port->lock held by caller. */ +static void sunhv_stop_tx(struct uart_port *port) +{ + return; +} + +/* port->lock held by caller. */ +static void sunhv_start_tx(struct uart_port *port) +{ + struct circ_buf *xmit = &port->info->xmit; + + while (!uart_circ_empty(xmit)) { + long status = hypervisor_con_putchar(xmit->buf[xmit->tail]); + + if (status != HV_EOK) + break; + + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + port->icount.tx++; + } +} + +/* port->lock is not held. */ +static void sunhv_send_xchar(struct uart_port *port, char ch) +{ + unsigned long flags; + int limit = 10000; + + spin_lock_irqsave(&port->lock, flags); + + while (limit-- > 0) { + long status = hypervisor_con_putchar(ch); + if (status == HV_EOK) + break; + } + + spin_unlock_irqrestore(&port->lock, flags); +} + +/* port->lock held by caller. */ +static void sunhv_stop_rx(struct uart_port *port) +{ +} + +/* port->lock held by caller. */ +static void sunhv_enable_ms(struct uart_port *port) +{ +} + +/* port->lock is not held. */ +static void sunhv_break_ctl(struct uart_port *port, int break_state) +{ + if (break_state) { + unsigned long flags; + int limit = 1000000; + + spin_lock_irqsave(&port->lock, flags); + + while (limit-- > 0) { + long status = hypervisor_con_putchar(CON_BREAK); + if (status == HV_EOK) + break; + udelay(2); + } + + spin_unlock_irqrestore(&port->lock, flags); + } +} + +/* port->lock is not held. */ +static int sunhv_startup(struct uart_port *port) +{ + return 0; +} + +/* port->lock is not held. */ +static void sunhv_shutdown(struct uart_port *port) +{ +} + +/* port->lock is not held. */ +static void sunhv_set_termios(struct uart_port *port, struct termios *termios, + struct termios *old) +{ + unsigned int baud = uart_get_baud_rate(port, termios, old, 0, 4000000); + unsigned int quot = uart_get_divisor(port, baud); + unsigned int iflag, cflag; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + + iflag = termios->c_iflag; + cflag = termios->c_cflag; + + port->ignore_status_mask = 0; + if (iflag & IGNBRK) + port->ignore_status_mask |= IGNORE_BREAK; + if ((cflag & CREAD) == 0) + port->ignore_status_mask |= IGNORE_ALL; + + /* XXX */ + uart_update_timeout(port, cflag, + (port->uartclk / (16 * quot))); + + spin_unlock_irqrestore(&port->lock, flags); +} + +static const char *sunhv_type(struct uart_port *port) +{ + return "SUN4V HCONS"; +} + +static void sunhv_release_port(struct uart_port *port) +{ +} + +static int sunhv_request_port(struct uart_port *port) +{ + return 0; +} + +static void sunhv_config_port(struct uart_port *port, int flags) +{ +} + +static int sunhv_verify_port(struct uart_port *port, struct serial_struct *ser) +{ + return -EINVAL; +} + +static struct uart_ops sunhv_pops = { + .tx_empty = sunhv_tx_empty, + .set_mctrl = sunhv_set_mctrl, + .get_mctrl = sunhv_get_mctrl, + .stop_tx = sunhv_stop_tx, + .start_tx = sunhv_start_tx, + .send_xchar = sunhv_send_xchar, + .stop_rx = sunhv_stop_rx, + .enable_ms = sunhv_enable_ms, + .break_ctl = sunhv_break_ctl, + .startup = sunhv_startup, + .shutdown = sunhv_shutdown, + .set_termios = sunhv_set_termios, + .type = sunhv_type, + .release_port = sunhv_release_port, + .request_port = sunhv_request_port, + .config_port = sunhv_config_port, + .verify_port = sunhv_verify_port, +}; + +static struct uart_driver sunhv_reg = { + .owner = THIS_MODULE, + .driver_name = "serial", + .devfs_name = "tts/", + .dev_name = "ttyS", + .major = TTY_MAJOR, +}; + +static struct uart_port *sunhv_port; + +static inline void sunhv_console_putchar(struct uart_port *port, char c) +{ + unsigned long flags; + int limit = 1000000; + + spin_lock_irqsave(&port->lock, flags); + + while (limit-- > 0) { + long status = hypervisor_con_putchar(c); + if (status == HV_EOK) + break; + udelay(2); + } + + spin_unlock_irqrestore(&port->lock, flags); +} + +static void sunhv_console_write(struct console *con, const char *s, unsigned n) +{ + struct uart_port *port = sunhv_port; + int i; + + for (i = 0; i < n; i++) { + if (*s == '\n') + sunhv_console_putchar(port, '\r'); + sunhv_console_putchar(port, *s++); + } +} + +static struct console sunhv_console = { + .name = "ttyHV", + .write = sunhv_console_write, + .device = uart_console_device, + .flags = CON_PRINTBUFFER, + .index = -1, + .data = &sunhv_reg, +}; + +static inline struct console *SUNHV_CONSOLE(void) +{ + if (con_is_present()) + return NULL; + + sunhv_console.index = 0; + + return &sunhv_console; +} + +static int __init hv_console_compatible(char *buf, int len) +{ + while (len) { + int this_len; + + if (!strcmp(buf, "qcn")) + return 1; + + this_len = strlen(buf) + 1; + + buf += this_len; + len -= this_len; + } + + return 0; +} + +static unsigned int __init get_interrupt(void) +{ + const char *cons_str = "console"; + const char *compat_str = "compatible"; + int node = prom_getchild(sun4v_vdev_root); + char buf[64]; + int err, len; + + node = prom_searchsiblings(node, cons_str); + if (!node) + return 0; + + len = prom_getproplen(node, compat_str); + if (len == 0 || len == -1) + return 0; + + err = prom_getproperty(node, compat_str, buf, 64); + if (err == -1) + return 0; + + if (!hv_console_compatible(buf, len)) + return 0; + + /* Ok, the this is the OBP node for the sun4v hypervisor + * console device. Decode the interrupt. + */ + return sun4v_vdev_device_interrupt(node); +} + +static int __init sunhv_init(void) +{ + struct uart_port *port; + int ret; + + if (tlb_type != hypervisor) + return -ENODEV; + + port = kmalloc(sizeof(struct uart_port), GFP_KERNEL); + if (unlikely(!port)) + return -ENOMEM; + + memset(port, 0, sizeof(struct uart_port)); + + port->line = 0; + port->ops = &sunhv_pops; + port->type = PORT_SUNHV; + port->uartclk = ( 29491200 / 16 ); /* arbitrary */ + + /* Set this just to make uart_configure_port() happy. */ + port->membase = (unsigned char __iomem *) __pa(port); + + port->irq = get_interrupt(); + if (!port->irq) { + kfree(port); + return -ENODEV; + } + + sunhv_reg.minor = sunserial_current_minor; + sunhv_reg.nr = 1; + + ret = uart_register_driver(&sunhv_reg); + if (ret < 0) { + printk(KERN_ERR "SUNHV: uart_register_driver() failed %d\n", + ret); + kfree(port); + + return ret; + } + + sunhv_reg.tty_driver->name_base = sunhv_reg.minor - 64; + sunserial_current_minor += 1; + + sunhv_reg.cons = SUNHV_CONSOLE(); + + sunhv_port = port; + + ret = uart_add_one_port(&sunhv_reg, port); + if (ret < 0) { + printk(KERN_ERR "SUNHV: uart_add_one_port() failed %d\n", ret); + sunserial_current_minor -= 1; + uart_unregister_driver(&sunhv_reg); + kfree(port); + sunhv_port = NULL; + return -ENODEV; + } + + if (request_irq(port->irq, sunhv_interrupt, + SA_SHIRQ, "serial(sunhv)", port)) { + printk(KERN_ERR "sunhv: Cannot register IRQ\n"); + uart_remove_one_port(&sunhv_reg, port); + sunserial_current_minor -= 1; + uart_unregister_driver(&sunhv_reg); + kfree(port); + sunhv_port = NULL; + return -ENODEV; + } + + return 0; +} + +static void __exit sunhv_exit(void) +{ + struct uart_port *port = sunhv_port; + + BUG_ON(!port); + + free_irq(port->irq, port); + + uart_remove_one_port(&sunhv_reg, port); + sunserial_current_minor -= 1; + + uart_unregister_driver(&sunhv_reg); + + kfree(sunhv_port); + sunhv_port = NULL; +} + +module_init(sunhv_init); +module_exit(sunhv_exit); + +MODULE_AUTHOR("David S. Miller"); +MODULE_DESCRIPTION("SUN4V Hypervisor console driver") +MODULE_LICENSE("GPL"); diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c index 85664228a0b6..a2fb0c2fb121 100644 --- a/drivers/serial/sunsab.c +++ b/drivers/serial/sunsab.c @@ -955,14 +955,13 @@ static struct console sunsab_console = { .index = -1, .data = &sunsab_reg, }; -#define SUNSAB_CONSOLE (&sunsab_console) -static void __init sunsab_console_init(void) +static inline struct console *SUNSAB_CONSOLE(void) { int i; if (con_is_present()) - return; + return NULL; for (i = 0; i < num_channels; i++) { int this_minor = sunsab_reg.minor + i; @@ -971,13 +970,14 @@ static void __init sunsab_console_init(void) break; } if (i == num_channels) - return; + return NULL; sunsab_console.index = i; - register_console(&sunsab_console); + + return &sunsab_console; } #else -#define SUNSAB_CONSOLE (NULL) +#define SUNSAB_CONSOLE() (NULL) #define sunsab_console_init() do { } while (0) #endif @@ -1124,7 +1124,6 @@ static int __init sunsab_init(void) sunsab_reg.minor = sunserial_current_minor; sunsab_reg.nr = num_channels; - sunsab_reg.cons = SUNSAB_CONSOLE; ret = uart_register_driver(&sunsab_reg); if (ret < 0) { @@ -1143,10 +1142,12 @@ static int __init sunsab_init(void) return ret; } + sunsab_reg.tty_driver->name_base = sunsab_reg.minor - 64; + + sunsab_reg.cons = SUNSAB_CONSOLE(); + sunserial_current_minor += num_channels; - sunsab_console_init(); - for (i = 0; i < num_channels; i++) { struct uart_sunsab_port *up = &sunsab_ports[i]; diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c index 4e453fa966ae..46c44b83f57c 100644 --- a/drivers/serial/sunsu.c +++ b/drivers/serial/sunsu.c @@ -1280,6 +1280,7 @@ static int __init sunsu_kbd_ms_init(struct uart_sunsu_port *up, int channel) struct serio *serio; #endif + spin_lock_init(&up->port.lock); up->port.line = channel; up->port.type = PORT_UNKNOWN; up->port.uartclk = (SU_BASE_BAUD * 16); @@ -1464,18 +1465,17 @@ static struct console sunsu_cons = { .index = -1, .data = &sunsu_reg, }; -#define SUNSU_CONSOLE (&sunsu_cons) /* * Register console. */ -static int __init sunsu_serial_console_init(void) +static inline struct console *SUNSU_CONSOLE(void) { int i; if (con_is_present()) - return 0; + return NULL; for (i = 0; i < UART_NR; i++) { int this_minor = sunsu_reg.minor + i; @@ -1484,16 +1484,16 @@ static int __init sunsu_serial_console_init(void) break; } if (i == UART_NR) - return 0; + return NULL; if (sunsu_ports[i].port_node == 0) - return 0; + return NULL; sunsu_cons.index = i; - register_console(&sunsu_cons); - return 0; + + return &sunsu_cons; } #else -#define SUNSU_CONSOLE (NULL) +#define SUNSU_CONSOLE() (NULL) #define sunsu_serial_console_init() do { } while (0) #endif @@ -1510,6 +1510,7 @@ static int __init sunsu_serial_init(void) up->su_type == SU_PORT_KBD) continue; + spin_lock_init(&up->port.lock); up->port.flags |= UPF_BOOT_AUTOCONF; up->port.type = PORT_UNKNOWN; up->port.uartclk = (SU_BASE_BAUD * 16); @@ -1523,16 +1524,19 @@ static int __init sunsu_serial_init(void) } sunsu_reg.minor = sunserial_current_minor; - sunserial_current_minor += instance; sunsu_reg.nr = instance; - sunsu_reg.cons = SUNSU_CONSOLE; ret = uart_register_driver(&sunsu_reg); if (ret < 0) return ret; - sunsu_serial_console_init(); + sunsu_reg.tty_driver->name_base = sunsu_reg.minor - 64; + + sunserial_current_minor += instance; + + sunsu_reg.cons = SUNSU_CONSOLE(); + for (i = 0; i < UART_NR; i++) { struct uart_sunsu_port *up = &sunsu_ports[i]; diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c index 5cc4d4c2935c..10b35c6f287d 100644 --- a/drivers/serial/sunzilog.c +++ b/drivers/serial/sunzilog.c @@ -1390,7 +1390,6 @@ static struct console sunzilog_console = { .index = -1, .data = &sunzilog_reg, }; -#define SUNZILOG_CONSOLE (&sunzilog_console) static int __init sunzilog_console_init(void) { @@ -1413,8 +1412,31 @@ static int __init sunzilog_console_init(void) register_console(&sunzilog_console); return 0; } + +static inline struct console *SUNZILOG_CONSOLE(void) +{ + int i; + + if (con_is_present()) + return NULL; + + for (i = 0; i < NUM_CHANNELS; i++) { + int this_minor = sunzilog_reg.minor + i; + + if ((this_minor - 64) == (serial_console - 1)) + break; + } + if (i == NUM_CHANNELS) + return NULL; + + sunzilog_console.index = i; + sunzilog_port_table[i].flags |= SUNZILOG_FLAG_IS_CONS; + + return &sunzilog_console; +} + #else -#define SUNZILOG_CONSOLE (NULL) +#define SUNZILOG_CONSOLE() (NULL) #define sunzilog_console_init() do { } while (0) #endif @@ -1666,14 +1688,15 @@ static int __init sunzilog_ports_init(void) } sunzilog_reg.nr = uart_count; - sunzilog_reg.cons = SUNZILOG_CONSOLE; - sunzilog_reg.minor = sunserial_current_minor; - sunserial_current_minor += uart_count; ret = uart_register_driver(&sunzilog_reg); if (ret == 0) { - sunzilog_console_init(); + sunzilog_reg.tty_driver->name_base = sunzilog_reg.minor - 64; + sunzilog_reg.cons = SUNZILOG_CONSOLE(); + + sunserial_current_minor += uart_count; + for (i = 0; i < NUM_CHANNELS; i++) { struct uart_sunzilog_port *up = &sunzilog_port_table[i]; diff --git a/include/asm-sparc/idprom.h b/include/asm-sparc/idprom.h index d856e640acd3..59083ed85232 100644 --- a/include/asm-sparc/idprom.h +++ b/include/asm-sparc/idprom.h @@ -7,27 +7,19 @@ #ifndef _SPARC_IDPROM_H #define _SPARC_IDPROM_H -/* Offset into the EEPROM where the id PROM is located on the 4c */ -#define IDPROM_OFFSET 0x7d8 +#include <linux/types.h> -/* On sun4m; physical. */ -/* MicroSPARC(-II) does not decode 31rd bit, but it works. */ -#define IDPROM_OFFSET_M 0xfd8 - -struct idprom -{ - unsigned char id_format; /* Format identifier (always 0x01) */ - unsigned char id_machtype; /* Machine type */ - unsigned char id_ethaddr[6]; /* Hardware ethernet address */ - long id_date; /* Date of manufacture */ - unsigned int id_sernum:24; /* Unique serial number */ - unsigned char id_cksum; /* Checksum - xor of the data bytes */ - unsigned char reserved[16]; +struct idprom { + u8 id_format; /* Format identifier (always 0x01) */ + u8 id_machtype; /* Machine type */ + u8 id_ethaddr[6]; /* Hardware ethernet address */ + s32 id_date; /* Date of manufacture */ + u32 id_sernum:24; /* Unique serial number */ + u8 id_cksum; /* Checksum - xor of the data bytes */ + u8 reserved[16]; }; extern struct idprom *idprom; extern void idprom_init(void); -#define IDPROM_SIZE (sizeof(struct idprom)) - #endif /* !(_SPARC_IDPROM_H) */ diff --git a/include/asm-sparc/oplib.h b/include/asm-sparc/oplib.h index d0d76b30eb4c..f283f8aaf6a9 100644 --- a/include/asm-sparc/oplib.h +++ b/include/asm-sparc/oplib.h @@ -165,6 +165,7 @@ enum prom_input_device { PROMDEV_ITTYA, /* input from ttya */ PROMDEV_ITTYB, /* input from ttyb */ PROMDEV_IRSC, /* input from rsc */ + PROMDEV_IVCONS, /* input from virtual-console */ PROMDEV_I_UNK, }; @@ -177,6 +178,7 @@ enum prom_output_device { PROMDEV_OTTYA, /* to ttya */ PROMDEV_OTTYB, /* to ttyb */ PROMDEV_ORSC, /* to rsc */ + PROMDEV_OVCONS, /* to virtual-console */ PROMDEV_O_UNK, }; diff --git a/include/asm-sparc/uaccess.h b/include/asm-sparc/uaccess.h index f8f1ec1f06e6..3cf132e1aa25 100644 --- a/include/asm-sparc/uaccess.h +++ b/include/asm-sparc/uaccess.h @@ -120,17 +120,6 @@ case 8: __put_user_asm(x,d,addr,__pu_ret); break; \ default: __pu_ret = __put_user_bad(); break; \ } } else { __pu_ret = -EFAULT; } __pu_ret; }) -#define __put_user_check_ret(x,addr,size,retval) ({ \ -register int __foo __asm__ ("l1"); \ -if (__access_ok(addr,size)) { \ -switch (size) { \ -case 1: __put_user_asm_ret(x,b,addr,retval,__foo); break; \ -case 2: __put_user_asm_ret(x,h,addr,retval,__foo); break; \ -case 4: __put_user_asm_ret(x,,addr,retval,__foo); break; \ -case 8: __put_user_asm_ret(x,d,addr,retval,__foo); break; \ -default: if (__put_user_bad()) return retval; break; \ -} } else return retval; }) - #define __put_user_nocheck(x,addr,size) ({ \ register int __pu_ret; \ switch (size) { \ @@ -141,16 +130,6 @@ case 8: __put_user_asm(x,d,addr,__pu_ret); break; \ default: __pu_ret = __put_user_bad(); break; \ } __pu_ret; }) -#define __put_user_nocheck_ret(x,addr,size,retval) ({ \ -register int __foo __asm__ ("l1"); \ -switch (size) { \ -case 1: __put_user_asm_ret(x,b,addr,retval,__foo); break; \ -case 2: __put_user_asm_ret(x,h,addr,retval,__foo); break; \ -case 4: __put_user_asm_ret(x,,addr,retval,__foo); break; \ -case 8: __put_user_asm_ret(x,d,addr,retval,__foo); break; \ -default: if (__put_user_bad()) return retval; break; \ -} }) - #define __put_user_asm(x,size,addr,ret) \ __asm__ __volatile__( \ "/* Put user asm, inline. */\n" \ @@ -170,32 +149,6 @@ __asm__ __volatile__( \ : "=&r" (ret) : "r" (x), "m" (*__m(addr)), \ "i" (-EFAULT)) -#define __put_user_asm_ret(x,size,addr,ret,foo) \ -if (__builtin_constant_p(ret) && ret == -EFAULT) \ -__asm__ __volatile__( \ - "/* Put user asm ret, inline. */\n" \ -"1:\t" "st"#size " %1, %2\n\n\t" \ - ".section __ex_table,#alloc\n\t" \ - ".align 4\n\t" \ - ".word 1b, __ret_efault\n\n\t" \ - ".previous\n\n\t" \ - : "=r" (foo) : "r" (x), "m" (*__m(addr))); \ -else \ -__asm__ __volatile( \ - "/* Put user asm ret, inline. */\n" \ -"1:\t" "st"#size " %1, %2\n\n\t" \ - ".section .fixup,#alloc,#execinstr\n\t" \ - ".align 4\n" \ -"3:\n\t" \ - "ret\n\t" \ - " restore %%g0, %3, %%o0\n\t" \ - ".previous\n\n\t" \ - ".section __ex_table,#alloc\n\t" \ - ".align 4\n\t" \ - ".word 1b, 3b\n\n\t" \ - ".previous\n\n\t" \ - : "=r" (foo) : "r" (x), "m" (*__m(addr)), "i" (ret)) - extern int __put_user_bad(void); #define __get_user_check(x,addr,size,type) ({ \ diff --git a/include/asm-sparc64/a.out.h b/include/asm-sparc64/a.out.h index 02af289e3f46..35cb5c9e0c92 100644 --- a/include/asm-sparc64/a.out.h +++ b/include/asm-sparc64/a.out.h @@ -95,7 +95,11 @@ struct relocation_info /* used when header.a_machtype == M_SPARC */ #ifdef __KERNEL__ -#define STACK_TOP (test_thread_flag(TIF_32BIT) ? 0xf0000000 : 0x80000000000L) +#define STACK_TOP32 ((1UL << 32UL) - PAGE_SIZE) +#define STACK_TOP64 (0x0000080000000000UL - (1UL << 32UL)) + +#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \ + STACK_TOP32 : STACK_TOP64) #endif diff --git a/include/asm-sparc64/asi.h b/include/asm-sparc64/asi.h index 534855660f2a..662a21107ae6 100644 --- a/include/asm-sparc64/asi.h +++ b/include/asm-sparc64/asi.h @@ -25,14 +25,27 @@ /* SpitFire and later extended ASIs. The "(III)" marker designates * UltraSparc-III and later specific ASIs. The "(CMT)" marker designates - * Chip Multi Threading specific ASIs. + * Chip Multi Threading specific ASIs. "(NG)" designates Niagara specific + * ASIs, "(4V)" designates SUN4V specific ASIs. */ #define ASI_PHYS_USE_EC 0x14 /* PADDR, E-cachable */ #define ASI_PHYS_BYPASS_EC_E 0x15 /* PADDR, E-bit */ +#define ASI_BLK_AIUP_4V 0x16 /* (4V) Prim, user, block ld/st */ +#define ASI_BLK_AIUS_4V 0x17 /* (4V) Sec, user, block ld/st */ #define ASI_PHYS_USE_EC_L 0x1c /* PADDR, E-cachable, little endian*/ #define ASI_PHYS_BYPASS_EC_E_L 0x1d /* PADDR, E-bit, little endian */ +#define ASI_BLK_AIUP_L_4V 0x1e /* (4V) Prim, user, block, l-endian*/ +#define ASI_BLK_AIUS_L_4V 0x1f /* (4V) Sec, user, block, l-endian */ +#define ASI_SCRATCHPAD 0x20 /* (4V) Scratch Pad Registers */ +#define ASI_MMU 0x21 /* (4V) MMU Context Registers */ +#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 /* (NG) init-store, twin load, + * secondary, user + */ #define ASI_NUCLEUS_QUAD_LDD 0x24 /* Cachable, qword load */ +#define ASI_QUEUE 0x25 /* (4V) Interrupt Queue Registers */ +#define ASI_QUAD_LDD_PHYS_4V 0x26 /* (4V) Physical, qword load */ #define ASI_NUCLEUS_QUAD_LDD_L 0x2c /* Cachable, qword load, l-endian */ +#define ASI_QUAD_LDD_PHYS_L_4V 0x2e /* (4V) Phys, qword load, l-endian */ #define ASI_PCACHE_DATA_STATUS 0x30 /* (III) PCache data stat RAM diag */ #define ASI_PCACHE_DATA 0x31 /* (III) PCache data RAM diag */ #define ASI_PCACHE_TAG 0x32 /* (III) PCache tag RAM diag */ @@ -137,6 +150,9 @@ #define ASI_FL16_SL 0xdb /* Secondary, 1 16-bit, fpu ld/st,L*/ #define ASI_BLK_COMMIT_P 0xe0 /* Primary, blk store commit */ #define ASI_BLK_COMMIT_S 0xe1 /* Secondary, blk store commit */ +#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 /* (NG) init-store, twin load, + * primary, implicit + */ #define ASI_BLK_P 0xf0 /* Primary, blk ld/st */ #define ASI_BLK_S 0xf1 /* Secondary, blk ld/st */ #define ASI_BLK_PL 0xf8 /* Primary, blk ld/st, little */ diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 74de79dca915..c66a81bbc84d 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -1,41 +1,224 @@ /* cpudata.h: Per-cpu parameters. * - * Copyright (C) 2003, 2005 David S. Miller (davem@redhat.com) + * Copyright (C) 2003, 2005, 2006 David S. Miller (davem@davemloft.net) */ #ifndef _SPARC64_CPUDATA_H #define _SPARC64_CPUDATA_H +#include <asm/hypervisor.h> +#include <asm/asi.h> + +#ifndef __ASSEMBLY__ + #include <linux/percpu.h> +#include <linux/threads.h> typedef struct { /* Dcache line 1 */ unsigned int __softirq_pending; /* must be 1st, see rtrap.S */ unsigned int multiplier; unsigned int counter; - unsigned int idle_volume; + unsigned int __pad1; unsigned long clock_tick; /* %tick's per second */ unsigned long udelay_val; - /* Dcache line 2 */ - unsigned int pgcache_size; - unsigned int __pad1; - unsigned long *pte_cache[2]; - unsigned long *pgd_cache; - - /* Dcache line 3, rarely used */ + /* Dcache line 2, rarely used */ unsigned int dcache_size; unsigned int dcache_line_size; unsigned int icache_size; unsigned int icache_line_size; unsigned int ecache_size; unsigned int ecache_line_size; - unsigned int __pad2; unsigned int __pad3; + unsigned int __pad4; } cpuinfo_sparc; DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); #define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) #define local_cpu_data() __get_cpu_var(__cpu_data) +/* Trap handling code needs to get at a few critical values upon + * trap entry and to process TSB misses. These cannot be in the + * per_cpu() area as we really need to lock them into the TLB and + * thus make them part of the main kernel image. As a result we + * try to make this as small as possible. + * + * This is padded out and aligned to 64-bytes to avoid false sharing + * on SMP. + */ + +/* If you modify the size of this structure, please update + * TRAP_BLOCK_SZ_SHIFT below. + */ +struct thread_info; +struct trap_per_cpu { +/* D-cache line 1: Basic thread information, cpu and device mondo queues */ + struct thread_info *thread; + unsigned long pgd_paddr; + unsigned long cpu_mondo_pa; + unsigned long dev_mondo_pa; + +/* D-cache line 2: Error Mondo Queue and kernel buffer pointers */ + unsigned long resum_mondo_pa; + unsigned long resum_kernel_buf_pa; + unsigned long nonresum_mondo_pa; + unsigned long nonresum_kernel_buf_pa; + +/* Dcache lines 3, 4, 5, and 6: Hypervisor Fault Status */ + struct hv_fault_status fault_info; + +/* Dcache line 7: Physical addresses of CPU send mondo block and CPU list. */ + unsigned long cpu_mondo_block_pa; + unsigned long cpu_list_pa; + unsigned long __pad1[2]; + +/* Dcache line 8: Unused, needed to keep trap_block a power-of-2 in size. */ + unsigned long __pad2[4]; +} __attribute__((aligned(64))); +extern struct trap_per_cpu trap_block[NR_CPUS]; +extern void init_cur_cpu_trap(struct thread_info *); +extern void setup_tba(void); + +struct cpuid_patch_entry { + unsigned int addr; + unsigned int cheetah_safari[4]; + unsigned int cheetah_jbus[4]; + unsigned int starfire[4]; + unsigned int sun4v[4]; +}; +extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end; + +struct sun4v_1insn_patch_entry { + unsigned int addr; + unsigned int insn; +}; +extern struct sun4v_1insn_patch_entry __sun4v_1insn_patch, + __sun4v_1insn_patch_end; + +struct sun4v_2insn_patch_entry { + unsigned int addr; + unsigned int insns[2]; +}; +extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, + __sun4v_2insn_patch_end; + +#endif /* !(__ASSEMBLY__) */ + +#define TRAP_PER_CPU_THREAD 0x00 +#define TRAP_PER_CPU_PGD_PADDR 0x08 +#define TRAP_PER_CPU_CPU_MONDO_PA 0x10 +#define TRAP_PER_CPU_DEV_MONDO_PA 0x18 +#define TRAP_PER_CPU_RESUM_MONDO_PA 0x20 +#define TRAP_PER_CPU_RESUM_KBUF_PA 0x28 +#define TRAP_PER_CPU_NONRESUM_MONDO_PA 0x30 +#define TRAP_PER_CPU_NONRESUM_KBUF_PA 0x38 +#define TRAP_PER_CPU_FAULT_INFO 0x40 +#define TRAP_PER_CPU_CPU_MONDO_BLOCK_PA 0xc0 +#define TRAP_PER_CPU_CPU_LIST_PA 0xc8 + +#define TRAP_BLOCK_SZ_SHIFT 8 + +#include <asm/scratchpad.h> + +#define __GET_CPUID(REG) \ + /* Spitfire implementation (default). */ \ +661: ldxa [%g0] ASI_UPA_CONFIG, REG; \ + srlx REG, 17, REG; \ + and REG, 0x1f, REG; \ + nop; \ + .section .cpuid_patch, "ax"; \ + /* Instruction location. */ \ + .word 661b; \ + /* Cheetah Safari implementation. */ \ + ldxa [%g0] ASI_SAFARI_CONFIG, REG; \ + srlx REG, 17, REG; \ + and REG, 0x3ff, REG; \ + nop; \ + /* Cheetah JBUS implementation. */ \ + ldxa [%g0] ASI_JBUS_CONFIG, REG; \ + srlx REG, 17, REG; \ + and REG, 0x1f, REG; \ + nop; \ + /* Starfire implementation. */ \ + sethi %hi(0x1fff40000d0 >> 9), REG; \ + sllx REG, 9, REG; \ + or REG, 0xd0, REG; \ + lduwa [REG] ASI_PHYS_BYPASS_EC_E, REG;\ + /* sun4v implementation. */ \ + mov SCRATCHPAD_CPUID, REG; \ + ldxa [REG] ASI_SCRATCHPAD, REG; \ + nop; \ + nop; \ + .previous; + +#ifdef CONFIG_SMP + +#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ + __GET_CPUID(TMP) \ + sethi %hi(trap_block), DEST; \ + sllx TMP, TRAP_BLOCK_SZ_SHIFT, TMP; \ + or DEST, %lo(trap_block), DEST; \ + add DEST, TMP, DEST; \ + +/* Clobbers TMP, current address space PGD phys address into DEST. */ +#define TRAP_LOAD_PGD_PHYS(DEST, TMP) \ + TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ + ldx [DEST + TRAP_PER_CPU_PGD_PADDR], DEST; + +/* Clobbers TMP, loads local processor's IRQ work area into DEST. */ +#define TRAP_LOAD_IRQ_WORK(DEST, TMP) \ + __GET_CPUID(TMP) \ + sethi %hi(__irq_work), DEST; \ + sllx TMP, 6, TMP; \ + or DEST, %lo(__irq_work), DEST; \ + add DEST, TMP, DEST; + +/* Clobbers TMP, loads DEST with current thread info pointer. */ +#define TRAP_LOAD_THREAD_REG(DEST, TMP) \ + TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ + ldx [DEST + TRAP_PER_CPU_THREAD], DEST; + +/* Given the current thread info pointer in THR, load the per-cpu + * area base of the current processor into DEST. REG1, REG2, and REG3 are + * clobbered. + * + * You absolutely cannot use DEST as a temporary in this code. The + * reason is that traps can happen during execution, and return from + * trap will load the fully resolved DEST per-cpu base. This can corrupt + * the calculations done by the macro mid-stream. + */ +#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \ + ldub [THR + TI_CPU], REG1; \ + sethi %hi(__per_cpu_shift), REG3; \ + sethi %hi(__per_cpu_base), REG2; \ + ldx [REG3 + %lo(__per_cpu_shift)], REG3; \ + ldx [REG2 + %lo(__per_cpu_base)], REG2; \ + sllx REG1, REG3, REG3; \ + add REG3, REG2, DEST; + +#else + +#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ + sethi %hi(trap_block), DEST; \ + or DEST, %lo(trap_block), DEST; \ + +/* Uniprocessor versions, we know the cpuid is zero. */ +#define TRAP_LOAD_PGD_PHYS(DEST, TMP) \ + TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ + ldx [DEST + TRAP_PER_CPU_PGD_PADDR], DEST; + +#define TRAP_LOAD_IRQ_WORK(DEST, TMP) \ + sethi %hi(__irq_work), DEST; \ + or DEST, %lo(__irq_work), DEST; + +#define TRAP_LOAD_THREAD_REG(DEST, TMP) \ + TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ + ldx [DEST + TRAP_PER_CPU_THREAD], DEST; + +/* No per-cpu areas on uniprocessor, so no need to load DEST. */ +#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) + +#endif /* !(CONFIG_SMP) */ + #endif /* _SPARC64_CPUDATA_H */ diff --git a/include/asm-sparc64/elf.h b/include/asm-sparc64/elf.h index 69539a8ab833..303d85e2f82e 100644 --- a/include/asm-sparc64/elf.h +++ b/include/asm-sparc64/elf.h @@ -10,6 +10,7 @@ #ifdef __KERNEL__ #include <asm/processor.h> #include <asm/uaccess.h> +#include <asm/spitfire.h> #endif /* @@ -68,6 +69,7 @@ #define HWCAP_SPARC_MULDIV 8 #define HWCAP_SPARC_V9 16 #define HWCAP_SPARC_ULTRA3 32 +#define HWCAP_SPARC_BLKINIT 64 /* * These are used to set parameters in the core dumps. @@ -145,11 +147,21 @@ typedef struct { instruction set this cpu supports. */ /* On Ultra, we support all of the v8 capabilities. */ -#define ELF_HWCAP ((HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | \ - HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV | \ - HWCAP_SPARC_V9) | \ - ((tlb_type == cheetah || tlb_type == cheetah_plus) ? \ - HWCAP_SPARC_ULTRA3 : 0)) +static inline unsigned int sparc64_elf_hwcap(void) +{ + unsigned int cap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | + HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV | + HWCAP_SPARC_V9); + + if (tlb_type == cheetah || tlb_type == cheetah_plus) + cap |= HWCAP_SPARC_ULTRA3; + else if (tlb_type == hypervisor) + cap |= HWCAP_SPARC_BLKINIT; + + return cap; +} + +#define ELF_HWCAP sparc64_elf_hwcap(); /* This yields a string that ld.so will use to load implementation specific libraries for optimization. This is more specific in diff --git a/include/asm-sparc64/head.h b/include/asm-sparc64/head.h index 0abd3a674e8f..67960a751f4d 100644 --- a/include/asm-sparc64/head.h +++ b/include/asm-sparc64/head.h @@ -4,12 +4,21 @@ #include <asm/pstate.h> + /* wrpr %g0, val, %gl */ +#define SET_GL(val) \ + .word 0xa1902000 | val + + /* rdpr %gl, %gN */ +#define GET_GL_GLOBAL(N) \ + .word 0x81540000 | (N << 25) + #define KERNBASE 0x400000 #define PTREGS_OFF (STACK_BIAS + STACKFRAME_SZ) #define __CHEETAH_ID 0x003e0014 #define __JALAPENO_ID 0x003e0016 +#define __SERRANO_ID 0x003e0022 #define CHEETAH_MANUF 0x003e #define CHEETAH_IMPL 0x0014 /* Ultra-III */ @@ -19,6 +28,12 @@ #define PANTHER_IMPL 0x0019 /* Ultra-IV+ */ #define SERRANO_IMPL 0x0022 /* Ultra-IIIi+ */ +#define BRANCH_IF_SUN4V(tmp1,label) \ + sethi %hi(is_sun4v), %tmp1; \ + lduw [%tmp1 + %lo(is_sun4v)], %tmp1; \ + brnz,pn %tmp1, label; \ + nop + #define BRANCH_IF_CHEETAH_BASE(tmp1,tmp2,label) \ rdpr %ver, %tmp1; \ sethi %hi(__CHEETAH_ID), %tmp2; \ diff --git a/include/asm-sparc64/hypervisor.h b/include/asm-sparc64/hypervisor.h new file mode 100644 index 000000000000..612bf319753f --- /dev/null +++ b/include/asm-sparc64/hypervisor.h @@ -0,0 +1,2128 @@ +#ifndef _SPARC64_HYPERVISOR_H +#define _SPARC64_HYPERVISOR_H + +/* Sun4v hypervisor interfaces and defines. + * + * Hypervisor calls are made via traps to software traps number 0x80 + * and above. Registers %o0 to %o5 serve as argument, status, and + * return value registers. + * + * There are two kinds of these traps. First there are the normal + * "fast traps" which use software trap 0x80 and encode the function + * to invoke by number in register %o5. Argument and return value + * handling is as follows: + * + * ----------------------------------------------- + * | %o5 | function number | undefined | + * | %o0 | argument 0 | return status | + * | %o1 | argument 1 | return value 1 | + * | %o2 | argument 2 | return value 2 | + * | %o3 | argument 3 | return value 3 | + * | %o4 | argument 4 | return value 4 | + * ----------------------------------------------- + * + * The second type are "hyper-fast traps" which encode the function + * number in the software trap number itself. So these use trap + * numbers > 0x80. The register usage for hyper-fast traps is as + * follows: + * + * ----------------------------------------------- + * | %o0 | argument 0 | return status | + * | %o1 | argument 1 | return value 1 | + * | %o2 | argument 2 | return value 2 | + * | %o3 | argument 3 | return value 3 | + * | %o4 | argument 4 | return value 4 | + * ----------------------------------------------- + * + * Registers providing explicit arguments to the hypervisor calls + * are volatile across the call. Upon return their values are + * undefined unless explicitly specified as containing a particular + * return value by the specific call. The return status is always + * returned in register %o0, zero indicates a successful execution of + * the hypervisor call and other values indicate an error status as + * defined below. So, for example, if a hyper-fast trap takes + * arguments 0, 1, and 2, then %o0, %o1, and %o2 are volatile across + * the call and %o3, %o4, and %o5 would be preserved. + * + * If the hypervisor trap is invalid, or the fast trap function number + * is invalid, HV_EBADTRAP will be returned in %o0. Also, all 64-bits + * of the argument and return values are significant. + */ + +/* Trap numbers. */ +#define HV_FAST_TRAP 0x80 +#define HV_MMU_MAP_ADDR_TRAP 0x83 +#define HV_MMU_UNMAP_ADDR_TRAP 0x84 +#define HV_TTRACE_ADDENTRY_TRAP 0x85 +#define HV_CORE_TRAP 0xff + +/* Error codes. */ +#define HV_EOK 0 /* Successful return */ +#define HV_ENOCPU 1 /* Invalid CPU id */ +#define HV_ENORADDR 2 /* Invalid real address */ +#define HV_ENOINTR 3 /* Invalid interrupt id */ +#define HV_EBADPGSZ 4 /* Invalid pagesize encoding */ +#define HV_EBADTSB 5 /* Invalid TSB description */ +#define HV_EINVAL 6 /* Invalid argument */ +#define HV_EBADTRAP 7 /* Invalid function number */ +#define HV_EBADALIGN 8 /* Invalid address alignment */ +#define HV_EWOULDBLOCK 9 /* Cannot complete w/o blocking */ +#define HV_ENOACCESS 10 /* No access to resource */ +#define HV_EIO 11 /* I/O error */ +#define HV_ECPUERROR 12 /* CPU in error state */ +#define HV_ENOTSUPPORTED 13 /* Function not supported */ +#define HV_ENOMAP 14 /* No mapping found */ +#define HV_ETOOMANY 15 /* Too many items specified */ + +/* mach_exit() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MACH_EXIT + * ARG0: exit code + * ERRORS: This service does not return. + * + * Stop all CPUs in the virtual domain and place them into the stopped + * state. The 64-bit exit code may be passed to a service entity as + * the domain's exit status. On systems without a service entity, the + * domain will undergo a reset, and the boot firmware will be + * reloaded. + * + * This function will never return to the guest that invokes it. + * + * Note: By convention an exit code of zero denotes a successful exit by + * the guest code. A non-zero exit code denotes a guest specific + * error indication. + * + */ +#define HV_FAST_MACH_EXIT 0x00 + +/* Domain services. */ + +/* mach_desc() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MACH_DESC + * ARG0: buffer + * ARG1: length + * RET0: status + * RET1: length + * ERRORS: HV_EBADALIGN Buffer is badly aligned + * HV_ENORADDR Buffer is to an illegal real address. + * HV_EINVAL Buffer length is too small for complete + * machine description. + * + * Copy the most current machine description into the buffer indicated + * by the real address in ARG0. The buffer provided must be 16 byte + * aligned. Upon success or HV_EINVAL, this service returns the + * actual size of the machine description in the RET1 return value. + * + * Note: A method of determining the appropriate buffer size for the + * machine description is to first call this service with a buffer + * length of 0 bytes. + */ +#define HV_FAST_MACH_DESC 0x01 + +/* mach_exit() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MACH_SIR + * ERRORS: This service does not return. + * + * Perform a software initiated reset of the virtual machine domain. + * All CPUs are captured as soon as possible, all hardware devices are + * returned to the entry default state, and the domain is restarted at + * the SIR (trap type 0x04) real trap table (RTBA) entry point on one + * of the CPUs. The single CPU restarted is selected as determined by + * platform specific policy. Memory is preserved across this + * operation. + */ +#define HV_FAST_MACH_SIR 0x02 + +/* mach_set_soft_state() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MACH_SET_SOFT_STATE + * ARG0: software state + * ARG1: software state description pointer + * RET0: status + * ERRORS: EINVAL software state not valid or software state + * description is not NULL terminated + * ENORADDR software state description pointer is not a + * valid real address + * EBADALIGNED software state description is not correctly + * aligned + * + * This allows the guest to report it's soft state to the hypervisor. There + * are two primary components to this state. The first part states whether + * the guest software is running or not. The second containts optional + * details specific to the software. + * + * The software state argument is defined below in HV_SOFT_STATE_*, and + * indicates whether the guest is operating normally or in a transitional + * state. + * + * The software state description argument is a real address of a data buffer + * of size 32-bytes aligned on a 32-byte boundary. It is treated as a NULL + * terminated 7-bit ASCII string of up to 31 characters not including the + * NULL termination. + */ +#define HV_FAST_MACH_SET_SOFT_STATE 0x03 +#define HV_SOFT_STATE_NORMAL 0x01 +#define HV_SOFT_STATE_TRANSITION 0x02 + +/* mach_get_soft_state() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MACH_GET_SOFT_STATE + * ARG0: software state description pointer + * RET0: status + * RET1: software state + * ERRORS: ENORADDR software state description pointer is not a + * valid real address + * EBADALIGNED software state description is not correctly + * aligned + * + * Retrieve the current value of the guest's software state. The rules + * for the software state pointer are the same as for mach_set_soft_state() + * above. + */ +#define HV_FAST_MACH_GET_SOFT_STATE 0x04 + +/* CPU services. + * + * CPUs represent devices that can execute software threads. A single + * chip that contains multiple cores or strands is represented as + * multiple CPUs with unique CPU identifiers. CPUs are exported to + * OBP via the machine description (and to the OS via the OBP device + * tree). CPUs are always in one of three states: stopped, running, + * or error. + * + * A CPU ID is a pre-assigned 16-bit value that uniquely identifies a + * CPU within a logical domain. Operations that are to be performed + * on multiple CPUs specify them via a CPU list. A CPU list is an + * array in real memory, of which each 16-bit word is a CPU ID. CPU + * lists are passed through the API as two arguments. The first is + * the number of entries (16-bit words) in the CPU list, and the + * second is the (real address) pointer to the CPU ID list. + */ + +/* cpu_start() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_START + * ARG0: CPU ID + * ARG1: PC + * ARG1: RTBA + * ARG1: target ARG0 + * RET0: status + * ERRORS: ENOCPU Invalid CPU ID + * EINVAL Target CPU ID is not in the stopped state + * ENORADDR Invalid PC or RTBA real address + * EBADALIGN Unaligned PC or unaligned RTBA + * EWOULDBLOCK Starting resources are not available + * + * Start CPU with given CPU ID with PC in %pc and with a real trap + * base address value of RTBA. The indicated CPU must be in the + * stopped state. The supplied RTBA must be aligned on a 256 byte + * boundary. On successful completion, the specified CPU will be in + * the running state and will be supplied with "target ARG0" in %o0 + * and RTBA in %tba. + */ +#define HV_FAST_CPU_START 0x10 + +/* cpu_stop() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_STOP + * ARG0: CPU ID + * RET0: status + * ERRORS: ENOCPU Invalid CPU ID + * EINVAL Target CPU ID is the current cpu + * EINVAL Target CPU ID is not in the running state + * EWOULDBLOCK Stopping resources are not available + * ENOTSUPPORTED Not supported on this platform + * + * The specified CPU is stopped. The indicated CPU must be in the + * running state. On completion, it will be in the stopped state. It + * is not legal to stop the current CPU. + * + * Note: As this service cannot be used to stop the current cpu, this service + * may not be used to stop the last running CPU in a domain. To stop + * and exit a running domain, a guest must use the mach_exit() service. + */ +#define HV_FAST_CPU_STOP 0x11 + +/* cpu_yield() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_YIELD + * RET0: status + * ERRORS: No possible error. + * + * Suspend execution on the current CPU. Execution will resume when + * an interrupt (device, %stick_compare, or cross-call) is targeted to + * the CPU. On some CPUs, this API may be used by the hypervisor to + * save power by disabling hardware strands. + */ +#define HV_FAST_CPU_YIELD 0x12 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_cpu_yield(void); +#endif + +/* cpu_qconf() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_QCONF + * ARG0: queue + * ARG1: base real address + * ARG2: number of entries + * RET0: status + * ERRORS: ENORADDR Invalid base real address + * EINVAL Invalid queue or number of entries is less + * than 2 or too large. + * EBADALIGN Base real address is not correctly aligned + * for size. + * + * Configure the given queue to be placed at the given base real + * address, with the given number of entries. The number of entries + * must be a power of 2. The base real address must be aligned + * exactly to match the queue size. Each queue entry is 64 bytes + * long, so for example a 32 entry queue must be aligned on a 2048 + * byte real address boundary. + * + * The specified queue is unconfigured if the number of entries is given + * as zero. + * + * For the current version of this API service, the argument queue is defined + * as follows: + * + * queue description + * ----- ------------------------- + * 0x3c cpu mondo queue + * 0x3d device mondo queue + * 0x3e resumable error queue + * 0x3f non-resumable error queue + * + * Note: The maximum number of entries for each queue for a specific cpu may + * be determined from the machine description. + */ +#define HV_FAST_CPU_QCONF 0x14 +#define HV_CPU_QUEUE_CPU_MONDO 0x3c +#define HV_CPU_QUEUE_DEVICE_MONDO 0x3d +#define HV_CPU_QUEUE_RES_ERROR 0x3e +#define HV_CPU_QUEUE_NONRES_ERROR 0x3f + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_cpu_qconf(unsigned long type, + unsigned long queue_paddr, + unsigned long num_queue_entries); +#endif + +/* cpu_qinfo() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_QINFO + * ARG0: queue + * RET0: status + * RET1: base real address + * RET1: number of entries + * ERRORS: EINVAL Invalid queue + * + * Return the configuration info for the given queue. The base real + * address and number of entries of the defined queue are returned. + * The queue argument values are the same as for cpu_qconf() above. + * + * If the specified queue is a valid queue number, but no queue has + * been defined, the number of entries will be set to zero and the + * base real address returned is undefined. + */ +#define HV_FAST_CPU_QINFO 0x15 + +/* cpu_mondo_send() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_MONDO_SEND + * ARG0-1: CPU list + * ARG2: data real address + * RET0: status + * ERRORS: EBADALIGN Mondo data is not 64-byte aligned or CPU list + * is not 2-byte aligned. + * ENORADDR Invalid data mondo address, or invalid cpu list + * address. + * ENOCPU Invalid cpu in CPU list + * EWOULDBLOCK Some or all of the listed CPUs did not receive + * the mondo + * ECPUERROR One or more of the listed CPUs are in error + * state, use HV_FAST_CPU_STATE to see which ones + * EINVAL CPU list includes caller's CPU ID + * + * Send a mondo interrupt to the CPUs in the given CPU list with the + * 64-bytes at the given data real address. The data must be 64-byte + * aligned. The mondo data will be delivered to the cpu_mondo queues + * of the recipient CPUs. + * + * In all cases, error or not, the CPUs in the CPU list to which the + * mondo has been successfully delivered will be indicated by having + * their entry in CPU list updated with the value 0xffff. + */ +#define HV_FAST_CPU_MONDO_SEND 0x42 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_cpu_mondo_send(unsigned long cpu_count, unsigned long cpu_list_pa, unsigned long mondo_block_pa); +#endif + +/* cpu_myid() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_MYID + * RET0: status + * RET1: CPU ID + * ERRORS: No errors defined. + * + * Return the hypervisor ID handle for the current CPU. Use by a + * virtual CPU to discover it's own identity. + */ +#define HV_FAST_CPU_MYID 0x16 + +/* cpu_state() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_STATE + * ARG0: CPU ID + * RET0: status + * RET1: state + * ERRORS: ENOCPU Invalid CPU ID + * + * Retrieve the current state of the CPU with the given CPU ID. + */ +#define HV_FAST_CPU_STATE 0x17 +#define HV_CPU_STATE_STOPPED 0x01 +#define HV_CPU_STATE_RUNNING 0x02 +#define HV_CPU_STATE_ERROR 0x03 + +#ifndef __ASSEMBLY__ +extern long sun4v_cpu_state(unsigned long cpuid); +#endif + +/* cpu_set_rtba() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_SET_RTBA + * ARG0: RTBA + * RET0: status + * RET1: previous RTBA + * ERRORS: ENORADDR Invalid RTBA real address + * EBADALIGN RTBA is incorrectly aligned for a trap table + * + * Set the real trap base address of the local cpu to the given RTBA. + * The supplied RTBA must be aligned on a 256 byte boundary. Upon + * success the previous value of the RTBA is returned in RET1. + * + * Note: This service does not affect %tba + */ +#define HV_FAST_CPU_SET_RTBA 0x18 + +/* cpu_set_rtba() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_GET_RTBA + * RET0: status + * RET1: previous RTBA + * ERRORS: No possible error. + * + * Returns the current value of RTBA in RET1. + */ +#define HV_FAST_CPU_GET_RTBA 0x19 + +/* MMU services. + * + * Layout of a TSB description for mmu_tsb_ctx{,non}0() calls. + */ +#ifndef __ASSEMBLY__ +struct hv_tsb_descr { + unsigned short pgsz_idx; + unsigned short assoc; + unsigned int num_ttes; /* in TTEs */ + unsigned int ctx_idx; + unsigned int pgsz_mask; + unsigned long tsb_base; + unsigned long resv; +}; +#endif +#define HV_TSB_DESCR_PGSZ_IDX_OFFSET 0x00 +#define HV_TSB_DESCR_ASSOC_OFFSET 0x02 +#define HV_TSB_DESCR_NUM_TTES_OFFSET 0x04 +#define HV_TSB_DESCR_CTX_IDX_OFFSET 0x08 +#define HV_TSB_DESCR_PGSZ_MASK_OFFSET 0x0c +#define HV_TSB_DESCR_TSB_BASE_OFFSET 0x10 +#define HV_TSB_DESCR_RESV_OFFSET 0x18 + +/* Page size bitmask. */ +#define HV_PGSZ_MASK_8K (1 << 0) +#define HV_PGSZ_MASK_64K (1 << 1) +#define HV_PGSZ_MASK_512K (1 << 2) +#define HV_PGSZ_MASK_4MB (1 << 3) +#define HV_PGSZ_MASK_32MB (1 << 4) +#define HV_PGSZ_MASK_256MB (1 << 5) +#define HV_PGSZ_MASK_2GB (1 << 6) +#define HV_PGSZ_MASK_16GB (1 << 7) + +/* Page size index. The value given in the TSB descriptor must correspond + * to the smallest page size specified in the pgsz_mask page size bitmask. + */ +#define HV_PGSZ_IDX_8K 0 +#define HV_PGSZ_IDX_64K 1 +#define HV_PGSZ_IDX_512K 2 +#define HV_PGSZ_IDX_4MB 3 +#define HV_PGSZ_IDX_32MB 4 +#define HV_PGSZ_IDX_256MB 5 +#define HV_PGSZ_IDX_2GB 6 +#define HV_PGSZ_IDX_16GB 7 + +/* MMU fault status area. + * + * MMU related faults have their status and fault address information + * placed into a memory region made available by privileged code. Each + * virtual processor must make a mmu_fault_area_conf() call to tell the + * hypervisor where that processor's fault status should be stored. + * + * The fault status block is a multiple of 64-bytes and must be aligned + * on a 64-byte boundary. + */ +#ifndef __ASSEMBLY__ +struct hv_fault_status { + unsigned long i_fault_type; + unsigned long i_fault_addr; + unsigned long i_fault_ctx; + unsigned long i_reserved[5]; + unsigned long d_fault_type; + unsigned long d_fault_addr; + unsigned long d_fault_ctx; + unsigned long d_reserved[5]; +}; +#endif +#define HV_FAULT_I_TYPE_OFFSET 0x00 +#define HV_FAULT_I_ADDR_OFFSET 0x08 +#define HV_FAULT_I_CTX_OFFSET 0x10 +#define HV_FAULT_D_TYPE_OFFSET 0x40 +#define HV_FAULT_D_ADDR_OFFSET 0x48 +#define HV_FAULT_D_CTX_OFFSET 0x50 + +#define HV_FAULT_TYPE_FAST_MISS 1 +#define HV_FAULT_TYPE_FAST_PROT 2 +#define HV_FAULT_TYPE_MMU_MISS 3 +#define HV_FAULT_TYPE_INV_RA 4 +#define HV_FAULT_TYPE_PRIV_VIOL 5 +#define HV_FAULT_TYPE_PROT_VIOL 6 +#define HV_FAULT_TYPE_NFO 7 +#define HV_FAULT_TYPE_NFO_SEFF 8 +#define HV_FAULT_TYPE_INV_VA 9 +#define HV_FAULT_TYPE_INV_ASI 10 +#define HV_FAULT_TYPE_NC_ATOMIC 11 +#define HV_FAULT_TYPE_PRIV_ACT 12 +#define HV_FAULT_TYPE_RESV1 13 +#define HV_FAULT_TYPE_UNALIGNED 14 +#define HV_FAULT_TYPE_INV_PGSZ 15 +/* Values 16 --> -2 are reserved. */ +#define HV_FAULT_TYPE_MULTIPLE -1 + +/* Flags argument for mmu_{map,unmap}_addr(), mmu_demap_{page,context,all}(), + * and mmu_{map,unmap}_perm_addr(). + */ +#define HV_MMU_DMMU 0x01 +#define HV_MMU_IMMU 0x02 +#define HV_MMU_ALL (HV_MMU_DMMU | HV_MMU_IMMU) + +/* mmu_map_addr() + * TRAP: HV_MMU_MAP_ADDR_TRAP + * ARG0: virtual address + * ARG1: mmu context + * ARG2: TTE + * ARG3: flags (HV_MMU_{IMMU,DMMU}) + * ERRORS: EINVAL Invalid virtual address, mmu context, or flags + * EBADPGSZ Invalid page size value + * ENORADDR Invalid real address in TTE + * + * Create a non-permanent mapping using the given TTE, virtual + * address, and mmu context. The flags argument determines which + * (data, or instruction, or both) TLB the mapping gets loaded into. + * + * The behavior is undefined if the valid bit is clear in the TTE. + * + * Note: This API call is for privileged code to specify temporary translation + * mappings without the need to create and manage a TSB. + */ + +/* mmu_unmap_addr() + * TRAP: HV_MMU_UNMAP_ADDR_TRAP + * ARG0: virtual address + * ARG1: mmu context + * ARG2: flags (HV_MMU_{IMMU,DMMU}) + * ERRORS: EINVAL Invalid virtual address, mmu context, or flags + * + * Demaps the given virtual address in the given mmu context on this + * CPU. This function is intended to be used to demap pages mapped + * with mmu_map_addr. This service is equivalent to invoking + * mmu_demap_page() with only the current CPU in the CPU list. The + * flags argument determines which (data, or instruction, or both) TLB + * the mapping gets unmapped from. + * + * Attempting to perform an unmap operation for a previously defined + * permanent mapping will have undefined results. + */ + +/* mmu_tsb_ctx0() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_TSB_CTX0 + * ARG0: number of TSB descriptions + * ARG1: TSB descriptions pointer + * RET0: status + * ERRORS: ENORADDR Invalid TSB descriptions pointer or + * TSB base within a descriptor + * EBADALIGN TSB descriptions pointer is not aligned + * to an 8-byte boundary, or TSB base + * within a descriptor is not aligned for + * the given TSB size + * EBADPGSZ Invalid page size in a TSB descriptor + * EBADTSB Invalid associativity or size in a TSB + * descriptor + * EINVAL Invalid number of TSB descriptions, or + * invalid context index in a TSB + * descriptor, or index page size not + * equal to smallest page size in page + * size bitmask field. + * + * Configures the TSBs for the current CPU for virtual addresses with + * context zero. The TSB descriptions pointer is a pointer to an + * array of the given number of TSB descriptions. + * + * Note: The maximum number of TSBs available to a virtual CPU is given by the + * mmu-max-#tsbs property of the cpu's corresponding "cpu" node in the + * machine description. + */ +#define HV_FAST_MMU_TSB_CTX0 0x20 + +/* mmu_tsb_ctxnon0() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_TSB_CTXNON0 + * ARG0: number of TSB descriptions + * ARG1: TSB descriptions pointer + * RET0: status + * ERRORS: Same as for mmu_tsb_ctx0() above. + * + * Configures the TSBs for the current CPU for virtual addresses with + * non-zero contexts. The TSB descriptions pointer is a pointer to an + * array of the given number of TSB descriptions. + * + * Note: A maximum of 16 TSBs may be specified in the TSB description list. + */ +#define HV_FAST_MMU_TSB_CTXNON0 0x21 + +/* mmu_demap_page() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_DEMAP_PAGE + * ARG0: reserved, must be zero + * ARG1: reserved, must be zero + * ARG2: virtual address + * ARG3: mmu context + * ARG4: flags (HV_MMU_{IMMU,DMMU}) + * RET0: status + * ERRORS: EINVAL Invalid virutal address, context, or + * flags value + * ENOTSUPPORTED ARG0 or ARG1 is non-zero + * + * Demaps any page mapping of the given virtual address in the given + * mmu context for the current virtual CPU. Any virtually tagged + * caches are guaranteed to be kept consistent. The flags argument + * determines which TLB (instruction, or data, or both) participate in + * the operation. + * + * ARG0 and ARG1 are both reserved and must be set to zero. + */ +#define HV_FAST_MMU_DEMAP_PAGE 0x22 + +/* mmu_demap_ctx() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_DEMAP_CTX + * ARG0: reserved, must be zero + * ARG1: reserved, must be zero + * ARG2: mmu context + * ARG3: flags (HV_MMU_{IMMU,DMMU}) + * RET0: status + * ERRORS: EINVAL Invalid context or flags value + * ENOTSUPPORTED ARG0 or ARG1 is non-zero + * + * Demaps all non-permanent virtual page mappings previously specified + * for the given context for the current virtual CPU. Any virtual + * tagged caches are guaranteed to be kept consistent. The flags + * argument determines which TLB (instruction, or data, or both) + * participate in the operation. + * + * ARG0 and ARG1 are both reserved and must be set to zero. + */ +#define HV_FAST_MMU_DEMAP_CTX 0x23 + +/* mmu_demap_all() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_DEMAP_ALL + * ARG0: reserved, must be zero + * ARG1: reserved, must be zero + * ARG2: flags (HV_MMU_{IMMU,DMMU}) + * RET0: status + * ERRORS: EINVAL Invalid flags value + * ENOTSUPPORTED ARG0 or ARG1 is non-zero + * + * Demaps all non-permanent virtual page mappings previously specified + * for the current virtual CPU. Any virtual tagged caches are + * guaranteed to be kept consistent. The flags argument determines + * which TLB (instruction, or data, or both) participate in the + * operation. + * + * ARG0 and ARG1 are both reserved and must be set to zero. + */ +#define HV_FAST_MMU_DEMAP_ALL 0x24 + +/* mmu_map_perm_addr() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_MAP_PERM_ADDR + * ARG0: virtual address + * ARG1: reserved, must be zero + * ARG2: TTE + * ARG3: flags (HV_MMU_{IMMU,DMMU}) + * RET0: status + * ERRORS: EINVAL Invalid virutal address or flags value + * EBADPGSZ Invalid page size value + * ENORADDR Invalid real address in TTE + * ETOOMANY Too many mappings (max of 8 reached) + * + * Create a permanent mapping using the given TTE and virtual address + * for context 0 on the calling virtual CPU. A maximum of 8 such + * permanent mappings may be specified by privileged code. Mappings + * may be removed with mmu_unmap_perm_addr(). + * + * The behavior is undefined if a TTE with the valid bit clear is given. + * + * Note: This call is used to specify address space mappings for which + * privileged code does not expect to receive misses. For example, + * this mechanism can be used to map kernel nucleus code and data. + */ +#define HV_FAST_MMU_MAP_PERM_ADDR 0x25 + +/* mmu_fault_area_conf() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_FAULT_AREA_CONF + * ARG0: real address + * RET0: status + * RET1: previous mmu fault area real address + * ERRORS: ENORADDR Invalid real address + * EBADALIGN Invalid alignment for fault area + * + * Configure the MMU fault status area for the calling CPU. A 64-byte + * aligned real address specifies where MMU fault status information + * is placed. The return value is the previously specified area, or 0 + * for the first invocation. Specifying a fault area at real address + * 0 is not allowed. + */ +#define HV_FAST_MMU_FAULT_AREA_CONF 0x26 + +/* mmu_enable() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_ENABLE + * ARG0: enable flag + * ARG1: return target address + * RET0: status + * ERRORS: ENORADDR Invalid real address when disabling + * translation. + * EBADALIGN The return target address is not + * aligned to an instruction. + * EINVAL The enable flag request the current + * operating mode (e.g. disable if already + * disabled) + * + * Enable or disable virtual address translation for the calling CPU + * within the virtual machine domain. If the enable flag is zero, + * translation is disabled, any non-zero value will enable + * translation. + * + * When this function returns, the newly selected translation mode + * will be active. If the mmu is being enabled, then the return + * target address is a virtual address else it is a real address. + * + * Upon successful completion, control will be returned to the given + * return target address (ie. the cpu will jump to that address). On + * failure, the previous mmu mode remains and the trap simply returns + * as normal with the appropriate error code in RET0. + */ +#define HV_FAST_MMU_ENABLE 0x27 + +/* mmu_unmap_perm_addr() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_UNMAP_PERM_ADDR + * ARG0: virtual address + * ARG1: reserved, must be zero + * ARG2: flags (HV_MMU_{IMMU,DMMU}) + * RET0: status + * ERRORS: EINVAL Invalid virutal address or flags value + * ENOMAP Specified mapping was not found + * + * Demaps any permanent page mapping (established via + * mmu_map_perm_addr()) at the given virtual address for context 0 on + * the current virtual CPU. Any virtual tagged caches are guaranteed + * to be kept consistent. + */ +#define HV_FAST_MMU_UNMAP_PERM_ADDR 0x28 + +/* mmu_tsb_ctx0_info() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_TSB_CTX0_INFO + * ARG0: max TSBs + * ARG1: buffer pointer + * RET0: status + * RET1: number of TSBs + * ERRORS: EINVAL Supplied buffer is too small + * EBADALIGN The buffer pointer is badly aligned + * ENORADDR Invalid real address for buffer pointer + * + * Return the TSB configuration as previous defined by mmu_tsb_ctx0() + * into the provided buffer. The size of the buffer is given in ARG1 + * in terms of the number of TSB description entries. + * + * Upon return, RET1 always contains the number of TSB descriptions + * previously configured. If zero TSBs were configured, EOK is + * returned with RET1 containing 0. + */ +#define HV_FAST_MMU_TSB_CTX0_INFO 0x29 + +/* mmu_tsb_ctxnon0_info() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_TSB_CTXNON0_INFO + * ARG0: max TSBs + * ARG1: buffer pointer + * RET0: status + * RET1: number of TSBs + * ERRORS: EINVAL Supplied buffer is too small + * EBADALIGN The buffer pointer is badly aligned + * ENORADDR Invalid real address for buffer pointer + * + * Return the TSB configuration as previous defined by + * mmu_tsb_ctxnon0() into the provided buffer. The size of the buffer + * is given in ARG1 in terms of the number of TSB description entries. + * + * Upon return, RET1 always contains the number of TSB descriptions + * previously configured. If zero TSBs were configured, EOK is + * returned with RET1 containing 0. + */ +#define HV_FAST_MMU_TSB_CTXNON0_INFO 0x2a + +/* mmu_fault_area_info() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_FAULT_AREA_INFO + * RET0: status + * RET1: fault area real address + * ERRORS: No errors defined. + * + * Return the currently defined MMU fault status area for the current + * CPU. The real address of the fault status area is returned in + * RET1, or 0 is returned in RET1 if no fault status area is defined. + * + * Note: mmu_fault_area_conf() may be called with the return value (RET1) + * from this service if there is a need to save and restore the fault + * area for a cpu. + */ +#define HV_FAST_MMU_FAULT_AREA_INFO 0x2b + +/* Cache and Memory services. */ + +/* mem_scrub() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MEM_SCRUB + * ARG0: real address + * ARG1: length + * RET0: status + * RET1: length scrubbed + * ERRORS: ENORADDR Invalid real address + * EBADALIGN Start address or length are not correctly + * aligned + * EINVAL Length is zero + * + * Zero the memory contents in the range real address to real address + * plus length minus 1. Also, valid ECC will be generated for that + * memory address range. Scrubbing is started at the given real + * address, but may not scrub the entire given length. The actual + * length scrubbed will be returned in RET1. + * + * The real address and length must be aligned on an 8K boundary, or + * contain the start address and length from a sun4v error report. + * + * Note: There are two uses for this function. The first use is to block clear + * and initialize memory and the second is to scrub an u ncorrectable + * error reported via a resumable or non-resumable trap. The second + * use requires the arguments to be equal to the real address and length + * provided in a sun4v memory error report. + */ +#define HV_FAST_MEM_SCRUB 0x31 + +/* mem_sync() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MEM_SYNC + * ARG0: real address + * ARG1: length + * RET0: status + * RET1: length synced + * ERRORS: ENORADDR Invalid real address + * EBADALIGN Start address or length are not correctly + * aligned + * EINVAL Length is zero + * + * Force the next access within the real address to real address plus + * length minus 1 to be fetches from main system memory. Less than + * the given length may be synced, the actual amount synced is + * returned in RET1. The real address and length must be aligned on + * an 8K boundary. + */ +#define HV_FAST_MEM_SYNC 0x32 + +/* Time of day services. + * + * The hypervisor maintains the time of day on a per-domain basis. + * Changing the time of day in one domain does not affect the time of + * day on any other domain. + * + * Time is described by a single unsigned 64-bit word which is the + * number of seconds since the UNIX Epoch (00:00:00 UTC, January 1, + * 1970). + */ + +/* tod_get() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_TOD_GET + * RET0: status + * RET1: TOD + * ERRORS: EWOULDBLOCK TOD resource is temporarily unavailable + * ENOTSUPPORTED If TOD not supported on this platform + * + * Return the current time of day. May block if TOD access is + * temporarily not possible. + */ +#define HV_FAST_TOD_GET 0x50 + +/* tod_set() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_TOD_SET + * ARG0: TOD + * RET0: status + * ERRORS: EWOULDBLOCK TOD resource is temporarily unavailable + * ENOTSUPPORTED If TOD not supported on this platform + * + * The current time of day is set to the value specified in ARG0. May + * block if TOD access is temporarily not possible. + */ +#define HV_FAST_TOD_SET 0x51 + +/* Console services */ + +/* con_getchar() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CONS_GETCHAR + * RET0: status + * RET1: character + * ERRORS: EWOULDBLOCK No character available. + * + * Returns a character from the console device. If no character is + * available then an EWOULDBLOCK error is returned. If a character is + * available, then the returned status is EOK and the character value + * is in RET1. + * + * A virtual BREAK is represented by the 64-bit value -1. + * + * A virtual HUP signal is represented by the 64-bit value -2. + */ +#define HV_FAST_CONS_GETCHAR 0x60 + +/* con_putchar() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CONS_PUTCHAR + * ARG0: character + * RET0: status + * ERRORS: EINVAL Illegal character + * EWOULDBLOCK Output buffer currently full, would block + * + * Send a character to the console device. Only character values + * between 0 and 255 may be used. Values outside this range are + * invalid except for the 64-bit value -1 which is used to send a + * virtual BREAK. + */ +#define HV_FAST_CONS_PUTCHAR 0x61 + +/* Trap trace services. + * + * The hypervisor provides a trap tracing capability for privileged + * code running on each virtual CPU. Privileged code provides a + * round-robin trap trace queue within which the hypervisor writes + * 64-byte entries detailing hyperprivileged traps taken n behalf of + * privileged code. This is provided as a debugging capability for + * privileged code. + * + * The trap trace control structure is 64-bytes long and placed at the + * start (offset 0) of the trap trace buffer, and is described as + * follows: + */ +#ifndef __ASSEMBLY__ +struct hv_trap_trace_control { + unsigned long head_offset; + unsigned long tail_offset; + unsigned long __reserved[0x30 / sizeof(unsigned long)]; +}; +#endif +#define HV_TRAP_TRACE_CTRL_HEAD_OFFSET 0x00 +#define HV_TRAP_TRACE_CTRL_TAIL_OFFSET 0x08 + +/* The head offset is the offset of the most recently completed entry + * in the trap-trace buffer. The tail offset is the offset of the + * next entry to be written. The control structure is owned and + * modified by the hypervisor. A guest may not modify the control + * structure contents. Attempts to do so will result in undefined + * behavior for the guest. + * + * Each trap trace buffer entry is layed out as follows: + */ +#ifndef __ASSEMBLY__ +struct hv_trap_trace_entry { + unsigned char type; /* Hypervisor or guest entry? */ + unsigned char hpstate; /* Hyper-privileged state */ + unsigned char tl; /* Trap level */ + unsigned char gl; /* Global register level */ + unsigned short tt; /* Trap type */ + unsigned short tag; /* Extended trap identifier */ + unsigned long tstate; /* Trap state */ + unsigned long tick; /* Tick */ + unsigned long tpc; /* Trap PC */ + unsigned long f1; /* Entry specific */ + unsigned long f2; /* Entry specific */ + unsigned long f3; /* Entry specific */ + unsigned long f4; /* Entry specific */ +}; +#endif +#define HV_TRAP_TRACE_ENTRY_TYPE 0x00 +#define HV_TRAP_TRACE_ENTRY_HPSTATE 0x01 +#define HV_TRAP_TRACE_ENTRY_TL 0x02 +#define HV_TRAP_TRACE_ENTRY_GL 0x03 +#define HV_TRAP_TRACE_ENTRY_TT 0x04 +#define HV_TRAP_TRACE_ENTRY_TAG 0x06 +#define HV_TRAP_TRACE_ENTRY_TSTATE 0x08 +#define HV_TRAP_TRACE_ENTRY_TICK 0x10 +#define HV_TRAP_TRACE_ENTRY_TPC 0x18 +#define HV_TRAP_TRACE_ENTRY_F1 0x20 +#define HV_TRAP_TRACE_ENTRY_F2 0x28 +#define HV_TRAP_TRACE_ENTRY_F3 0x30 +#define HV_TRAP_TRACE_ENTRY_F4 0x38 + +/* The type field is encoded as follows. */ +#define HV_TRAP_TYPE_UNDEF 0x00 /* Entry content undefined */ +#define HV_TRAP_TYPE_HV 0x01 /* Hypervisor trap entry */ +#define HV_TRAP_TYPE_GUEST 0xff /* Added via ttrace_addentry() */ + +/* ttrace_buf_conf() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_TTRACE_BUF_CONF + * ARG0: real address + * ARG1: number of entries + * RET0: status + * RET1: number of entries + * ERRORS: ENORADDR Invalid real address + * EINVAL Size is too small + * EBADALIGN Real address not aligned on 64-byte boundary + * + * Requests hypervisor trap tracing and declares a virtual CPU's trap + * trace buffer to the hypervisor. The real address supplies the real + * base address of the trap trace queue and must be 64-byte aligned. + * Specifying a value of 0 for the number of entries disables trap + * tracing for the calling virtual CPU. The buffer allocated must be + * sized for a power of two number of 64-byte trap trace entries plus + * an initial 64-byte control structure. + * + * This may be invoked any number of times so that a virtual CPU may + * relocate a trap trace buffer or create "snapshots" of information. + * + * If the real address is illegal or badly aligned, then trap tracing + * is disabled and an error is returned. + * + * Upon failure with EINVAL, this service call returns in RET1 the + * minimum number of buffer entries required. Upon other failures + * RET1 is undefined. + */ +#define HV_FAST_TTRACE_BUF_CONF 0x90 + +/* ttrace_buf_info() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_TTRACE_BUF_INFO + * RET0: status + * RET1: real address + * RET2: size + * ERRORS: None defined. + * + * Returns the size and location of the previously declared trap-trace + * buffer. In the event that no buffer was previously defined, or the + * buffer is disabled, this call will return a size of zero bytes. + */ +#define HV_FAST_TTRACE_BUF_INFO 0x91 + +/* ttrace_enable() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_TTRACE_ENABLE + * ARG0: enable + * RET0: status + * RET1: previous enable state + * ERRORS: EINVAL No trap trace buffer currently defined + * + * Enable or disable trap tracing, and return the previous enabled + * state in RET1. Future systems may define various flags for the + * enable argument (ARG0), for the moment a guest should pass + * "(uint64_t) -1" to enable, and "(uint64_t) 0" to disable all + * tracing - which will ensure future compatability. + */ +#define HV_FAST_TTRACE_ENABLE 0x92 + +/* ttrace_freeze() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_TTRACE_FREEZE + * ARG0: freeze + * RET0: status + * RET1: previous freeze state + * ERRORS: EINVAL No trap trace buffer currently defined + * + * Freeze or unfreeze trap tracing, returning the previous freeze + * state in RET1. A guest should pass a non-zero value to freeze and + * a zero value to unfreeze all tracing. The returned previous state + * is 0 for not frozen and 1 for frozen. + */ +#define HV_FAST_TTRACE_FREEZE 0x93 + +/* ttrace_addentry() + * TRAP: HV_TTRACE_ADDENTRY_TRAP + * ARG0: tag (16-bits) + * ARG1: data word 0 + * ARG2: data word 1 + * ARG3: data word 2 + * ARG4: data word 3 + * RET0: status + * ERRORS: EINVAL No trap trace buffer currently defined + * + * Add an entry to the trap trace buffer. Upon return only ARG0/RET0 + * is modified - none of the other registers holding arguments are + * volatile across this hypervisor service. + */ + +/* Core dump services. + * + * Since the hypervisor viraulizes and thus obscures a lot of the + * physical machine layout and state, traditional OS crash dumps can + * be difficult to diagnose especially when the problem is a + * configuration error of some sort. + * + * The dump services provide an opaque buffer into which the + * hypervisor can place it's internal state in order to assist in + * debugging such situations. The contents are opaque and extremely + * platform and hypervisor implementation specific. The guest, during + * a core dump, requests that the hypervisor update any information in + * the dump buffer in preparation to being dumped as part of the + * domain's memory image. + */ + +/* dump_buf_update() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_DUMP_BUF_UPDATE + * ARG0: real address + * ARG1: size + * RET0: status + * RET1: required size of dump buffer + * ERRORS: ENORADDR Invalid real address + * EBADALIGN Real address is not aligned on a 64-byte + * boundary + * EINVAL Size is non-zero but less than minimum size + * required + * ENOTSUPPORTED Operation not supported on current logical + * domain + * + * Declare a domain dump buffer to the hypervisor. The real address + * provided for the domain dump buffer must be 64-byte aligned. The + * size specifies the size of the dump buffer and may be larger than + * the minimum size specified in the machine description. The + * hypervisor will fill the dump buffer with opaque data. + * + * Note: A guest may elect to include dump buffer contents as part of a crash + * dump to assist with debugging. This function may be called any number + * of times so that a guest may relocate a dump buffer, or create + * "snapshots" of any dump-buffer information. Each call to + * dump_buf_update() atomically declares the new dump buffer to the + * hypervisor. + * + * A specified size of 0 unconfigures the dump buffer. If the real + * address is illegal or badly aligned, then any currently active dump + * buffer is disabled and an error is returned. + * + * In the event that the call fails with EINVAL, RET1 contains the + * minimum size requires by the hypervisor for a valid dump buffer. + */ +#define HV_FAST_DUMP_BUF_UPDATE 0x94 + +/* dump_buf_info() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_DUMP_BUF_INFO + * RET0: status + * RET1: real address of current dump buffer + * RET2: size of current dump buffer + * ERRORS: No errors defined. + * + * Return the currently configures dump buffer description. A + * returned size of 0 bytes indicates an undefined dump buffer. In + * this case the return address in RET1 is undefined. + */ +#define HV_FAST_DUMP_BUF_INFO 0x95 + +/* Device interrupt services. + * + * Device interrupts are allocated to system bus bridges by the hypervisor, + * and described to OBP in the machine description. OBP then describes + * these interrupts to the OS via properties in the device tree. + * + * Terminology: + * + * cpuid Unique opaque value which represents a target cpu. + * + * devhandle Device handle. It uniquely identifies a device, and + * consistes of the lower 28-bits of the hi-cell of the + * first entry of the device's "reg" property in the + * OBP device tree. + * + * devino Device interrupt number. Specifies the relative + * interrupt number within the device. The unique + * combination of devhandle and devino are used to + * identify a specific device interrupt. + * + * Note: The devino value is the same as the values in the + * "interrupts" property or "interrupt-map" property + * in the OBP device tree for that device. + * + * sysino System interrupt number. A 64-bit unsigned interger + * representing a unique interrupt within a virtual + * machine. + * + * intr_state A flag representing the interrupt state for a given + * sysino. The state values are defined below. + * + * intr_enabled A flag representing the 'enabled' state for a given + * sysino. The enable values are defined below. + */ + +#define HV_INTR_STATE_IDLE 0 /* Nothing pending */ +#define HV_INTR_STATE_RECEIVED 1 /* Interrupt received by hardware */ +#define HV_INTR_STATE_DELIVERED 2 /* Interrupt delivered to queue */ + +#define HV_INTR_DISABLED 0 /* sysino not enabled */ +#define HV_INTR_ENABLED 1 /* sysino enabled */ + +/* intr_devino_to_sysino() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_INTR_DEVINO2SYSINO + * ARG0: devhandle + * ARG1: devino + * RET0: status + * RET1: sysino + * ERRORS: EINVAL Invalid devhandle/devino + * + * Converts a device specific interrupt number of the given + * devhandle/devino into a system specific ino (sysino). + */ +#define HV_FAST_INTR_DEVINO2SYSINO 0xa0 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_devino_to_sysino(unsigned long devhandle, + unsigned long devino); +#endif + +/* intr_getenabled() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_INTR_GETENABLED + * ARG0: sysino + * RET0: status + * RET1: intr_enabled (HV_INTR_{DISABLED,ENABLED}) + * ERRORS: EINVAL Invalid sysino + * + * Returns interrupt enabled state in RET1 for the interrupt defined + * by the given sysino. + */ +#define HV_FAST_INTR_GETENABLED 0xa1 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_intr_getenabled(unsigned long sysino); +#endif + +/* intr_setenabled() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_INTR_SETENABLED + * ARG0: sysino + * ARG1: intr_enabled (HV_INTR_{DISABLED,ENABLED}) + * RET0: status + * ERRORS: EINVAL Invalid sysino or intr_enabled value + * + * Set the 'enabled' state of the interrupt sysino. + */ +#define HV_FAST_INTR_SETENABLED 0xa2 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_intr_setenabled(unsigned long sysino, unsigned long intr_enabled); +#endif + +/* intr_getstate() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_INTR_GETSTATE + * ARG0: sysino + * RET0: status + * RET1: intr_state (HV_INTR_STATE_*) + * ERRORS: EINVAL Invalid sysino + * + * Returns current state of the interrupt defined by the given sysino. + */ +#define HV_FAST_INTR_GETSTATE 0xa3 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_intr_getstate(unsigned long sysino); +#endif + +/* intr_setstate() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_INTR_SETSTATE + * ARG0: sysino + * ARG1: intr_state (HV_INTR_STATE_*) + * RET0: status + * ERRORS: EINVAL Invalid sysino or intr_state value + * + * Sets the current state of the interrupt described by the given sysino + * value. + * + * Note: Setting the state to HV_INTR_STATE_IDLE clears any pending + * interrupt for sysino. + */ +#define HV_FAST_INTR_SETSTATE 0xa4 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_intr_setstate(unsigned long sysino, unsigned long intr_state); +#endif + +/* intr_gettarget() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_INTR_GETTARGET + * ARG0: sysino + * RET0: status + * RET1: cpuid + * ERRORS: EINVAL Invalid sysino + * + * Returns CPU that is the current target of the interrupt defined by + * the given sysino. The CPU value returned is undefined if the target + * has not been set via intr_settarget(). + */ +#define HV_FAST_INTR_GETTARGET 0xa5 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_intr_gettarget(unsigned long sysino); +#endif + +/* intr_settarget() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_INTR_SETTARGET + * ARG0: sysino + * ARG1: cpuid + * RET0: status + * ERRORS: EINVAL Invalid sysino + * ENOCPU Invalid cpuid + * + * Set the target CPU for the interrupt defined by the given sysino. + */ +#define HV_FAST_INTR_SETTARGET 0xa6 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_intr_settarget(unsigned long sysino, unsigned long cpuid); +#endif + +/* PCI IO services. + * + * See the terminology descriptions in the device interrupt services + * section above as those apply here too. Here are terminology + * definitions specific to these PCI IO services: + * + * tsbnum TSB number. Indentifies which io-tsb is used. + * For this version of the specification, tsbnum + * must be zero. + * + * tsbindex TSB index. Identifies which entry in the TSB + * is used. The first entry is zero. + * + * tsbid A 64-bit aligned data structure which contains + * a tsbnum and a tsbindex. Bits 63:32 contain the + * tsbnum and bits 31:00 contain the tsbindex. + * + * Use the HV_PCI_TSBID() macro to construct such + * values. + * + * io_attributes IO attributes for IOMMU mappings. One of more + * of the attritbute bits are stores in a 64-bit + * value. The values are defined below. + * + * r_addr 64-bit real address + * + * pci_device PCI device address. A PCI device address identifies + * a specific device on a specific PCI bus segment. + * A PCI device address ia a 32-bit unsigned integer + * with the following format: + * + * 00000000.bbbbbbbb.dddddfff.00000000 + * + * Use the HV_PCI_DEVICE_BUILD() macro to construct + * such values. + * + * pci_config_offset + * PCI configureation space offset. For conventional + * PCI a value between 0 and 255. For extended + * configuration space, a value between 0 and 4095. + * + * Note: For PCI configuration space accesses, the offset + * must be aligned to the access size. + * + * error_flag A return value which specifies if the action succeeded + * or failed. 0 means no error, non-0 means some error + * occurred while performing the service. + * + * io_sync_direction + * Direction definition for pci_dma_sync(), defined + * below in HV_PCI_SYNC_*. + * + * io_page_list A list of io_page_addresses, an io_page_address is + * a real address. + * + * io_page_list_p A pointer to an io_page_list. + * + * "size based byte swap" - Some functions do size based byte swapping + * which allows sw to access pointers and + * counters in native form when the processor + * operates in a different endianness than the + * IO bus. Size-based byte swapping converts a + * multi-byte field between big-endian and + * little-endian format. + */ + +#define HV_PCI_MAP_ATTR_READ 0x01 +#define HV_PCI_MAP_ATTR_WRITE 0x02 + +#define HV_PCI_DEVICE_BUILD(b,d,f) \ + ((((b) & 0xff) << 16) | \ + (((d) & 0x1f) << 11) | \ + (((f) & 0x07) << 8)) + +#define HV_PCI_TSBID(__tsb_num, __tsb_index) \ + ((((u64)(__tsb_num)) << 32UL) | ((u64)(__tsb_index))) + +#define HV_PCI_SYNC_FOR_DEVICE 0x01 +#define HV_PCI_SYNC_FOR_CPU 0x02 + +/* pci_iommu_map() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOMMU_MAP + * ARG0: devhandle + * ARG1: tsbid + * ARG2: #ttes + * ARG3: io_attributes + * ARG4: io_page_list_p + * RET0: status + * RET1: #ttes mapped + * ERRORS: EINVAL Invalid devhandle/tsbnum/tsbindex/io_attributes + * EBADALIGN Improperly aligned real address + * ENORADDR Invalid real address + * + * Create IOMMU mappings in the sun4v device defined by the given + * devhandle. The mappings are created in the TSB defined by the + * tsbnum component of the given tsbid. The first mapping is created + * in the TSB i ndex defined by the tsbindex component of the given tsbid. + * The call creates up to #ttes mappings, the first one at tsbnum, tsbindex, + * the second at tsbnum, tsbindex + 1, etc. + * + * All mappings are created with the attributes defined by the io_attributes + * argument. The page mapping addresses are described in the io_page_list + * defined by the given io_page_list_p, which is a pointer to the io_page_list. + * The first entry in the io_page_list is the address for the first iotte, the + * 2nd for the 2nd iotte, and so on. + * + * Each io_page_address in the io_page_list must be appropriately aligned. + * #ttes must be greater than zero. For this version of the spec, the tsbnum + * component of the given tsbid must be zero. + * + * Returns the actual number of mappings creates, which may be less than + * or equal to the argument #ttes. If the function returns a value which + * is less than the #ttes, the caller may continus to call the function with + * an updated tsbid, #ttes, io_page_list_p arguments until all pages are + * mapped. + * + * Note: This function does not imply an iotte cache flush. The guest must + * demap an entry before re-mapping it. + */ +#define HV_FAST_PCI_IOMMU_MAP 0xb0 + +/* pci_iommu_demap() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOMMU_DEMAP + * ARG0: devhandle + * ARG1: tsbid + * ARG2: #ttes + * RET0: status + * RET1: #ttes demapped + * ERRORS: EINVAL Invalid devhandle/tsbnum/tsbindex + * + * Demap and flush IOMMU mappings in the device defined by the given + * devhandle. Demaps up to #ttes entries in the TSB defined by the tsbnum + * component of the given tsbid, starting at the TSB index defined by the + * tsbindex component of the given tsbid. + * + * For this version of the spec, the tsbnum of the given tsbid must be zero. + * #ttes must be greater than zero. + * + * Returns the actual number of ttes demapped, which may be less than or equal + * to the argument #ttes. If #ttes demapped is less than #ttes, the caller + * may continue to call this function with updated tsbid and #ttes arguments + * until all pages are demapped. + * + * Note: Entries do not have to be mapped to be demapped. A demap of an + * unmapped page will flush the entry from the tte cache. + */ +#define HV_FAST_PCI_IOMMU_DEMAP 0xb1 + +/* pci_iommu_getmap() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOMMU_GETMAP + * ARG0: devhandle + * ARG1: tsbid + * RET0: status + * RET1: io_attributes + * RET2: real address + * ERRORS: EINVAL Invalid devhandle/tsbnum/tsbindex + * ENOMAP Mapping is not valid, no translation exists + * + * Read and return the mapping in the device described by the given devhandle + * and tsbid. If successful, the io_attributes shall be returned in RET1 + * and the page address of the mapping shall be returned in RET2. + * + * For this version of the spec, the tsbnum component of the given tsbid + * must be zero. + */ +#define HV_FAST_PCI_IOMMU_GETMAP 0xb2 + +/* pci_iommu_getbypass() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOMMU_GETBYPASS + * ARG0: devhandle + * ARG1: real address + * ARG2: io_attributes + * RET0: status + * RET1: io_addr + * ERRORS: EINVAL Invalid devhandle/io_attributes + * ENORADDR Invalid real address + * ENOTSUPPORTED Function not supported in this implementation. + * + * Create a "special" mapping in the device described by the given devhandle, + * for the given real address and attributes. Return the IO address in RET1 + * if successful. + */ +#define HV_FAST_PCI_IOMMU_GETBYPASS 0xb3 + +/* pci_config_get() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_CONFIG_GET + * ARG0: devhandle + * ARG1: pci_device + * ARG2: pci_config_offset + * ARG3: size + * RET0: status + * RET1: error_flag + * RET2: data + * ERRORS: EINVAL Invalid devhandle/pci_device/offset/size + * EBADALIGN pci_config_offset not size aligned + * ENOACCESS Access to this offset is not permitted + * + * Read PCI configuration space for the adapter described by the given + * devhandle. Read size (1, 2, or 4) bytes of data from the given + * pci_device, at pci_config_offset from the beginning of the device's + * configuration space. If there was no error, RET1 is set to zero and + * RET2 is set to the data read. Insignificant bits in RET2 are not + * guarenteed to have any specific value and therefore must be ignored. + * + * The data returned in RET2 is size based byte swapped. + * + * If an error occurs during the read, set RET1 to a non-zero value. The + * given pci_config_offset must be 'size' aligned. + */ +#define HV_FAST_PCI_CONFIG_GET 0xb4 + +/* pci_config_put() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_CONFIG_PUT + * ARG0: devhandle + * ARG1: pci_device + * ARG2: pci_config_offset + * ARG3: size + * ARG4: data + * RET0: status + * RET1: error_flag + * ERRORS: EINVAL Invalid devhandle/pci_device/offset/size + * EBADALIGN pci_config_offset not size aligned + * ENOACCESS Access to this offset is not permitted + * + * Write PCI configuration space for the adapter described by the given + * devhandle. Write size (1, 2, or 4) bytes of data in a single operation, + * at pci_config_offset from the beginning of the device's configuration + * space. The data argument contains the data to be written to configuration + * space. Prior to writing, the data is size based byte swapped. + * + * If an error occurs during the write access, do not generate an error + * report, do set RET1 to a non-zero value. Otherwise RET1 is zero. + * The given pci_config_offset must be 'size' aligned. + * + * This function is permitted to read from offset zero in the configuration + * space described by the given pci_device if necessary to ensure that the + * write access to config space completes. + */ +#define HV_FAST_PCI_CONFIG_PUT 0xb5 + +/* pci_peek() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_PEEK + * ARG0: devhandle + * ARG1: real address + * ARG2: size + * RET0: status + * RET1: error_flag + * RET2: data + * ERRORS: EINVAL Invalid devhandle or size + * EBADALIGN Improperly aligned real address + * ENORADDR Bad real address + * ENOACCESS Guest access prohibited + * + * Attempt to read the IO address given by the given devhandle, real address, + * and size. Size must be 1, 2, 4, or 8. The read is performed as a single + * access operation using the given size. If an error occurs when reading + * from the given location, do not generate an error report, but return a + * non-zero value in RET1. If the read was successful, return zero in RET1 + * and return the actual data read in RET2. The data returned is size based + * byte swapped. + * + * Non-significant bits in RET2 are not guarenteed to have any specific value + * and therefore must be ignored. If RET1 is returned as non-zero, the data + * value is not guarenteed to have any specific value and should be ignored. + * + * The caller must have permission to read from the given devhandle, real + * address, which must be an IO address. The argument real address must be a + * size aligned address. + * + * The hypervisor implementation of this function must block access to any + * IO address that the guest does not have explicit permission to access. + */ +#define HV_FAST_PCI_PEEK 0xb6 + +/* pci_poke() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_POKE + * ARG0: devhandle + * ARG1: real address + * ARG2: size + * ARG3: data + * ARG4: pci_device + * RET0: status + * RET1: error_flag + * ERRORS: EINVAL Invalid devhandle, size, or pci_device + * EBADALIGN Improperly aligned real address + * ENORADDR Bad real address + * ENOACCESS Guest access prohibited + * ENOTSUPPORTED Function is not supported by implementation + * + * Attempt to write data to the IO address given by the given devhandle, + * real address, and size. Size must be 1, 2, 4, or 8. The write is + * performed as a single access operation using the given size. Prior to + * writing the data is size based swapped. + * + * If an error occurs when writing to the given location, do not generate an + * error report, but return a non-zero value in RET1. If the write was + * successful, return zero in RET1. + * + * pci_device describes the configuration address of the device being + * written to. The implementation may safely read from offset 0 with + * the configuration space of the device described by devhandle and + * pci_device in order to guarantee that the write portion of the operation + * completes + * + * Any error that occurs due to the read shall be reported using the normal + * error reporting mechanisms .. the read error is not suppressed. + * + * The caller must have permission to write to the given devhandle, real + * address, which must be an IO address. The argument real address must be a + * size aligned address. The caller must have permission to read from + * the given devhandle, pci_device cofiguration space offset 0. + * + * The hypervisor implementation of this function must block access to any + * IO address that the guest does not have explicit permission to access. + */ +#define HV_FAST_PCI_POKE 0xb7 + +/* pci_dma_sync() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_DMA_SYNC + * ARG0: devhandle + * ARG1: real address + * ARG2: size + * ARG3: io_sync_direction + * RET0: status + * RET1: #synced + * ERRORS: EINVAL Invalid devhandle or io_sync_direction + * ENORADDR Bad real address + * + * Synchronize a memory region described by the given real address and size, + * for the device defined by the given devhandle using the direction(s) + * defined by the given io_sync_direction. The argument size is the size of + * the memory region in bytes. + * + * Return the actual number of bytes synchronized in the return value #synced, + * which may be less than or equal to the argument size. If the return + * value #synced is less than size, the caller must continue to call this + * function with updated real address and size arguments until the entire + * memory region is synchronized. + */ +#define HV_FAST_PCI_DMA_SYNC 0xb8 + +/* PCI MSI services. */ + +#define HV_MSITYPE_MSI32 0x00 +#define HV_MSITYPE_MSI64 0x01 + +#define HV_MSIQSTATE_IDLE 0x00 +#define HV_MSIQSTATE_ERROR 0x01 + +#define HV_MSIQ_INVALID 0x00 +#define HV_MSIQ_VALID 0x01 + +#define HV_MSISTATE_IDLE 0x00 +#define HV_MSISTATE_DELIVERED 0x01 + +#define HV_MSIVALID_INVALID 0x00 +#define HV_MSIVALID_VALID 0x01 + +#define HV_PCIE_MSGTYPE_PME_MSG 0x18 +#define HV_PCIE_MSGTYPE_PME_ACK_MSG 0x1b +#define HV_PCIE_MSGTYPE_CORR_MSG 0x30 +#define HV_PCIE_MSGTYPE_NONFATAL_MSG 0x31 +#define HV_PCIE_MSGTYPE_FATAL_MSG 0x33 + +#define HV_MSG_INVALID 0x00 +#define HV_MSG_VALID 0x01 + +/* pci_msiq_conf() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_CONF + * ARG0: devhandle + * ARG1: msiqid + * ARG2: real address + * ARG3: number of entries + * RET0: status + * ERRORS: EINVAL Invalid devhandle, msiqid or nentries + * EBADALIGN Improperly aligned real address + * ENORADDR Bad real address + * + * Configure the MSI queue given by the devhandle and msiqid arguments, + * and to be placed at the given real address and be of the given + * number of entries. The real address must be aligned exactly to match + * the queue size. Each queue entry is 64-bytes long, so f.e. a 32 entry + * queue must be aligned on a 2048 byte real address boundary. The MSI-EQ + * Head and Tail are initialized so that the MSI-EQ is 'empty'. + * + * Implementation Note: Certain implementations have fixed sized queues. In + * that case, number of entries must contain the correct + * value. + */ +#define HV_FAST_PCI_MSIQ_CONF 0xc0 + +/* pci_msiq_info() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_INFO + * ARG0: devhandle + * ARG1: msiqid + * RET0: status + * RET1: real address + * RET2: number of entries + * ERRORS: EINVAL Invalid devhandle or msiqid + * + * Return the configuration information for the MSI queue described + * by the given devhandle and msiqid. The base address of the queue + * is returned in ARG1 and the number of entries is returned in ARG2. + * If the queue is unconfigured, the real address is undefined and the + * number of entries will be returned as zero. + */ +#define HV_FAST_PCI_MSIQ_INFO 0xc1 + +/* pci_msiq_getvalid() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_GETVALID + * ARG0: devhandle + * ARG1: msiqid + * RET0: status + * RET1: msiqvalid (HV_MSIQ_VALID or HV_MSIQ_INVALID) + * ERRORS: EINVAL Invalid devhandle or msiqid + * + * Get the valid state of the MSI-EQ described by the given devhandle and + * msiqid. + */ +#define HV_FAST_PCI_MSIQ_GETVALID 0xc2 + +/* pci_msiq_setvalid() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_SETVALID + * ARG0: devhandle + * ARG1: msiqid + * ARG2: msiqvalid (HV_MSIQ_VALID or HV_MSIQ_INVALID) + * RET0: status + * ERRORS: EINVAL Invalid devhandle or msiqid or msiqvalid + * value or MSI EQ is uninitialized + * + * Set the valid state of the MSI-EQ described by the given devhandle and + * msiqid to the given msiqvalid. + */ +#define HV_FAST_PCI_MSIQ_SETVALID 0xc3 + +/* pci_msiq_getstate() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_GETSTATE + * ARG0: devhandle + * ARG1: msiqid + * RET0: status + * RET1: msiqstate (HV_MSIQSTATE_IDLE or HV_MSIQSTATE_ERROR) + * ERRORS: EINVAL Invalid devhandle or msiqid + * + * Get the state of the MSI-EQ described by the given devhandle and + * msiqid. + */ +#define HV_FAST_PCI_MSIQ_GETSTATE 0xc4 + +/* pci_msiq_getvalid() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_GETVALID + * ARG0: devhandle + * ARG1: msiqid + * ARG2: msiqstate (HV_MSIQSTATE_IDLE or HV_MSIQSTATE_ERROR) + * RET0: status + * ERRORS: EINVAL Invalid devhandle or msiqid or msiqstate + * value or MSI EQ is uninitialized + * + * Set the state of the MSI-EQ described by the given devhandle and + * msiqid to the given msiqvalid. + */ +#define HV_FAST_PCI_MSIQ_SETSTATE 0xc5 + +/* pci_msiq_gethead() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_GETHEAD + * ARG0: devhandle + * ARG1: msiqid + * RET0: status + * RET1: msiqhead + * ERRORS: EINVAL Invalid devhandle or msiqid + * + * Get the current MSI EQ queue head for the MSI-EQ described by the + * given devhandle and msiqid. + */ +#define HV_FAST_PCI_MSIQ_GETHEAD 0xc6 + +/* pci_msiq_sethead() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_SETHEAD + * ARG0: devhandle + * ARG1: msiqid + * ARG2: msiqhead + * RET0: status + * ERRORS: EINVAL Invalid devhandle or msiqid or msiqhead, + * or MSI EQ is uninitialized + * + * Set the current MSI EQ queue head for the MSI-EQ described by the + * given devhandle and msiqid. + */ +#define HV_FAST_PCI_MSIQ_SETHEAD 0xc7 + +/* pci_msiq_gettail() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_GETTAIL + * ARG0: devhandle + * ARG1: msiqid + * RET0: status + * RET1: msiqtail + * ERRORS: EINVAL Invalid devhandle or msiqid + * + * Get the current MSI EQ queue tail for the MSI-EQ described by the + * given devhandle and msiqid. + */ +#define HV_FAST_PCI_MSIQ_GETTAIL 0xc8 + +/* pci_msi_getvalid() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSI_GETVALID + * ARG0: devhandle + * ARG1: msinum + * RET0: status + * RET1: msivalidstate + * ERRORS: EINVAL Invalid devhandle or msinum + * + * Get the current valid/enabled state for the MSI defined by the + * given devhandle and msinum. + */ +#define HV_FAST_PCI_MSI_GETVALID 0xc9 + +/* pci_msi_setvalid() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSI_SETVALID + * ARG0: devhandle + * ARG1: msinum + * ARG2: msivalidstate + * RET0: status + * ERRORS: EINVAL Invalid devhandle or msinum or msivalidstate + * + * Set the current valid/enabled state for the MSI defined by the + * given devhandle and msinum. + */ +#define HV_FAST_PCI_MSI_SETVALID 0xca + +/* pci_msi_getmsiq() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSI_GETMSIQ + * ARG0: devhandle + * ARG1: msinum + * RET0: status + * RET1: msiqid + * ERRORS: EINVAL Invalid devhandle or msinum or MSI is unbound + * + * Get the MSI EQ that the MSI defined by the given devhandle and + * msinum is bound to. + */ +#define HV_FAST_PCI_MSI_GETMSIQ 0xcb + +/* pci_msi_setmsiq() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSI_SETMSIQ + * ARG0: devhandle + * ARG1: msinum + * ARG2: msitype + * ARG3: msiqid + * RET0: status + * ERRORS: EINVAL Invalid devhandle or msinum or msiqid + * + * Set the MSI EQ that the MSI defined by the given devhandle and + * msinum is bound to. + */ +#define HV_FAST_PCI_MSI_SETMSIQ 0xcc + +/* pci_msi_getstate() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSI_GETSTATE + * ARG0: devhandle + * ARG1: msinum + * RET0: status + * RET1: msistate + * ERRORS: EINVAL Invalid devhandle or msinum + * + * Get the state of the MSI defined by the given devhandle and msinum. + * If not initialized, return HV_MSISTATE_IDLE. + */ +#define HV_FAST_PCI_MSI_GETSTATE 0xcd + +/* pci_msi_setstate() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSI_SETSTATE + * ARG0: devhandle + * ARG1: msinum + * ARG2: msistate + * RET0: status + * ERRORS: EINVAL Invalid devhandle or msinum or msistate + * + * Set the state of the MSI defined by the given devhandle and msinum. + */ +#define HV_FAST_PCI_MSI_SETSTATE 0xce + +/* pci_msg_getmsiq() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSG_GETMSIQ + * ARG0: devhandle + * ARG1: msgtype + * RET0: status + * RET1: msiqid + * ERRORS: EINVAL Invalid devhandle or msgtype + * + * Get the MSI EQ of the MSG defined by the given devhandle and msgtype. + */ +#define HV_FAST_PCI_MSG_GETMSIQ 0xd0 + +/* pci_msg_setmsiq() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSG_SETMSIQ + * ARG0: devhandle + * ARG1: msgtype + * ARG2: msiqid + * RET0: status + * ERRORS: EINVAL Invalid devhandle, msgtype, or msiqid + * + * Set the MSI EQ of the MSG defined by the given devhandle and msgtype. + */ +#define HV_FAST_PCI_MSG_SETMSIQ 0xd1 + +/* pci_msg_getvalid() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSG_GETVALID + * ARG0: devhandle + * ARG1: msgtype + * RET0: status + * RET1: msgvalidstate + * ERRORS: EINVAL Invalid devhandle or msgtype + * + * Get the valid/enabled state of the MSG defined by the given + * devhandle and msgtype. + */ +#define HV_FAST_PCI_MSG_GETVALID 0xd2 + +/* pci_msg_setvalid() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSG_SETVALID + * ARG0: devhandle + * ARG1: msgtype + * ARG2: msgvalidstate + * RET0: status + * ERRORS: EINVAL Invalid devhandle or msgtype or msgvalidstate + * + * Set the valid/enabled state of the MSG defined by the given + * devhandle and msgtype. + */ +#define HV_FAST_PCI_MSG_SETVALID 0xd3 + +/* Performance counter services. */ + +#define HV_PERF_JBUS_PERF_CTRL_REG 0x00 +#define HV_PERF_JBUS_PERF_CNT_REG 0x01 +#define HV_PERF_DRAM_PERF_CTRL_REG_0 0x02 +#define HV_PERF_DRAM_PERF_CNT_REG_0 0x03 +#define HV_PERF_DRAM_PERF_CTRL_REG_1 0x04 +#define HV_PERF_DRAM_PERF_CNT_REG_1 0x05 +#define HV_PERF_DRAM_PERF_CTRL_REG_2 0x06 +#define HV_PERF_DRAM_PERF_CNT_REG_2 0x07 +#define HV_PERF_DRAM_PERF_CTRL_REG_3 0x08 +#define HV_PERF_DRAM_PERF_CNT_REG_3 0x09 + +/* get_perfreg() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_GET_PERFREG + * ARG0: performance reg number + * RET0: status + * RET1: performance reg value + * ERRORS: EINVAL Invalid performance register number + * ENOACCESS No access allowed to performance counters + * + * Read the value of the given DRAM/JBUS performance counter/control register. + */ +#define HV_FAST_GET_PERFREG 0x100 + +/* set_perfreg() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_SET_PERFREG + * ARG0: performance reg number + * ARG1: performance reg value + * RET0: status + * ERRORS: EINVAL Invalid performance register number + * ENOACCESS No access allowed to performance counters + * + * Write the given performance reg value to the given DRAM/JBUS + * performance counter/control register. + */ +#define HV_FAST_SET_PERFREG 0x101 + +/* MMU statistics services. + * + * The hypervisor maintains MMU statistics and privileged code provides + * a buffer where these statistics can be collected. It is continually + * updated once configured. The layout is as follows: + */ +#ifndef __ASSEMBLY__ +struct hv_mmu_statistics { + unsigned long immu_tsb_hits_ctx0_8k_tte; + unsigned long immu_tsb_ticks_ctx0_8k_tte; + unsigned long immu_tsb_hits_ctx0_64k_tte; + unsigned long immu_tsb_ticks_ctx0_64k_tte; + unsigned long __reserved1[2]; + unsigned long immu_tsb_hits_ctx0_4mb_tte; + unsigned long immu_tsb_ticks_ctx0_4mb_tte; + unsigned long __reserved2[2]; + unsigned long immu_tsb_hits_ctx0_256mb_tte; + unsigned long immu_tsb_ticks_ctx0_256mb_tte; + unsigned long __reserved3[4]; + unsigned long immu_tsb_hits_ctxnon0_8k_tte; + unsigned long immu_tsb_ticks_ctxnon0_8k_tte; + unsigned long immu_tsb_hits_ctxnon0_64k_tte; + unsigned long immu_tsb_ticks_ctxnon0_64k_tte; + unsigned long __reserved4[2]; + unsigned long immu_tsb_hits_ctxnon0_4mb_tte; + unsigned long immu_tsb_ticks_ctxnon0_4mb_tte; + unsigned long __reserved5[2]; + unsigned long immu_tsb_hits_ctxnon0_256mb_tte; + unsigned long immu_tsb_ticks_ctxnon0_256mb_tte; + unsigned long __reserved6[4]; + unsigned long dmmu_tsb_hits_ctx0_8k_tte; + unsigned long dmmu_tsb_ticks_ctx0_8k_tte; + unsigned long dmmu_tsb_hits_ctx0_64k_tte; + unsigned long dmmu_tsb_ticks_ctx0_64k_tte; + unsigned long __reserved7[2]; + unsigned long dmmu_tsb_hits_ctx0_4mb_tte; + unsigned long dmmu_tsb_ticks_ctx0_4mb_tte; + unsigned long __reserved8[2]; + unsigned long dmmu_tsb_hits_ctx0_256mb_tte; + unsigned long dmmu_tsb_ticks_ctx0_256mb_tte; + unsigned long __reserved9[4]; + unsigned long dmmu_tsb_hits_ctxnon0_8k_tte; + unsigned long dmmu_tsb_ticks_ctxnon0_8k_tte; + unsigned long dmmu_tsb_hits_ctxnon0_64k_tte; + unsigned long dmmu_tsb_ticks_ctxnon0_64k_tte; + unsigned long __reserved10[2]; + unsigned long dmmu_tsb_hits_ctxnon0_4mb_tte; + unsigned long dmmu_tsb_ticks_ctxnon0_4mb_tte; + unsigned long __reserved11[2]; + unsigned long dmmu_tsb_hits_ctxnon0_256mb_tte; + unsigned long dmmu_tsb_ticks_ctxnon0_256mb_tte; + unsigned long __reserved12[4]; +}; +#endif + +/* mmustat_conf() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMUSTAT_CONF + * ARG0: real address + * RET0: status + * RET1: real address + * ERRORS: ENORADDR Invalid real address + * EBADALIGN Real address not aligned on 64-byte boundary + * EBADTRAP API not supported on this processor + * + * Enable MMU statistic gathering using the buffer at the given real + * address on the current virtual CPU. The new buffer real address + * is given in ARG1, and the previously specified buffer real address + * is returned in RET1, or is returned as zero for the first invocation. + * + * If the passed in real address argument is zero, this will disable + * MMU statistic collection on the current virtual CPU. If an error is + * returned then no statistics are collected. + * + * The buffer contents should be initialized to all zeros before being + * given to the hypervisor or else the statistics will be meaningless. + */ +#define HV_FAST_MMUSTAT_CONF 0x102 + +/* mmustat_info() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMUSTAT_INFO + * RET0: status + * RET1: real address + * ERRORS: EBADTRAP API not supported on this processor + * + * Return the current state and real address of the currently configured + * MMU statistics buffer on the current virtual CPU. + */ +#define HV_FAST_MMUSTAT_INFO 0x103 + +/* Function numbers for HV_CORE_TRAP. */ +#define HV_CORE_VER 0x00 +#define HV_CORE_PUTCHAR 0x01 +#define HV_CORE_EXIT 0x02 + +#endif /* !(_SPARC64_HYPERVISOR_H) */ diff --git a/include/asm-sparc64/idprom.h b/include/asm-sparc64/idprom.h index 701483c5465d..77fbf987385f 100644 --- a/include/asm-sparc64/idprom.h +++ b/include/asm-sparc64/idprom.h @@ -9,15 +9,7 @@ #include <linux/types.h> -/* Offset into the EEPROM where the id PROM is located on the 4c */ -#define IDPROM_OFFSET 0x7d8 - -/* On sun4m; physical. */ -/* MicroSPARC(-II) does not decode 31rd bit, but it works. */ -#define IDPROM_OFFSET_M 0xfd8 - -struct idprom -{ +struct idprom { u8 id_format; /* Format identifier (always 0x01) */ u8 id_machtype; /* Machine type */ u8 id_ethaddr[6]; /* Hardware ethernet address */ @@ -30,6 +22,4 @@ struct idprom extern struct idprom *idprom; extern void idprom_init(void); -#define IDPROM_SIZE (sizeof(struct idprom)) - #endif /* !(_SPARC_IDPROM_H) */ diff --git a/include/asm-sparc64/intr_queue.h b/include/asm-sparc64/intr_queue.h new file mode 100644 index 000000000000..206077dedc2a --- /dev/null +++ b/include/asm-sparc64/intr_queue.h @@ -0,0 +1,15 @@ +#ifndef _SPARC64_INTR_QUEUE_H +#define _SPARC64_INTR_QUEUE_H + +/* Sun4v interrupt queue registers, accessed via ASI_QUEUE. */ + +#define INTRQ_CPU_MONDO_HEAD 0x3c0 /* CPU mondo head */ +#define INTRQ_CPU_MONDO_TAIL 0x3c8 /* CPU mondo tail */ +#define INTRQ_DEVICE_MONDO_HEAD 0x3d0 /* Device mondo head */ +#define INTRQ_DEVICE_MONDO_TAIL 0x3d8 /* Device mondo tail */ +#define INTRQ_RESUM_MONDO_HEAD 0x3e0 /* Resumable error mondo head */ +#define INTRQ_RESUM_MONDO_TAIL 0x3e8 /* Resumable error mondo tail */ +#define INTRQ_NONRESUM_MONDO_HEAD 0x3f0 /* Non-resumable error mondo head */ +#define INTRQ_NONRESUM_MONDO_TAIL 0x3f8 /* Non-resumable error mondo head */ + +#endif /* !(_SPARC64_INTR_QUEUE_H) */ diff --git a/include/asm-sparc64/irq.h b/include/asm-sparc64/irq.h index 8b70edcb80dc..de33d6e1afb5 100644 --- a/include/asm-sparc64/irq.h +++ b/include/asm-sparc64/irq.h @@ -72,8 +72,11 @@ struct ino_bucket { #define IMAP_VALID 0x80000000 /* IRQ Enabled */ #define IMAP_TID_UPA 0x7c000000 /* UPA TargetID */ #define IMAP_TID_JBUS 0x7c000000 /* JBUS TargetID */ +#define IMAP_TID_SHIFT 26 #define IMAP_AID_SAFARI 0x7c000000 /* Safari AgentID */ +#define IMAP_AID_SHIFT 26 #define IMAP_NID_SAFARI 0x03e00000 /* Safari NodeID */ +#define IMAP_NID_SHIFT 21 #define IMAP_IGN 0x000007c0 /* IRQ Group Number */ #define IMAP_INO 0x0000003f /* IRQ Number */ #define IMAP_INR 0x000007ff /* Full interrupt number*/ @@ -111,6 +114,7 @@ extern void disable_irq(unsigned int); #define disable_irq_nosync disable_irq extern void enable_irq(unsigned int); extern unsigned int build_irq(int pil, int inofixup, unsigned long iclr, unsigned long imap); +extern unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino, int pil, unsigned char flags); extern unsigned int sbus_build_irq(void *sbus, unsigned int ino); static __inline__ void set_softint(unsigned long bits) diff --git a/include/asm-sparc64/mmu.h b/include/asm-sparc64/mmu.h index 8627eed6e83d..230ba678d3b0 100644 --- a/include/asm-sparc64/mmu.h +++ b/include/asm-sparc64/mmu.h @@ -4,20 +4,9 @@ #include <linux/config.h> #include <asm/page.h> #include <asm/const.h> +#include <asm/hypervisor.h> -/* - * For the 8k pagesize kernel, use only 10 hw context bits to optimize some - * shifts in the fast tlbmiss handlers, instead of all 13 bits (specifically - * for vpte offset calculation). For other pagesizes, this optimization in - * the tlbhandlers can not be done; but still, all 13 bits can not be used - * because the tlb handlers use "andcc" instruction which sign extends 13 - * bit arguments. - */ -#if PAGE_SHIFT == 13 -#define CTX_NR_BITS 10 -#else -#define CTX_NR_BITS 12 -#endif +#define CTX_NR_BITS 13 #define TAG_CONTEXT_BITS ((_AC(1,UL) << CTX_NR_BITS) - _AC(1,UL)) @@ -90,8 +79,27 @@ #ifndef __ASSEMBLY__ +#define TSB_ENTRY_ALIGNMENT 16 + +struct tsb { + unsigned long tag; + unsigned long pte; +} __attribute__((aligned(TSB_ENTRY_ALIGNMENT))); + +extern void __tsb_insert(unsigned long ent, unsigned long tag, unsigned long pte); +extern void tsb_flush(unsigned long ent, unsigned long tag); +extern void tsb_init(struct tsb *tsb, unsigned long size); + typedef struct { - unsigned long sparc64_ctx_val; + spinlock_t lock; + unsigned long sparc64_ctx_val; + struct tsb *tsb; + unsigned long tsb_rss_limit; + unsigned long tsb_nentries; + unsigned long tsb_reg_val; + unsigned long tsb_map_vaddr; + unsigned long tsb_map_pte; + struct hv_tsb_descr tsb_descr; } mm_context_t; #endif /* !__ASSEMBLY__ */ diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h index 57ee7b306189..e7974321d052 100644 --- a/include/asm-sparc64/mmu_context.h +++ b/include/asm-sparc64/mmu_context.h @@ -19,96 +19,98 @@ extern unsigned long tlb_context_cache; extern unsigned long mmu_context_bmap[]; extern void get_new_mmu_context(struct mm_struct *mm); +#ifdef CONFIG_SMP +extern void smp_new_mmu_context_version(void); +#else +#define smp_new_mmu_context_version() do { } while (0) +#endif + +extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm); +extern void destroy_context(struct mm_struct *mm); + +extern void __tsb_context_switch(unsigned long pgd_pa, + unsigned long tsb_reg, + unsigned long tsb_vaddr, + unsigned long tsb_pte, + unsigned long tsb_descr_pa); + +static inline void tsb_context_switch(struct mm_struct *mm) +{ + __tsb_context_switch(__pa(mm->pgd), mm->context.tsb_reg_val, + mm->context.tsb_map_vaddr, + mm->context.tsb_map_pte, + __pa(&mm->context.tsb_descr)); +} -/* Initialize a new mmu context. This is invoked when a new - * address space instance (unique or shared) is instantiated. - * This just needs to set mm->context to an invalid context. - */ -#define init_new_context(__tsk, __mm) \ - (((__mm)->context.sparc64_ctx_val = 0UL), 0) - -/* Destroy a dead context. This occurs when mmput drops the - * mm_users count to zero, the mmaps have been released, and - * all the page tables have been flushed. Our job is to destroy - * any remaining processor-specific state, and in the sparc64 - * case this just means freeing up the mmu context ID held by - * this task if valid. - */ -#define destroy_context(__mm) \ -do { spin_lock(&ctx_alloc_lock); \ - if (CTX_VALID((__mm)->context)) { \ - unsigned long nr = CTX_NRBITS((__mm)->context); \ - mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); \ - } \ - spin_unlock(&ctx_alloc_lock); \ -} while(0) - -/* Reload the two core values used by TLB miss handler - * processing on sparc64. They are: - * 1) The physical address of mm->pgd, when full page - * table walks are necessary, this is where the - * search begins. - * 2) A "PGD cache". For 32-bit tasks only pgd[0] is - * ever used since that maps the entire low 4GB - * completely. To speed up TLB miss processing we - * make this value available to the handlers. This - * decreases the amount of memory traffic incurred. - */ -#define reload_tlbmiss_state(__tsk, __mm) \ -do { \ - register unsigned long paddr asm("o5"); \ - register unsigned long pgd_cache asm("o4"); \ - paddr = __pa((__mm)->pgd); \ - pgd_cache = 0UL; \ - if (task_thread_info(__tsk)->flags & _TIF_32BIT) \ - pgd_cache = get_pgd_cache((__mm)->pgd); \ - __asm__ __volatile__("wrpr %%g0, 0x494, %%pstate\n\t" \ - "mov %3, %%g4\n\t" \ - "mov %0, %%g7\n\t" \ - "stxa %1, [%%g4] %2\n\t" \ - "membar #Sync\n\t" \ - "wrpr %%g0, 0x096, %%pstate" \ - : /* no outputs */ \ - : "r" (paddr), "r" (pgd_cache),\ - "i" (ASI_DMMU), "i" (TSB_REG)); \ -} while(0) +extern void tsb_grow(struct mm_struct *mm, unsigned long mm_rss); +#ifdef CONFIG_SMP +extern void smp_tsb_sync(struct mm_struct *mm); +#else +#define smp_tsb_sync(__mm) do { } while (0) +#endif /* Set MMU context in the actual hardware. */ #define load_secondary_context(__mm) \ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" \ - "flush %%g6" \ - : /* No outputs */ \ - : "r" (CTX_HWBITS((__mm)->context)), \ - "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU)) + __asm__ __volatile__( \ + "\n661: stxa %0, [%1] %2\n" \ + " .section .sun4v_1insn_patch, \"ax\"\n" \ + " .word 661b\n" \ + " stxa %0, [%1] %3\n" \ + " .previous\n" \ + " flush %%g6\n" \ + : /* No outputs */ \ + : "r" (CTX_HWBITS((__mm)->context)), \ + "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU), "i" (ASI_MMU)) extern void __flush_tlb_mm(unsigned long, unsigned long); -/* Switch the current MM context. */ +/* Switch the current MM context. Interrupts are disabled. */ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk) { - unsigned long ctx_valid; + unsigned long ctx_valid, flags; int cpu; - /* Note: page_table_lock is used here to serialize switch_mm - * and activate_mm, and their calls to get_new_mmu_context. - * This use of page_table_lock is unrelated to its other uses. - */ - spin_lock(&mm->page_table_lock); + spin_lock_irqsave(&mm->context.lock, flags); ctx_valid = CTX_VALID(mm->context); if (!ctx_valid) get_new_mmu_context(mm); - spin_unlock(&mm->page_table_lock); - if (!ctx_valid || (old_mm != mm)) { - load_secondary_context(mm); - reload_tlbmiss_state(tsk, mm); - } + /* We have to be extremely careful here or else we will miss + * a TSB grow if we switch back and forth between a kernel + * thread and an address space which has it's TSB size increased + * on another processor. + * + * It is possible to play some games in order to optimize the + * switch, but the safest thing to do is to unconditionally + * perform the secondary context load and the TSB context switch. + * + * For reference the bad case is, for address space "A": + * + * CPU 0 CPU 1 + * run address space A + * set cpu0's bits in cpu_vm_mask + * switch to kernel thread, borrow + * address space A via entry_lazy_tlb + * run address space A + * set cpu1's bit in cpu_vm_mask + * flush_tlb_pending() + * reset cpu_vm_mask to just cpu1 + * TSB grow + * run address space A + * context was valid, so skip + * TSB context switch + * + * At that point cpu0 continues to use a stale TSB, the one from + * before the TSB grow performed on cpu1. cpu1 did not cross-call + * cpu0 to update it's TSB because at that point the cpu_vm_mask + * only had cpu1 set in it. + */ + load_secondary_context(mm); + tsb_context_switch(mm); - /* Even if (mm == old_mm) we _must_ check - * the cpu_vm_mask. If we do not we could - * corrupt the TLB state because of how - * smp_flush_tlb_{page,range,mm} on sparc64 - * and lazy tlb switches work. -DaveM + /* Any time a processor runs a context on an address space + * for the first time, we must flush that context out of the + * local TLB. */ cpu = smp_processor_id(); if (!ctx_valid || !cpu_isset(cpu, mm->cpu_vm_mask)) { @@ -116,6 +118,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); } + spin_unlock_irqrestore(&mm->context.lock, flags); } #define deactivate_mm(tsk,mm) do { } while (0) @@ -123,23 +126,20 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str /* Activate a new MM instance for the current task. */ static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm) { + unsigned long flags; int cpu; - /* Note: page_table_lock is used here to serialize switch_mm - * and activate_mm, and their calls to get_new_mmu_context. - * This use of page_table_lock is unrelated to its other uses. - */ - spin_lock(&mm->page_table_lock); + spin_lock_irqsave(&mm->context.lock, flags); if (!CTX_VALID(mm->context)) get_new_mmu_context(mm); cpu = smp_processor_id(); if (!cpu_isset(cpu, mm->cpu_vm_mask)) cpu_set(cpu, mm->cpu_vm_mask); - spin_unlock(&mm->page_table_lock); load_secondary_context(mm); __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); - reload_tlbmiss_state(current, mm); + tsb_context_switch(mm); + spin_unlock_irqrestore(&mm->context.lock, flags); } #endif /* !(__ASSEMBLY__) */ diff --git a/include/asm-sparc64/numnodes.h b/include/asm-sparc64/numnodes.h new file mode 100644 index 000000000000..017e7e74f5e7 --- /dev/null +++ b/include/asm-sparc64/numnodes.h @@ -0,0 +1,6 @@ +#ifndef _SPARC64_NUMNODES_H +#define _SPARC64_NUMNODES_H + +#define NODES_SHIFT 0 + +#endif /* !(_SPARC64_NUMNODES_H) */ diff --git a/include/asm-sparc64/oplib.h b/include/asm-sparc64/oplib.h index 3c59b2693fb9..c754676e13ef 100644 --- a/include/asm-sparc64/oplib.h +++ b/include/asm-sparc64/oplib.h @@ -12,18 +12,8 @@ #include <linux/config.h> #include <asm/openprom.h> -/* Enumeration to describe the prom major version we have detected. */ -enum prom_major_version { - PROM_V0, /* Original sun4c V0 prom */ - PROM_V2, /* sun4c and early sun4m V2 prom */ - PROM_V3, /* sun4m and later, up to sun4d/sun4e machines V3 */ - PROM_P1275, /* IEEE compliant ISA based Sun PROM, only sun4u */ - PROM_AP1000, /* actually no prom at all */ -}; - -extern enum prom_major_version prom_vers; -/* Revision, and firmware revision. */ -extern unsigned int prom_rev, prom_prev; +/* OBP version string. */ +extern char prom_version[]; /* Root node of the prom device tree, this stays constant after * initialization is complete. @@ -39,6 +29,9 @@ extern int prom_stdin, prom_stdout; extern int prom_chosen_node; /* Helper values and strings in arch/sparc64/kernel/head.S */ +extern const char prom_peer_name[]; +extern const char prom_compatible_name[]; +extern const char prom_root_compatible[]; extern const char prom_finddev_name[]; extern const char prom_chosen_path[]; extern const char prom_getprop_name[]; @@ -130,15 +123,6 @@ extern void prom_setcallback(callback_func_t func_ptr); */ extern unsigned char prom_get_idprom(char *idp_buffer, int idpbuf_size); -/* Get the prom major version. */ -extern int prom_version(void); - -/* Get the prom plugin revision. */ -extern int prom_getrev(void); - -/* Get the prom firmware revision. */ -extern int prom_getprev(void); - /* Character operations to/from the console.... */ /* Non-blocking get character from console. */ @@ -164,6 +148,7 @@ enum prom_input_device { PROMDEV_ITTYA, /* input from ttya */ PROMDEV_ITTYB, /* input from ttyb */ PROMDEV_IRSC, /* input from rsc */ + PROMDEV_IVCONS, /* input from virtual-console */ PROMDEV_I_UNK, }; @@ -176,6 +161,7 @@ enum prom_output_device { PROMDEV_OTTYA, /* to ttya */ PROMDEV_OTTYB, /* to ttyb */ PROMDEV_ORSC, /* to rsc */ + PROMDEV_OVCONS, /* to virtual-console */ PROMDEV_O_UNK, }; @@ -183,10 +169,18 @@ extern enum prom_output_device prom_query_output_device(void); /* Multiprocessor operations... */ #ifdef CONFIG_SMP -/* Start the CPU with the given device tree node, context table, and context - * at the passed program counter. +/* Start the CPU with the given device tree node at the passed program + * counter with the given arg passed in via register %o0. + */ +extern void prom_startcpu(int cpunode, unsigned long pc, unsigned long arg); + +/* Start the CPU with the given cpu ID at the passed program + * counter with the given arg passed in via register %o0. */ -extern void prom_startcpu(int cpunode, unsigned long pc, unsigned long o0); +extern void prom_startcpu_cpuid(int cpuid, unsigned long pc, unsigned long arg); + +/* Stop the CPU with the given cpu ID. */ +extern void prom_stopcpu_cpuid(int cpuid); /* Stop the current CPU. */ extern void prom_stopself(void); @@ -335,6 +329,7 @@ int cpu_find_by_mid(int mid, int *prom_node); /* Client interface level routines. */ extern void prom_set_trap_table(unsigned long tba); +extern void prom_set_trap_table_sun4v(unsigned long tba, unsigned long mmfsa); extern long p1275_cmd(const char *, long, ...); diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h index 5426bb28a993..fcb2812265f4 100644 --- a/include/asm-sparc64/page.h +++ b/include/asm-sparc64/page.h @@ -104,10 +104,12 @@ typedef unsigned long pgprot_t; #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define ARCH_HAS_SETCLEAR_HUGE_PTE #define ARCH_HAS_HUGETLB_PREFAULT_HOOK +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #endif #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ - (_AC(0x0000000070000000,UL)) : (PAGE_OFFSET)) + (_AC(0x0000000070000000,UL)) : \ + (_AC(0xfffff80000000000,UL) + (1UL << 32UL))) #endif /* !(__ASSEMBLY__) */ @@ -124,17 +126,10 @@ typedef unsigned long pgprot_t; #define __pa(x) ((unsigned long)(x) - PAGE_OFFSET) #define __va(x) ((void *)((unsigned long) (x) + PAGE_OFFSET)) -/* PFNs are real physical page numbers. However, mem_map only begins to record - * per-page information starting at pfn_base. This is to handle systems where - * the first physical page in the machine is at some huge physical address, - * such as 4GB. This is common on a partitioned E10000, for example. - */ -extern struct page *pfn_to_page(unsigned long pfn); -extern unsigned long page_to_pfn(struct page *); +#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr)>>PAGE_SHIFT) -#define pfn_valid(pfn) (((pfn)-(pfn_base)) < max_mapnr) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #define virt_to_phys __pa diff --git a/include/asm-sparc64/pbm.h b/include/asm-sparc64/pbm.h index dd35a2c7798a..1396f110939a 100644 --- a/include/asm-sparc64/pbm.h +++ b/include/asm-sparc64/pbm.h @@ -139,6 +139,9 @@ struct pci_pbm_info { /* Opaque 32-bit system bus Port ID. */ u32 portid; + /* Opaque 32-bit handle used for hypervisor calls. */ + u32 devhandle; + /* Chipset version information. */ int chip_type; #define PBM_CHIP_TYPE_SABRE 1 diff --git a/include/asm-sparc64/pci.h b/include/asm-sparc64/pci.h index 89bd71b1c0d8..7c5a589ea437 100644 --- a/include/asm-sparc64/pci.h +++ b/include/asm-sparc64/pci.h @@ -41,10 +41,26 @@ static inline void pcibios_penalize_isa_irq(int irq, int active) struct pci_dev; +struct pci_iommu_ops { + void *(*alloc_consistent)(struct pci_dev *, size_t, dma_addr_t *); + void (*free_consistent)(struct pci_dev *, size_t, void *, dma_addr_t); + dma_addr_t (*map_single)(struct pci_dev *, void *, size_t, int); + void (*unmap_single)(struct pci_dev *, dma_addr_t, size_t, int); + int (*map_sg)(struct pci_dev *, struct scatterlist *, int, int); + void (*unmap_sg)(struct pci_dev *, struct scatterlist *, int, int); + void (*dma_sync_single_for_cpu)(struct pci_dev *, dma_addr_t, size_t, int); + void (*dma_sync_sg_for_cpu)(struct pci_dev *, struct scatterlist *, int, int); +}; + +extern struct pci_iommu_ops *pci_iommu_ops; + /* Allocate and map kernel buffer using consistent mode DMA for a device. * hwdev should be valid struct pci_dev pointer for PCI devices. */ -extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle); +static inline void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle) +{ + return pci_iommu_ops->alloc_consistent(hwdev, size, dma_handle); +} /* Free and unmap a consistent DMA buffer. * cpu_addr is what was returned from pci_alloc_consistent, @@ -54,7 +70,10 @@ extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t * References to the memory and mappings associated with cpu_addr/dma_addr * past this call are illegal. */ -extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle); +static inline void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle) +{ + return pci_iommu_ops->free_consistent(hwdev, size, vaddr, dma_handle); +} /* Map a single buffer of the indicated size for DMA in streaming mode. * The 32-bit bus address to use is returned. @@ -62,7 +81,10 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, * Once the device is given the dma address, the device owns this memory * until either pci_unmap_single or pci_dma_sync_single_for_cpu is performed. */ -extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction); +static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction) +{ + return pci_iommu_ops->map_single(hwdev, ptr, size, direction); +} /* Unmap a single streaming mode DMA translation. The dma_addr and size * must match what was provided for in a previous pci_map_single call. All @@ -71,7 +93,10 @@ extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, * After this call, reads by the cpu to the buffer are guaranteed to see * whatever the device wrote there. */ -extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction); +static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction) +{ + pci_iommu_ops->unmap_single(hwdev, dma_addr, size, direction); +} /* No highmem on sparc64, plus we have an IOMMU, so mapping pages is easy. */ #define pci_map_page(dev, page, off, size, dir) \ @@ -107,15 +132,19 @@ extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t * Device ownership issues as mentioned above for pci_map_single are * the same here. */ -extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, - int nents, int direction); +static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction) +{ + return pci_iommu_ops->map_sg(hwdev, sg, nents, direction); +} /* Unmap a set of streaming mode DMA translations. * Again, cpu read rules concerning calls here are the same as for * pci_unmap_single() above. */ -extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, - int nhwents, int direction); +static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nhwents, int direction) +{ + pci_iommu_ops->unmap_sg(hwdev, sg, nhwents, direction); +} /* Make physical memory consistent for a single * streaming mode DMA translation after a transfer. @@ -127,8 +156,10 @@ extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, * must first perform a pci_dma_sync_for_device, and then the * device again owns the buffer. */ -extern void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle, - size_t size, int direction); +static inline void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction) +{ + pci_iommu_ops->dma_sync_single_for_cpu(hwdev, dma_handle, size, direction); +} static inline void pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle, @@ -144,7 +175,10 @@ pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle, * The same as pci_dma_sync_single_* but for a scatter-gather list, * same rules and usage. */ -extern void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction); +static inline void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction) +{ + pci_iommu_ops->dma_sync_sg_for_cpu(hwdev, sg, nelems, direction); +} static inline void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg, diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h index a96067cca963..12e4a273bd43 100644 --- a/include/asm-sparc64/pgalloc.h +++ b/include/asm-sparc64/pgalloc.h @@ -6,6 +6,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/slab.h> #include <asm/spitfire.h> #include <asm/cpudata.h> @@ -13,172 +14,59 @@ #include <asm/page.h> /* Page table allocation/freeing. */ -#ifdef CONFIG_SMP -/* Sliiiicck */ -#define pgt_quicklists local_cpu_data() -#else -extern struct pgtable_cache_struct { - unsigned long *pgd_cache; - unsigned long *pte_cache[2]; - unsigned int pgcache_size; -} pgt_quicklists; -#endif -#define pgd_quicklist (pgt_quicklists.pgd_cache) -#define pmd_quicklist ((unsigned long *)0) -#define pte_quicklist (pgt_quicklists.pte_cache) -#define pgtable_cache_size (pgt_quicklists.pgcache_size) +extern kmem_cache_t *pgtable_cache; -static __inline__ void free_pgd_fast(pgd_t *pgd) +static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - preempt_disable(); - *(unsigned long *)pgd = (unsigned long) pgd_quicklist; - pgd_quicklist = (unsigned long *) pgd; - pgtable_cache_size++; - preempt_enable(); + return kmem_cache_alloc(pgtable_cache, GFP_KERNEL); } -static __inline__ pgd_t *get_pgd_fast(void) +static inline void pgd_free(pgd_t *pgd) { - unsigned long *ret; - - preempt_disable(); - if((ret = pgd_quicklist) != NULL) { - pgd_quicklist = (unsigned long *)(*ret); - ret[0] = 0; - pgtable_cache_size--; - preempt_enable(); - } else { - preempt_enable(); - ret = (unsigned long *) __get_free_page(GFP_KERNEL|__GFP_REPEAT); - if(ret) - memset(ret, 0, PAGE_SIZE); - } - return (pgd_t *)ret; + kmem_cache_free(pgtable_cache, pgd); } -static __inline__ void free_pgd_slow(pgd_t *pgd) -{ - free_page((unsigned long)pgd); -} - -#ifdef DCACHE_ALIASING_POSSIBLE -#define VPTE_COLOR(address) (((address) >> (PAGE_SHIFT + 10)) & 1UL) -#define DCACHE_COLOR(address) (((address) >> PAGE_SHIFT) & 1UL) -#else -#define VPTE_COLOR(address) 0 -#define DCACHE_COLOR(address) 0 -#endif - #define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) -static __inline__ pmd_t *pmd_alloc_one_fast(struct mm_struct *mm, unsigned long address) +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - unsigned long *ret; - int color = 0; - - preempt_disable(); - if (pte_quicklist[color] == NULL) - color = 1; - - if((ret = (unsigned long *)pte_quicklist[color]) != NULL) { - pte_quicklist[color] = (unsigned long *)(*ret); - ret[0] = 0; - pgtable_cache_size--; - } - preempt_enable(); - - return (pmd_t *)ret; + return kmem_cache_alloc(pgtable_cache, + GFP_KERNEL|__GFP_REPEAT); } -static __inline__ pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) +static inline void pmd_free(pmd_t *pmd) { - pmd_t *pmd; - - pmd = pmd_alloc_one_fast(mm, address); - if (!pmd) { - pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); - if (pmd) - memset(pmd, 0, PAGE_SIZE); - } - return pmd; + kmem_cache_free(pgtable_cache, pmd); } -static __inline__ void free_pmd_fast(pmd_t *pmd) +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, + unsigned long address) { - unsigned long color = DCACHE_COLOR((unsigned long)pmd); - - preempt_disable(); - *(unsigned long *)pmd = (unsigned long) pte_quicklist[color]; - pte_quicklist[color] = (unsigned long *) pmd; - pgtable_cache_size++; - preempt_enable(); + return kmem_cache_alloc(pgtable_cache, + GFP_KERNEL|__GFP_REPEAT); } -static __inline__ void free_pmd_slow(pmd_t *pmd) +static inline struct page *pte_alloc_one(struct mm_struct *mm, + unsigned long address) { - free_page((unsigned long)pmd); + return virt_to_page(pte_alloc_one_kernel(mm, address)); } - -#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE) -#define pmd_populate(MM,PMD,PTE_PAGE) \ - pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE)) - -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address); - -static inline struct page * -pte_alloc_one(struct mm_struct *mm, unsigned long addr) -{ - pte_t *pte = pte_alloc_one_kernel(mm, addr); - - if (pte) - return virt_to_page(pte); - - return NULL; -} - -static __inline__ pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) -{ - unsigned long color = VPTE_COLOR(address); - unsigned long *ret; - - preempt_disable(); - if((ret = (unsigned long *)pte_quicklist[color]) != NULL) { - pte_quicklist[color] = (unsigned long *)(*ret); - ret[0] = 0; - pgtable_cache_size--; - } - preempt_enable(); - return (pte_t *)ret; -} - -static __inline__ void free_pte_fast(pte_t *pte) -{ - unsigned long color = DCACHE_COLOR((unsigned long)pte); - - preempt_disable(); - *(unsigned long *)pte = (unsigned long) pte_quicklist[color]; - pte_quicklist[color] = (unsigned long *) pte; - pgtable_cache_size++; - preempt_enable(); -} - -static __inline__ void free_pte_slow(pte_t *pte) -{ - free_page((unsigned long)pte); -} - + static inline void pte_free_kernel(pte_t *pte) { - free_pte_fast(pte); + kmem_cache_free(pgtable_cache, pte); } static inline void pte_free(struct page *ptepage) { - free_pte_fast(page_address(ptepage)); + pte_free_kernel(page_address(ptepage)); } -#define pmd_free(pmd) free_pmd_fast(pmd) -#define pgd_free(pgd) free_pgd_fast(pgd) -#define pgd_alloc(mm) get_pgd_fast() + +#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE) +#define pmd_populate(MM,PMD,PTE_PAGE) \ + pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE)) + +#define check_pgt_cache() do { } while (0) #endif /* _SPARC64_PGALLOC_H */ diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h index f0a9b44d3eb5..ed4124edf837 100644 --- a/include/asm-sparc64/pgtable.h +++ b/include/asm-sparc64/pgtable.h @@ -25,7 +25,8 @@ #include <asm/const.h> /* The kernel image occupies 0x4000000 to 0x1000000 (4MB --> 32MB). - * The page copy blockops can use 0x2000000 to 0x10000000. + * The page copy blockops can use 0x2000000 to 0x4000000. + * The TSB is mapped in the 0x4000000 to 0x6000000 range. * The PROM resides in an area spanning 0xf0000000 to 0x100000000. * The vmalloc area spans 0x100000000 to 0x200000000. * Since modules need to be in the lowest 32-bits of the address space, @@ -34,6 +35,7 @@ * 0x400000000. */ #define TLBTEMP_BASE _AC(0x0000000002000000,UL) +#define TSBMAP_BASE _AC(0x0000000004000000,UL) #define MODULES_VADDR _AC(0x0000000010000000,UL) #define MODULES_LEN _AC(0x00000000e0000000,UL) #define MODULES_END _AC(0x00000000f0000000,UL) @@ -88,162 +90,538 @@ #endif /* !(__ASSEMBLY__) */ -/* Spitfire/Cheetah TTE bits. */ -#define _PAGE_VALID _AC(0x8000000000000000,UL) /* Valid TTE */ -#define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit up to date*/ -#define _PAGE_SZ4MB _AC(0x6000000000000000,UL) /* 4MB Page */ -#define _PAGE_SZ512K _AC(0x4000000000000000,UL) /* 512K Page */ -#define _PAGE_SZ64K _AC(0x2000000000000000,UL) /* 64K Page */ -#define _PAGE_SZ8K _AC(0x0000000000000000,UL) /* 8K Page */ -#define _PAGE_NFO _AC(0x1000000000000000,UL) /* No Fault Only */ -#define _PAGE_IE _AC(0x0800000000000000,UL) /* Invert Endianness */ -#define _PAGE_SOFT2 _AC(0x07FC000000000000,UL) /* Software bits, set 2 */ -#define _PAGE_RES1 _AC(0x0002000000000000,UL) /* Reserved */ -#define _PAGE_SZ32MB _AC(0x0001000000000000,UL) /* (Panther) 32MB page */ -#define _PAGE_SZ256MB _AC(0x2001000000000000,UL) /* (Panther) 256MB page */ -#define _PAGE_SN _AC(0x0000800000000000,UL) /* (Cheetah) Snoop */ -#define _PAGE_RES2 _AC(0x0000780000000000,UL) /* Reserved */ -#define _PAGE_PADDR_SF _AC(0x000001FFFFFFE000,UL) /* (Spitfire) paddr[40:13]*/ -#define _PAGE_PADDR _AC(0x000007FFFFFFE000,UL) /* (Cheetah) paddr[42:13] */ -#define _PAGE_SOFT _AC(0x0000000000001F80,UL) /* Software bits */ -#define _PAGE_L _AC(0x0000000000000040,UL) /* Locked TTE */ -#define _PAGE_CP _AC(0x0000000000000020,UL) /* Cacheable in P-Cache */ -#define _PAGE_CV _AC(0x0000000000000010,UL) /* Cacheable in V-Cache */ -#define _PAGE_E _AC(0x0000000000000008,UL) /* side-Effect */ -#define _PAGE_P _AC(0x0000000000000004,UL) /* Privileged Page */ -#define _PAGE_W _AC(0x0000000000000002,UL) /* Writable */ -#define _PAGE_G _AC(0x0000000000000001,UL) /* Global */ - -/* Here are the SpitFire software bits we use in the TTE's. - * - * WARNING: If you are going to try and start using some - * of the soft2 bits, you will need to make - * modifications to the swap entry implementation. - * For example, one thing that could happen is that - * swp_entry_to_pte() would BUG_ON() if you tried - * to use one of the soft2 bits for _PAGE_FILE. - * - * Like other architectures, I have aliased _PAGE_FILE with - * _PAGE_MODIFIED. This works because _PAGE_FILE is never - * interpreted that way unless _PAGE_PRESENT is clear. - */ -#define _PAGE_EXEC _AC(0x0000000000001000,UL) /* Executable SW bit */ -#define _PAGE_MODIFIED _AC(0x0000000000000800,UL) /* Modified (dirty) */ -#define _PAGE_FILE _AC(0x0000000000000800,UL) /* Pagecache page */ -#define _PAGE_ACCESSED _AC(0x0000000000000400,UL) /* Accessed (ref'd) */ -#define _PAGE_READ _AC(0x0000000000000200,UL) /* Readable SW Bit */ -#define _PAGE_WRITE _AC(0x0000000000000100,UL) /* Writable SW Bit */ -#define _PAGE_PRESENT _AC(0x0000000000000080,UL) /* Present */ +/* PTE bits which are the same in SUN4U and SUN4V format. */ +#define _PAGE_VALID _AC(0x8000000000000000,UL) /* Valid TTE */ +#define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/ + +/* SUN4U pte bits... */ +#define _PAGE_SZ4MB_4U _AC(0x6000000000000000,UL) /* 4MB Page */ +#define _PAGE_SZ512K_4U _AC(0x4000000000000000,UL) /* 512K Page */ +#define _PAGE_SZ64K_4U _AC(0x2000000000000000,UL) /* 64K Page */ +#define _PAGE_SZ8K_4U _AC(0x0000000000000000,UL) /* 8K Page */ +#define _PAGE_NFO_4U _AC(0x1000000000000000,UL) /* No Fault Only */ +#define _PAGE_IE_4U _AC(0x0800000000000000,UL) /* Invert Endianness */ +#define _PAGE_SOFT2_4U _AC(0x07FC000000000000,UL) /* Software bits, set 2 */ +#define _PAGE_RES1_4U _AC(0x0002000000000000,UL) /* Reserved */ +#define _PAGE_SZ32MB_4U _AC(0x0001000000000000,UL) /* (Panther) 32MB page */ +#define _PAGE_SZ256MB_4U _AC(0x2001000000000000,UL) /* (Panther) 256MB page */ +#define _PAGE_SN_4U _AC(0x0000800000000000,UL) /* (Cheetah) Snoop */ +#define _PAGE_RES2_4U _AC(0x0000780000000000,UL) /* Reserved */ +#define _PAGE_PADDR_4U _AC(0x000007FFFFFFE000,UL) /* (Cheetah) pa[42:13] */ +#define _PAGE_SOFT_4U _AC(0x0000000000001F80,UL) /* Software bits: */ +#define _PAGE_EXEC_4U _AC(0x0000000000001000,UL) /* Executable SW bit */ +#define _PAGE_MODIFIED_4U _AC(0x0000000000000800,UL) /* Modified (dirty) */ +#define _PAGE_FILE_4U _AC(0x0000000000000800,UL) /* Pagecache page */ +#define _PAGE_ACCESSED_4U _AC(0x0000000000000400,UL) /* Accessed (ref'd) */ +#define _PAGE_READ_4U _AC(0x0000000000000200,UL) /* Readable SW Bit */ +#define _PAGE_WRITE_4U _AC(0x0000000000000100,UL) /* Writable SW Bit */ +#define _PAGE_PRESENT_4U _AC(0x0000000000000080,UL) /* Present */ +#define _PAGE_L_4U _AC(0x0000000000000040,UL) /* Locked TTE */ +#define _PAGE_CP_4U _AC(0x0000000000000020,UL) /* Cacheable in P-Cache */ +#define _PAGE_CV_4U _AC(0x0000000000000010,UL) /* Cacheable in V-Cache */ +#define _PAGE_E_4U _AC(0x0000000000000008,UL) /* side-Effect */ +#define _PAGE_P_4U _AC(0x0000000000000004,UL) /* Privileged Page */ +#define _PAGE_W_4U _AC(0x0000000000000002,UL) /* Writable */ + +/* SUN4V pte bits... */ +#define _PAGE_NFO_4V _AC(0x4000000000000000,UL) /* No Fault Only */ +#define _PAGE_SOFT2_4V _AC(0x3F00000000000000,UL) /* Software bits, set 2 */ +#define _PAGE_MODIFIED_4V _AC(0x2000000000000000,UL) /* Modified (dirty) */ +#define _PAGE_ACCESSED_4V _AC(0x1000000000000000,UL) /* Accessed (ref'd) */ +#define _PAGE_READ_4V _AC(0x0800000000000000,UL) /* Readable SW Bit */ +#define _PAGE_WRITE_4V _AC(0x0400000000000000,UL) /* Writable SW Bit */ +#define _PAGE_PADDR_4V _AC(0x00FFFFFFFFFFE000,UL) /* paddr[55:13] */ +#define _PAGE_IE_4V _AC(0x0000000000001000,UL) /* Invert Endianness */ +#define _PAGE_E_4V _AC(0x0000000000000800,UL) /* side-Effect */ +#define _PAGE_CP_4V _AC(0x0000000000000400,UL) /* Cacheable in P-Cache */ +#define _PAGE_CV_4V _AC(0x0000000000000200,UL) /* Cacheable in V-Cache */ +#define _PAGE_P_4V _AC(0x0000000000000100,UL) /* Privileged Page */ +#define _PAGE_EXEC_4V _AC(0x0000000000000080,UL) /* Executable Page */ +#define _PAGE_W_4V _AC(0x0000000000000040,UL) /* Writable */ +#define _PAGE_SOFT_4V _AC(0x0000000000000030,UL) /* Software bits */ +#define _PAGE_FILE_4V _AC(0x0000000000000020,UL) /* Pagecache page */ +#define _PAGE_PRESENT_4V _AC(0x0000000000000010,UL) /* Present */ +#define _PAGE_RESV_4V _AC(0x0000000000000008,UL) /* Reserved */ +#define _PAGE_SZ16GB_4V _AC(0x0000000000000007,UL) /* 16GB Page */ +#define _PAGE_SZ2GB_4V _AC(0x0000000000000006,UL) /* 2GB Page */ +#define _PAGE_SZ256MB_4V _AC(0x0000000000000005,UL) /* 256MB Page */ +#define _PAGE_SZ32MB_4V _AC(0x0000000000000004,UL) /* 32MB Page */ +#define _PAGE_SZ4MB_4V _AC(0x0000000000000003,UL) /* 4MB Page */ +#define _PAGE_SZ512K_4V _AC(0x0000000000000002,UL) /* 512K Page */ +#define _PAGE_SZ64K_4V _AC(0x0000000000000001,UL) /* 64K Page */ +#define _PAGE_SZ8K_4V _AC(0x0000000000000000,UL) /* 8K Page */ #if PAGE_SHIFT == 13 -#define _PAGE_SZBITS _PAGE_SZ8K +#define _PAGE_SZBITS_4U _PAGE_SZ8K_4U +#define _PAGE_SZBITS_4V _PAGE_SZ8K_4V #elif PAGE_SHIFT == 16 -#define _PAGE_SZBITS _PAGE_SZ64K +#define _PAGE_SZBITS_4U _PAGE_SZ64K_4U +#define _PAGE_SZBITS_4V _PAGE_SZ64K_4V #elif PAGE_SHIFT == 19 -#define _PAGE_SZBITS _PAGE_SZ512K +#define _PAGE_SZBITS_4U _PAGE_SZ512K_4U +#define _PAGE_SZBITS_4V _PAGE_SZ512K_4V #elif PAGE_SHIFT == 22 -#define _PAGE_SZBITS _PAGE_SZ4MB +#define _PAGE_SZBITS_4U _PAGE_SZ4MB_4U +#define _PAGE_SZBITS_4V _PAGE_SZ4MB_4V #else #error Wrong PAGE_SHIFT specified #endif #if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB) -#define _PAGE_SZHUGE _PAGE_SZ4MB +#define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U +#define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V #elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) -#define _PAGE_SZHUGE _PAGE_SZ512K +#define _PAGE_SZHUGE_4U _PAGE_SZ512K_4U +#define _PAGE_SZHUGE_4V _PAGE_SZ512K_4V #elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) -#define _PAGE_SZHUGE _PAGE_SZ64K +#define _PAGE_SZHUGE_4U _PAGE_SZ64K_4U +#define _PAGE_SZHUGE_4V _PAGE_SZ64K_4V #endif -#define _PAGE_CACHE (_PAGE_CP | _PAGE_CV) +/* These are actually filled in at boot time by sun4{u,v}_pgprot_init() */ +#define __P000 __pgprot(0) +#define __P001 __pgprot(0) +#define __P010 __pgprot(0) +#define __P011 __pgprot(0) +#define __P100 __pgprot(0) +#define __P101 __pgprot(0) +#define __P110 __pgprot(0) +#define __P111 __pgprot(0) + +#define __S000 __pgprot(0) +#define __S001 __pgprot(0) +#define __S010 __pgprot(0) +#define __S011 __pgprot(0) +#define __S100 __pgprot(0) +#define __S101 __pgprot(0) +#define __S110 __pgprot(0) +#define __S111 __pgprot(0) -#define __DIRTY_BITS (_PAGE_MODIFIED | _PAGE_WRITE | _PAGE_W) -#define __ACCESS_BITS (_PAGE_ACCESSED | _PAGE_READ | _PAGE_R) -#define __PRIV_BITS _PAGE_P +#ifndef __ASSEMBLY__ -#define PAGE_NONE __pgprot (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_CACHE) +extern pte_t mk_pte_io(unsigned long, pgprot_t, int, unsigned long); -/* Don't set the TTE _PAGE_W bit here, else the dirty bit never gets set. */ -#define PAGE_SHARED __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \ - __ACCESS_BITS | _PAGE_WRITE | _PAGE_EXEC) +extern unsigned long pte_sz_bits(unsigned long size); -#define PAGE_COPY __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \ - __ACCESS_BITS | _PAGE_EXEC) +extern pgprot_t PAGE_KERNEL; +extern pgprot_t PAGE_KERNEL_LOCKED; +extern pgprot_t PAGE_COPY; +extern pgprot_t PAGE_SHARED; -#define PAGE_READONLY __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \ - __ACCESS_BITS | _PAGE_EXEC) +/* XXX This uglyness is for the atyfb driver's sparc mmap() support. XXX */ +extern unsigned long _PAGE_IE; +extern unsigned long _PAGE_E; +extern unsigned long _PAGE_CACHE; -#define PAGE_KERNEL __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \ - __PRIV_BITS | \ - __ACCESS_BITS | __DIRTY_BITS | _PAGE_EXEC) +extern unsigned long pg_iobits; +extern unsigned long _PAGE_ALL_SZ_BITS; +extern unsigned long _PAGE_SZBITS; -#define PAGE_SHARED_NOEXEC __pgprot (_PAGE_PRESENT | _PAGE_VALID | \ - _PAGE_CACHE | \ - __ACCESS_BITS | _PAGE_WRITE) +extern struct page *mem_map_zero; +#define ZERO_PAGE(vaddr) (mem_map_zero) -#define PAGE_COPY_NOEXEC __pgprot (_PAGE_PRESENT | _PAGE_VALID | \ - _PAGE_CACHE | __ACCESS_BITS) +/* PFNs are real physical page numbers. However, mem_map only begins to record + * per-page information starting at pfn_base. This is to handle systems where + * the first physical page in the machine is at some huge physical address, + * such as 4GB. This is common on a partitioned E10000, for example. + */ +static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) +{ + unsigned long paddr = pfn << PAGE_SHIFT; + unsigned long sz_bits; + + sz_bits = 0UL; + if (_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL) { + __asm__ __volatile__( + "\n661: sethi %uhi(%1), %0\n" + " sllx %0, 32, %0\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " mov %2, %0\n" + " nop\n" + " .previous\n" + : "=r" (sz_bits) + : "i" (_PAGE_SZBITS_4U), "i" (_PAGE_SZBITS_4V)); + } + return __pte(paddr | sz_bits | pgprot_val(prot)); +} +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) -#define PAGE_READONLY_NOEXEC __pgprot (_PAGE_PRESENT | _PAGE_VALID | \ - _PAGE_CACHE | __ACCESS_BITS) +/* This one can be done with two shifts. */ +static inline unsigned long pte_pfn(pte_t pte) +{ + unsigned long ret; + + __asm__ __volatile__( + "\n661: sllx %1, %2, %0\n" + " srlx %0, %3, %0\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sllx %1, %4, %0\n" + " srlx %0, %5, %0\n" + " .previous\n" + : "=r" (ret) + : "r" (pte_val(pte)), + "i" (21), "i" (21 + PAGE_SHIFT), + "i" (8), "i" (8 + PAGE_SHIFT)); + + return ret; +} +#define pte_page(x) pfn_to_page(pte_pfn(x)) -#define _PFN_MASK _PAGE_PADDR +static inline pte_t pte_modify(pte_t pte, pgprot_t prot) +{ + unsigned long mask, tmp; + + /* SUN4U: 0x600307ffffffecb8 (negated == 0x9ffcf80000001347) + * SUN4V: 0x30ffffffffffee17 (negated == 0xcf000000000011e8) + * + * Even if we use negation tricks the result is still a 6 + * instruction sequence, so don't try to play fancy and just + * do the most straightforward implementation. + * + * Note: We encode this into 3 sun4v 2-insn patch sequences. + */ -#define pg_iobits (_PAGE_VALID | _PAGE_PRESENT | __DIRTY_BITS | \ - __ACCESS_BITS | _PAGE_E) + __asm__ __volatile__( + "\n661: sethi %%uhi(%2), %1\n" + " sethi %%hi(%2), %0\n" + "\n662: or %1, %%ulo(%2), %1\n" + " or %0, %%lo(%2), %0\n" + "\n663: sllx %1, 32, %1\n" + " or %0, %1, %0\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%3), %1\n" + " sethi %%hi(%3), %0\n" + " .word 662b\n" + " or %1, %%ulo(%3), %1\n" + " or %0, %%lo(%3), %0\n" + " .word 663b\n" + " sllx %1, 32, %1\n" + " or %0, %1, %0\n" + " .previous\n" + : "=r" (mask), "=r" (tmp) + : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U | + _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_PRESENT_4U | + _PAGE_SZBITS_4U), + "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | + _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | _PAGE_PRESENT_4V | + _PAGE_SZBITS_4V)); + + return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask)); +} -#define __P000 PAGE_NONE -#define __P001 PAGE_READONLY_NOEXEC -#define __P010 PAGE_COPY_NOEXEC -#define __P011 PAGE_COPY_NOEXEC -#define __P100 PAGE_READONLY -#define __P101 PAGE_READONLY -#define __P110 PAGE_COPY -#define __P111 PAGE_COPY +static inline pte_t pgoff_to_pte(unsigned long off) +{ + off <<= PAGE_SHIFT; + + __asm__ __volatile__( + "\n661: or %0, %2, %0\n" + " .section .sun4v_1insn_patch, \"ax\"\n" + " .word 661b\n" + " or %0, %3, %0\n" + " .previous\n" + : "=r" (off) + : "0" (off), "i" (_PAGE_FILE_4U), "i" (_PAGE_FILE_4V)); + + return __pte(off); +} -#define __S000 PAGE_NONE -#define __S001 PAGE_READONLY_NOEXEC -#define __S010 PAGE_SHARED_NOEXEC -#define __S011 PAGE_SHARED_NOEXEC -#define __S100 PAGE_READONLY -#define __S101 PAGE_READONLY -#define __S110 PAGE_SHARED -#define __S111 PAGE_SHARED +static inline pgprot_t pgprot_noncached(pgprot_t prot) +{ + unsigned long val = pgprot_val(prot); + + __asm__ __volatile__( + "\n661: andn %0, %2, %0\n" + " or %0, %3, %0\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " andn %0, %4, %0\n" + " or %0, %3, %0\n" + " .previous\n" + : "=r" (val) + : "0" (val), "i" (_PAGE_CP_4U | _PAGE_CV_4U), "i" (_PAGE_E_4U), + "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V)); + + return __pgprot(val); +} +/* Various pieces of code check for platform support by ifdef testing + * on "pgprot_noncached". That's broken and should be fixed, but for + * now... + */ +#define pgprot_noncached pgprot_noncached -#ifndef __ASSEMBLY__ +#ifdef CONFIG_HUGETLB_PAGE +static inline pte_t pte_mkhuge(pte_t pte) +{ + unsigned long mask; + + __asm__ __volatile__( + "\n661: sethi %%uhi(%1), %0\n" + " sllx %0, 32, %0\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " mov %2, %0\n" + " nop\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_SZHUGE_4U), "i" (_PAGE_SZHUGE_4V)); + + return __pte(pte_val(pte) | mask); +} +#endif -extern unsigned long phys_base; -extern unsigned long pfn_base; +static inline pte_t pte_mkdirty(pte_t pte) +{ + unsigned long val = pte_val(pte), tmp; + + __asm__ __volatile__( + "\n661: or %0, %3, %0\n" + " nop\n" + "\n662: nop\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%4), %1\n" + " sllx %1, 32, %1\n" + " .word 662b\n" + " or %1, %%lo(%4), %1\n" + " or %0, %1, %0\n" + " .previous\n" + : "=r" (val), "=r" (tmp) + : "0" (val), "i" (_PAGE_MODIFIED_4U | _PAGE_W_4U), + "i" (_PAGE_MODIFIED_4V | _PAGE_W_4V)); + + return __pte(val); +} -extern struct page *mem_map_zero; -#define ZERO_PAGE(vaddr) (mem_map_zero) +static inline pte_t pte_mkclean(pte_t pte) +{ + unsigned long val = pte_val(pte), tmp; + + __asm__ __volatile__( + "\n661: andn %0, %3, %0\n" + " nop\n" + "\n662: nop\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%4), %1\n" + " sllx %1, 32, %1\n" + " .word 662b\n" + " or %1, %%lo(%4), %1\n" + " andn %0, %1, %0\n" + " .previous\n" + : "=r" (val), "=r" (tmp) + : "0" (val), "i" (_PAGE_MODIFIED_4U | _PAGE_W_4U), + "i" (_PAGE_MODIFIED_4V | _PAGE_W_4V)); + + return __pte(val); +} -/* PFNs are real physical page numbers. However, mem_map only begins to record - * per-page information starting at pfn_base. This is to handle systems where - * the first physical page in the machine is at some huge physical address, - * such as 4GB. This is common on a partitioned E10000, for example. - */ +static inline pte_t pte_mkwrite(pte_t pte) +{ + unsigned long val = pte_val(pte), mask; + + __asm__ __volatile__( + "\n661: mov %1, %0\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%2), %0\n" + " sllx %0, 32, %0\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_WRITE_4U), "i" (_PAGE_WRITE_4V)); + + return __pte(val | mask); +} -#define pfn_pte(pfn, prot) \ - __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot) | _PAGE_SZBITS) -#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) +static inline pte_t pte_wrprotect(pte_t pte) +{ + unsigned long val = pte_val(pte), tmp; + + __asm__ __volatile__( + "\n661: andn %0, %3, %0\n" + " nop\n" + "\n662: nop\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%4), %1\n" + " sllx %1, 32, %1\n" + " .word 662b\n" + " or %1, %%lo(%4), %1\n" + " andn %0, %1, %0\n" + " .previous\n" + : "=r" (val), "=r" (tmp) + : "0" (val), "i" (_PAGE_WRITE_4U | _PAGE_W_4U), + "i" (_PAGE_WRITE_4V | _PAGE_W_4V)); + + return __pte(val); +} + +static inline pte_t pte_mkold(pte_t pte) +{ + unsigned long mask; + + __asm__ __volatile__( + "\n661: mov %1, %0\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%2), %0\n" + " sllx %0, 32, %0\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_ACCESSED_4U), "i" (_PAGE_ACCESSED_4V)); + + mask |= _PAGE_R; + + return __pte(pte_val(pte) & ~mask); +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + unsigned long mask; + + __asm__ __volatile__( + "\n661: mov %1, %0\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%2), %0\n" + " sllx %0, 32, %0\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_ACCESSED_4U), "i" (_PAGE_ACCESSED_4V)); + + mask |= _PAGE_R; + + return __pte(pte_val(pte) | mask); +} -#define pte_pfn(x) ((pte_val(x) & _PAGE_PADDR)>>PAGE_SHIFT) -#define pte_page(x) pfn_to_page(pte_pfn(x)) +static inline unsigned long pte_young(pte_t pte) +{ + unsigned long mask; + + __asm__ __volatile__( + "\n661: mov %1, %0\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%2), %0\n" + " sllx %0, 32, %0\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_ACCESSED_4U), "i" (_PAGE_ACCESSED_4V)); + + return (pte_val(pte) & mask); +} + +static inline unsigned long pte_dirty(pte_t pte) +{ + unsigned long mask; + + __asm__ __volatile__( + "\n661: mov %1, %0\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%2), %0\n" + " sllx %0, 32, %0\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_MODIFIED_4U), "i" (_PAGE_MODIFIED_4V)); + + return (pte_val(pte) & mask); +} -static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot) +static inline unsigned long pte_write(pte_t pte) { - pte_t __pte; - const unsigned long preserve_mask = (_PFN_MASK | - _PAGE_MODIFIED | _PAGE_ACCESSED | - _PAGE_CACHE | _PAGE_E | - _PAGE_PRESENT | _PAGE_SZBITS); + unsigned long mask; + + __asm__ __volatile__( + "\n661: mov %1, %0\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%2), %0\n" + " sllx %0, 32, %0\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_WRITE_4U), "i" (_PAGE_WRITE_4V)); + + return (pte_val(pte) & mask); +} - pte_val(__pte) = (pte_val(orig_pte) & preserve_mask) | - (pgprot_val(new_prot) & ~preserve_mask); +static inline unsigned long pte_exec(pte_t pte) +{ + unsigned long mask; + + __asm__ __volatile__( + "\n661: sethi %%hi(%1), %0\n" + " .section .sun4v_1insn_patch, \"ax\"\n" + " .word 661b\n" + " mov %2, %0\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_EXEC_4U), "i" (_PAGE_EXEC_4V)); + + return (pte_val(pte) & mask); +} - return __pte; +static inline unsigned long pte_read(pte_t pte) +{ + unsigned long mask; + + __asm__ __volatile__( + "\n661: mov %1, %0\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%2), %0\n" + " sllx %0, 32, %0\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_READ_4U), "i" (_PAGE_READ_4V)); + + return (pte_val(pte) & mask); } + +static inline unsigned long pte_file(pte_t pte) +{ + unsigned long val = pte_val(pte); + + __asm__ __volatile__( + "\n661: and %0, %2, %0\n" + " .section .sun4v_1insn_patch, \"ax\"\n" + " .word 661b\n" + " and %0, %3, %0\n" + " .previous\n" + : "=r" (val) + : "0" (val), "i" (_PAGE_FILE_4U), "i" (_PAGE_FILE_4V)); + + return val; +} + +static inline unsigned long pte_present(pte_t pte) +{ + unsigned long val = pte_val(pte); + + __asm__ __volatile__( + "\n661: and %0, %2, %0\n" + " .section .sun4v_1insn_patch, \"ax\"\n" + " .word 661b\n" + " and %0, %3, %0\n" + " .previous\n" + : "=r" (val) + : "0" (val), "i" (_PAGE_PRESENT_4U), "i" (_PAGE_PRESENT_4V)); + + return val; +} + #define pmd_set(pmdp, ptep) \ (pmd_val(*(pmdp)) = (__pa((unsigned long) (ptep)) >> 11UL)) #define pud_set(pudp, pmdp) \ @@ -253,8 +631,6 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot) #define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) #define pud_page(pud) \ ((unsigned long) __va((((unsigned long)pud_val(pud))<<11UL))) -#define pte_none(pte) (!pte_val(pte)) -#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT) #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (0) #define pmd_present(pmd) (pmd_val(pmd) != 0U) @@ -264,30 +640,8 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot) #define pud_present(pud) (pud_val(pud) != 0U) #define pud_clear(pudp) (pud_val(*(pudp)) = 0U) -/* The following only work if pte_present() is true. - * Undefined behaviour if not.. - */ -#define pte_read(pte) (pte_val(pte) & _PAGE_READ) -#define pte_exec(pte) (pte_val(pte) & _PAGE_EXEC) -#define pte_write(pte) (pte_val(pte) & _PAGE_WRITE) -#define pte_dirty(pte) (pte_val(pte) & _PAGE_MODIFIED) -#define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED) -#define pte_wrprotect(pte) (__pte(pte_val(pte) & ~(_PAGE_WRITE|_PAGE_W))) -#define pte_rdprotect(pte) \ - (__pte(((pte_val(pte)<<1UL)>>1UL) & ~_PAGE_READ)) -#define pte_mkclean(pte) \ - (__pte(pte_val(pte) & ~(_PAGE_MODIFIED|_PAGE_W))) -#define pte_mkold(pte) \ - (__pte(((pte_val(pte)<<1UL)>>1UL) & ~_PAGE_ACCESSED)) - -/* Permanent address of a page. */ -#define __page_address(page) page_address(page) - -/* Be very careful when you change these three, they are delicate. */ -#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_ACCESSED | _PAGE_R)) -#define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_WRITE)) -#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_MODIFIED | _PAGE_W)) -#define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_SZHUGE)) +/* Same in both SUN4V and SUN4U. */ +#define pte_none(pte) (!pte_val(pte)) /* to find an entry in a page-table-directory. */ #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) @@ -296,11 +650,6 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(address) pgd_offset(&init_mm, address) -/* extract the pgd cache used for optimizing the tlb miss - * slow path when executing 32-bit compat processes - */ -#define get_pgd_cache(pgd) ((unsigned long) pgd_val(*pgd) << 11) - /* Find an entry in the second-level page table.. */ #define pmd_offset(pudp, address) \ ((pmd_t *) pud_page(*(pudp)) + \ @@ -327,6 +676,9 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *p /* It is more efficient to let flush_tlb_kernel_range() * handle init_mm tlb flushes. + * + * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U + * and SUN4V pte layout, so this inline test is fine. */ if (likely(mm != &init_mm) && (pte_val(orig) & _PAGE_VALID)) tlb_batch_add(mm, addr, ptep, orig); @@ -361,42 +713,23 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) /* File offset in PTE support. */ -#define pte_file(pte) (pte_val(pte) & _PAGE_FILE) +extern unsigned long pte_file(pte_t); #define pte_to_pgoff(pte) (pte_val(pte) >> PAGE_SHIFT) -#define pgoff_to_pte(off) (__pte(((off) << PAGE_SHIFT) | _PAGE_FILE)) +extern pte_t pgoff_to_pte(unsigned long); #define PTE_FILE_MAX_BITS (64UL - PAGE_SHIFT - 1UL) extern unsigned long prom_virt_to_phys(unsigned long, int *); -static __inline__ unsigned long -sun4u_get_pte (unsigned long addr) -{ - pgd_t *pgdp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - - if (addr >= PAGE_OFFSET) - return addr & _PAGE_PADDR; - if ((addr >= LOW_OBP_ADDRESS) && (addr < HI_OBP_ADDRESS)) - return prom_virt_to_phys(addr, NULL); - pgdp = pgd_offset_k(addr); - pudp = pud_offset(pgdp, addr); - pmdp = pmd_offset(pudp, addr); - ptep = pte_offset_kernel(pmdp, addr); - return pte_val(*ptep) & _PAGE_PADDR; -} +extern unsigned long sun4u_get_pte(unsigned long); -static __inline__ unsigned long -__get_phys (unsigned long addr) +static inline unsigned long __get_phys(unsigned long addr) { - return sun4u_get_pte (addr); + return sun4u_get_pte(addr); } -static __inline__ int -__get_iospace (unsigned long addr) +static inline int __get_iospace(unsigned long addr) { - return ((sun4u_get_pte (addr) & 0xf0000000) >> 28); + return ((sun4u_get_pte(addr) & 0xf0000000) >> 28); } extern unsigned long *sparc64_valid_addr_bitmap; @@ -409,11 +742,6 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot); -/* Clear virtual and physical cachability, set side-effect bit. */ -#define pgprot_noncached(prot) \ - (__pgprot((pgprot_val(prot) & ~(_PAGE_CP | _PAGE_CV)) | \ - _PAGE_E)) - /* * For sparc32&64, the pfn in io_remap_pfn_range() carries <iospace> in * its high 4 bits. These macros/functions put it there or get it from there. @@ -424,8 +752,11 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, #include <asm-generic/pgtable.h> -/* We provide our own get_unmapped_area to cope with VA holes for userland */ +/* We provide our own get_unmapped_area to cope with VA holes and + * SHM area cache aliasing for userland. + */ #define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN /* We provide a special get_unmapped_area for framebuffer mmaps to try and use * the largest alignment possible such that larget PTEs can be used. @@ -435,12 +766,9 @@ extern unsigned long get_fb_unmapped_area(struct file *filp, unsigned long, unsigned long); #define HAVE_ARCH_FB_UNMAPPED_AREA -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - -extern void check_pgt_cache(void); +extern void pgtable_cache_init(void); +extern void sun4v_register_fault_status(void); +extern void sun4v_ktsb_register(void); #endif /* !(__ASSEMBLY__) */ diff --git a/include/asm-sparc64/pil.h b/include/asm-sparc64/pil.h index 8f87750c3517..79f827eb3f5d 100644 --- a/include/asm-sparc64/pil.h +++ b/include/asm-sparc64/pil.h @@ -16,11 +16,13 @@ #define PIL_SMP_CALL_FUNC 1 #define PIL_SMP_RECEIVE_SIGNAL 2 #define PIL_SMP_CAPTURE 3 +#define PIL_SMP_CTX_NEW_VERSION 4 #ifndef __ASSEMBLY__ #define PIL_RESERVED(PIL) ((PIL) == PIL_SMP_CALL_FUNC || \ (PIL) == PIL_SMP_RECEIVE_SIGNAL || \ - (PIL) == PIL_SMP_CAPTURE) + (PIL) == PIL_SMP_CAPTURE || \ + (PIL) == PIL_SMP_CTX_NEW_VERSION) #endif #endif /* !(_SPARC64_PIL_H) */ diff --git a/include/asm-sparc64/processor.h b/include/asm-sparc64/processor.h index cd8d9b4c8658..c6896b88283e 100644 --- a/include/asm-sparc64/processor.h +++ b/include/asm-sparc64/processor.h @@ -28,6 +28,8 @@ * User lives in his very own context, and cannot reference us. Note * that TASK_SIZE is a misnomer, it really gives maximum user virtual * address that the kernel will allocate out. + * + * XXX No longer using virtual page tables, kill this upper limit... */ #define VA_BITS 44 #ifndef __ASSEMBLY__ @@ -37,18 +39,6 @@ #endif #define TASK_SIZE ((unsigned long)-VPTE_SIZE) -/* - * The vpte base must be able to hold the entire vpte, half - * of which lives above, and half below, the base. And it - * is placed as close to the highest address range as possible. - */ -#define VPTE_BASE_SPITFIRE (-(VPTE_SIZE/2)) -#if 1 -#define VPTE_BASE_CHEETAH VPTE_BASE_SPITFIRE -#else -#define VPTE_BASE_CHEETAH 0xffe0000000000000 -#endif - #ifndef __ASSEMBLY__ typedef struct { @@ -101,7 +91,8 @@ extern unsigned long thread_saved_pc(struct task_struct *); /* Do necessary setup to start up a newly executed thread. */ #define start_thread(regs, pc, sp) \ do { \ - regs->tstate = (regs->tstate & (TSTATE_CWP)) | (TSTATE_INITIAL_MM|TSTATE_IE) | (ASI_PNF << 24); \ + unsigned long __asi = ASI_PNF; \ + regs->tstate = (regs->tstate & (TSTATE_CWP)) | (TSTATE_INITIAL_MM|TSTATE_IE) | (__asi << 24UL); \ regs->tpc = ((pc & (~3)) - 4); \ regs->tnpc = regs->tpc + 4; \ regs->y = 0; \ @@ -138,10 +129,10 @@ do { \ #define start_thread32(regs, pc, sp) \ do { \ + unsigned long __asi = ASI_PNF; \ pc &= 0x00000000ffffffffUL; \ sp &= 0x00000000ffffffffUL; \ -\ - regs->tstate = (regs->tstate & (TSTATE_CWP))|(TSTATE_INITIAL_MM|TSTATE_IE|TSTATE_AM); \ + regs->tstate = (regs->tstate & (TSTATE_CWP))|(TSTATE_INITIAL_MM|TSTATE_IE|TSTATE_AM) | (__asi << 24UL); \ regs->tpc = ((pc & (~3)) - 4); \ regs->tnpc = regs->tpc + 4; \ regs->y = 0; \ @@ -226,6 +217,8 @@ static inline void prefetchw(const void *x) #define spin_lock_prefetch(x) prefetchw(x) +#define HAVE_ARCH_PICK_MMAP_LAYOUT + #endif /* !(__ASSEMBLY__) */ #endif /* !(__ASM_SPARC64_PROCESSOR_H) */ diff --git a/include/asm-sparc64/pstate.h b/include/asm-sparc64/pstate.h index 29fb74aa805d..49a7924a89ab 100644 --- a/include/asm-sparc64/pstate.h +++ b/include/asm-sparc64/pstate.h @@ -28,11 +28,12 @@ /* The V9 TSTATE Register (with SpitFire and Linux extensions). * - * --------------------------------------------------------------- - * | Resv | CCR | ASI | %pil | PSTATE | Resv | CWP | - * --------------------------------------------------------------- - * 63 40 39 32 31 24 23 20 19 8 7 5 4 0 + * --------------------------------------------------------------------- + * | Resv | GL | CCR | ASI | %pil | PSTATE | Resv | CWP | + * --------------------------------------------------------------------- + * 63 43 42 40 39 32 31 24 23 20 19 8 7 5 4 0 */ +#define TSTATE_GL _AC(0x0000070000000000,UL) /* Global reg level */ #define TSTATE_CCR _AC(0x000000ff00000000,UL) /* Condition Codes. */ #define TSTATE_XCC _AC(0x000000f000000000,UL) /* Condition Codes. */ #define TSTATE_XNEG _AC(0x0000008000000000,UL) /* %xcc Negative. */ diff --git a/include/asm-sparc64/scratchpad.h b/include/asm-sparc64/scratchpad.h new file mode 100644 index 000000000000..5e8b01fb3343 --- /dev/null +++ b/include/asm-sparc64/scratchpad.h @@ -0,0 +1,14 @@ +#ifndef _SPARC64_SCRATCHPAD_H +#define _SPARC64_SCRATCHPAD_H + +/* Sun4v scratchpad registers, accessed via ASI_SCRATCHPAD. */ + +#define SCRATCHPAD_MMU_MISS 0x00 /* Shared with OBP - set by OBP */ +#define SCRATCHPAD_CPUID 0x08 /* Shared with OBP - set by hypervisor */ +#define SCRATCHPAD_UTSBREG1 0x10 +#define SCRATCHPAD_UTSBREG2 0x18 + /* 0x20 and 0x28, hypervisor only... */ +#define SCRATCHPAD_UNUSED1 0x30 +#define SCRATCHPAD_UNUSED2 0x38 /* Reserved for OBP */ + +#endif /* !(_SPARC64_SCRATCHPAD_H) */ diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h index 473edb2603ec..89d86ecaab24 100644 --- a/include/asm-sparc64/smp.h +++ b/include/asm-sparc64/smp.h @@ -33,37 +33,13 @@ extern cpumask_t phys_cpu_present_map; #define cpu_possible_map phys_cpu_present_map +extern cpumask_t cpu_sibling_map[NR_CPUS]; + /* * General functions that each host system must provide. */ -static __inline__ int hard_smp_processor_id(void) -{ - if (tlb_type == cheetah || tlb_type == cheetah_plus) { - unsigned long cfg, ver; - __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver)); - if ((ver >> 32) == 0x003e0016) { - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (cfg) - : "i" (ASI_JBUS_CONFIG)); - return ((cfg >> 17) & 0x1f); - } else { - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (cfg) - : "i" (ASI_SAFARI_CONFIG)); - return ((cfg >> 17) & 0x3ff); - } - } else if (this_is_starfire != 0) { - return starfire_hard_smp_processor_id(); - } else { - unsigned long upaconfig; - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (upaconfig) - : "i" (ASI_UPA_CONFIG)); - return ((upaconfig >> 17) & 0x1f); - } -} - +extern int hard_smp_processor_id(void); #define raw_smp_processor_id() (current_thread_info()->cpu) extern void smp_setup_cpu_possible_map(void); diff --git a/include/asm-sparc64/sparsemem.h b/include/asm-sparc64/sparsemem.h new file mode 100644 index 000000000000..ed5c9d8541e2 --- /dev/null +++ b/include/asm-sparc64/sparsemem.h @@ -0,0 +1,12 @@ +#ifndef _SPARC64_SPARSEMEM_H +#define _SPARC64_SPARSEMEM_H + +#ifdef __KERNEL__ + +#define SECTION_SIZE_BITS 26 +#define MAX_PHYSADDR_BITS 42 +#define MAX_PHYSMEM_BITS 42 + +#endif /* !(__KERNEL__) */ + +#endif /* !(_SPARC64_SPARSEMEM_H) */ diff --git a/include/asm-sparc64/spitfire.h b/include/asm-sparc64/spitfire.h index 962638c9d122..23ad8a7987ad 100644 --- a/include/asm-sparc64/spitfire.h +++ b/include/asm-sparc64/spitfire.h @@ -44,6 +44,7 @@ enum ultra_tlb_layout { spitfire = 0, cheetah = 1, cheetah_plus = 2, + hypervisor = 3, }; extern enum ultra_tlb_layout tlb_type; diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h index af254e581834..a18ec87a52c1 100644 --- a/include/asm-sparc64/system.h +++ b/include/asm-sparc64/system.h @@ -209,9 +209,10 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \ /* so that ASI is only written if it changes, think again. */ \ __asm__ __volatile__("wr %%g0, %0, %%asi" \ : : "r" (__thread_flag_byte_ptr(task_thread_info(next))[TI_FLAG_BYTE_CURRENT_DS]));\ + trap_block[current_thread_info()->cpu].thread = \ + task_thread_info(next); \ __asm__ __volatile__( \ "mov %%g4, %%g7\n\t" \ - "wrpr %%g0, 0x95, %%pstate\n\t" \ "stx %%i6, [%%sp + 2047 + 0x70]\n\t" \ "stx %%i7, [%%sp + 2047 + 0x78]\n\t" \ "rdpr %%wstate, %%o5\n\t" \ @@ -225,14 +226,10 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \ "ldx [%%g6 + %3], %%o6\n\t" \ "ldub [%%g6 + %2], %%o5\n\t" \ "ldub [%%g6 + %4], %%o7\n\t" \ - "mov %%g6, %%l2\n\t" \ "wrpr %%o5, 0x0, %%wstate\n\t" \ "ldx [%%sp + 2047 + 0x70], %%i6\n\t" \ "ldx [%%sp + 2047 + 0x78], %%i7\n\t" \ - "wrpr %%g0, 0x94, %%pstate\n\t" \ - "mov %%l2, %%g6\n\t" \ "ldx [%%g6 + %6], %%g4\n\t" \ - "wrpr %%g0, 0x96, %%pstate\n\t" \ "brz,pt %%o7, 1f\n\t" \ " mov %%g7, %0\n\t" \ "b,a ret_from_syscall\n\t" \ diff --git a/include/asm-sparc64/thread_info.h b/include/asm-sparc64/thread_info.h index ac9d068aab4f..2ebf7f27bf91 100644 --- a/include/asm-sparc64/thread_info.h +++ b/include/asm-sparc64/thread_info.h @@ -64,8 +64,6 @@ struct thread_info { __u64 kernel_cntd0, kernel_cntd1; __u64 pcr_reg; - __u64 cee_stuff; - struct restart_block restart_block; struct pt_regs *kern_una_regs; @@ -104,10 +102,9 @@ struct thread_info { #define TI_KERN_CNTD0 0x00000480 #define TI_KERN_CNTD1 0x00000488 #define TI_PCR 0x00000490 -#define TI_CEE_STUFF 0x00000498 -#define TI_RESTART_BLOCK 0x000004a0 -#define TI_KUNA_REGS 0x000004c8 -#define TI_KUNA_INSN 0x000004d0 +#define TI_RESTART_BLOCK 0x00000498 +#define TI_KUNA_REGS 0x000004c0 +#define TI_KUNA_INSN 0x000004c8 #define TI_FPREGS 0x00000500 /* We embed this in the uppermost byte of thread_info->flags */ diff --git a/include/asm-sparc64/timex.h b/include/asm-sparc64/timex.h index 9e8d4175bcb2..2a5e4ebaad80 100644 --- a/include/asm-sparc64/timex.h +++ b/include/asm-sparc64/timex.h @@ -14,4 +14,10 @@ typedef unsigned long cycles_t; #define get_cycles() tick_ops->get_tick() +#define ARCH_HAS_READ_CURRENT_TIMER 1 +#define read_current_timer(timer_val_p) \ +({ *timer_val_p = tick_ops->get_tick(); \ + 0; \ +}) + #endif diff --git a/include/asm-sparc64/tlbflush.h b/include/asm-sparc64/tlbflush.h index 3ef9909ac3ac..9ad5d9c51d42 100644 --- a/include/asm-sparc64/tlbflush.h +++ b/include/asm-sparc64/tlbflush.h @@ -5,6 +5,11 @@ #include <linux/mm.h> #include <asm/mmu_context.h> +/* TSB flush operations. */ +struct mmu_gather; +extern void flush_tsb_kernel_range(unsigned long start, unsigned long end); +extern void flush_tsb_user(struct mmu_gather *mp); + /* TLB flush operations. */ extern void flush_tlb_pending(void); @@ -14,28 +19,36 @@ extern void flush_tlb_pending(void); #define flush_tlb_page(vma,addr) flush_tlb_pending() #define flush_tlb_mm(mm) flush_tlb_pending() +/* Local cpu only. */ extern void __flush_tlb_all(void); + extern void __flush_tlb_page(unsigned long context, unsigned long page, unsigned long r); extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end); #ifndef CONFIG_SMP -#define flush_tlb_all() __flush_tlb_all() #define flush_tlb_kernel_range(start,end) \ - __flush_tlb_kernel_range(start,end) +do { flush_tsb_kernel_range(start,end); \ + __flush_tlb_kernel_range(start,end); \ +} while (0) #else /* CONFIG_SMP */ -extern void smp_flush_tlb_all(void); extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end); -#define flush_tlb_all() smp_flush_tlb_all() #define flush_tlb_kernel_range(start, end) \ - smp_flush_tlb_kernel_range(start, end) +do { flush_tsb_kernel_range(start,end); \ + smp_flush_tlb_kernel_range(start, end); \ +} while (0) #endif /* ! CONFIG_SMP */ -extern void flush_tlb_pgtables(struct mm_struct *, unsigned long, unsigned long); +static inline void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end) +{ + /* We don't use virtual page tables for TLB miss processing + * any more. Nowadays we use the TSB. + */ +} #endif /* _SPARC64_TLBFLUSH_H */ diff --git a/include/asm-sparc64/tsb.h b/include/asm-sparc64/tsb.h new file mode 100644 index 000000000000..e82612cd9f33 --- /dev/null +++ b/include/asm-sparc64/tsb.h @@ -0,0 +1,281 @@ +#ifndef _SPARC64_TSB_H +#define _SPARC64_TSB_H + +/* The sparc64 TSB is similar to the powerpc hashtables. It's a + * power-of-2 sized table of TAG/PTE pairs. The cpu precomputes + * pointers into this table for 8K and 64K page sizes, and also a + * comparison TAG based upon the virtual address and context which + * faults. + * + * TLB miss trap handler software does the actual lookup via something + * of the form: + * + * ldxa [%g0] ASI_{D,I}MMU_TSB_8KB_PTR, %g1 + * ldxa [%g0] ASI_{D,I}MMU, %g6 + * sllx %g6, 22, %g6 + * srlx %g6, 22, %g6 + * ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4 + * cmp %g4, %g6 + * bne,pn %xcc, tsb_miss_{d,i}tlb + * mov FAULT_CODE_{D,I}TLB, %g3 + * stxa %g5, [%g0] ASI_{D,I}TLB_DATA_IN + * retry + * + * + * Each 16-byte slot of the TSB is the 8-byte tag and then the 8-byte + * PTE. The TAG is of the same layout as the TLB TAG TARGET mmu + * register which is: + * + * ------------------------------------------------- + * | - | CONTEXT | - | VADDR bits 63:22 | + * ------------------------------------------------- + * 63 61 60 48 47 42 41 0 + * + * But actually, since we use per-mm TSB's, we zero out the CONTEXT + * field. + * + * Like the powerpc hashtables we need to use locking in order to + * synchronize while we update the entries. PTE updates need locking + * as well. + * + * We need to carefully choose a lock bits for the TSB entry. We + * choose to use bit 47 in the tag. Also, since we never map anything + * at page zero in context zero, we use zero as an invalid tag entry. + * When the lock bit is set, this forces a tag comparison failure. + */ + +#define TSB_TAG_LOCK_BIT 47 +#define TSB_TAG_LOCK_HIGH (1 << (TSB_TAG_LOCK_BIT - 32)) + +#define TSB_TAG_INVALID_BIT 46 +#define TSB_TAG_INVALID_HIGH (1 << (TSB_TAG_INVALID_BIT - 32)) + +#define TSB_MEMBAR membar #StoreStore + +/* Some cpus support physical address quad loads. We want to use + * those if possible so we don't need to hard-lock the TSB mapping + * into the TLB. We encode some instruction patching in order to + * support this. + * + * The kernel TSB is locked into the TLB by virtue of being in the + * kernel image, so we don't play these games for swapper_tsb access. + */ +#ifndef __ASSEMBLY__ +struct tsb_ldquad_phys_patch_entry { + unsigned int addr; + unsigned int sun4u_insn; + unsigned int sun4v_insn; +}; +extern struct tsb_ldquad_phys_patch_entry __tsb_ldquad_phys_patch, + __tsb_ldquad_phys_patch_end; + +struct tsb_phys_patch_entry { + unsigned int addr; + unsigned int insn; +}; +extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; +#endif +#define TSB_LOAD_QUAD(TSB, REG) \ +661: ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG; \ + .section .tsb_ldquad_phys_patch, "ax"; \ + .word 661b; \ + ldda [TSB] ASI_QUAD_LDD_PHYS, REG; \ + ldda [TSB] ASI_QUAD_LDD_PHYS_4V, REG; \ + .previous + +#define TSB_LOAD_TAG_HIGH(TSB, REG) \ +661: lduwa [TSB] ASI_N, REG; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + lduwa [TSB] ASI_PHYS_USE_EC, REG; \ + .previous + +#define TSB_LOAD_TAG(TSB, REG) \ +661: ldxa [TSB] ASI_N, REG; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + ldxa [TSB] ASI_PHYS_USE_EC, REG; \ + .previous + +#define TSB_CAS_TAG_HIGH(TSB, REG1, REG2) \ +661: casa [TSB] ASI_N, REG1, REG2; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + casa [TSB] ASI_PHYS_USE_EC, REG1, REG2; \ + .previous + +#define TSB_CAS_TAG(TSB, REG1, REG2) \ +661: casxa [TSB] ASI_N, REG1, REG2; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + casxa [TSB] ASI_PHYS_USE_EC, REG1, REG2; \ + .previous + +#define TSB_STORE(ADDR, VAL) \ +661: stxa VAL, [ADDR] ASI_N; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + stxa VAL, [ADDR] ASI_PHYS_USE_EC; \ + .previous + +#define TSB_LOCK_TAG(TSB, REG1, REG2) \ +99: TSB_LOAD_TAG_HIGH(TSB, REG1); \ + sethi %hi(TSB_TAG_LOCK_HIGH), REG2;\ + andcc REG1, REG2, %g0; \ + bne,pn %icc, 99b; \ + nop; \ + TSB_CAS_TAG_HIGH(TSB, REG1, REG2); \ + cmp REG1, REG2; \ + bne,pn %icc, 99b; \ + nop; \ + TSB_MEMBAR + +#define TSB_WRITE(TSB, TTE, TAG) \ + add TSB, 0x8, TSB; \ + TSB_STORE(TSB, TTE); \ + sub TSB, 0x8, TSB; \ + TSB_MEMBAR; \ + TSB_STORE(TSB, TAG); + +#define KTSB_LOAD_QUAD(TSB, REG) \ + ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG; + +#define KTSB_STORE(ADDR, VAL) \ + stxa VAL, [ADDR] ASI_N; + +#define KTSB_LOCK_TAG(TSB, REG1, REG2) \ +99: lduwa [TSB] ASI_N, REG1; \ + sethi %hi(TSB_TAG_LOCK_HIGH), REG2;\ + andcc REG1, REG2, %g0; \ + bne,pn %icc, 99b; \ + nop; \ + casa [TSB] ASI_N, REG1, REG2;\ + cmp REG1, REG2; \ + bne,pn %icc, 99b; \ + nop; \ + TSB_MEMBAR + +#define KTSB_WRITE(TSB, TTE, TAG) \ + add TSB, 0x8, TSB; \ + stxa TTE, [TSB] ASI_N; \ + sub TSB, 0x8, TSB; \ + TSB_MEMBAR; \ + stxa TAG, [TSB] ASI_N; + + /* Do a kernel page table walk. Leaves physical PTE pointer in + * REG1. Jumps to FAIL_LABEL on early page table walk termination. + * VADDR will not be clobbered, but REG2 will. + */ +#define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \ + sethi %hi(swapper_pg_dir), REG1; \ + or REG1, %lo(swapper_pg_dir), REG1; \ + sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x3, REG2; \ + lduw [REG1 + REG2], REG1; \ + brz,pn REG1, FAIL_LABEL; \ + sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + sllx REG1, 11, REG1; \ + andn REG2, 0x3, REG2; \ + lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + brz,pn REG1, FAIL_LABEL; \ + sllx VADDR, 64 - PMD_SHIFT, REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + sllx REG1, 11, REG1; \ + andn REG2, 0x7, REG2; \ + add REG1, REG2, REG1; + + /* Do a user page table walk in MMU globals. Leaves physical PTE + * pointer in REG1. Jumps to FAIL_LABEL on early page table walk + * termination. Physical base of page tables is in PHYS_PGD which + * will not be modified. + * + * VADDR will not be clobbered, but REG1 and REG2 will. + */ +#define USER_PGTABLE_WALK_TL1(VADDR, PHYS_PGD, REG1, REG2, FAIL_LABEL) \ + sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x3, REG2; \ + lduwa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \ + brz,pn REG1, FAIL_LABEL; \ + sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + sllx REG1, 11, REG1; \ + andn REG2, 0x3, REG2; \ + lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + brz,pn REG1, FAIL_LABEL; \ + sllx VADDR, 64 - PMD_SHIFT, REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + sllx REG1, 11, REG1; \ + andn REG2, 0x7, REG2; \ + add REG1, REG2, REG1; + +/* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0. + * If no entry is found, FAIL_LABEL will be branched to. On success + * the resulting PTE value will be left in REG1. VADDR is preserved + * by this routine. + */ +#define OBP_TRANS_LOOKUP(VADDR, REG1, REG2, REG3, FAIL_LABEL) \ + sethi %hi(prom_trans), REG1; \ + or REG1, %lo(prom_trans), REG1; \ +97: ldx [REG1 + 0x00], REG2; \ + brz,pn REG2, FAIL_LABEL; \ + nop; \ + ldx [REG1 + 0x08], REG3; \ + add REG2, REG3, REG3; \ + cmp REG2, VADDR; \ + bgu,pt %xcc, 98f; \ + cmp VADDR, REG3; \ + bgeu,pt %xcc, 98f; \ + ldx [REG1 + 0x10], REG3; \ + sub VADDR, REG2, REG2; \ + ba,pt %xcc, 99f; \ + add REG3, REG2, REG1; \ +98: ba,pt %xcc, 97b; \ + add REG1, (3 * 8), REG1; \ +99: + + /* We use a 32K TSB for the whole kernel, this allows to + * handle about 16MB of modules and vmalloc mappings without + * incurring many hash conflicts. + */ +#define KERNEL_TSB_SIZE_BYTES (32 * 1024) +#define KERNEL_TSB_NENTRIES \ + (KERNEL_TSB_SIZE_BYTES / 16) +#define KERNEL_TSB4M_NENTRIES 4096 + + /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL + * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries + * and the found TTE will be left in REG1. REG3 and REG4 must + * be an even/odd pair of registers. + * + * VADDR and TAG will be preserved and not clobbered by this macro. + */ +#define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ + sethi %hi(swapper_tsb), REG1; \ + or REG1, %lo(swapper_tsb), REG1; \ + srlx VADDR, PAGE_SHIFT, REG2; \ + and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \ + sllx REG2, 4, REG2; \ + add REG1, REG2, REG2; \ + KTSB_LOAD_QUAD(REG2, REG3); \ + cmp REG3, TAG; \ + be,a,pt %xcc, OK_LABEL; \ + mov REG4, REG1; + + /* This version uses a trick, the TAG is already (VADDR >> 22) so + * we can make use of that for the index computation. + */ +#define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ + sethi %hi(swapper_4m_tsb), REG1; \ + or REG1, %lo(swapper_4m_tsb), REG1; \ + and TAG, (KERNEL_TSB_NENTRIES - 1), REG2; \ + sllx REG2, 4, REG2; \ + add REG1, REG2, REG2; \ + KTSB_LOAD_QUAD(REG2, REG3); \ + cmp REG3, TAG; \ + be,a,pt %xcc, OK_LABEL; \ + mov REG4, REG1; + +#endif /* !(_SPARC64_TSB_H) */ diff --git a/include/asm-sparc64/ttable.h b/include/asm-sparc64/ttable.h index 2784f80094c3..2d5e3c464df5 100644 --- a/include/asm-sparc64/ttable.h +++ b/include/asm-sparc64/ttable.h @@ -93,7 +93,7 @@ #define SYSCALL_TRAP(routine, systbl) \ sethi %hi(109f), %g7; \ - ba,pt %xcc, scetrap; \ + ba,pt %xcc, etrap; \ 109: or %g7, %lo(109b), %g7; \ sethi %hi(systbl), %l7; \ ba,pt %xcc, routine; \ @@ -109,14 +109,14 @@ nop;nop;nop; #define TRAP_UTRAP(handler,lvl) \ - ldx [%g6 + TI_UTRAPS], %g1; \ - sethi %hi(109f), %g7; \ - brz,pn %g1, utrap; \ - or %g7, %lo(109f), %g7; \ - ba,pt %xcc, utrap; \ -109: ldx [%g1 + handler*8], %g1; \ - ba,pt %xcc, utrap_ill; \ - mov lvl, %o1; + mov handler, %g3; \ + ba,pt %xcc, utrap_trap; \ + mov lvl, %g4; \ + nop; \ + nop; \ + nop; \ + nop; \ + nop; #ifdef CONFIG_SUNOS_EMUL #define SUNOS_SYSCALL_TRAP SYSCALL_TRAP(linux_sparc_syscall32, sunos_sys_table) @@ -136,8 +136,6 @@ #else #define SOLARIS_SYSCALL_TRAP TRAP(solaris_syscall) #endif -/* FIXME: Write these actually */ -#define NETBSD_SYSCALL_TRAP TRAP(netbsd_syscall) #define BREAKPOINT_TRAP TRAP(breakpoint_trap) #define TRAP_IRQ(routine, level) \ @@ -182,6 +180,26 @@ #define KPROBES_TRAP(lvl) TRAP_ARG(bad_trap, lvl) #endif +#define SUN4V_ITSB_MISS \ + ldxa [%g0] ASI_SCRATCHPAD, %g2; \ + ldx [%g2 + HV_FAULT_I_ADDR_OFFSET], %g4; \ + ldx [%g2 + HV_FAULT_I_CTX_OFFSET], %g5; \ + srlx %g4, 22, %g6; \ + ba,pt %xcc, sun4v_itsb_miss; \ + nop; \ + nop; \ + nop; + +#define SUN4V_DTSB_MISS \ + ldxa [%g0] ASI_SCRATCHPAD, %g2; \ + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4; \ + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5; \ + srlx %g4, 22, %g6; \ + ba,pt %xcc, sun4v_dtsb_miss; \ + nop; \ + nop; \ + nop; + /* Before touching these macros, you owe it to yourself to go and * see how arch/sparc64/kernel/winfixup.S works... -DaveM * @@ -221,6 +239,31 @@ saved; retry; nop; nop; nop; nop; nop; nop; \ nop; nop; nop; nop; nop; nop; nop; nop; +#define SPILL_0_NORMAL_ETRAP \ +etrap_kernel_spill: \ + stx %l0, [%sp + STACK_BIAS + 0x00]; \ + stx %l1, [%sp + STACK_BIAS + 0x08]; \ + stx %l2, [%sp + STACK_BIAS + 0x10]; \ + stx %l3, [%sp + STACK_BIAS + 0x18]; \ + stx %l4, [%sp + STACK_BIAS + 0x20]; \ + stx %l5, [%sp + STACK_BIAS + 0x28]; \ + stx %l6, [%sp + STACK_BIAS + 0x30]; \ + stx %l7, [%sp + STACK_BIAS + 0x38]; \ + stx %i0, [%sp + STACK_BIAS + 0x40]; \ + stx %i1, [%sp + STACK_BIAS + 0x48]; \ + stx %i2, [%sp + STACK_BIAS + 0x50]; \ + stx %i3, [%sp + STACK_BIAS + 0x58]; \ + stx %i4, [%sp + STACK_BIAS + 0x60]; \ + stx %i5, [%sp + STACK_BIAS + 0x68]; \ + stx %i6, [%sp + STACK_BIAS + 0x70]; \ + stx %i7, [%sp + STACK_BIAS + 0x78]; \ + saved; \ + sub %g1, 2, %g1; \ + ba,pt %xcc, etrap_save; \ + wrpr %g1, %cwp; \ + nop; nop; nop; nop; nop; nop; nop; nop; \ + nop; nop; nop; nop; + /* Normal 64bit spill */ #define SPILL_1_GENERIC(ASI) \ add %sp, STACK_BIAS + 0x00, %g1; \ @@ -254,6 +297,67 @@ b,a,pt %xcc, spill_fixup_mna; \ b,a,pt %xcc, spill_fixup; +#define SPILL_1_GENERIC_ETRAP \ +etrap_user_spill_64bit: \ + stxa %l0, [%sp + STACK_BIAS + 0x00] %asi; \ + stxa %l1, [%sp + STACK_BIAS + 0x08] %asi; \ + stxa %l2, [%sp + STACK_BIAS + 0x10] %asi; \ + stxa %l3, [%sp + STACK_BIAS + 0x18] %asi; \ + stxa %l4, [%sp + STACK_BIAS + 0x20] %asi; \ + stxa %l5, [%sp + STACK_BIAS + 0x28] %asi; \ + stxa %l6, [%sp + STACK_BIAS + 0x30] %asi; \ + stxa %l7, [%sp + STACK_BIAS + 0x38] %asi; \ + stxa %i0, [%sp + STACK_BIAS + 0x40] %asi; \ + stxa %i1, [%sp + STACK_BIAS + 0x48] %asi; \ + stxa %i2, [%sp + STACK_BIAS + 0x50] %asi; \ + stxa %i3, [%sp + STACK_BIAS + 0x58] %asi; \ + stxa %i4, [%sp + STACK_BIAS + 0x60] %asi; \ + stxa %i5, [%sp + STACK_BIAS + 0x68] %asi; \ + stxa %i6, [%sp + STACK_BIAS + 0x70] %asi; \ + stxa %i7, [%sp + STACK_BIAS + 0x78] %asi; \ + saved; \ + sub %g1, 2, %g1; \ + ba,pt %xcc, etrap_save; \ + wrpr %g1, %cwp; \ + nop; nop; nop; nop; nop; \ + nop; nop; nop; nop; \ + ba,a,pt %xcc, etrap_spill_fixup_64bit; \ + ba,a,pt %xcc, etrap_spill_fixup_64bit; \ + ba,a,pt %xcc, etrap_spill_fixup_64bit; + +#define SPILL_1_GENERIC_ETRAP_FIXUP \ +etrap_spill_fixup_64bit: \ + ldub [%g6 + TI_WSAVED], %g1; \ + sll %g1, 3, %g3; \ + add %g6, %g3, %g3; \ + stx %sp, [%g3 + TI_RWIN_SPTRS]; \ + sll %g1, 7, %g3; \ + add %g6, %g3, %g3; \ + stx %l0, [%g3 + TI_REG_WINDOW + 0x00]; \ + stx %l1, [%g3 + TI_REG_WINDOW + 0x08]; \ + stx %l2, [%g3 + TI_REG_WINDOW + 0x10]; \ + stx %l3, [%g3 + TI_REG_WINDOW + 0x18]; \ + stx %l4, [%g3 + TI_REG_WINDOW + 0x20]; \ + stx %l5, [%g3 + TI_REG_WINDOW + 0x28]; \ + stx %l6, [%g3 + TI_REG_WINDOW + 0x30]; \ + stx %l7, [%g3 + TI_REG_WINDOW + 0x38]; \ + stx %i0, [%g3 + TI_REG_WINDOW + 0x40]; \ + stx %i1, [%g3 + TI_REG_WINDOW + 0x48]; \ + stx %i2, [%g3 + TI_REG_WINDOW + 0x50]; \ + stx %i3, [%g3 + TI_REG_WINDOW + 0x58]; \ + stx %i4, [%g3 + TI_REG_WINDOW + 0x60]; \ + stx %i5, [%g3 + TI_REG_WINDOW + 0x68]; \ + stx %i6, [%g3 + TI_REG_WINDOW + 0x70]; \ + stx %i7, [%g3 + TI_REG_WINDOW + 0x78]; \ + add %g1, 1, %g1; \ + stb %g1, [%g6 + TI_WSAVED]; \ + saved; \ + rdpr %cwp, %g1; \ + sub %g1, 2, %g1; \ + ba,pt %xcc, etrap_save; \ + wrpr %g1, %cwp; \ + nop; nop; nop + /* Normal 32bit spill */ #define SPILL_2_GENERIC(ASI) \ srl %sp, 0, %sp; \ @@ -287,6 +391,68 @@ b,a,pt %xcc, spill_fixup_mna; \ b,a,pt %xcc, spill_fixup; +#define SPILL_2_GENERIC_ETRAP \ +etrap_user_spill_32bit: \ + srl %sp, 0, %sp; \ + stwa %l0, [%sp + 0x00] %asi; \ + stwa %l1, [%sp + 0x04] %asi; \ + stwa %l2, [%sp + 0x08] %asi; \ + stwa %l3, [%sp + 0x0c] %asi; \ + stwa %l4, [%sp + 0x10] %asi; \ + stwa %l5, [%sp + 0x14] %asi; \ + stwa %l6, [%sp + 0x18] %asi; \ + stwa %l7, [%sp + 0x1c] %asi; \ + stwa %i0, [%sp + 0x20] %asi; \ + stwa %i1, [%sp + 0x24] %asi; \ + stwa %i2, [%sp + 0x28] %asi; \ + stwa %i3, [%sp + 0x2c] %asi; \ + stwa %i4, [%sp + 0x30] %asi; \ + stwa %i5, [%sp + 0x34] %asi; \ + stwa %i6, [%sp + 0x38] %asi; \ + stwa %i7, [%sp + 0x3c] %asi; \ + saved; \ + sub %g1, 2, %g1; \ + ba,pt %xcc, etrap_save; \ + wrpr %g1, %cwp; \ + nop; nop; nop; nop; \ + nop; nop; nop; nop; \ + ba,a,pt %xcc, etrap_spill_fixup_32bit; \ + ba,a,pt %xcc, etrap_spill_fixup_32bit; \ + ba,a,pt %xcc, etrap_spill_fixup_32bit; + +#define SPILL_2_GENERIC_ETRAP_FIXUP \ +etrap_spill_fixup_32bit: \ + ldub [%g6 + TI_WSAVED], %g1; \ + sll %g1, 3, %g3; \ + add %g6, %g3, %g3; \ + stx %sp, [%g3 + TI_RWIN_SPTRS]; \ + sll %g1, 7, %g3; \ + add %g6, %g3, %g3; \ + stw %l0, [%g3 + TI_REG_WINDOW + 0x00]; \ + stw %l1, [%g3 + TI_REG_WINDOW + 0x04]; \ + stw %l2, [%g3 + TI_REG_WINDOW + 0x08]; \ + stw %l3, [%g3 + TI_REG_WINDOW + 0x0c]; \ + stw %l4, [%g3 + TI_REG_WINDOW + 0x10]; \ + stw %l5, [%g3 + TI_REG_WINDOW + 0x14]; \ + stw %l6, [%g3 + TI_REG_WINDOW + 0x18]; \ + stw %l7, [%g3 + TI_REG_WINDOW + 0x1c]; \ + stw %i0, [%g3 + TI_REG_WINDOW + 0x20]; \ + stw %i1, [%g3 + TI_REG_WINDOW + 0x24]; \ + stw %i2, [%g3 + TI_REG_WINDOW + 0x28]; \ + stw %i3, [%g3 + TI_REG_WINDOW + 0x2c]; \ + stw %i4, [%g3 + TI_REG_WINDOW + 0x30]; \ + stw %i5, [%g3 + TI_REG_WINDOW + 0x34]; \ + stw %i6, [%g3 + TI_REG_WINDOW + 0x38]; \ + stw %i7, [%g3 + TI_REG_WINDOW + 0x3c]; \ + add %g1, 1, %g1; \ + stb %g1, [%g6 + TI_WSAVED]; \ + saved; \ + rdpr %cwp, %g1; \ + sub %g1, 2, %g1; \ + ba,pt %xcc, etrap_save; \ + wrpr %g1, %cwp; \ + nop; nop; nop + #define SPILL_1_NORMAL SPILL_1_GENERIC(ASI_AIUP) #define SPILL_2_NORMAL SPILL_2_GENERIC(ASI_AIUP) #define SPILL_3_NORMAL SPILL_0_NORMAL @@ -325,6 +491,35 @@ restored; retry; nop; nop; nop; nop; nop; nop; \ nop; nop; nop; nop; nop; nop; nop; nop; +#define FILL_0_NORMAL_RTRAP \ +kern_rtt_fill: \ + rdpr %cwp, %g1; \ + sub %g1, 1, %g1; \ + wrpr %g1, %cwp; \ + ldx [%sp + STACK_BIAS + 0x00], %l0; \ + ldx [%sp + STACK_BIAS + 0x08], %l1; \ + ldx [%sp + STACK_BIAS + 0x10], %l2; \ + ldx [%sp + STACK_BIAS + 0x18], %l3; \ + ldx [%sp + STACK_BIAS + 0x20], %l4; \ + ldx [%sp + STACK_BIAS + 0x28], %l5; \ + ldx [%sp + STACK_BIAS + 0x30], %l6; \ + ldx [%sp + STACK_BIAS + 0x38], %l7; \ + ldx [%sp + STACK_BIAS + 0x40], %i0; \ + ldx [%sp + STACK_BIAS + 0x48], %i1; \ + ldx [%sp + STACK_BIAS + 0x50], %i2; \ + ldx [%sp + STACK_BIAS + 0x58], %i3; \ + ldx [%sp + STACK_BIAS + 0x60], %i4; \ + ldx [%sp + STACK_BIAS + 0x68], %i5; \ + ldx [%sp + STACK_BIAS + 0x70], %i6; \ + ldx [%sp + STACK_BIAS + 0x78], %i7; \ + restored; \ + add %g1, 1, %g1; \ + ba,pt %xcc, kern_rtt_restore; \ + wrpr %g1, %cwp; \ + nop; nop; nop; nop; nop; \ + nop; nop; nop; nop; + + /* Normal 64bit fill */ #define FILL_1_GENERIC(ASI) \ add %sp, STACK_BIAS + 0x00, %g1; \ @@ -356,6 +551,33 @@ b,a,pt %xcc, fill_fixup_mna; \ b,a,pt %xcc, fill_fixup; +#define FILL_1_GENERIC_RTRAP \ +user_rtt_fill_64bit: \ + ldxa [%sp + STACK_BIAS + 0x00] %asi, %l0; \ + ldxa [%sp + STACK_BIAS + 0x08] %asi, %l1; \ + ldxa [%sp + STACK_BIAS + 0x10] %asi, %l2; \ + ldxa [%sp + STACK_BIAS + 0x18] %asi, %l3; \ + ldxa [%sp + STACK_BIAS + 0x20] %asi, %l4; \ + ldxa [%sp + STACK_BIAS + 0x28] %asi, %l5; \ + ldxa [%sp + STACK_BIAS + 0x30] %asi, %l6; \ + ldxa [%sp + STACK_BIAS + 0x38] %asi, %l7; \ + ldxa [%sp + STACK_BIAS + 0x40] %asi, %i0; \ + ldxa [%sp + STACK_BIAS + 0x48] %asi, %i1; \ + ldxa [%sp + STACK_BIAS + 0x50] %asi, %i2; \ + ldxa [%sp + STACK_BIAS + 0x58] %asi, %i3; \ + ldxa [%sp + STACK_BIAS + 0x60] %asi, %i4; \ + ldxa [%sp + STACK_BIAS + 0x68] %asi, %i5; \ + ldxa [%sp + STACK_BIAS + 0x70] %asi, %i6; \ + ldxa [%sp + STACK_BIAS + 0x78] %asi, %i7; \ + ba,pt %xcc, user_rtt_pre_restore; \ + restored; \ + nop; nop; nop; nop; nop; nop; \ + nop; nop; nop; nop; nop; \ + ba,a,pt %xcc, user_rtt_fill_fixup; \ + ba,a,pt %xcc, user_rtt_fill_fixup; \ + ba,a,pt %xcc, user_rtt_fill_fixup; + + /* Normal 32bit fill */ #define FILL_2_GENERIC(ASI) \ srl %sp, 0, %sp; \ @@ -387,6 +609,34 @@ b,a,pt %xcc, fill_fixup_mna; \ b,a,pt %xcc, fill_fixup; +#define FILL_2_GENERIC_RTRAP \ +user_rtt_fill_32bit: \ + srl %sp, 0, %sp; \ + lduwa [%sp + 0x00] %asi, %l0; \ + lduwa [%sp + 0x04] %asi, %l1; \ + lduwa [%sp + 0x08] %asi, %l2; \ + lduwa [%sp + 0x0c] %asi, %l3; \ + lduwa [%sp + 0x10] %asi, %l4; \ + lduwa [%sp + 0x14] %asi, %l5; \ + lduwa [%sp + 0x18] %asi, %l6; \ + lduwa [%sp + 0x1c] %asi, %l7; \ + lduwa [%sp + 0x20] %asi, %i0; \ + lduwa [%sp + 0x24] %asi, %i1; \ + lduwa [%sp + 0x28] %asi, %i2; \ + lduwa [%sp + 0x2c] %asi, %i3; \ + lduwa [%sp + 0x30] %asi, %i4; \ + lduwa [%sp + 0x34] %asi, %i5; \ + lduwa [%sp + 0x38] %asi, %i6; \ + lduwa [%sp + 0x3c] %asi, %i7; \ + ba,pt %xcc, user_rtt_pre_restore; \ + restored; \ + nop; nop; nop; nop; nop; \ + nop; nop; nop; nop; nop; \ + ba,a,pt %xcc, user_rtt_fill_fixup; \ + ba,a,pt %xcc, user_rtt_fill_fixup; \ + ba,a,pt %xcc, user_rtt_fill_fixup; + + #define FILL_1_NORMAL FILL_1_GENERIC(ASI_AIUP) #define FILL_2_NORMAL FILL_2_GENERIC(ASI_AIUP) #define FILL_3_NORMAL FILL_0_NORMAL diff --git a/include/asm-sparc64/uaccess.h b/include/asm-sparc64/uaccess.h index c91d1e38eac6..afe236ba555b 100644 --- a/include/asm-sparc64/uaccess.h +++ b/include/asm-sparc64/uaccess.h @@ -114,16 +114,6 @@ case 8: __put_user_asm(data,x,addr,__pu_ret); break; \ default: __pu_ret = __put_user_bad(); break; \ } __pu_ret; }) -#define __put_user_nocheck_ret(data,addr,size,retval) ({ \ -register int __foo __asm__ ("l1"); \ -switch (size) { \ -case 1: __put_user_asm_ret(data,b,addr,retval,__foo); break; \ -case 2: __put_user_asm_ret(data,h,addr,retval,__foo); break; \ -case 4: __put_user_asm_ret(data,w,addr,retval,__foo); break; \ -case 8: __put_user_asm_ret(data,x,addr,retval,__foo); break; \ -default: if (__put_user_bad()) return retval; break; \ -} }) - #define __put_user_asm(x,size,addr,ret) \ __asm__ __volatile__( \ "/* Put user asm, inline. */\n" \ @@ -143,33 +133,6 @@ __asm__ __volatile__( \ : "=r" (ret) : "r" (x), "r" (__m(addr)), \ "i" (-EFAULT)) -#define __put_user_asm_ret(x,size,addr,ret,foo) \ -if (__builtin_constant_p(ret) && ret == -EFAULT) \ -__asm__ __volatile__( \ - "/* Put user asm ret, inline. */\n" \ -"1:\t" "st"#size "a %1, [%2] %%asi\n\n\t" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".word 1b, __ret_efault\n\n\t" \ - ".previous\n\n\t" \ - : "=r" (foo) : "r" (x), "r" (__m(addr))); \ -else \ -__asm__ __volatile__( \ - "/* Put user asm ret, inline. */\n" \ -"1:\t" "st"#size "a %1, [%2] %%asi\n\n\t" \ - ".section .fixup,#alloc,#execinstr\n\t" \ - ".align 4\n" \ -"3:\n\t" \ - "ret\n\t" \ - " restore %%g0, %3, %%o0\n\n\t" \ - ".previous\n\t" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".word 1b, 3b\n\n\t" \ - ".previous\n\n\t" \ - : "=r" (foo) : "r" (x), "r" (__m(addr)), \ - "i" (ret)) - extern int __put_user_bad(void); #define __get_user_nocheck(data,addr,size,type) ({ \ @@ -289,14 +252,7 @@ copy_in_user(void __user *to, void __user *from, unsigned long size) } #define __copy_in_user copy_in_user -extern unsigned long __must_check __bzero_noasi(void __user *, unsigned long); - -static inline unsigned long __must_check -__clear_user(void __user *addr, unsigned long size) -{ - - return __bzero_noasi(addr, size); -} +extern unsigned long __must_check __clear_user(void __user *, unsigned long); #define clear_user __clear_user diff --git a/include/asm-sparc64/vdev.h b/include/asm-sparc64/vdev.h new file mode 100644 index 000000000000..996e6be7b976 --- /dev/null +++ b/include/asm-sparc64/vdev.h @@ -0,0 +1,16 @@ +/* vdev.h: SUN4V virtual device interfaces and defines. + * + * Copyright (C) 2006 David S. Miller <davem@davemloft.net> + */ + +#ifndef _SPARC64_VDEV_H +#define _SPARC64_VDEV_H + +#include <linux/types.h> + +extern u32 sun4v_vdev_devhandle; +extern int sun4v_vdev_root; + +extern unsigned int sun4v_vdev_device_interrupt(unsigned int); + +#endif /* !(_SPARC64_VDEV_H) */ diff --git a/include/asm-sparc64/xor.h b/include/asm-sparc64/xor.h index 8b3a7e4b6062..8ce3f1813e28 100644 --- a/include/asm-sparc64/xor.h +++ b/include/asm-sparc64/xor.h @@ -2,9 +2,11 @@ * include/asm-sparc64/xor.h * * High speed xor_block operation for RAID4/5 utilizing the - * UltraSparc Visual Instruction Set. + * UltraSparc Visual Instruction Set and Niagara block-init + * twin-load instructions. * * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 2006 David S. Miller <davem@davemloft.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,8 +18,7 @@ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <asm/pstate.h> -#include <asm/asi.h> +#include <asm/spitfire.h> extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, @@ -37,4 +38,29 @@ static struct xor_block_template xor_block_VIS = { .do_5 = xor_vis_5, }; -#define XOR_TRY_TEMPLATES xor_speed(&xor_block_VIS) +extern void xor_niagara_2(unsigned long, unsigned long *, unsigned long *); +extern void xor_niagara_3(unsigned long, unsigned long *, unsigned long *, + unsigned long *); +extern void xor_niagara_4(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *); +extern void xor_niagara_5(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *, unsigned long *); + +static struct xor_block_template xor_block_niagara = { + .name = "Niagara", + .do_2 = xor_niagara_2, + .do_3 = xor_niagara_3, + .do_4 = xor_niagara_4, + .do_5 = xor_niagara_5, +}; + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_VIS); \ + xor_speed(&xor_block_niagara); \ + } while (0) + +/* For VIS for everything except Niagara. */ +#define XOR_SELECT_TEMPLATE(FASTEST) \ + (tlb_type == hypervisor ? &xor_block_niagara : &xor_block_VIS) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 4041122dabfc..57abcea1cb5d 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -127,6 +127,9 @@ /* Hilscher netx */ #define PORT_NETX 71 +/* SUN4V Hypervisor Console */ +#define PORT_SUNHV 72 + #ifdef __KERNEL__ #include <linux/config.h> |