summaryrefslogtreecommitdiff
path: root/include/asm-sparc64
diff options
context:
space:
mode:
Diffstat (limited to 'include/asm-sparc64')
-rw-r--r--include/asm-sparc64/a.out.h6
-rw-r--r--include/asm-sparc64/asi.h18
-rw-r--r--include/asm-sparc64/cpudata.h203
-rw-r--r--include/asm-sparc64/elf.h22
-rw-r--r--include/asm-sparc64/head.h15
-rw-r--r--include/asm-sparc64/hypervisor.h2128
-rw-r--r--include/asm-sparc64/idprom.h12
-rw-r--r--include/asm-sparc64/intr_queue.h15
-rw-r--r--include/asm-sparc64/irq.h4
-rw-r--r--include/asm-sparc64/mmu.h36
-rw-r--r--include/asm-sparc64/mmu_context.h162
-rw-r--r--include/asm-sparc64/numnodes.h6
-rw-r--r--include/asm-sparc64/oplib.h43
-rw-r--r--include/asm-sparc64/page.h13
-rw-r--r--include/asm-sparc64/pbm.h3
-rw-r--r--include/asm-sparc64/pci.h56
-rw-r--r--include/asm-sparc64/pgalloc.h166
-rw-r--r--include/asm-sparc64/pgtable.h704
-rw-r--r--include/asm-sparc64/pil.h4
-rw-r--r--include/asm-sparc64/processor.h23
-rw-r--r--include/asm-sparc64/pstate.h9
-rw-r--r--include/asm-sparc64/scratchpad.h14
-rw-r--r--include/asm-sparc64/smp.h30
-rw-r--r--include/asm-sparc64/sparsemem.h12
-rw-r--r--include/asm-sparc64/spitfire.h1
-rw-r--r--include/asm-sparc64/system.h7
-rw-r--r--include/asm-sparc64/thread_info.h9
-rw-r--r--include/asm-sparc64/timex.h6
-rw-r--r--include/asm-sparc64/tlbflush.h25
-rw-r--r--include/asm-sparc64/tsb.h281
-rw-r--r--include/asm-sparc64/ttable.h272
-rw-r--r--include/asm-sparc64/uaccess.h46
-rw-r--r--include/asm-sparc64/vdev.h16
-rw-r--r--include/asm-sparc64/xor.h34
34 files changed, 3783 insertions, 618 deletions
diff --git a/include/asm-sparc64/a.out.h b/include/asm-sparc64/a.out.h
index 02af289e3f46..35cb5c9e0c92 100644
--- a/include/asm-sparc64/a.out.h
+++ b/include/asm-sparc64/a.out.h
@@ -95,7 +95,11 @@ struct relocation_info /* used when header.a_machtype == M_SPARC */
#ifdef __KERNEL__
-#define STACK_TOP (test_thread_flag(TIF_32BIT) ? 0xf0000000 : 0x80000000000L)
+#define STACK_TOP32 ((1UL << 32UL) - PAGE_SIZE)
+#define STACK_TOP64 (0x0000080000000000UL - (1UL << 32UL))
+
+#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \
+ STACK_TOP32 : STACK_TOP64)
#endif
diff --git a/include/asm-sparc64/asi.h b/include/asm-sparc64/asi.h
index 534855660f2a..662a21107ae6 100644
--- a/include/asm-sparc64/asi.h
+++ b/include/asm-sparc64/asi.h
@@ -25,14 +25,27 @@
/* SpitFire and later extended ASIs. The "(III)" marker designates
* UltraSparc-III and later specific ASIs. The "(CMT)" marker designates
- * Chip Multi Threading specific ASIs.
+ * Chip Multi Threading specific ASIs. "(NG)" designates Niagara specific
+ * ASIs, "(4V)" designates SUN4V specific ASIs.
*/
#define ASI_PHYS_USE_EC 0x14 /* PADDR, E-cachable */
#define ASI_PHYS_BYPASS_EC_E 0x15 /* PADDR, E-bit */
+#define ASI_BLK_AIUP_4V 0x16 /* (4V) Prim, user, block ld/st */
+#define ASI_BLK_AIUS_4V 0x17 /* (4V) Sec, user, block ld/st */
#define ASI_PHYS_USE_EC_L 0x1c /* PADDR, E-cachable, little endian*/
#define ASI_PHYS_BYPASS_EC_E_L 0x1d /* PADDR, E-bit, little endian */
+#define ASI_BLK_AIUP_L_4V 0x1e /* (4V) Prim, user, block, l-endian*/
+#define ASI_BLK_AIUS_L_4V 0x1f /* (4V) Sec, user, block, l-endian */
+#define ASI_SCRATCHPAD 0x20 /* (4V) Scratch Pad Registers */
+#define ASI_MMU 0x21 /* (4V) MMU Context Registers */
+#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 /* (NG) init-store, twin load,
+ * secondary, user
+ */
#define ASI_NUCLEUS_QUAD_LDD 0x24 /* Cachable, qword load */
+#define ASI_QUEUE 0x25 /* (4V) Interrupt Queue Registers */
+#define ASI_QUAD_LDD_PHYS_4V 0x26 /* (4V) Physical, qword load */
#define ASI_NUCLEUS_QUAD_LDD_L 0x2c /* Cachable, qword load, l-endian */
+#define ASI_QUAD_LDD_PHYS_L_4V 0x2e /* (4V) Phys, qword load, l-endian */
#define ASI_PCACHE_DATA_STATUS 0x30 /* (III) PCache data stat RAM diag */
#define ASI_PCACHE_DATA 0x31 /* (III) PCache data RAM diag */
#define ASI_PCACHE_TAG 0x32 /* (III) PCache tag RAM diag */
@@ -137,6 +150,9 @@
#define ASI_FL16_SL 0xdb /* Secondary, 1 16-bit, fpu ld/st,L*/
#define ASI_BLK_COMMIT_P 0xe0 /* Primary, blk store commit */
#define ASI_BLK_COMMIT_S 0xe1 /* Secondary, blk store commit */
+#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 /* (NG) init-store, twin load,
+ * primary, implicit
+ */
#define ASI_BLK_P 0xf0 /* Primary, blk ld/st */
#define ASI_BLK_S 0xf1 /* Secondary, blk ld/st */
#define ASI_BLK_PL 0xf8 /* Primary, blk ld/st, little */
diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h
index 74de79dca915..c66a81bbc84d 100644
--- a/include/asm-sparc64/cpudata.h
+++ b/include/asm-sparc64/cpudata.h
@@ -1,41 +1,224 @@
/* cpudata.h: Per-cpu parameters.
*
- * Copyright (C) 2003, 2005 David S. Miller (davem@redhat.com)
+ * Copyright (C) 2003, 2005, 2006 David S. Miller (davem@davemloft.net)
*/
#ifndef _SPARC64_CPUDATA_H
#define _SPARC64_CPUDATA_H
+#include <asm/hypervisor.h>
+#include <asm/asi.h>
+
+#ifndef __ASSEMBLY__
+
#include <linux/percpu.h>
+#include <linux/threads.h>
typedef struct {
/* Dcache line 1 */
unsigned int __softirq_pending; /* must be 1st, see rtrap.S */
unsigned int multiplier;
unsigned int counter;
- unsigned int idle_volume;
+ unsigned int __pad1;
unsigned long clock_tick; /* %tick's per second */
unsigned long udelay_val;
- /* Dcache line 2 */
- unsigned int pgcache_size;
- unsigned int __pad1;
- unsigned long *pte_cache[2];
- unsigned long *pgd_cache;
-
- /* Dcache line 3, rarely used */
+ /* Dcache line 2, rarely used */
unsigned int dcache_size;
unsigned int dcache_line_size;
unsigned int icache_size;
unsigned int icache_line_size;
unsigned int ecache_size;
unsigned int ecache_line_size;
- unsigned int __pad2;
unsigned int __pad3;
+ unsigned int __pad4;
} cpuinfo_sparc;
DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
#define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu))
#define local_cpu_data() __get_cpu_var(__cpu_data)
+/* Trap handling code needs to get at a few critical values upon
+ * trap entry and to process TSB misses. These cannot be in the
+ * per_cpu() area as we really need to lock them into the TLB and
+ * thus make them part of the main kernel image. As a result we
+ * try to make this as small as possible.
+ *
+ * This is padded out and aligned to 64-bytes to avoid false sharing
+ * on SMP.
+ */
+
+/* If you modify the size of this structure, please update
+ * TRAP_BLOCK_SZ_SHIFT below.
+ */
+struct thread_info;
+struct trap_per_cpu {
+/* D-cache line 1: Basic thread information, cpu and device mondo queues */
+ struct thread_info *thread;
+ unsigned long pgd_paddr;
+ unsigned long cpu_mondo_pa;
+ unsigned long dev_mondo_pa;
+
+/* D-cache line 2: Error Mondo Queue and kernel buffer pointers */
+ unsigned long resum_mondo_pa;
+ unsigned long resum_kernel_buf_pa;
+ unsigned long nonresum_mondo_pa;
+ unsigned long nonresum_kernel_buf_pa;
+
+/* Dcache lines 3, 4, 5, and 6: Hypervisor Fault Status */
+ struct hv_fault_status fault_info;
+
+/* Dcache line 7: Physical addresses of CPU send mondo block and CPU list. */
+ unsigned long cpu_mondo_block_pa;
+ unsigned long cpu_list_pa;
+ unsigned long __pad1[2];
+
+/* Dcache line 8: Unused, needed to keep trap_block a power-of-2 in size. */
+ unsigned long __pad2[4];
+} __attribute__((aligned(64)));
+extern struct trap_per_cpu trap_block[NR_CPUS];
+extern void init_cur_cpu_trap(struct thread_info *);
+extern void setup_tba(void);
+
+struct cpuid_patch_entry {
+ unsigned int addr;
+ unsigned int cheetah_safari[4];
+ unsigned int cheetah_jbus[4];
+ unsigned int starfire[4];
+ unsigned int sun4v[4];
+};
+extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end;
+
+struct sun4v_1insn_patch_entry {
+ unsigned int addr;
+ unsigned int insn;
+};
+extern struct sun4v_1insn_patch_entry __sun4v_1insn_patch,
+ __sun4v_1insn_patch_end;
+
+struct sun4v_2insn_patch_entry {
+ unsigned int addr;
+ unsigned int insns[2];
+};
+extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
+ __sun4v_2insn_patch_end;
+
+#endif /* !(__ASSEMBLY__) */
+
+#define TRAP_PER_CPU_THREAD 0x00
+#define TRAP_PER_CPU_PGD_PADDR 0x08
+#define TRAP_PER_CPU_CPU_MONDO_PA 0x10
+#define TRAP_PER_CPU_DEV_MONDO_PA 0x18
+#define TRAP_PER_CPU_RESUM_MONDO_PA 0x20
+#define TRAP_PER_CPU_RESUM_KBUF_PA 0x28
+#define TRAP_PER_CPU_NONRESUM_MONDO_PA 0x30
+#define TRAP_PER_CPU_NONRESUM_KBUF_PA 0x38
+#define TRAP_PER_CPU_FAULT_INFO 0x40
+#define TRAP_PER_CPU_CPU_MONDO_BLOCK_PA 0xc0
+#define TRAP_PER_CPU_CPU_LIST_PA 0xc8
+
+#define TRAP_BLOCK_SZ_SHIFT 8
+
+#include <asm/scratchpad.h>
+
+#define __GET_CPUID(REG) \
+ /* Spitfire implementation (default). */ \
+661: ldxa [%g0] ASI_UPA_CONFIG, REG; \
+ srlx REG, 17, REG; \
+ and REG, 0x1f, REG; \
+ nop; \
+ .section .cpuid_patch, "ax"; \
+ /* Instruction location. */ \
+ .word 661b; \
+ /* Cheetah Safari implementation. */ \
+ ldxa [%g0] ASI_SAFARI_CONFIG, REG; \
+ srlx REG, 17, REG; \
+ and REG, 0x3ff, REG; \
+ nop; \
+ /* Cheetah JBUS implementation. */ \
+ ldxa [%g0] ASI_JBUS_CONFIG, REG; \
+ srlx REG, 17, REG; \
+ and REG, 0x1f, REG; \
+ nop; \
+ /* Starfire implementation. */ \
+ sethi %hi(0x1fff40000d0 >> 9), REG; \
+ sllx REG, 9, REG; \
+ or REG, 0xd0, REG; \
+ lduwa [REG] ASI_PHYS_BYPASS_EC_E, REG;\
+ /* sun4v implementation. */ \
+ mov SCRATCHPAD_CPUID, REG; \
+ ldxa [REG] ASI_SCRATCHPAD, REG; \
+ nop; \
+ nop; \
+ .previous;
+
+#ifdef CONFIG_SMP
+
+#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \
+ __GET_CPUID(TMP) \
+ sethi %hi(trap_block), DEST; \
+ sllx TMP, TRAP_BLOCK_SZ_SHIFT, TMP; \
+ or DEST, %lo(trap_block), DEST; \
+ add DEST, TMP, DEST; \
+
+/* Clobbers TMP, current address space PGD phys address into DEST. */
+#define TRAP_LOAD_PGD_PHYS(DEST, TMP) \
+ TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \
+ ldx [DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
+
+/* Clobbers TMP, loads local processor's IRQ work area into DEST. */
+#define TRAP_LOAD_IRQ_WORK(DEST, TMP) \
+ __GET_CPUID(TMP) \
+ sethi %hi(__irq_work), DEST; \
+ sllx TMP, 6, TMP; \
+ or DEST, %lo(__irq_work), DEST; \
+ add DEST, TMP, DEST;
+
+/* Clobbers TMP, loads DEST with current thread info pointer. */
+#define TRAP_LOAD_THREAD_REG(DEST, TMP) \
+ TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \
+ ldx [DEST + TRAP_PER_CPU_THREAD], DEST;
+
+/* Given the current thread info pointer in THR, load the per-cpu
+ * area base of the current processor into DEST. REG1, REG2, and REG3 are
+ * clobbered.
+ *
+ * You absolutely cannot use DEST as a temporary in this code. The
+ * reason is that traps can happen during execution, and return from
+ * trap will load the fully resolved DEST per-cpu base. This can corrupt
+ * the calculations done by the macro mid-stream.
+ */
+#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \
+ ldub [THR + TI_CPU], REG1; \
+ sethi %hi(__per_cpu_shift), REG3; \
+ sethi %hi(__per_cpu_base), REG2; \
+ ldx [REG3 + %lo(__per_cpu_shift)], REG3; \
+ ldx [REG2 + %lo(__per_cpu_base)], REG2; \
+ sllx REG1, REG3, REG3; \
+ add REG3, REG2, DEST;
+
+#else
+
+#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \
+ sethi %hi(trap_block), DEST; \
+ or DEST, %lo(trap_block), DEST; \
+
+/* Uniprocessor versions, we know the cpuid is zero. */
+#define TRAP_LOAD_PGD_PHYS(DEST, TMP) \
+ TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \
+ ldx [DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
+
+#define TRAP_LOAD_IRQ_WORK(DEST, TMP) \
+ sethi %hi(__irq_work), DEST; \
+ or DEST, %lo(__irq_work), DEST;
+
+#define TRAP_LOAD_THREAD_REG(DEST, TMP) \
+ TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \
+ ldx [DEST + TRAP_PER_CPU_THREAD], DEST;
+
+/* No per-cpu areas on uniprocessor, so no need to load DEST. */
+#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3)
+
+#endif /* !(CONFIG_SMP) */
+
#endif /* _SPARC64_CPUDATA_H */
diff --git a/include/asm-sparc64/elf.h b/include/asm-sparc64/elf.h
index 69539a8ab833..303d85e2f82e 100644
--- a/include/asm-sparc64/elf.h
+++ b/include/asm-sparc64/elf.h
@@ -10,6 +10,7 @@
#ifdef __KERNEL__
#include <asm/processor.h>
#include <asm/uaccess.h>
+#include <asm/spitfire.h>
#endif
/*
@@ -68,6 +69,7 @@
#define HWCAP_SPARC_MULDIV 8
#define HWCAP_SPARC_V9 16
#define HWCAP_SPARC_ULTRA3 32
+#define HWCAP_SPARC_BLKINIT 64
/*
* These are used to set parameters in the core dumps.
@@ -145,11 +147,21 @@ typedef struct {
instruction set this cpu supports. */
/* On Ultra, we support all of the v8 capabilities. */
-#define ELF_HWCAP ((HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | \
- HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV | \
- HWCAP_SPARC_V9) | \
- ((tlb_type == cheetah || tlb_type == cheetah_plus) ? \
- HWCAP_SPARC_ULTRA3 : 0))
+static inline unsigned int sparc64_elf_hwcap(void)
+{
+ unsigned int cap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR |
+ HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV |
+ HWCAP_SPARC_V9);
+
+ if (tlb_type == cheetah || tlb_type == cheetah_plus)
+ cap |= HWCAP_SPARC_ULTRA3;
+ else if (tlb_type == hypervisor)
+ cap |= HWCAP_SPARC_BLKINIT;
+
+ return cap;
+}
+
+#define ELF_HWCAP sparc64_elf_hwcap();
/* This yields a string that ld.so will use to load implementation
specific libraries for optimization. This is more specific in
diff --git a/include/asm-sparc64/head.h b/include/asm-sparc64/head.h
index 0abd3a674e8f..67960a751f4d 100644
--- a/include/asm-sparc64/head.h
+++ b/include/asm-sparc64/head.h
@@ -4,12 +4,21 @@
#include <asm/pstate.h>
+ /* wrpr %g0, val, %gl */
+#define SET_GL(val) \
+ .word 0xa1902000 | val
+
+ /* rdpr %gl, %gN */
+#define GET_GL_GLOBAL(N) \
+ .word 0x81540000 | (N << 25)
+
#define KERNBASE 0x400000
#define PTREGS_OFF (STACK_BIAS + STACKFRAME_SZ)
#define __CHEETAH_ID 0x003e0014
#define __JALAPENO_ID 0x003e0016
+#define __SERRANO_ID 0x003e0022
#define CHEETAH_MANUF 0x003e
#define CHEETAH_IMPL 0x0014 /* Ultra-III */
@@ -19,6 +28,12 @@
#define PANTHER_IMPL 0x0019 /* Ultra-IV+ */
#define SERRANO_IMPL 0x0022 /* Ultra-IIIi+ */
+#define BRANCH_IF_SUN4V(tmp1,label) \
+ sethi %hi(is_sun4v), %tmp1; \
+ lduw [%tmp1 + %lo(is_sun4v)], %tmp1; \
+ brnz,pn %tmp1, label; \
+ nop
+
#define BRANCH_IF_CHEETAH_BASE(tmp1,tmp2,label) \
rdpr %ver, %tmp1; \
sethi %hi(__CHEETAH_ID), %tmp2; \
diff --git a/include/asm-sparc64/hypervisor.h b/include/asm-sparc64/hypervisor.h
new file mode 100644
index 000000000000..612bf319753f
--- /dev/null
+++ b/include/asm-sparc64/hypervisor.h
@@ -0,0 +1,2128 @@
+#ifndef _SPARC64_HYPERVISOR_H
+#define _SPARC64_HYPERVISOR_H
+
+/* Sun4v hypervisor interfaces and defines.
+ *
+ * Hypervisor calls are made via traps to software traps number 0x80
+ * and above. Registers %o0 to %o5 serve as argument, status, and
+ * return value registers.
+ *
+ * There are two kinds of these traps. First there are the normal
+ * "fast traps" which use software trap 0x80 and encode the function
+ * to invoke by number in register %o5. Argument and return value
+ * handling is as follows:
+ *
+ * -----------------------------------------------
+ * | %o5 | function number | undefined |
+ * | %o0 | argument 0 | return status |
+ * | %o1 | argument 1 | return value 1 |
+ * | %o2 | argument 2 | return value 2 |
+ * | %o3 | argument 3 | return value 3 |
+ * | %o4 | argument 4 | return value 4 |
+ * -----------------------------------------------
+ *
+ * The second type are "hyper-fast traps" which encode the function
+ * number in the software trap number itself. So these use trap
+ * numbers > 0x80. The register usage for hyper-fast traps is as
+ * follows:
+ *
+ * -----------------------------------------------
+ * | %o0 | argument 0 | return status |
+ * | %o1 | argument 1 | return value 1 |
+ * | %o2 | argument 2 | return value 2 |
+ * | %o3 | argument 3 | return value 3 |
+ * | %o4 | argument 4 | return value 4 |
+ * -----------------------------------------------
+ *
+ * Registers providing explicit arguments to the hypervisor calls
+ * are volatile across the call. Upon return their values are
+ * undefined unless explicitly specified as containing a particular
+ * return value by the specific call. The return status is always
+ * returned in register %o0, zero indicates a successful execution of
+ * the hypervisor call and other values indicate an error status as
+ * defined below. So, for example, if a hyper-fast trap takes
+ * arguments 0, 1, and 2, then %o0, %o1, and %o2 are volatile across
+ * the call and %o3, %o4, and %o5 would be preserved.
+ *
+ * If the hypervisor trap is invalid, or the fast trap function number
+ * is invalid, HV_EBADTRAP will be returned in %o0. Also, all 64-bits
+ * of the argument and return values are significant.
+ */
+
+/* Trap numbers. */
+#define HV_FAST_TRAP 0x80
+#define HV_MMU_MAP_ADDR_TRAP 0x83
+#define HV_MMU_UNMAP_ADDR_TRAP 0x84
+#define HV_TTRACE_ADDENTRY_TRAP 0x85
+#define HV_CORE_TRAP 0xff
+
+/* Error codes. */
+#define HV_EOK 0 /* Successful return */
+#define HV_ENOCPU 1 /* Invalid CPU id */
+#define HV_ENORADDR 2 /* Invalid real address */
+#define HV_ENOINTR 3 /* Invalid interrupt id */
+#define HV_EBADPGSZ 4 /* Invalid pagesize encoding */
+#define HV_EBADTSB 5 /* Invalid TSB description */
+#define HV_EINVAL 6 /* Invalid argument */
+#define HV_EBADTRAP 7 /* Invalid function number */
+#define HV_EBADALIGN 8 /* Invalid address alignment */
+#define HV_EWOULDBLOCK 9 /* Cannot complete w/o blocking */
+#define HV_ENOACCESS 10 /* No access to resource */
+#define HV_EIO 11 /* I/O error */
+#define HV_ECPUERROR 12 /* CPU in error state */
+#define HV_ENOTSUPPORTED 13 /* Function not supported */
+#define HV_ENOMAP 14 /* No mapping found */
+#define HV_ETOOMANY 15 /* Too many items specified */
+
+/* mach_exit()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MACH_EXIT
+ * ARG0: exit code
+ * ERRORS: This service does not return.
+ *
+ * Stop all CPUs in the virtual domain and place them into the stopped
+ * state. The 64-bit exit code may be passed to a service entity as
+ * the domain's exit status. On systems without a service entity, the
+ * domain will undergo a reset, and the boot firmware will be
+ * reloaded.
+ *
+ * This function will never return to the guest that invokes it.
+ *
+ * Note: By convention an exit code of zero denotes a successful exit by
+ * the guest code. A non-zero exit code denotes a guest specific
+ * error indication.
+ *
+ */
+#define HV_FAST_MACH_EXIT 0x00
+
+/* Domain services. */
+
+/* mach_desc()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MACH_DESC
+ * ARG0: buffer
+ * ARG1: length
+ * RET0: status
+ * RET1: length
+ * ERRORS: HV_EBADALIGN Buffer is badly aligned
+ * HV_ENORADDR Buffer is to an illegal real address.
+ * HV_EINVAL Buffer length is too small for complete
+ * machine description.
+ *
+ * Copy the most current machine description into the buffer indicated
+ * by the real address in ARG0. The buffer provided must be 16 byte
+ * aligned. Upon success or HV_EINVAL, this service returns the
+ * actual size of the machine description in the RET1 return value.
+ *
+ * Note: A method of determining the appropriate buffer size for the
+ * machine description is to first call this service with a buffer
+ * length of 0 bytes.
+ */
+#define HV_FAST_MACH_DESC 0x01
+
+/* mach_exit()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MACH_SIR
+ * ERRORS: This service does not return.
+ *
+ * Perform a software initiated reset of the virtual machine domain.
+ * All CPUs are captured as soon as possible, all hardware devices are
+ * returned to the entry default state, and the domain is restarted at
+ * the SIR (trap type 0x04) real trap table (RTBA) entry point on one
+ * of the CPUs. The single CPU restarted is selected as determined by
+ * platform specific policy. Memory is preserved across this
+ * operation.
+ */
+#define HV_FAST_MACH_SIR 0x02
+
+/* mach_set_soft_state()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MACH_SET_SOFT_STATE
+ * ARG0: software state
+ * ARG1: software state description pointer
+ * RET0: status
+ * ERRORS: EINVAL software state not valid or software state
+ * description is not NULL terminated
+ * ENORADDR software state description pointer is not a
+ * valid real address
+ * EBADALIGNED software state description is not correctly
+ * aligned
+ *
+ * This allows the guest to report it's soft state to the hypervisor. There
+ * are two primary components to this state. The first part states whether
+ * the guest software is running or not. The second containts optional
+ * details specific to the software.
+ *
+ * The software state argument is defined below in HV_SOFT_STATE_*, and
+ * indicates whether the guest is operating normally or in a transitional
+ * state.
+ *
+ * The software state description argument is a real address of a data buffer
+ * of size 32-bytes aligned on a 32-byte boundary. It is treated as a NULL
+ * terminated 7-bit ASCII string of up to 31 characters not including the
+ * NULL termination.
+ */
+#define HV_FAST_MACH_SET_SOFT_STATE 0x03
+#define HV_SOFT_STATE_NORMAL 0x01
+#define HV_SOFT_STATE_TRANSITION 0x02
+
+/* mach_get_soft_state()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MACH_GET_SOFT_STATE
+ * ARG0: software state description pointer
+ * RET0: status
+ * RET1: software state
+ * ERRORS: ENORADDR software state description pointer is not a
+ * valid real address
+ * EBADALIGNED software state description is not correctly
+ * aligned
+ *
+ * Retrieve the current value of the guest's software state. The rules
+ * for the software state pointer are the same as for mach_set_soft_state()
+ * above.
+ */
+#define HV_FAST_MACH_GET_SOFT_STATE 0x04
+
+/* CPU services.
+ *
+ * CPUs represent devices that can execute software threads. A single
+ * chip that contains multiple cores or strands is represented as
+ * multiple CPUs with unique CPU identifiers. CPUs are exported to
+ * OBP via the machine description (and to the OS via the OBP device
+ * tree). CPUs are always in one of three states: stopped, running,
+ * or error.
+ *
+ * A CPU ID is a pre-assigned 16-bit value that uniquely identifies a
+ * CPU within a logical domain. Operations that are to be performed
+ * on multiple CPUs specify them via a CPU list. A CPU list is an
+ * array in real memory, of which each 16-bit word is a CPU ID. CPU
+ * lists are passed through the API as two arguments. The first is
+ * the number of entries (16-bit words) in the CPU list, and the
+ * second is the (real address) pointer to the CPU ID list.
+ */
+
+/* cpu_start()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_START
+ * ARG0: CPU ID
+ * ARG1: PC
+ * ARG1: RTBA
+ * ARG1: target ARG0
+ * RET0: status
+ * ERRORS: ENOCPU Invalid CPU ID
+ * EINVAL Target CPU ID is not in the stopped state
+ * ENORADDR Invalid PC or RTBA real address
+ * EBADALIGN Unaligned PC or unaligned RTBA
+ * EWOULDBLOCK Starting resources are not available
+ *
+ * Start CPU with given CPU ID with PC in %pc and with a real trap
+ * base address value of RTBA. The indicated CPU must be in the
+ * stopped state. The supplied RTBA must be aligned on a 256 byte
+ * boundary. On successful completion, the specified CPU will be in
+ * the running state and will be supplied with "target ARG0" in %o0
+ * and RTBA in %tba.
+ */
+#define HV_FAST_CPU_START 0x10
+
+/* cpu_stop()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_STOP
+ * ARG0: CPU ID
+ * RET0: status
+ * ERRORS: ENOCPU Invalid CPU ID
+ * EINVAL Target CPU ID is the current cpu
+ * EINVAL Target CPU ID is not in the running state
+ * EWOULDBLOCK Stopping resources are not available
+ * ENOTSUPPORTED Not supported on this platform
+ *
+ * The specified CPU is stopped. The indicated CPU must be in the
+ * running state. On completion, it will be in the stopped state. It
+ * is not legal to stop the current CPU.
+ *
+ * Note: As this service cannot be used to stop the current cpu, this service
+ * may not be used to stop the last running CPU in a domain. To stop
+ * and exit a running domain, a guest must use the mach_exit() service.
+ */
+#define HV_FAST_CPU_STOP 0x11
+
+/* cpu_yield()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_YIELD
+ * RET0: status
+ * ERRORS: No possible error.
+ *
+ * Suspend execution on the current CPU. Execution will resume when
+ * an interrupt (device, %stick_compare, or cross-call) is targeted to
+ * the CPU. On some CPUs, this API may be used by the hypervisor to
+ * save power by disabling hardware strands.
+ */
+#define HV_FAST_CPU_YIELD 0x12
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_cpu_yield(void);
+#endif
+
+/* cpu_qconf()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_QCONF
+ * ARG0: queue
+ * ARG1: base real address
+ * ARG2: number of entries
+ * RET0: status
+ * ERRORS: ENORADDR Invalid base real address
+ * EINVAL Invalid queue or number of entries is less
+ * than 2 or too large.
+ * EBADALIGN Base real address is not correctly aligned
+ * for size.
+ *
+ * Configure the given queue to be placed at the given base real
+ * address, with the given number of entries. The number of entries
+ * must be a power of 2. The base real address must be aligned
+ * exactly to match the queue size. Each queue entry is 64 bytes
+ * long, so for example a 32 entry queue must be aligned on a 2048
+ * byte real address boundary.
+ *
+ * The specified queue is unconfigured if the number of entries is given
+ * as zero.
+ *
+ * For the current version of this API service, the argument queue is defined
+ * as follows:
+ *
+ * queue description
+ * ----- -------------------------
+ * 0x3c cpu mondo queue
+ * 0x3d device mondo queue
+ * 0x3e resumable error queue
+ * 0x3f non-resumable error queue
+ *
+ * Note: The maximum number of entries for each queue for a specific cpu may
+ * be determined from the machine description.
+ */
+#define HV_FAST_CPU_QCONF 0x14
+#define HV_CPU_QUEUE_CPU_MONDO 0x3c
+#define HV_CPU_QUEUE_DEVICE_MONDO 0x3d
+#define HV_CPU_QUEUE_RES_ERROR 0x3e
+#define HV_CPU_QUEUE_NONRES_ERROR 0x3f
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_cpu_qconf(unsigned long type,
+ unsigned long queue_paddr,
+ unsigned long num_queue_entries);
+#endif
+
+/* cpu_qinfo()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_QINFO
+ * ARG0: queue
+ * RET0: status
+ * RET1: base real address
+ * RET1: number of entries
+ * ERRORS: EINVAL Invalid queue
+ *
+ * Return the configuration info for the given queue. The base real
+ * address and number of entries of the defined queue are returned.
+ * The queue argument values are the same as for cpu_qconf() above.
+ *
+ * If the specified queue is a valid queue number, but no queue has
+ * been defined, the number of entries will be set to zero and the
+ * base real address returned is undefined.
+ */
+#define HV_FAST_CPU_QINFO 0x15
+
+/* cpu_mondo_send()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_MONDO_SEND
+ * ARG0-1: CPU list
+ * ARG2: data real address
+ * RET0: status
+ * ERRORS: EBADALIGN Mondo data is not 64-byte aligned or CPU list
+ * is not 2-byte aligned.
+ * ENORADDR Invalid data mondo address, or invalid cpu list
+ * address.
+ * ENOCPU Invalid cpu in CPU list
+ * EWOULDBLOCK Some or all of the listed CPUs did not receive
+ * the mondo
+ * ECPUERROR One or more of the listed CPUs are in error
+ * state, use HV_FAST_CPU_STATE to see which ones
+ * EINVAL CPU list includes caller's CPU ID
+ *
+ * Send a mondo interrupt to the CPUs in the given CPU list with the
+ * 64-bytes at the given data real address. The data must be 64-byte
+ * aligned. The mondo data will be delivered to the cpu_mondo queues
+ * of the recipient CPUs.
+ *
+ * In all cases, error or not, the CPUs in the CPU list to which the
+ * mondo has been successfully delivered will be indicated by having
+ * their entry in CPU list updated with the value 0xffff.
+ */
+#define HV_FAST_CPU_MONDO_SEND 0x42
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_cpu_mondo_send(unsigned long cpu_count, unsigned long cpu_list_pa, unsigned long mondo_block_pa);
+#endif
+
+/* cpu_myid()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_MYID
+ * RET0: status
+ * RET1: CPU ID
+ * ERRORS: No errors defined.
+ *
+ * Return the hypervisor ID handle for the current CPU. Use by a
+ * virtual CPU to discover it's own identity.
+ */
+#define HV_FAST_CPU_MYID 0x16
+
+/* cpu_state()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_STATE
+ * ARG0: CPU ID
+ * RET0: status
+ * RET1: state
+ * ERRORS: ENOCPU Invalid CPU ID
+ *
+ * Retrieve the current state of the CPU with the given CPU ID.
+ */
+#define HV_FAST_CPU_STATE 0x17
+#define HV_CPU_STATE_STOPPED 0x01
+#define HV_CPU_STATE_RUNNING 0x02
+#define HV_CPU_STATE_ERROR 0x03
+
+#ifndef __ASSEMBLY__
+extern long sun4v_cpu_state(unsigned long cpuid);
+#endif
+
+/* cpu_set_rtba()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_SET_RTBA
+ * ARG0: RTBA
+ * RET0: status
+ * RET1: previous RTBA
+ * ERRORS: ENORADDR Invalid RTBA real address
+ * EBADALIGN RTBA is incorrectly aligned for a trap table
+ *
+ * Set the real trap base address of the local cpu to the given RTBA.
+ * The supplied RTBA must be aligned on a 256 byte boundary. Upon
+ * success the previous value of the RTBA is returned in RET1.
+ *
+ * Note: This service does not affect %tba
+ */
+#define HV_FAST_CPU_SET_RTBA 0x18
+
+/* cpu_set_rtba()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_GET_RTBA
+ * RET0: status
+ * RET1: previous RTBA
+ * ERRORS: No possible error.
+ *
+ * Returns the current value of RTBA in RET1.
+ */
+#define HV_FAST_CPU_GET_RTBA 0x19
+
+/* MMU services.
+ *
+ * Layout of a TSB description for mmu_tsb_ctx{,non}0() calls.
+ */
+#ifndef __ASSEMBLY__
+struct hv_tsb_descr {
+ unsigned short pgsz_idx;
+ unsigned short assoc;
+ unsigned int num_ttes; /* in TTEs */
+ unsigned int ctx_idx;
+ unsigned int pgsz_mask;
+ unsigned long tsb_base;
+ unsigned long resv;
+};
+#endif
+#define HV_TSB_DESCR_PGSZ_IDX_OFFSET 0x00
+#define HV_TSB_DESCR_ASSOC_OFFSET 0x02
+#define HV_TSB_DESCR_NUM_TTES_OFFSET 0x04
+#define HV_TSB_DESCR_CTX_IDX_OFFSET 0x08
+#define HV_TSB_DESCR_PGSZ_MASK_OFFSET 0x0c
+#define HV_TSB_DESCR_TSB_BASE_OFFSET 0x10
+#define HV_TSB_DESCR_RESV_OFFSET 0x18
+
+/* Page size bitmask. */
+#define HV_PGSZ_MASK_8K (1 << 0)
+#define HV_PGSZ_MASK_64K (1 << 1)
+#define HV_PGSZ_MASK_512K (1 << 2)
+#define HV_PGSZ_MASK_4MB (1 << 3)
+#define HV_PGSZ_MASK_32MB (1 << 4)
+#define HV_PGSZ_MASK_256MB (1 << 5)
+#define HV_PGSZ_MASK_2GB (1 << 6)
+#define HV_PGSZ_MASK_16GB (1 << 7)
+
+/* Page size index. The value given in the TSB descriptor must correspond
+ * to the smallest page size specified in the pgsz_mask page size bitmask.
+ */
+#define HV_PGSZ_IDX_8K 0
+#define HV_PGSZ_IDX_64K 1
+#define HV_PGSZ_IDX_512K 2
+#define HV_PGSZ_IDX_4MB 3
+#define HV_PGSZ_IDX_32MB 4
+#define HV_PGSZ_IDX_256MB 5
+#define HV_PGSZ_IDX_2GB 6
+#define HV_PGSZ_IDX_16GB 7
+
+/* MMU fault status area.
+ *
+ * MMU related faults have their status and fault address information
+ * placed into a memory region made available by privileged code. Each
+ * virtual processor must make a mmu_fault_area_conf() call to tell the
+ * hypervisor where that processor's fault status should be stored.
+ *
+ * The fault status block is a multiple of 64-bytes and must be aligned
+ * on a 64-byte boundary.
+ */
+#ifndef __ASSEMBLY__
+struct hv_fault_status {
+ unsigned long i_fault_type;
+ unsigned long i_fault_addr;
+ unsigned long i_fault_ctx;
+ unsigned long i_reserved[5];
+ unsigned long d_fault_type;
+ unsigned long d_fault_addr;
+ unsigned long d_fault_ctx;
+ unsigned long d_reserved[5];
+};
+#endif
+#define HV_FAULT_I_TYPE_OFFSET 0x00
+#define HV_FAULT_I_ADDR_OFFSET 0x08
+#define HV_FAULT_I_CTX_OFFSET 0x10
+#define HV_FAULT_D_TYPE_OFFSET 0x40
+#define HV_FAULT_D_ADDR_OFFSET 0x48
+#define HV_FAULT_D_CTX_OFFSET 0x50
+
+#define HV_FAULT_TYPE_FAST_MISS 1
+#define HV_FAULT_TYPE_FAST_PROT 2
+#define HV_FAULT_TYPE_MMU_MISS 3
+#define HV_FAULT_TYPE_INV_RA 4
+#define HV_FAULT_TYPE_PRIV_VIOL 5
+#define HV_FAULT_TYPE_PROT_VIOL 6
+#define HV_FAULT_TYPE_NFO 7
+#define HV_FAULT_TYPE_NFO_SEFF 8
+#define HV_FAULT_TYPE_INV_VA 9
+#define HV_FAULT_TYPE_INV_ASI 10
+#define HV_FAULT_TYPE_NC_ATOMIC 11
+#define HV_FAULT_TYPE_PRIV_ACT 12
+#define HV_FAULT_TYPE_RESV1 13
+#define HV_FAULT_TYPE_UNALIGNED 14
+#define HV_FAULT_TYPE_INV_PGSZ 15
+/* Values 16 --> -2 are reserved. */
+#define HV_FAULT_TYPE_MULTIPLE -1
+
+/* Flags argument for mmu_{map,unmap}_addr(), mmu_demap_{page,context,all}(),
+ * and mmu_{map,unmap}_perm_addr().
+ */
+#define HV_MMU_DMMU 0x01
+#define HV_MMU_IMMU 0x02
+#define HV_MMU_ALL (HV_MMU_DMMU | HV_MMU_IMMU)
+
+/* mmu_map_addr()
+ * TRAP: HV_MMU_MAP_ADDR_TRAP
+ * ARG0: virtual address
+ * ARG1: mmu context
+ * ARG2: TTE
+ * ARG3: flags (HV_MMU_{IMMU,DMMU})
+ * ERRORS: EINVAL Invalid virtual address, mmu context, or flags
+ * EBADPGSZ Invalid page size value
+ * ENORADDR Invalid real address in TTE
+ *
+ * Create a non-permanent mapping using the given TTE, virtual
+ * address, and mmu context. The flags argument determines which
+ * (data, or instruction, or both) TLB the mapping gets loaded into.
+ *
+ * The behavior is undefined if the valid bit is clear in the TTE.
+ *
+ * Note: This API call is for privileged code to specify temporary translation
+ * mappings without the need to create and manage a TSB.
+ */
+
+/* mmu_unmap_addr()
+ * TRAP: HV_MMU_UNMAP_ADDR_TRAP
+ * ARG0: virtual address
+ * ARG1: mmu context
+ * ARG2: flags (HV_MMU_{IMMU,DMMU})
+ * ERRORS: EINVAL Invalid virtual address, mmu context, or flags
+ *
+ * Demaps the given virtual address in the given mmu context on this
+ * CPU. This function is intended to be used to demap pages mapped
+ * with mmu_map_addr. This service is equivalent to invoking
+ * mmu_demap_page() with only the current CPU in the CPU list. The
+ * flags argument determines which (data, or instruction, or both) TLB
+ * the mapping gets unmapped from.
+ *
+ * Attempting to perform an unmap operation for a previously defined
+ * permanent mapping will have undefined results.
+ */
+
+/* mmu_tsb_ctx0()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_TSB_CTX0
+ * ARG0: number of TSB descriptions
+ * ARG1: TSB descriptions pointer
+ * RET0: status
+ * ERRORS: ENORADDR Invalid TSB descriptions pointer or
+ * TSB base within a descriptor
+ * EBADALIGN TSB descriptions pointer is not aligned
+ * to an 8-byte boundary, or TSB base
+ * within a descriptor is not aligned for
+ * the given TSB size
+ * EBADPGSZ Invalid page size in a TSB descriptor
+ * EBADTSB Invalid associativity or size in a TSB
+ * descriptor
+ * EINVAL Invalid number of TSB descriptions, or
+ * invalid context index in a TSB
+ * descriptor, or index page size not
+ * equal to smallest page size in page
+ * size bitmask field.
+ *
+ * Configures the TSBs for the current CPU for virtual addresses with
+ * context zero. The TSB descriptions pointer is a pointer to an
+ * array of the given number of TSB descriptions.
+ *
+ * Note: The maximum number of TSBs available to a virtual CPU is given by the
+ * mmu-max-#tsbs property of the cpu's corresponding "cpu" node in the
+ * machine description.
+ */
+#define HV_FAST_MMU_TSB_CTX0 0x20
+
+/* mmu_tsb_ctxnon0()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_TSB_CTXNON0
+ * ARG0: number of TSB descriptions
+ * ARG1: TSB descriptions pointer
+ * RET0: status
+ * ERRORS: Same as for mmu_tsb_ctx0() above.
+ *
+ * Configures the TSBs for the current CPU for virtual addresses with
+ * non-zero contexts. The TSB descriptions pointer is a pointer to an
+ * array of the given number of TSB descriptions.
+ *
+ * Note: A maximum of 16 TSBs may be specified in the TSB description list.
+ */
+#define HV_FAST_MMU_TSB_CTXNON0 0x21
+
+/* mmu_demap_page()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_DEMAP_PAGE
+ * ARG0: reserved, must be zero
+ * ARG1: reserved, must be zero
+ * ARG2: virtual address
+ * ARG3: mmu context
+ * ARG4: flags (HV_MMU_{IMMU,DMMU})
+ * RET0: status
+ * ERRORS: EINVAL Invalid virutal address, context, or
+ * flags value
+ * ENOTSUPPORTED ARG0 or ARG1 is non-zero
+ *
+ * Demaps any page mapping of the given virtual address in the given
+ * mmu context for the current virtual CPU. Any virtually tagged
+ * caches are guaranteed to be kept consistent. The flags argument
+ * determines which TLB (instruction, or data, or both) participate in
+ * the operation.
+ *
+ * ARG0 and ARG1 are both reserved and must be set to zero.
+ */
+#define HV_FAST_MMU_DEMAP_PAGE 0x22
+
+/* mmu_demap_ctx()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_DEMAP_CTX
+ * ARG0: reserved, must be zero
+ * ARG1: reserved, must be zero
+ * ARG2: mmu context
+ * ARG3: flags (HV_MMU_{IMMU,DMMU})
+ * RET0: status
+ * ERRORS: EINVAL Invalid context or flags value
+ * ENOTSUPPORTED ARG0 or ARG1 is non-zero
+ *
+ * Demaps all non-permanent virtual page mappings previously specified
+ * for the given context for the current virtual CPU. Any virtual
+ * tagged caches are guaranteed to be kept consistent. The flags
+ * argument determines which TLB (instruction, or data, or both)
+ * participate in the operation.
+ *
+ * ARG0 and ARG1 are both reserved and must be set to zero.
+ */
+#define HV_FAST_MMU_DEMAP_CTX 0x23
+
+/* mmu_demap_all()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_DEMAP_ALL
+ * ARG0: reserved, must be zero
+ * ARG1: reserved, must be zero
+ * ARG2: flags (HV_MMU_{IMMU,DMMU})
+ * RET0: status
+ * ERRORS: EINVAL Invalid flags value
+ * ENOTSUPPORTED ARG0 or ARG1 is non-zero
+ *
+ * Demaps all non-permanent virtual page mappings previously specified
+ * for the current virtual CPU. Any virtual tagged caches are
+ * guaranteed to be kept consistent. The flags argument determines
+ * which TLB (instruction, or data, or both) participate in the
+ * operation.
+ *
+ * ARG0 and ARG1 are both reserved and must be set to zero.
+ */
+#define HV_FAST_MMU_DEMAP_ALL 0x24
+
+/* mmu_map_perm_addr()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_MAP_PERM_ADDR
+ * ARG0: virtual address
+ * ARG1: reserved, must be zero
+ * ARG2: TTE
+ * ARG3: flags (HV_MMU_{IMMU,DMMU})
+ * RET0: status
+ * ERRORS: EINVAL Invalid virutal address or flags value
+ * EBADPGSZ Invalid page size value
+ * ENORADDR Invalid real address in TTE
+ * ETOOMANY Too many mappings (max of 8 reached)
+ *
+ * Create a permanent mapping using the given TTE and virtual address
+ * for context 0 on the calling virtual CPU. A maximum of 8 such
+ * permanent mappings may be specified by privileged code. Mappings
+ * may be removed with mmu_unmap_perm_addr().
+ *
+ * The behavior is undefined if a TTE with the valid bit clear is given.
+ *
+ * Note: This call is used to specify address space mappings for which
+ * privileged code does not expect to receive misses. For example,
+ * this mechanism can be used to map kernel nucleus code and data.
+ */
+#define HV_FAST_MMU_MAP_PERM_ADDR 0x25
+
+/* mmu_fault_area_conf()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_FAULT_AREA_CONF
+ * ARG0: real address
+ * RET0: status
+ * RET1: previous mmu fault area real address
+ * ERRORS: ENORADDR Invalid real address
+ * EBADALIGN Invalid alignment for fault area
+ *
+ * Configure the MMU fault status area for the calling CPU. A 64-byte
+ * aligned real address specifies where MMU fault status information
+ * is placed. The return value is the previously specified area, or 0
+ * for the first invocation. Specifying a fault area at real address
+ * 0 is not allowed.
+ */
+#define HV_FAST_MMU_FAULT_AREA_CONF 0x26
+
+/* mmu_enable()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_ENABLE
+ * ARG0: enable flag
+ * ARG1: return target address
+ * RET0: status
+ * ERRORS: ENORADDR Invalid real address when disabling
+ * translation.
+ * EBADALIGN The return target address is not
+ * aligned to an instruction.
+ * EINVAL The enable flag request the current
+ * operating mode (e.g. disable if already
+ * disabled)
+ *
+ * Enable or disable virtual address translation for the calling CPU
+ * within the virtual machine domain. If the enable flag is zero,
+ * translation is disabled, any non-zero value will enable
+ * translation.
+ *
+ * When this function returns, the newly selected translation mode
+ * will be active. If the mmu is being enabled, then the return
+ * target address is a virtual address else it is a real address.
+ *
+ * Upon successful completion, control will be returned to the given
+ * return target address (ie. the cpu will jump to that address). On
+ * failure, the previous mmu mode remains and the trap simply returns
+ * as normal with the appropriate error code in RET0.
+ */
+#define HV_FAST_MMU_ENABLE 0x27
+
+/* mmu_unmap_perm_addr()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_UNMAP_PERM_ADDR
+ * ARG0: virtual address
+ * ARG1: reserved, must be zero
+ * ARG2: flags (HV_MMU_{IMMU,DMMU})
+ * RET0: status
+ * ERRORS: EINVAL Invalid virutal address or flags value
+ * ENOMAP Specified mapping was not found
+ *
+ * Demaps any permanent page mapping (established via
+ * mmu_map_perm_addr()) at the given virtual address for context 0 on
+ * the current virtual CPU. Any virtual tagged caches are guaranteed
+ * to be kept consistent.
+ */
+#define HV_FAST_MMU_UNMAP_PERM_ADDR 0x28
+
+/* mmu_tsb_ctx0_info()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_TSB_CTX0_INFO
+ * ARG0: max TSBs
+ * ARG1: buffer pointer
+ * RET0: status
+ * RET1: number of TSBs
+ * ERRORS: EINVAL Supplied buffer is too small
+ * EBADALIGN The buffer pointer is badly aligned
+ * ENORADDR Invalid real address for buffer pointer
+ *
+ * Return the TSB configuration as previous defined by mmu_tsb_ctx0()
+ * into the provided buffer. The size of the buffer is given in ARG1
+ * in terms of the number of TSB description entries.
+ *
+ * Upon return, RET1 always contains the number of TSB descriptions
+ * previously configured. If zero TSBs were configured, EOK is
+ * returned with RET1 containing 0.
+ */
+#define HV_FAST_MMU_TSB_CTX0_INFO 0x29
+
+/* mmu_tsb_ctxnon0_info()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_TSB_CTXNON0_INFO
+ * ARG0: max TSBs
+ * ARG1: buffer pointer
+ * RET0: status
+ * RET1: number of TSBs
+ * ERRORS: EINVAL Supplied buffer is too small
+ * EBADALIGN The buffer pointer is badly aligned
+ * ENORADDR Invalid real address for buffer pointer
+ *
+ * Return the TSB configuration as previous defined by
+ * mmu_tsb_ctxnon0() into the provided buffer. The size of the buffer
+ * is given in ARG1 in terms of the number of TSB description entries.
+ *
+ * Upon return, RET1 always contains the number of TSB descriptions
+ * previously configured. If zero TSBs were configured, EOK is
+ * returned with RET1 containing 0.
+ */
+#define HV_FAST_MMU_TSB_CTXNON0_INFO 0x2a
+
+/* mmu_fault_area_info()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_FAULT_AREA_INFO
+ * RET0: status
+ * RET1: fault area real address
+ * ERRORS: No errors defined.
+ *
+ * Return the currently defined MMU fault status area for the current
+ * CPU. The real address of the fault status area is returned in
+ * RET1, or 0 is returned in RET1 if no fault status area is defined.
+ *
+ * Note: mmu_fault_area_conf() may be called with the return value (RET1)
+ * from this service if there is a need to save and restore the fault
+ * area for a cpu.
+ */
+#define HV_FAST_MMU_FAULT_AREA_INFO 0x2b
+
+/* Cache and Memory services. */
+
+/* mem_scrub()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MEM_SCRUB
+ * ARG0: real address
+ * ARG1: length
+ * RET0: status
+ * RET1: length scrubbed
+ * ERRORS: ENORADDR Invalid real address
+ * EBADALIGN Start address or length are not correctly
+ * aligned
+ * EINVAL Length is zero
+ *
+ * Zero the memory contents in the range real address to real address
+ * plus length minus 1. Also, valid ECC will be generated for that
+ * memory address range. Scrubbing is started at the given real
+ * address, but may not scrub the entire given length. The actual
+ * length scrubbed will be returned in RET1.
+ *
+ * The real address and length must be aligned on an 8K boundary, or
+ * contain the start address and length from a sun4v error report.
+ *
+ * Note: There are two uses for this function. The first use is to block clear
+ * and initialize memory and the second is to scrub an u ncorrectable
+ * error reported via a resumable or non-resumable trap. The second
+ * use requires the arguments to be equal to the real address and length
+ * provided in a sun4v memory error report.
+ */
+#define HV_FAST_MEM_SCRUB 0x31
+
+/* mem_sync()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MEM_SYNC
+ * ARG0: real address
+ * ARG1: length
+ * RET0: status
+ * RET1: length synced
+ * ERRORS: ENORADDR Invalid real address
+ * EBADALIGN Start address or length are not correctly
+ * aligned
+ * EINVAL Length is zero
+ *
+ * Force the next access within the real address to real address plus
+ * length minus 1 to be fetches from main system memory. Less than
+ * the given length may be synced, the actual amount synced is
+ * returned in RET1. The real address and length must be aligned on
+ * an 8K boundary.
+ */
+#define HV_FAST_MEM_SYNC 0x32
+
+/* Time of day services.
+ *
+ * The hypervisor maintains the time of day on a per-domain basis.
+ * Changing the time of day in one domain does not affect the time of
+ * day on any other domain.
+ *
+ * Time is described by a single unsigned 64-bit word which is the
+ * number of seconds since the UNIX Epoch (00:00:00 UTC, January 1,
+ * 1970).
+ */
+
+/* tod_get()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_TOD_GET
+ * RET0: status
+ * RET1: TOD
+ * ERRORS: EWOULDBLOCK TOD resource is temporarily unavailable
+ * ENOTSUPPORTED If TOD not supported on this platform
+ *
+ * Return the current time of day. May block if TOD access is
+ * temporarily not possible.
+ */
+#define HV_FAST_TOD_GET 0x50
+
+/* tod_set()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_TOD_SET
+ * ARG0: TOD
+ * RET0: status
+ * ERRORS: EWOULDBLOCK TOD resource is temporarily unavailable
+ * ENOTSUPPORTED If TOD not supported on this platform
+ *
+ * The current time of day is set to the value specified in ARG0. May
+ * block if TOD access is temporarily not possible.
+ */
+#define HV_FAST_TOD_SET 0x51
+
+/* Console services */
+
+/* con_getchar()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CONS_GETCHAR
+ * RET0: status
+ * RET1: character
+ * ERRORS: EWOULDBLOCK No character available.
+ *
+ * Returns a character from the console device. If no character is
+ * available then an EWOULDBLOCK error is returned. If a character is
+ * available, then the returned status is EOK and the character value
+ * is in RET1.
+ *
+ * A virtual BREAK is represented by the 64-bit value -1.
+ *
+ * A virtual HUP signal is represented by the 64-bit value -2.
+ */
+#define HV_FAST_CONS_GETCHAR 0x60
+
+/* con_putchar()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CONS_PUTCHAR
+ * ARG0: character
+ * RET0: status
+ * ERRORS: EINVAL Illegal character
+ * EWOULDBLOCK Output buffer currently full, would block
+ *
+ * Send a character to the console device. Only character values
+ * between 0 and 255 may be used. Values outside this range are
+ * invalid except for the 64-bit value -1 which is used to send a
+ * virtual BREAK.
+ */
+#define HV_FAST_CONS_PUTCHAR 0x61
+
+/* Trap trace services.
+ *
+ * The hypervisor provides a trap tracing capability for privileged
+ * code running on each virtual CPU. Privileged code provides a
+ * round-robin trap trace queue within which the hypervisor writes
+ * 64-byte entries detailing hyperprivileged traps taken n behalf of
+ * privileged code. This is provided as a debugging capability for
+ * privileged code.
+ *
+ * The trap trace control structure is 64-bytes long and placed at the
+ * start (offset 0) of the trap trace buffer, and is described as
+ * follows:
+ */
+#ifndef __ASSEMBLY__
+struct hv_trap_trace_control {
+ unsigned long head_offset;
+ unsigned long tail_offset;
+ unsigned long __reserved[0x30 / sizeof(unsigned long)];
+};
+#endif
+#define HV_TRAP_TRACE_CTRL_HEAD_OFFSET 0x00
+#define HV_TRAP_TRACE_CTRL_TAIL_OFFSET 0x08
+
+/* The head offset is the offset of the most recently completed entry
+ * in the trap-trace buffer. The tail offset is the offset of the
+ * next entry to be written. The control structure is owned and
+ * modified by the hypervisor. A guest may not modify the control
+ * structure contents. Attempts to do so will result in undefined
+ * behavior for the guest.
+ *
+ * Each trap trace buffer entry is layed out as follows:
+ */
+#ifndef __ASSEMBLY__
+struct hv_trap_trace_entry {
+ unsigned char type; /* Hypervisor or guest entry? */
+ unsigned char hpstate; /* Hyper-privileged state */
+ unsigned char tl; /* Trap level */
+ unsigned char gl; /* Global register level */
+ unsigned short tt; /* Trap type */
+ unsigned short tag; /* Extended trap identifier */
+ unsigned long tstate; /* Trap state */
+ unsigned long tick; /* Tick */
+ unsigned long tpc; /* Trap PC */
+ unsigned long f1; /* Entry specific */
+ unsigned long f2; /* Entry specific */
+ unsigned long f3; /* Entry specific */
+ unsigned long f4; /* Entry specific */
+};
+#endif
+#define HV_TRAP_TRACE_ENTRY_TYPE 0x00
+#define HV_TRAP_TRACE_ENTRY_HPSTATE 0x01
+#define HV_TRAP_TRACE_ENTRY_TL 0x02
+#define HV_TRAP_TRACE_ENTRY_GL 0x03
+#define HV_TRAP_TRACE_ENTRY_TT 0x04
+#define HV_TRAP_TRACE_ENTRY_TAG 0x06
+#define HV_TRAP_TRACE_ENTRY_TSTATE 0x08
+#define HV_TRAP_TRACE_ENTRY_TICK 0x10
+#define HV_TRAP_TRACE_ENTRY_TPC 0x18
+#define HV_TRAP_TRACE_ENTRY_F1 0x20
+#define HV_TRAP_TRACE_ENTRY_F2 0x28
+#define HV_TRAP_TRACE_ENTRY_F3 0x30
+#define HV_TRAP_TRACE_ENTRY_F4 0x38
+
+/* The type field is encoded as follows. */
+#define HV_TRAP_TYPE_UNDEF 0x00 /* Entry content undefined */
+#define HV_TRAP_TYPE_HV 0x01 /* Hypervisor trap entry */
+#define HV_TRAP_TYPE_GUEST 0xff /* Added via ttrace_addentry() */
+
+/* ttrace_buf_conf()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_TTRACE_BUF_CONF
+ * ARG0: real address
+ * ARG1: number of entries
+ * RET0: status
+ * RET1: number of entries
+ * ERRORS: ENORADDR Invalid real address
+ * EINVAL Size is too small
+ * EBADALIGN Real address not aligned on 64-byte boundary
+ *
+ * Requests hypervisor trap tracing and declares a virtual CPU's trap
+ * trace buffer to the hypervisor. The real address supplies the real
+ * base address of the trap trace queue and must be 64-byte aligned.
+ * Specifying a value of 0 for the number of entries disables trap
+ * tracing for the calling virtual CPU. The buffer allocated must be
+ * sized for a power of two number of 64-byte trap trace entries plus
+ * an initial 64-byte control structure.
+ *
+ * This may be invoked any number of times so that a virtual CPU may
+ * relocate a trap trace buffer or create "snapshots" of information.
+ *
+ * If the real address is illegal or badly aligned, then trap tracing
+ * is disabled and an error is returned.
+ *
+ * Upon failure with EINVAL, this service call returns in RET1 the
+ * minimum number of buffer entries required. Upon other failures
+ * RET1 is undefined.
+ */
+#define HV_FAST_TTRACE_BUF_CONF 0x90
+
+/* ttrace_buf_info()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_TTRACE_BUF_INFO
+ * RET0: status
+ * RET1: real address
+ * RET2: size
+ * ERRORS: None defined.
+ *
+ * Returns the size and location of the previously declared trap-trace
+ * buffer. In the event that no buffer was previously defined, or the
+ * buffer is disabled, this call will return a size of zero bytes.
+ */
+#define HV_FAST_TTRACE_BUF_INFO 0x91
+
+/* ttrace_enable()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_TTRACE_ENABLE
+ * ARG0: enable
+ * RET0: status
+ * RET1: previous enable state
+ * ERRORS: EINVAL No trap trace buffer currently defined
+ *
+ * Enable or disable trap tracing, and return the previous enabled
+ * state in RET1. Future systems may define various flags for the
+ * enable argument (ARG0), for the moment a guest should pass
+ * "(uint64_t) -1" to enable, and "(uint64_t) 0" to disable all
+ * tracing - which will ensure future compatability.
+ */
+#define HV_FAST_TTRACE_ENABLE 0x92
+
+/* ttrace_freeze()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_TTRACE_FREEZE
+ * ARG0: freeze
+ * RET0: status
+ * RET1: previous freeze state
+ * ERRORS: EINVAL No trap trace buffer currently defined
+ *
+ * Freeze or unfreeze trap tracing, returning the previous freeze
+ * state in RET1. A guest should pass a non-zero value to freeze and
+ * a zero value to unfreeze all tracing. The returned previous state
+ * is 0 for not frozen and 1 for frozen.
+ */
+#define HV_FAST_TTRACE_FREEZE 0x93
+
+/* ttrace_addentry()
+ * TRAP: HV_TTRACE_ADDENTRY_TRAP
+ * ARG0: tag (16-bits)
+ * ARG1: data word 0
+ * ARG2: data word 1
+ * ARG3: data word 2
+ * ARG4: data word 3
+ * RET0: status
+ * ERRORS: EINVAL No trap trace buffer currently defined
+ *
+ * Add an entry to the trap trace buffer. Upon return only ARG0/RET0
+ * is modified - none of the other registers holding arguments are
+ * volatile across this hypervisor service.
+ */
+
+/* Core dump services.
+ *
+ * Since the hypervisor viraulizes and thus obscures a lot of the
+ * physical machine layout and state, traditional OS crash dumps can
+ * be difficult to diagnose especially when the problem is a
+ * configuration error of some sort.
+ *
+ * The dump services provide an opaque buffer into which the
+ * hypervisor can place it's internal state in order to assist in
+ * debugging such situations. The contents are opaque and extremely
+ * platform and hypervisor implementation specific. The guest, during
+ * a core dump, requests that the hypervisor update any information in
+ * the dump buffer in preparation to being dumped as part of the
+ * domain's memory image.
+ */
+
+/* dump_buf_update()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_DUMP_BUF_UPDATE
+ * ARG0: real address
+ * ARG1: size
+ * RET0: status
+ * RET1: required size of dump buffer
+ * ERRORS: ENORADDR Invalid real address
+ * EBADALIGN Real address is not aligned on a 64-byte
+ * boundary
+ * EINVAL Size is non-zero but less than minimum size
+ * required
+ * ENOTSUPPORTED Operation not supported on current logical
+ * domain
+ *
+ * Declare a domain dump buffer to the hypervisor. The real address
+ * provided for the domain dump buffer must be 64-byte aligned. The
+ * size specifies the size of the dump buffer and may be larger than
+ * the minimum size specified in the machine description. The
+ * hypervisor will fill the dump buffer with opaque data.
+ *
+ * Note: A guest may elect to include dump buffer contents as part of a crash
+ * dump to assist with debugging. This function may be called any number
+ * of times so that a guest may relocate a dump buffer, or create
+ * "snapshots" of any dump-buffer information. Each call to
+ * dump_buf_update() atomically declares the new dump buffer to the
+ * hypervisor.
+ *
+ * A specified size of 0 unconfigures the dump buffer. If the real
+ * address is illegal or badly aligned, then any currently active dump
+ * buffer is disabled and an error is returned.
+ *
+ * In the event that the call fails with EINVAL, RET1 contains the
+ * minimum size requires by the hypervisor for a valid dump buffer.
+ */
+#define HV_FAST_DUMP_BUF_UPDATE 0x94
+
+/* dump_buf_info()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_DUMP_BUF_INFO
+ * RET0: status
+ * RET1: real address of current dump buffer
+ * RET2: size of current dump buffer
+ * ERRORS: No errors defined.
+ *
+ * Return the currently configures dump buffer description. A
+ * returned size of 0 bytes indicates an undefined dump buffer. In
+ * this case the return address in RET1 is undefined.
+ */
+#define HV_FAST_DUMP_BUF_INFO 0x95
+
+/* Device interrupt services.
+ *
+ * Device interrupts are allocated to system bus bridges by the hypervisor,
+ * and described to OBP in the machine description. OBP then describes
+ * these interrupts to the OS via properties in the device tree.
+ *
+ * Terminology:
+ *
+ * cpuid Unique opaque value which represents a target cpu.
+ *
+ * devhandle Device handle. It uniquely identifies a device, and
+ * consistes of the lower 28-bits of the hi-cell of the
+ * first entry of the device's "reg" property in the
+ * OBP device tree.
+ *
+ * devino Device interrupt number. Specifies the relative
+ * interrupt number within the device. The unique
+ * combination of devhandle and devino are used to
+ * identify a specific device interrupt.
+ *
+ * Note: The devino value is the same as the values in the
+ * "interrupts" property or "interrupt-map" property
+ * in the OBP device tree for that device.
+ *
+ * sysino System interrupt number. A 64-bit unsigned interger
+ * representing a unique interrupt within a virtual
+ * machine.
+ *
+ * intr_state A flag representing the interrupt state for a given
+ * sysino. The state values are defined below.
+ *
+ * intr_enabled A flag representing the 'enabled' state for a given
+ * sysino. The enable values are defined below.
+ */
+
+#define HV_INTR_STATE_IDLE 0 /* Nothing pending */
+#define HV_INTR_STATE_RECEIVED 1 /* Interrupt received by hardware */
+#define HV_INTR_STATE_DELIVERED 2 /* Interrupt delivered to queue */
+
+#define HV_INTR_DISABLED 0 /* sysino not enabled */
+#define HV_INTR_ENABLED 1 /* sysino enabled */
+
+/* intr_devino_to_sysino()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_INTR_DEVINO2SYSINO
+ * ARG0: devhandle
+ * ARG1: devino
+ * RET0: status
+ * RET1: sysino
+ * ERRORS: EINVAL Invalid devhandle/devino
+ *
+ * Converts a device specific interrupt number of the given
+ * devhandle/devino into a system specific ino (sysino).
+ */
+#define HV_FAST_INTR_DEVINO2SYSINO 0xa0
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_devino_to_sysino(unsigned long devhandle,
+ unsigned long devino);
+#endif
+
+/* intr_getenabled()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_INTR_GETENABLED
+ * ARG0: sysino
+ * RET0: status
+ * RET1: intr_enabled (HV_INTR_{DISABLED,ENABLED})
+ * ERRORS: EINVAL Invalid sysino
+ *
+ * Returns interrupt enabled state in RET1 for the interrupt defined
+ * by the given sysino.
+ */
+#define HV_FAST_INTR_GETENABLED 0xa1
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_intr_getenabled(unsigned long sysino);
+#endif
+
+/* intr_setenabled()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_INTR_SETENABLED
+ * ARG0: sysino
+ * ARG1: intr_enabled (HV_INTR_{DISABLED,ENABLED})
+ * RET0: status
+ * ERRORS: EINVAL Invalid sysino or intr_enabled value
+ *
+ * Set the 'enabled' state of the interrupt sysino.
+ */
+#define HV_FAST_INTR_SETENABLED 0xa2
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_intr_setenabled(unsigned long sysino, unsigned long intr_enabled);
+#endif
+
+/* intr_getstate()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_INTR_GETSTATE
+ * ARG0: sysino
+ * RET0: status
+ * RET1: intr_state (HV_INTR_STATE_*)
+ * ERRORS: EINVAL Invalid sysino
+ *
+ * Returns current state of the interrupt defined by the given sysino.
+ */
+#define HV_FAST_INTR_GETSTATE 0xa3
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_intr_getstate(unsigned long sysino);
+#endif
+
+/* intr_setstate()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_INTR_SETSTATE
+ * ARG0: sysino
+ * ARG1: intr_state (HV_INTR_STATE_*)
+ * RET0: status
+ * ERRORS: EINVAL Invalid sysino or intr_state value
+ *
+ * Sets the current state of the interrupt described by the given sysino
+ * value.
+ *
+ * Note: Setting the state to HV_INTR_STATE_IDLE clears any pending
+ * interrupt for sysino.
+ */
+#define HV_FAST_INTR_SETSTATE 0xa4
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_intr_setstate(unsigned long sysino, unsigned long intr_state);
+#endif
+
+/* intr_gettarget()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_INTR_GETTARGET
+ * ARG0: sysino
+ * RET0: status
+ * RET1: cpuid
+ * ERRORS: EINVAL Invalid sysino
+ *
+ * Returns CPU that is the current target of the interrupt defined by
+ * the given sysino. The CPU value returned is undefined if the target
+ * has not been set via intr_settarget().
+ */
+#define HV_FAST_INTR_GETTARGET 0xa5
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_intr_gettarget(unsigned long sysino);
+#endif
+
+/* intr_settarget()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_INTR_SETTARGET
+ * ARG0: sysino
+ * ARG1: cpuid
+ * RET0: status
+ * ERRORS: EINVAL Invalid sysino
+ * ENOCPU Invalid cpuid
+ *
+ * Set the target CPU for the interrupt defined by the given sysino.
+ */
+#define HV_FAST_INTR_SETTARGET 0xa6
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_intr_settarget(unsigned long sysino, unsigned long cpuid);
+#endif
+
+/* PCI IO services.
+ *
+ * See the terminology descriptions in the device interrupt services
+ * section above as those apply here too. Here are terminology
+ * definitions specific to these PCI IO services:
+ *
+ * tsbnum TSB number. Indentifies which io-tsb is used.
+ * For this version of the specification, tsbnum
+ * must be zero.
+ *
+ * tsbindex TSB index. Identifies which entry in the TSB
+ * is used. The first entry is zero.
+ *
+ * tsbid A 64-bit aligned data structure which contains
+ * a tsbnum and a tsbindex. Bits 63:32 contain the
+ * tsbnum and bits 31:00 contain the tsbindex.
+ *
+ * Use the HV_PCI_TSBID() macro to construct such
+ * values.
+ *
+ * io_attributes IO attributes for IOMMU mappings. One of more
+ * of the attritbute bits are stores in a 64-bit
+ * value. The values are defined below.
+ *
+ * r_addr 64-bit real address
+ *
+ * pci_device PCI device address. A PCI device address identifies
+ * a specific device on a specific PCI bus segment.
+ * A PCI device address ia a 32-bit unsigned integer
+ * with the following format:
+ *
+ * 00000000.bbbbbbbb.dddddfff.00000000
+ *
+ * Use the HV_PCI_DEVICE_BUILD() macro to construct
+ * such values.
+ *
+ * pci_config_offset
+ * PCI configureation space offset. For conventional
+ * PCI a value between 0 and 255. For extended
+ * configuration space, a value between 0 and 4095.
+ *
+ * Note: For PCI configuration space accesses, the offset
+ * must be aligned to the access size.
+ *
+ * error_flag A return value which specifies if the action succeeded
+ * or failed. 0 means no error, non-0 means some error
+ * occurred while performing the service.
+ *
+ * io_sync_direction
+ * Direction definition for pci_dma_sync(), defined
+ * below in HV_PCI_SYNC_*.
+ *
+ * io_page_list A list of io_page_addresses, an io_page_address is
+ * a real address.
+ *
+ * io_page_list_p A pointer to an io_page_list.
+ *
+ * "size based byte swap" - Some functions do size based byte swapping
+ * which allows sw to access pointers and
+ * counters in native form when the processor
+ * operates in a different endianness than the
+ * IO bus. Size-based byte swapping converts a
+ * multi-byte field between big-endian and
+ * little-endian format.
+ */
+
+#define HV_PCI_MAP_ATTR_READ 0x01
+#define HV_PCI_MAP_ATTR_WRITE 0x02
+
+#define HV_PCI_DEVICE_BUILD(b,d,f) \
+ ((((b) & 0xff) << 16) | \
+ (((d) & 0x1f) << 11) | \
+ (((f) & 0x07) << 8))
+
+#define HV_PCI_TSBID(__tsb_num, __tsb_index) \
+ ((((u64)(__tsb_num)) << 32UL) | ((u64)(__tsb_index)))
+
+#define HV_PCI_SYNC_FOR_DEVICE 0x01
+#define HV_PCI_SYNC_FOR_CPU 0x02
+
+/* pci_iommu_map()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOMMU_MAP
+ * ARG0: devhandle
+ * ARG1: tsbid
+ * ARG2: #ttes
+ * ARG3: io_attributes
+ * ARG4: io_page_list_p
+ * RET0: status
+ * RET1: #ttes mapped
+ * ERRORS: EINVAL Invalid devhandle/tsbnum/tsbindex/io_attributes
+ * EBADALIGN Improperly aligned real address
+ * ENORADDR Invalid real address
+ *
+ * Create IOMMU mappings in the sun4v device defined by the given
+ * devhandle. The mappings are created in the TSB defined by the
+ * tsbnum component of the given tsbid. The first mapping is created
+ * in the TSB i ndex defined by the tsbindex component of the given tsbid.
+ * The call creates up to #ttes mappings, the first one at tsbnum, tsbindex,
+ * the second at tsbnum, tsbindex + 1, etc.
+ *
+ * All mappings are created with the attributes defined by the io_attributes
+ * argument. The page mapping addresses are described in the io_page_list
+ * defined by the given io_page_list_p, which is a pointer to the io_page_list.
+ * The first entry in the io_page_list is the address for the first iotte, the
+ * 2nd for the 2nd iotte, and so on.
+ *
+ * Each io_page_address in the io_page_list must be appropriately aligned.
+ * #ttes must be greater than zero. For this version of the spec, the tsbnum
+ * component of the given tsbid must be zero.
+ *
+ * Returns the actual number of mappings creates, which may be less than
+ * or equal to the argument #ttes. If the function returns a value which
+ * is less than the #ttes, the caller may continus to call the function with
+ * an updated tsbid, #ttes, io_page_list_p arguments until all pages are
+ * mapped.
+ *
+ * Note: This function does not imply an iotte cache flush. The guest must
+ * demap an entry before re-mapping it.
+ */
+#define HV_FAST_PCI_IOMMU_MAP 0xb0
+
+/* pci_iommu_demap()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOMMU_DEMAP
+ * ARG0: devhandle
+ * ARG1: tsbid
+ * ARG2: #ttes
+ * RET0: status
+ * RET1: #ttes demapped
+ * ERRORS: EINVAL Invalid devhandle/tsbnum/tsbindex
+ *
+ * Demap and flush IOMMU mappings in the device defined by the given
+ * devhandle. Demaps up to #ttes entries in the TSB defined by the tsbnum
+ * component of the given tsbid, starting at the TSB index defined by the
+ * tsbindex component of the given tsbid.
+ *
+ * For this version of the spec, the tsbnum of the given tsbid must be zero.
+ * #ttes must be greater than zero.
+ *
+ * Returns the actual number of ttes demapped, which may be less than or equal
+ * to the argument #ttes. If #ttes demapped is less than #ttes, the caller
+ * may continue to call this function with updated tsbid and #ttes arguments
+ * until all pages are demapped.
+ *
+ * Note: Entries do not have to be mapped to be demapped. A demap of an
+ * unmapped page will flush the entry from the tte cache.
+ */
+#define HV_FAST_PCI_IOMMU_DEMAP 0xb1
+
+/* pci_iommu_getmap()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOMMU_GETMAP
+ * ARG0: devhandle
+ * ARG1: tsbid
+ * RET0: status
+ * RET1: io_attributes
+ * RET2: real address
+ * ERRORS: EINVAL Invalid devhandle/tsbnum/tsbindex
+ * ENOMAP Mapping is not valid, no translation exists
+ *
+ * Read and return the mapping in the device described by the given devhandle
+ * and tsbid. If successful, the io_attributes shall be returned in RET1
+ * and the page address of the mapping shall be returned in RET2.
+ *
+ * For this version of the spec, the tsbnum component of the given tsbid
+ * must be zero.
+ */
+#define HV_FAST_PCI_IOMMU_GETMAP 0xb2
+
+/* pci_iommu_getbypass()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOMMU_GETBYPASS
+ * ARG0: devhandle
+ * ARG1: real address
+ * ARG2: io_attributes
+ * RET0: status
+ * RET1: io_addr
+ * ERRORS: EINVAL Invalid devhandle/io_attributes
+ * ENORADDR Invalid real address
+ * ENOTSUPPORTED Function not supported in this implementation.
+ *
+ * Create a "special" mapping in the device described by the given devhandle,
+ * for the given real address and attributes. Return the IO address in RET1
+ * if successful.
+ */
+#define HV_FAST_PCI_IOMMU_GETBYPASS 0xb3
+
+/* pci_config_get()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_CONFIG_GET
+ * ARG0: devhandle
+ * ARG1: pci_device
+ * ARG2: pci_config_offset
+ * ARG3: size
+ * RET0: status
+ * RET1: error_flag
+ * RET2: data
+ * ERRORS: EINVAL Invalid devhandle/pci_device/offset/size
+ * EBADALIGN pci_config_offset not size aligned
+ * ENOACCESS Access to this offset is not permitted
+ *
+ * Read PCI configuration space for the adapter described by the given
+ * devhandle. Read size (1, 2, or 4) bytes of data from the given
+ * pci_device, at pci_config_offset from the beginning of the device's
+ * configuration space. If there was no error, RET1 is set to zero and
+ * RET2 is set to the data read. Insignificant bits in RET2 are not
+ * guarenteed to have any specific value and therefore must be ignored.
+ *
+ * The data returned in RET2 is size based byte swapped.
+ *
+ * If an error occurs during the read, set RET1 to a non-zero value. The
+ * given pci_config_offset must be 'size' aligned.
+ */
+#define HV_FAST_PCI_CONFIG_GET 0xb4
+
+/* pci_config_put()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_CONFIG_PUT
+ * ARG0: devhandle
+ * ARG1: pci_device
+ * ARG2: pci_config_offset
+ * ARG3: size
+ * ARG4: data
+ * RET0: status
+ * RET1: error_flag
+ * ERRORS: EINVAL Invalid devhandle/pci_device/offset/size
+ * EBADALIGN pci_config_offset not size aligned
+ * ENOACCESS Access to this offset is not permitted
+ *
+ * Write PCI configuration space for the adapter described by the given
+ * devhandle. Write size (1, 2, or 4) bytes of data in a single operation,
+ * at pci_config_offset from the beginning of the device's configuration
+ * space. The data argument contains the data to be written to configuration
+ * space. Prior to writing, the data is size based byte swapped.
+ *
+ * If an error occurs during the write access, do not generate an error
+ * report, do set RET1 to a non-zero value. Otherwise RET1 is zero.
+ * The given pci_config_offset must be 'size' aligned.
+ *
+ * This function is permitted to read from offset zero in the configuration
+ * space described by the given pci_device if necessary to ensure that the
+ * write access to config space completes.
+ */
+#define HV_FAST_PCI_CONFIG_PUT 0xb5
+
+/* pci_peek()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_PEEK
+ * ARG0: devhandle
+ * ARG1: real address
+ * ARG2: size
+ * RET0: status
+ * RET1: error_flag
+ * RET2: data
+ * ERRORS: EINVAL Invalid devhandle or size
+ * EBADALIGN Improperly aligned real address
+ * ENORADDR Bad real address
+ * ENOACCESS Guest access prohibited
+ *
+ * Attempt to read the IO address given by the given devhandle, real address,
+ * and size. Size must be 1, 2, 4, or 8. The read is performed as a single
+ * access operation using the given size. If an error occurs when reading
+ * from the given location, do not generate an error report, but return a
+ * non-zero value in RET1. If the read was successful, return zero in RET1
+ * and return the actual data read in RET2. The data returned is size based
+ * byte swapped.
+ *
+ * Non-significant bits in RET2 are not guarenteed to have any specific value
+ * and therefore must be ignored. If RET1 is returned as non-zero, the data
+ * value is not guarenteed to have any specific value and should be ignored.
+ *
+ * The caller must have permission to read from the given devhandle, real
+ * address, which must be an IO address. The argument real address must be a
+ * size aligned address.
+ *
+ * The hypervisor implementation of this function must block access to any
+ * IO address that the guest does not have explicit permission to access.
+ */
+#define HV_FAST_PCI_PEEK 0xb6
+
+/* pci_poke()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_POKE
+ * ARG0: devhandle
+ * ARG1: real address
+ * ARG2: size
+ * ARG3: data
+ * ARG4: pci_device
+ * RET0: status
+ * RET1: error_flag
+ * ERRORS: EINVAL Invalid devhandle, size, or pci_device
+ * EBADALIGN Improperly aligned real address
+ * ENORADDR Bad real address
+ * ENOACCESS Guest access prohibited
+ * ENOTSUPPORTED Function is not supported by implementation
+ *
+ * Attempt to write data to the IO address given by the given devhandle,
+ * real address, and size. Size must be 1, 2, 4, or 8. The write is
+ * performed as a single access operation using the given size. Prior to
+ * writing the data is size based swapped.
+ *
+ * If an error occurs when writing to the given location, do not generate an
+ * error report, but return a non-zero value in RET1. If the write was
+ * successful, return zero in RET1.
+ *
+ * pci_device describes the configuration address of the device being
+ * written to. The implementation may safely read from offset 0 with
+ * the configuration space of the device described by devhandle and
+ * pci_device in order to guarantee that the write portion of the operation
+ * completes
+ *
+ * Any error that occurs due to the read shall be reported using the normal
+ * error reporting mechanisms .. the read error is not suppressed.
+ *
+ * The caller must have permission to write to the given devhandle, real
+ * address, which must be an IO address. The argument real address must be a
+ * size aligned address. The caller must have permission to read from
+ * the given devhandle, pci_device cofiguration space offset 0.
+ *
+ * The hypervisor implementation of this function must block access to any
+ * IO address that the guest does not have explicit permission to access.
+ */
+#define HV_FAST_PCI_POKE 0xb7
+
+/* pci_dma_sync()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_DMA_SYNC
+ * ARG0: devhandle
+ * ARG1: real address
+ * ARG2: size
+ * ARG3: io_sync_direction
+ * RET0: status
+ * RET1: #synced
+ * ERRORS: EINVAL Invalid devhandle or io_sync_direction
+ * ENORADDR Bad real address
+ *
+ * Synchronize a memory region described by the given real address and size,
+ * for the device defined by the given devhandle using the direction(s)
+ * defined by the given io_sync_direction. The argument size is the size of
+ * the memory region in bytes.
+ *
+ * Return the actual number of bytes synchronized in the return value #synced,
+ * which may be less than or equal to the argument size. If the return
+ * value #synced is less than size, the caller must continue to call this
+ * function with updated real address and size arguments until the entire
+ * memory region is synchronized.
+ */
+#define HV_FAST_PCI_DMA_SYNC 0xb8
+
+/* PCI MSI services. */
+
+#define HV_MSITYPE_MSI32 0x00
+#define HV_MSITYPE_MSI64 0x01
+
+#define HV_MSIQSTATE_IDLE 0x00
+#define HV_MSIQSTATE_ERROR 0x01
+
+#define HV_MSIQ_INVALID 0x00
+#define HV_MSIQ_VALID 0x01
+
+#define HV_MSISTATE_IDLE 0x00
+#define HV_MSISTATE_DELIVERED 0x01
+
+#define HV_MSIVALID_INVALID 0x00
+#define HV_MSIVALID_VALID 0x01
+
+#define HV_PCIE_MSGTYPE_PME_MSG 0x18
+#define HV_PCIE_MSGTYPE_PME_ACK_MSG 0x1b
+#define HV_PCIE_MSGTYPE_CORR_MSG 0x30
+#define HV_PCIE_MSGTYPE_NONFATAL_MSG 0x31
+#define HV_PCIE_MSGTYPE_FATAL_MSG 0x33
+
+#define HV_MSG_INVALID 0x00
+#define HV_MSG_VALID 0x01
+
+/* pci_msiq_conf()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_CONF
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * ARG2: real address
+ * ARG3: number of entries
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle, msiqid or nentries
+ * EBADALIGN Improperly aligned real address
+ * ENORADDR Bad real address
+ *
+ * Configure the MSI queue given by the devhandle and msiqid arguments,
+ * and to be placed at the given real address and be of the given
+ * number of entries. The real address must be aligned exactly to match
+ * the queue size. Each queue entry is 64-bytes long, so f.e. a 32 entry
+ * queue must be aligned on a 2048 byte real address boundary. The MSI-EQ
+ * Head and Tail are initialized so that the MSI-EQ is 'empty'.
+ *
+ * Implementation Note: Certain implementations have fixed sized queues. In
+ * that case, number of entries must contain the correct
+ * value.
+ */
+#define HV_FAST_PCI_MSIQ_CONF 0xc0
+
+/* pci_msiq_info()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_INFO
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * RET0: status
+ * RET1: real address
+ * RET2: number of entries
+ * ERRORS: EINVAL Invalid devhandle or msiqid
+ *
+ * Return the configuration information for the MSI queue described
+ * by the given devhandle and msiqid. The base address of the queue
+ * is returned in ARG1 and the number of entries is returned in ARG2.
+ * If the queue is unconfigured, the real address is undefined and the
+ * number of entries will be returned as zero.
+ */
+#define HV_FAST_PCI_MSIQ_INFO 0xc1
+
+/* pci_msiq_getvalid()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_GETVALID
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * RET0: status
+ * RET1: msiqvalid (HV_MSIQ_VALID or HV_MSIQ_INVALID)
+ * ERRORS: EINVAL Invalid devhandle or msiqid
+ *
+ * Get the valid state of the MSI-EQ described by the given devhandle and
+ * msiqid.
+ */
+#define HV_FAST_PCI_MSIQ_GETVALID 0xc2
+
+/* pci_msiq_setvalid()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_SETVALID
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * ARG2: msiqvalid (HV_MSIQ_VALID or HV_MSIQ_INVALID)
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle or msiqid or msiqvalid
+ * value or MSI EQ is uninitialized
+ *
+ * Set the valid state of the MSI-EQ described by the given devhandle and
+ * msiqid to the given msiqvalid.
+ */
+#define HV_FAST_PCI_MSIQ_SETVALID 0xc3
+
+/* pci_msiq_getstate()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_GETSTATE
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * RET0: status
+ * RET1: msiqstate (HV_MSIQSTATE_IDLE or HV_MSIQSTATE_ERROR)
+ * ERRORS: EINVAL Invalid devhandle or msiqid
+ *
+ * Get the state of the MSI-EQ described by the given devhandle and
+ * msiqid.
+ */
+#define HV_FAST_PCI_MSIQ_GETSTATE 0xc4
+
+/* pci_msiq_getvalid()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_GETVALID
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * ARG2: msiqstate (HV_MSIQSTATE_IDLE or HV_MSIQSTATE_ERROR)
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle or msiqid or msiqstate
+ * value or MSI EQ is uninitialized
+ *
+ * Set the state of the MSI-EQ described by the given devhandle and
+ * msiqid to the given msiqvalid.
+ */
+#define HV_FAST_PCI_MSIQ_SETSTATE 0xc5
+
+/* pci_msiq_gethead()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_GETHEAD
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * RET0: status
+ * RET1: msiqhead
+ * ERRORS: EINVAL Invalid devhandle or msiqid
+ *
+ * Get the current MSI EQ queue head for the MSI-EQ described by the
+ * given devhandle and msiqid.
+ */
+#define HV_FAST_PCI_MSIQ_GETHEAD 0xc6
+
+/* pci_msiq_sethead()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_SETHEAD
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * ARG2: msiqhead
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle or msiqid or msiqhead,
+ * or MSI EQ is uninitialized
+ *
+ * Set the current MSI EQ queue head for the MSI-EQ described by the
+ * given devhandle and msiqid.
+ */
+#define HV_FAST_PCI_MSIQ_SETHEAD 0xc7
+
+/* pci_msiq_gettail()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_GETTAIL
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * RET0: status
+ * RET1: msiqtail
+ * ERRORS: EINVAL Invalid devhandle or msiqid
+ *
+ * Get the current MSI EQ queue tail for the MSI-EQ described by the
+ * given devhandle and msiqid.
+ */
+#define HV_FAST_PCI_MSIQ_GETTAIL 0xc8
+
+/* pci_msi_getvalid()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSI_GETVALID
+ * ARG0: devhandle
+ * ARG1: msinum
+ * RET0: status
+ * RET1: msivalidstate
+ * ERRORS: EINVAL Invalid devhandle or msinum
+ *
+ * Get the current valid/enabled state for the MSI defined by the
+ * given devhandle and msinum.
+ */
+#define HV_FAST_PCI_MSI_GETVALID 0xc9
+
+/* pci_msi_setvalid()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSI_SETVALID
+ * ARG0: devhandle
+ * ARG1: msinum
+ * ARG2: msivalidstate
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle or msinum or msivalidstate
+ *
+ * Set the current valid/enabled state for the MSI defined by the
+ * given devhandle and msinum.
+ */
+#define HV_FAST_PCI_MSI_SETVALID 0xca
+
+/* pci_msi_getmsiq()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSI_GETMSIQ
+ * ARG0: devhandle
+ * ARG1: msinum
+ * RET0: status
+ * RET1: msiqid
+ * ERRORS: EINVAL Invalid devhandle or msinum or MSI is unbound
+ *
+ * Get the MSI EQ that the MSI defined by the given devhandle and
+ * msinum is bound to.
+ */
+#define HV_FAST_PCI_MSI_GETMSIQ 0xcb
+
+/* pci_msi_setmsiq()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSI_SETMSIQ
+ * ARG0: devhandle
+ * ARG1: msinum
+ * ARG2: msitype
+ * ARG3: msiqid
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle or msinum or msiqid
+ *
+ * Set the MSI EQ that the MSI defined by the given devhandle and
+ * msinum is bound to.
+ */
+#define HV_FAST_PCI_MSI_SETMSIQ 0xcc
+
+/* pci_msi_getstate()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSI_GETSTATE
+ * ARG0: devhandle
+ * ARG1: msinum
+ * RET0: status
+ * RET1: msistate
+ * ERRORS: EINVAL Invalid devhandle or msinum
+ *
+ * Get the state of the MSI defined by the given devhandle and msinum.
+ * If not initialized, return HV_MSISTATE_IDLE.
+ */
+#define HV_FAST_PCI_MSI_GETSTATE 0xcd
+
+/* pci_msi_setstate()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSI_SETSTATE
+ * ARG0: devhandle
+ * ARG1: msinum
+ * ARG2: msistate
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle or msinum or msistate
+ *
+ * Set the state of the MSI defined by the given devhandle and msinum.
+ */
+#define HV_FAST_PCI_MSI_SETSTATE 0xce
+
+/* pci_msg_getmsiq()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSG_GETMSIQ
+ * ARG0: devhandle
+ * ARG1: msgtype
+ * RET0: status
+ * RET1: msiqid
+ * ERRORS: EINVAL Invalid devhandle or msgtype
+ *
+ * Get the MSI EQ of the MSG defined by the given devhandle and msgtype.
+ */
+#define HV_FAST_PCI_MSG_GETMSIQ 0xd0
+
+/* pci_msg_setmsiq()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSG_SETMSIQ
+ * ARG0: devhandle
+ * ARG1: msgtype
+ * ARG2: msiqid
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle, msgtype, or msiqid
+ *
+ * Set the MSI EQ of the MSG defined by the given devhandle and msgtype.
+ */
+#define HV_FAST_PCI_MSG_SETMSIQ 0xd1
+
+/* pci_msg_getvalid()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSG_GETVALID
+ * ARG0: devhandle
+ * ARG1: msgtype
+ * RET0: status
+ * RET1: msgvalidstate
+ * ERRORS: EINVAL Invalid devhandle or msgtype
+ *
+ * Get the valid/enabled state of the MSG defined by the given
+ * devhandle and msgtype.
+ */
+#define HV_FAST_PCI_MSG_GETVALID 0xd2
+
+/* pci_msg_setvalid()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSG_SETVALID
+ * ARG0: devhandle
+ * ARG1: msgtype
+ * ARG2: msgvalidstate
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle or msgtype or msgvalidstate
+ *
+ * Set the valid/enabled state of the MSG defined by the given
+ * devhandle and msgtype.
+ */
+#define HV_FAST_PCI_MSG_SETVALID 0xd3
+
+/* Performance counter services. */
+
+#define HV_PERF_JBUS_PERF_CTRL_REG 0x00
+#define HV_PERF_JBUS_PERF_CNT_REG 0x01
+#define HV_PERF_DRAM_PERF_CTRL_REG_0 0x02
+#define HV_PERF_DRAM_PERF_CNT_REG_0 0x03
+#define HV_PERF_DRAM_PERF_CTRL_REG_1 0x04
+#define HV_PERF_DRAM_PERF_CNT_REG_1 0x05
+#define HV_PERF_DRAM_PERF_CTRL_REG_2 0x06
+#define HV_PERF_DRAM_PERF_CNT_REG_2 0x07
+#define HV_PERF_DRAM_PERF_CTRL_REG_3 0x08
+#define HV_PERF_DRAM_PERF_CNT_REG_3 0x09
+
+/* get_perfreg()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_GET_PERFREG
+ * ARG0: performance reg number
+ * RET0: status
+ * RET1: performance reg value
+ * ERRORS: EINVAL Invalid performance register number
+ * ENOACCESS No access allowed to performance counters
+ *
+ * Read the value of the given DRAM/JBUS performance counter/control register.
+ */
+#define HV_FAST_GET_PERFREG 0x100
+
+/* set_perfreg()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_SET_PERFREG
+ * ARG0: performance reg number
+ * ARG1: performance reg value
+ * RET0: status
+ * ERRORS: EINVAL Invalid performance register number
+ * ENOACCESS No access allowed to performance counters
+ *
+ * Write the given performance reg value to the given DRAM/JBUS
+ * performance counter/control register.
+ */
+#define HV_FAST_SET_PERFREG 0x101
+
+/* MMU statistics services.
+ *
+ * The hypervisor maintains MMU statistics and privileged code provides
+ * a buffer where these statistics can be collected. It is continually
+ * updated once configured. The layout is as follows:
+ */
+#ifndef __ASSEMBLY__
+struct hv_mmu_statistics {
+ unsigned long immu_tsb_hits_ctx0_8k_tte;
+ unsigned long immu_tsb_ticks_ctx0_8k_tte;
+ unsigned long immu_tsb_hits_ctx0_64k_tte;
+ unsigned long immu_tsb_ticks_ctx0_64k_tte;
+ unsigned long __reserved1[2];
+ unsigned long immu_tsb_hits_ctx0_4mb_tte;
+ unsigned long immu_tsb_ticks_ctx0_4mb_tte;
+ unsigned long __reserved2[2];
+ unsigned long immu_tsb_hits_ctx0_256mb_tte;
+ unsigned long immu_tsb_ticks_ctx0_256mb_tte;
+ unsigned long __reserved3[4];
+ unsigned long immu_tsb_hits_ctxnon0_8k_tte;
+ unsigned long immu_tsb_ticks_ctxnon0_8k_tte;
+ unsigned long immu_tsb_hits_ctxnon0_64k_tte;
+ unsigned long immu_tsb_ticks_ctxnon0_64k_tte;
+ unsigned long __reserved4[2];
+ unsigned long immu_tsb_hits_ctxnon0_4mb_tte;
+ unsigned long immu_tsb_ticks_ctxnon0_4mb_tte;
+ unsigned long __reserved5[2];
+ unsigned long immu_tsb_hits_ctxnon0_256mb_tte;
+ unsigned long immu_tsb_ticks_ctxnon0_256mb_tte;
+ unsigned long __reserved6[4];
+ unsigned long dmmu_tsb_hits_ctx0_8k_tte;
+ unsigned long dmmu_tsb_ticks_ctx0_8k_tte;
+ unsigned long dmmu_tsb_hits_ctx0_64k_tte;
+ unsigned long dmmu_tsb_ticks_ctx0_64k_tte;
+ unsigned long __reserved7[2];
+ unsigned long dmmu_tsb_hits_ctx0_4mb_tte;
+ unsigned long dmmu_tsb_ticks_ctx0_4mb_tte;
+ unsigned long __reserved8[2];
+ unsigned long dmmu_tsb_hits_ctx0_256mb_tte;
+ unsigned long dmmu_tsb_ticks_ctx0_256mb_tte;
+ unsigned long __reserved9[4];
+ unsigned long dmmu_tsb_hits_ctxnon0_8k_tte;
+ unsigned long dmmu_tsb_ticks_ctxnon0_8k_tte;
+ unsigned long dmmu_tsb_hits_ctxnon0_64k_tte;
+ unsigned long dmmu_tsb_ticks_ctxnon0_64k_tte;
+ unsigned long __reserved10[2];
+ unsigned long dmmu_tsb_hits_ctxnon0_4mb_tte;
+ unsigned long dmmu_tsb_ticks_ctxnon0_4mb_tte;
+ unsigned long __reserved11[2];
+ unsigned long dmmu_tsb_hits_ctxnon0_256mb_tte;
+ unsigned long dmmu_tsb_ticks_ctxnon0_256mb_tte;
+ unsigned long __reserved12[4];
+};
+#endif
+
+/* mmustat_conf()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMUSTAT_CONF
+ * ARG0: real address
+ * RET0: status
+ * RET1: real address
+ * ERRORS: ENORADDR Invalid real address
+ * EBADALIGN Real address not aligned on 64-byte boundary
+ * EBADTRAP API not supported on this processor
+ *
+ * Enable MMU statistic gathering using the buffer at the given real
+ * address on the current virtual CPU. The new buffer real address
+ * is given in ARG1, and the previously specified buffer real address
+ * is returned in RET1, or is returned as zero for the first invocation.
+ *
+ * If the passed in real address argument is zero, this will disable
+ * MMU statistic collection on the current virtual CPU. If an error is
+ * returned then no statistics are collected.
+ *
+ * The buffer contents should be initialized to all zeros before being
+ * given to the hypervisor or else the statistics will be meaningless.
+ */
+#define HV_FAST_MMUSTAT_CONF 0x102
+
+/* mmustat_info()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMUSTAT_INFO
+ * RET0: status
+ * RET1: real address
+ * ERRORS: EBADTRAP API not supported on this processor
+ *
+ * Return the current state and real address of the currently configured
+ * MMU statistics buffer on the current virtual CPU.
+ */
+#define HV_FAST_MMUSTAT_INFO 0x103
+
+/* Function numbers for HV_CORE_TRAP. */
+#define HV_CORE_VER 0x00
+#define HV_CORE_PUTCHAR 0x01
+#define HV_CORE_EXIT 0x02
+
+#endif /* !(_SPARC64_HYPERVISOR_H) */
diff --git a/include/asm-sparc64/idprom.h b/include/asm-sparc64/idprom.h
index 701483c5465d..77fbf987385f 100644
--- a/include/asm-sparc64/idprom.h
+++ b/include/asm-sparc64/idprom.h
@@ -9,15 +9,7 @@
#include <linux/types.h>
-/* Offset into the EEPROM where the id PROM is located on the 4c */
-#define IDPROM_OFFSET 0x7d8
-
-/* On sun4m; physical. */
-/* MicroSPARC(-II) does not decode 31rd bit, but it works. */
-#define IDPROM_OFFSET_M 0xfd8
-
-struct idprom
-{
+struct idprom {
u8 id_format; /* Format identifier (always 0x01) */
u8 id_machtype; /* Machine type */
u8 id_ethaddr[6]; /* Hardware ethernet address */
@@ -30,6 +22,4 @@ struct idprom
extern struct idprom *idprom;
extern void idprom_init(void);
-#define IDPROM_SIZE (sizeof(struct idprom))
-
#endif /* !(_SPARC_IDPROM_H) */
diff --git a/include/asm-sparc64/intr_queue.h b/include/asm-sparc64/intr_queue.h
new file mode 100644
index 000000000000..206077dedc2a
--- /dev/null
+++ b/include/asm-sparc64/intr_queue.h
@@ -0,0 +1,15 @@
+#ifndef _SPARC64_INTR_QUEUE_H
+#define _SPARC64_INTR_QUEUE_H
+
+/* Sun4v interrupt queue registers, accessed via ASI_QUEUE. */
+
+#define INTRQ_CPU_MONDO_HEAD 0x3c0 /* CPU mondo head */
+#define INTRQ_CPU_MONDO_TAIL 0x3c8 /* CPU mondo tail */
+#define INTRQ_DEVICE_MONDO_HEAD 0x3d0 /* Device mondo head */
+#define INTRQ_DEVICE_MONDO_TAIL 0x3d8 /* Device mondo tail */
+#define INTRQ_RESUM_MONDO_HEAD 0x3e0 /* Resumable error mondo head */
+#define INTRQ_RESUM_MONDO_TAIL 0x3e8 /* Resumable error mondo tail */
+#define INTRQ_NONRESUM_MONDO_HEAD 0x3f0 /* Non-resumable error mondo head */
+#define INTRQ_NONRESUM_MONDO_TAIL 0x3f8 /* Non-resumable error mondo head */
+
+#endif /* !(_SPARC64_INTR_QUEUE_H) */
diff --git a/include/asm-sparc64/irq.h b/include/asm-sparc64/irq.h
index 8b70edcb80dc..de33d6e1afb5 100644
--- a/include/asm-sparc64/irq.h
+++ b/include/asm-sparc64/irq.h
@@ -72,8 +72,11 @@ struct ino_bucket {
#define IMAP_VALID 0x80000000 /* IRQ Enabled */
#define IMAP_TID_UPA 0x7c000000 /* UPA TargetID */
#define IMAP_TID_JBUS 0x7c000000 /* JBUS TargetID */
+#define IMAP_TID_SHIFT 26
#define IMAP_AID_SAFARI 0x7c000000 /* Safari AgentID */
+#define IMAP_AID_SHIFT 26
#define IMAP_NID_SAFARI 0x03e00000 /* Safari NodeID */
+#define IMAP_NID_SHIFT 21
#define IMAP_IGN 0x000007c0 /* IRQ Group Number */
#define IMAP_INO 0x0000003f /* IRQ Number */
#define IMAP_INR 0x000007ff /* Full interrupt number*/
@@ -111,6 +114,7 @@ extern void disable_irq(unsigned int);
#define disable_irq_nosync disable_irq
extern void enable_irq(unsigned int);
extern unsigned int build_irq(int pil, int inofixup, unsigned long iclr, unsigned long imap);
+extern unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino, int pil, unsigned char flags);
extern unsigned int sbus_build_irq(void *sbus, unsigned int ino);
static __inline__ void set_softint(unsigned long bits)
diff --git a/include/asm-sparc64/mmu.h b/include/asm-sparc64/mmu.h
index 8627eed6e83d..230ba678d3b0 100644
--- a/include/asm-sparc64/mmu.h
+++ b/include/asm-sparc64/mmu.h
@@ -4,20 +4,9 @@
#include <linux/config.h>
#include <asm/page.h>
#include <asm/const.h>
+#include <asm/hypervisor.h>
-/*
- * For the 8k pagesize kernel, use only 10 hw context bits to optimize some
- * shifts in the fast tlbmiss handlers, instead of all 13 bits (specifically
- * for vpte offset calculation). For other pagesizes, this optimization in
- * the tlbhandlers can not be done; but still, all 13 bits can not be used
- * because the tlb handlers use "andcc" instruction which sign extends 13
- * bit arguments.
- */
-#if PAGE_SHIFT == 13
-#define CTX_NR_BITS 10
-#else
-#define CTX_NR_BITS 12
-#endif
+#define CTX_NR_BITS 13
#define TAG_CONTEXT_BITS ((_AC(1,UL) << CTX_NR_BITS) - _AC(1,UL))
@@ -90,8 +79,27 @@
#ifndef __ASSEMBLY__
+#define TSB_ENTRY_ALIGNMENT 16
+
+struct tsb {
+ unsigned long tag;
+ unsigned long pte;
+} __attribute__((aligned(TSB_ENTRY_ALIGNMENT)));
+
+extern void __tsb_insert(unsigned long ent, unsigned long tag, unsigned long pte);
+extern void tsb_flush(unsigned long ent, unsigned long tag);
+extern void tsb_init(struct tsb *tsb, unsigned long size);
+
typedef struct {
- unsigned long sparc64_ctx_val;
+ spinlock_t lock;
+ unsigned long sparc64_ctx_val;
+ struct tsb *tsb;
+ unsigned long tsb_rss_limit;
+ unsigned long tsb_nentries;
+ unsigned long tsb_reg_val;
+ unsigned long tsb_map_vaddr;
+ unsigned long tsb_map_pte;
+ struct hv_tsb_descr tsb_descr;
} mm_context_t;
#endif /* !__ASSEMBLY__ */
diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h
index 57ee7b306189..e7974321d052 100644
--- a/include/asm-sparc64/mmu_context.h
+++ b/include/asm-sparc64/mmu_context.h
@@ -19,96 +19,98 @@ extern unsigned long tlb_context_cache;
extern unsigned long mmu_context_bmap[];
extern void get_new_mmu_context(struct mm_struct *mm);
+#ifdef CONFIG_SMP
+extern void smp_new_mmu_context_version(void);
+#else
+#define smp_new_mmu_context_version() do { } while (0)
+#endif
+
+extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+extern void destroy_context(struct mm_struct *mm);
+
+extern void __tsb_context_switch(unsigned long pgd_pa,
+ unsigned long tsb_reg,
+ unsigned long tsb_vaddr,
+ unsigned long tsb_pte,
+ unsigned long tsb_descr_pa);
+
+static inline void tsb_context_switch(struct mm_struct *mm)
+{
+ __tsb_context_switch(__pa(mm->pgd), mm->context.tsb_reg_val,
+ mm->context.tsb_map_vaddr,
+ mm->context.tsb_map_pte,
+ __pa(&mm->context.tsb_descr));
+}
-/* Initialize a new mmu context. This is invoked when a new
- * address space instance (unique or shared) is instantiated.
- * This just needs to set mm->context to an invalid context.
- */
-#define init_new_context(__tsk, __mm) \
- (((__mm)->context.sparc64_ctx_val = 0UL), 0)
-
-/* Destroy a dead context. This occurs when mmput drops the
- * mm_users count to zero, the mmaps have been released, and
- * all the page tables have been flushed. Our job is to destroy
- * any remaining processor-specific state, and in the sparc64
- * case this just means freeing up the mmu context ID held by
- * this task if valid.
- */
-#define destroy_context(__mm) \
-do { spin_lock(&ctx_alloc_lock); \
- if (CTX_VALID((__mm)->context)) { \
- unsigned long nr = CTX_NRBITS((__mm)->context); \
- mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); \
- } \
- spin_unlock(&ctx_alloc_lock); \
-} while(0)
-
-/* Reload the two core values used by TLB miss handler
- * processing on sparc64. They are:
- * 1) The physical address of mm->pgd, when full page
- * table walks are necessary, this is where the
- * search begins.
- * 2) A "PGD cache". For 32-bit tasks only pgd[0] is
- * ever used since that maps the entire low 4GB
- * completely. To speed up TLB miss processing we
- * make this value available to the handlers. This
- * decreases the amount of memory traffic incurred.
- */
-#define reload_tlbmiss_state(__tsk, __mm) \
-do { \
- register unsigned long paddr asm("o5"); \
- register unsigned long pgd_cache asm("o4"); \
- paddr = __pa((__mm)->pgd); \
- pgd_cache = 0UL; \
- if (task_thread_info(__tsk)->flags & _TIF_32BIT) \
- pgd_cache = get_pgd_cache((__mm)->pgd); \
- __asm__ __volatile__("wrpr %%g0, 0x494, %%pstate\n\t" \
- "mov %3, %%g4\n\t" \
- "mov %0, %%g7\n\t" \
- "stxa %1, [%%g4] %2\n\t" \
- "membar #Sync\n\t" \
- "wrpr %%g0, 0x096, %%pstate" \
- : /* no outputs */ \
- : "r" (paddr), "r" (pgd_cache),\
- "i" (ASI_DMMU), "i" (TSB_REG)); \
-} while(0)
+extern void tsb_grow(struct mm_struct *mm, unsigned long mm_rss);
+#ifdef CONFIG_SMP
+extern void smp_tsb_sync(struct mm_struct *mm);
+#else
+#define smp_tsb_sync(__mm) do { } while (0)
+#endif
/* Set MMU context in the actual hardware. */
#define load_secondary_context(__mm) \
- __asm__ __volatile__("stxa %0, [%1] %2\n\t" \
- "flush %%g6" \
- : /* No outputs */ \
- : "r" (CTX_HWBITS((__mm)->context)), \
- "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU))
+ __asm__ __volatile__( \
+ "\n661: stxa %0, [%1] %2\n" \
+ " .section .sun4v_1insn_patch, \"ax\"\n" \
+ " .word 661b\n" \
+ " stxa %0, [%1] %3\n" \
+ " .previous\n" \
+ " flush %%g6\n" \
+ : /* No outputs */ \
+ : "r" (CTX_HWBITS((__mm)->context)), \
+ "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU), "i" (ASI_MMU))
extern void __flush_tlb_mm(unsigned long, unsigned long);
-/* Switch the current MM context. */
+/* Switch the current MM context. Interrupts are disabled. */
static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk)
{
- unsigned long ctx_valid;
+ unsigned long ctx_valid, flags;
int cpu;
- /* Note: page_table_lock is used here to serialize switch_mm
- * and activate_mm, and their calls to get_new_mmu_context.
- * This use of page_table_lock is unrelated to its other uses.
- */
- spin_lock(&mm->page_table_lock);
+ spin_lock_irqsave(&mm->context.lock, flags);
ctx_valid = CTX_VALID(mm->context);
if (!ctx_valid)
get_new_mmu_context(mm);
- spin_unlock(&mm->page_table_lock);
- if (!ctx_valid || (old_mm != mm)) {
- load_secondary_context(mm);
- reload_tlbmiss_state(tsk, mm);
- }
+ /* We have to be extremely careful here or else we will miss
+ * a TSB grow if we switch back and forth between a kernel
+ * thread and an address space which has it's TSB size increased
+ * on another processor.
+ *
+ * It is possible to play some games in order to optimize the
+ * switch, but the safest thing to do is to unconditionally
+ * perform the secondary context load and the TSB context switch.
+ *
+ * For reference the bad case is, for address space "A":
+ *
+ * CPU 0 CPU 1
+ * run address space A
+ * set cpu0's bits in cpu_vm_mask
+ * switch to kernel thread, borrow
+ * address space A via entry_lazy_tlb
+ * run address space A
+ * set cpu1's bit in cpu_vm_mask
+ * flush_tlb_pending()
+ * reset cpu_vm_mask to just cpu1
+ * TSB grow
+ * run address space A
+ * context was valid, so skip
+ * TSB context switch
+ *
+ * At that point cpu0 continues to use a stale TSB, the one from
+ * before the TSB grow performed on cpu1. cpu1 did not cross-call
+ * cpu0 to update it's TSB because at that point the cpu_vm_mask
+ * only had cpu1 set in it.
+ */
+ load_secondary_context(mm);
+ tsb_context_switch(mm);
- /* Even if (mm == old_mm) we _must_ check
- * the cpu_vm_mask. If we do not we could
- * corrupt the TLB state because of how
- * smp_flush_tlb_{page,range,mm} on sparc64
- * and lazy tlb switches work. -DaveM
+ /* Any time a processor runs a context on an address space
+ * for the first time, we must flush that context out of the
+ * local TLB.
*/
cpu = smp_processor_id();
if (!ctx_valid || !cpu_isset(cpu, mm->cpu_vm_mask)) {
@@ -116,6 +118,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
__flush_tlb_mm(CTX_HWBITS(mm->context),
SECONDARY_CONTEXT);
}
+ spin_unlock_irqrestore(&mm->context.lock, flags);
}
#define deactivate_mm(tsk,mm) do { } while (0)
@@ -123,23 +126,20 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
/* Activate a new MM instance for the current task. */
static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm)
{
+ unsigned long flags;
int cpu;
- /* Note: page_table_lock is used here to serialize switch_mm
- * and activate_mm, and their calls to get_new_mmu_context.
- * This use of page_table_lock is unrelated to its other uses.
- */
- spin_lock(&mm->page_table_lock);
+ spin_lock_irqsave(&mm->context.lock, flags);
if (!CTX_VALID(mm->context))
get_new_mmu_context(mm);
cpu = smp_processor_id();
if (!cpu_isset(cpu, mm->cpu_vm_mask))
cpu_set(cpu, mm->cpu_vm_mask);
- spin_unlock(&mm->page_table_lock);
load_secondary_context(mm);
__flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT);
- reload_tlbmiss_state(current, mm);
+ tsb_context_switch(mm);
+ spin_unlock_irqrestore(&mm->context.lock, flags);
}
#endif /* !(__ASSEMBLY__) */
diff --git a/include/asm-sparc64/numnodes.h b/include/asm-sparc64/numnodes.h
new file mode 100644
index 000000000000..017e7e74f5e7
--- /dev/null
+++ b/include/asm-sparc64/numnodes.h
@@ -0,0 +1,6 @@
+#ifndef _SPARC64_NUMNODES_H
+#define _SPARC64_NUMNODES_H
+
+#define NODES_SHIFT 0
+
+#endif /* !(_SPARC64_NUMNODES_H) */
diff --git a/include/asm-sparc64/oplib.h b/include/asm-sparc64/oplib.h
index 3c59b2693fb9..c754676e13ef 100644
--- a/include/asm-sparc64/oplib.h
+++ b/include/asm-sparc64/oplib.h
@@ -12,18 +12,8 @@
#include <linux/config.h>
#include <asm/openprom.h>
-/* Enumeration to describe the prom major version we have detected. */
-enum prom_major_version {
- PROM_V0, /* Original sun4c V0 prom */
- PROM_V2, /* sun4c and early sun4m V2 prom */
- PROM_V3, /* sun4m and later, up to sun4d/sun4e machines V3 */
- PROM_P1275, /* IEEE compliant ISA based Sun PROM, only sun4u */
- PROM_AP1000, /* actually no prom at all */
-};
-
-extern enum prom_major_version prom_vers;
-/* Revision, and firmware revision. */
-extern unsigned int prom_rev, prom_prev;
+/* OBP version string. */
+extern char prom_version[];
/* Root node of the prom device tree, this stays constant after
* initialization is complete.
@@ -39,6 +29,9 @@ extern int prom_stdin, prom_stdout;
extern int prom_chosen_node;
/* Helper values and strings in arch/sparc64/kernel/head.S */
+extern const char prom_peer_name[];
+extern const char prom_compatible_name[];
+extern const char prom_root_compatible[];
extern const char prom_finddev_name[];
extern const char prom_chosen_path[];
extern const char prom_getprop_name[];
@@ -130,15 +123,6 @@ extern void prom_setcallback(callback_func_t func_ptr);
*/
extern unsigned char prom_get_idprom(char *idp_buffer, int idpbuf_size);
-/* Get the prom major version. */
-extern int prom_version(void);
-
-/* Get the prom plugin revision. */
-extern int prom_getrev(void);
-
-/* Get the prom firmware revision. */
-extern int prom_getprev(void);
-
/* Character operations to/from the console.... */
/* Non-blocking get character from console. */
@@ -164,6 +148,7 @@ enum prom_input_device {
PROMDEV_ITTYA, /* input from ttya */
PROMDEV_ITTYB, /* input from ttyb */
PROMDEV_IRSC, /* input from rsc */
+ PROMDEV_IVCONS, /* input from virtual-console */
PROMDEV_I_UNK,
};
@@ -176,6 +161,7 @@ enum prom_output_device {
PROMDEV_OTTYA, /* to ttya */
PROMDEV_OTTYB, /* to ttyb */
PROMDEV_ORSC, /* to rsc */
+ PROMDEV_OVCONS, /* to virtual-console */
PROMDEV_O_UNK,
};
@@ -183,10 +169,18 @@ extern enum prom_output_device prom_query_output_device(void);
/* Multiprocessor operations... */
#ifdef CONFIG_SMP
-/* Start the CPU with the given device tree node, context table, and context
- * at the passed program counter.
+/* Start the CPU with the given device tree node at the passed program
+ * counter with the given arg passed in via register %o0.
+ */
+extern void prom_startcpu(int cpunode, unsigned long pc, unsigned long arg);
+
+/* Start the CPU with the given cpu ID at the passed program
+ * counter with the given arg passed in via register %o0.
*/
-extern void prom_startcpu(int cpunode, unsigned long pc, unsigned long o0);
+extern void prom_startcpu_cpuid(int cpuid, unsigned long pc, unsigned long arg);
+
+/* Stop the CPU with the given cpu ID. */
+extern void prom_stopcpu_cpuid(int cpuid);
/* Stop the current CPU. */
extern void prom_stopself(void);
@@ -335,6 +329,7 @@ int cpu_find_by_mid(int mid, int *prom_node);
/* Client interface level routines. */
extern void prom_set_trap_table(unsigned long tba);
+extern void prom_set_trap_table_sun4v(unsigned long tba, unsigned long mmfsa);
extern long p1275_cmd(const char *, long, ...);
diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h
index 5426bb28a993..fcb2812265f4 100644
--- a/include/asm-sparc64/page.h
+++ b/include/asm-sparc64/page.h
@@ -104,10 +104,12 @@ typedef unsigned long pgprot_t;
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#define ARCH_HAS_SETCLEAR_HUGE_PTE
#define ARCH_HAS_HUGETLB_PREFAULT_HOOK
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#endif
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \
- (_AC(0x0000000070000000,UL)) : (PAGE_OFFSET))
+ (_AC(0x0000000070000000,UL)) : \
+ (_AC(0xfffff80000000000,UL) + (1UL << 32UL)))
#endif /* !(__ASSEMBLY__) */
@@ -124,17 +126,10 @@ typedef unsigned long pgprot_t;
#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET)
#define __va(x) ((void *)((unsigned long) (x) + PAGE_OFFSET))
-/* PFNs are real physical page numbers. However, mem_map only begins to record
- * per-page information starting at pfn_base. This is to handle systems where
- * the first physical page in the machine is at some huge physical address,
- * such as 4GB. This is common on a partitioned E10000, for example.
- */
-extern struct page *pfn_to_page(unsigned long pfn);
-extern unsigned long page_to_pfn(struct page *);
+#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr)>>PAGE_SHIFT)
-#define pfn_valid(pfn) (((pfn)-(pfn_base)) < max_mapnr)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#define virt_to_phys __pa
diff --git a/include/asm-sparc64/pbm.h b/include/asm-sparc64/pbm.h
index dd35a2c7798a..1396f110939a 100644
--- a/include/asm-sparc64/pbm.h
+++ b/include/asm-sparc64/pbm.h
@@ -139,6 +139,9 @@ struct pci_pbm_info {
/* Opaque 32-bit system bus Port ID. */
u32 portid;
+ /* Opaque 32-bit handle used for hypervisor calls. */
+ u32 devhandle;
+
/* Chipset version information. */
int chip_type;
#define PBM_CHIP_TYPE_SABRE 1
diff --git a/include/asm-sparc64/pci.h b/include/asm-sparc64/pci.h
index 89bd71b1c0d8..7c5a589ea437 100644
--- a/include/asm-sparc64/pci.h
+++ b/include/asm-sparc64/pci.h
@@ -41,10 +41,26 @@ static inline void pcibios_penalize_isa_irq(int irq, int active)
struct pci_dev;
+struct pci_iommu_ops {
+ void *(*alloc_consistent)(struct pci_dev *, size_t, dma_addr_t *);
+ void (*free_consistent)(struct pci_dev *, size_t, void *, dma_addr_t);
+ dma_addr_t (*map_single)(struct pci_dev *, void *, size_t, int);
+ void (*unmap_single)(struct pci_dev *, dma_addr_t, size_t, int);
+ int (*map_sg)(struct pci_dev *, struct scatterlist *, int, int);
+ void (*unmap_sg)(struct pci_dev *, struct scatterlist *, int, int);
+ void (*dma_sync_single_for_cpu)(struct pci_dev *, dma_addr_t, size_t, int);
+ void (*dma_sync_sg_for_cpu)(struct pci_dev *, struct scatterlist *, int, int);
+};
+
+extern struct pci_iommu_ops *pci_iommu_ops;
+
/* Allocate and map kernel buffer using consistent mode DMA for a device.
* hwdev should be valid struct pci_dev pointer for PCI devices.
*/
-extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle);
+static inline void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle)
+{
+ return pci_iommu_ops->alloc_consistent(hwdev, size, dma_handle);
+}
/* Free and unmap a consistent DMA buffer.
* cpu_addr is what was returned from pci_alloc_consistent,
@@ -54,7 +70,10 @@ extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t
* References to the memory and mappings associated with cpu_addr/dma_addr
* past this call are illegal.
*/
-extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle);
+static inline void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle)
+{
+ return pci_iommu_ops->free_consistent(hwdev, size, vaddr, dma_handle);
+}
/* Map a single buffer of the indicated size for DMA in streaming mode.
* The 32-bit bus address to use is returned.
@@ -62,7 +81,10 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr,
* Once the device is given the dma address, the device owns this memory
* until either pci_unmap_single or pci_dma_sync_single_for_cpu is performed.
*/
-extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction);
+static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction)
+{
+ return pci_iommu_ops->map_single(hwdev, ptr, size, direction);
+}
/* Unmap a single streaming mode DMA translation. The dma_addr and size
* must match what was provided for in a previous pci_map_single call. All
@@ -71,7 +93,10 @@ extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
* After this call, reads by the cpu to the buffer are guaranteed to see
* whatever the device wrote there.
*/
-extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction);
+static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction)
+{
+ pci_iommu_ops->unmap_single(hwdev, dma_addr, size, direction);
+}
/* No highmem on sparc64, plus we have an IOMMU, so mapping pages is easy. */
#define pci_map_page(dev, page, off, size, dir) \
@@ -107,15 +132,19 @@ extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t
* Device ownership issues as mentioned above for pci_map_single are
* the same here.
*/
-extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
- int nents, int direction);
+static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
+{
+ return pci_iommu_ops->map_sg(hwdev, sg, nents, direction);
+}
/* Unmap a set of streaming mode DMA translations.
* Again, cpu read rules concerning calls here are the same as for
* pci_unmap_single() above.
*/
-extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
- int nhwents, int direction);
+static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nhwents, int direction)
+{
+ pci_iommu_ops->unmap_sg(hwdev, sg, nhwents, direction);
+}
/* Make physical memory consistent for a single
* streaming mode DMA translation after a transfer.
@@ -127,8 +156,10 @@ extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
* must first perform a pci_dma_sync_for_device, and then the
* device again owns the buffer.
*/
-extern void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle,
- size_t size, int direction);
+static inline void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction)
+{
+ pci_iommu_ops->dma_sync_single_for_cpu(hwdev, dma_handle, size, direction);
+}
static inline void
pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle,
@@ -144,7 +175,10 @@ pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle,
* The same as pci_dma_sync_single_* but for a scatter-gather list,
* same rules and usage.
*/
-extern void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction);
+static inline void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+{
+ pci_iommu_ops->dma_sync_sg_for_cpu(hwdev, sg, nelems, direction);
+}
static inline void
pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg,
diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h
index a96067cca963..12e4a273bd43 100644
--- a/include/asm-sparc64/pgalloc.h
+++ b/include/asm-sparc64/pgalloc.h
@@ -6,6 +6,7 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/slab.h>
#include <asm/spitfire.h>
#include <asm/cpudata.h>
@@ -13,172 +14,59 @@
#include <asm/page.h>
/* Page table allocation/freeing. */
-#ifdef CONFIG_SMP
-/* Sliiiicck */
-#define pgt_quicklists local_cpu_data()
-#else
-extern struct pgtable_cache_struct {
- unsigned long *pgd_cache;
- unsigned long *pte_cache[2];
- unsigned int pgcache_size;
-} pgt_quicklists;
-#endif
-#define pgd_quicklist (pgt_quicklists.pgd_cache)
-#define pmd_quicklist ((unsigned long *)0)
-#define pte_quicklist (pgt_quicklists.pte_cache)
-#define pgtable_cache_size (pgt_quicklists.pgcache_size)
+extern kmem_cache_t *pgtable_cache;
-static __inline__ void free_pgd_fast(pgd_t *pgd)
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- preempt_disable();
- *(unsigned long *)pgd = (unsigned long) pgd_quicklist;
- pgd_quicklist = (unsigned long *) pgd;
- pgtable_cache_size++;
- preempt_enable();
+ return kmem_cache_alloc(pgtable_cache, GFP_KERNEL);
}
-static __inline__ pgd_t *get_pgd_fast(void)
+static inline void pgd_free(pgd_t *pgd)
{
- unsigned long *ret;
-
- preempt_disable();
- if((ret = pgd_quicklist) != NULL) {
- pgd_quicklist = (unsigned long *)(*ret);
- ret[0] = 0;
- pgtable_cache_size--;
- preempt_enable();
- } else {
- preempt_enable();
- ret = (unsigned long *) __get_free_page(GFP_KERNEL|__GFP_REPEAT);
- if(ret)
- memset(ret, 0, PAGE_SIZE);
- }
- return (pgd_t *)ret;
+ kmem_cache_free(pgtable_cache, pgd);
}
-static __inline__ void free_pgd_slow(pgd_t *pgd)
-{
- free_page((unsigned long)pgd);
-}
-
-#ifdef DCACHE_ALIASING_POSSIBLE
-#define VPTE_COLOR(address) (((address) >> (PAGE_SHIFT + 10)) & 1UL)
-#define DCACHE_COLOR(address) (((address) >> PAGE_SHIFT) & 1UL)
-#else
-#define VPTE_COLOR(address) 0
-#define DCACHE_COLOR(address) 0
-#endif
-
#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD)
-static __inline__ pmd_t *pmd_alloc_one_fast(struct mm_struct *mm, unsigned long address)
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- unsigned long *ret;
- int color = 0;
-
- preempt_disable();
- if (pte_quicklist[color] == NULL)
- color = 1;
-
- if((ret = (unsigned long *)pte_quicklist[color]) != NULL) {
- pte_quicklist[color] = (unsigned long *)(*ret);
- ret[0] = 0;
- pgtable_cache_size--;
- }
- preempt_enable();
-
- return (pmd_t *)ret;
+ return kmem_cache_alloc(pgtable_cache,
+ GFP_KERNEL|__GFP_REPEAT);
}
-static __inline__ pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline void pmd_free(pmd_t *pmd)
{
- pmd_t *pmd;
-
- pmd = pmd_alloc_one_fast(mm, address);
- if (!pmd) {
- pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
- if (pmd)
- memset(pmd, 0, PAGE_SIZE);
- }
- return pmd;
+ kmem_cache_free(pgtable_cache, pmd);
}
-static __inline__ void free_pmd_fast(pmd_t *pmd)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ unsigned long address)
{
- unsigned long color = DCACHE_COLOR((unsigned long)pmd);
-
- preempt_disable();
- *(unsigned long *)pmd = (unsigned long) pte_quicklist[color];
- pte_quicklist[color] = (unsigned long *) pmd;
- pgtable_cache_size++;
- preempt_enable();
+ return kmem_cache_alloc(pgtable_cache,
+ GFP_KERNEL|__GFP_REPEAT);
}
-static __inline__ void free_pmd_slow(pmd_t *pmd)
+static inline struct page *pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
{
- free_page((unsigned long)pmd);
+ return virt_to_page(pte_alloc_one_kernel(mm, address));
}
-
-#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
-#define pmd_populate(MM,PMD,PTE_PAGE) \
- pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE))
-
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
-
-static inline struct page *
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
- pte_t *pte = pte_alloc_one_kernel(mm, addr);
-
- if (pte)
- return virt_to_page(pte);
-
- return NULL;
-}
-
-static __inline__ pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address)
-{
- unsigned long color = VPTE_COLOR(address);
- unsigned long *ret;
-
- preempt_disable();
- if((ret = (unsigned long *)pte_quicklist[color]) != NULL) {
- pte_quicklist[color] = (unsigned long *)(*ret);
- ret[0] = 0;
- pgtable_cache_size--;
- }
- preempt_enable();
- return (pte_t *)ret;
-}
-
-static __inline__ void free_pte_fast(pte_t *pte)
-{
- unsigned long color = DCACHE_COLOR((unsigned long)pte);
-
- preempt_disable();
- *(unsigned long *)pte = (unsigned long) pte_quicklist[color];
- pte_quicklist[color] = (unsigned long *) pte;
- pgtable_cache_size++;
- preempt_enable();
-}
-
-static __inline__ void free_pte_slow(pte_t *pte)
-{
- free_page((unsigned long)pte);
-}
-
+
static inline void pte_free_kernel(pte_t *pte)
{
- free_pte_fast(pte);
+ kmem_cache_free(pgtable_cache, pte);
}
static inline void pte_free(struct page *ptepage)
{
- free_pte_fast(page_address(ptepage));
+ pte_free_kernel(page_address(ptepage));
}
-#define pmd_free(pmd) free_pmd_fast(pmd)
-#define pgd_free(pgd) free_pgd_fast(pgd)
-#define pgd_alloc(mm) get_pgd_fast()
+
+#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
+#define pmd_populate(MM,PMD,PTE_PAGE) \
+ pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE))
+
+#define check_pgt_cache() do { } while (0)
#endif /* _SPARC64_PGALLOC_H */
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index f0a9b44d3eb5..ed4124edf837 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -25,7 +25,8 @@
#include <asm/const.h>
/* The kernel image occupies 0x4000000 to 0x1000000 (4MB --> 32MB).
- * The page copy blockops can use 0x2000000 to 0x10000000.
+ * The page copy blockops can use 0x2000000 to 0x4000000.
+ * The TSB is mapped in the 0x4000000 to 0x6000000 range.
* The PROM resides in an area spanning 0xf0000000 to 0x100000000.
* The vmalloc area spans 0x100000000 to 0x200000000.
* Since modules need to be in the lowest 32-bits of the address space,
@@ -34,6 +35,7 @@
* 0x400000000.
*/
#define TLBTEMP_BASE _AC(0x0000000002000000,UL)
+#define TSBMAP_BASE _AC(0x0000000004000000,UL)
#define MODULES_VADDR _AC(0x0000000010000000,UL)
#define MODULES_LEN _AC(0x00000000e0000000,UL)
#define MODULES_END _AC(0x00000000f0000000,UL)
@@ -88,162 +90,538 @@
#endif /* !(__ASSEMBLY__) */
-/* Spitfire/Cheetah TTE bits. */
-#define _PAGE_VALID _AC(0x8000000000000000,UL) /* Valid TTE */
-#define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit up to date*/
-#define _PAGE_SZ4MB _AC(0x6000000000000000,UL) /* 4MB Page */
-#define _PAGE_SZ512K _AC(0x4000000000000000,UL) /* 512K Page */
-#define _PAGE_SZ64K _AC(0x2000000000000000,UL) /* 64K Page */
-#define _PAGE_SZ8K _AC(0x0000000000000000,UL) /* 8K Page */
-#define _PAGE_NFO _AC(0x1000000000000000,UL) /* No Fault Only */
-#define _PAGE_IE _AC(0x0800000000000000,UL) /* Invert Endianness */
-#define _PAGE_SOFT2 _AC(0x07FC000000000000,UL) /* Software bits, set 2 */
-#define _PAGE_RES1 _AC(0x0002000000000000,UL) /* Reserved */
-#define _PAGE_SZ32MB _AC(0x0001000000000000,UL) /* (Panther) 32MB page */
-#define _PAGE_SZ256MB _AC(0x2001000000000000,UL) /* (Panther) 256MB page */
-#define _PAGE_SN _AC(0x0000800000000000,UL) /* (Cheetah) Snoop */
-#define _PAGE_RES2 _AC(0x0000780000000000,UL) /* Reserved */
-#define _PAGE_PADDR_SF _AC(0x000001FFFFFFE000,UL) /* (Spitfire) paddr[40:13]*/
-#define _PAGE_PADDR _AC(0x000007FFFFFFE000,UL) /* (Cheetah) paddr[42:13] */
-#define _PAGE_SOFT _AC(0x0000000000001F80,UL) /* Software bits */
-#define _PAGE_L _AC(0x0000000000000040,UL) /* Locked TTE */
-#define _PAGE_CP _AC(0x0000000000000020,UL) /* Cacheable in P-Cache */
-#define _PAGE_CV _AC(0x0000000000000010,UL) /* Cacheable in V-Cache */
-#define _PAGE_E _AC(0x0000000000000008,UL) /* side-Effect */
-#define _PAGE_P _AC(0x0000000000000004,UL) /* Privileged Page */
-#define _PAGE_W _AC(0x0000000000000002,UL) /* Writable */
-#define _PAGE_G _AC(0x0000000000000001,UL) /* Global */
-
-/* Here are the SpitFire software bits we use in the TTE's.
- *
- * WARNING: If you are going to try and start using some
- * of the soft2 bits, you will need to make
- * modifications to the swap entry implementation.
- * For example, one thing that could happen is that
- * swp_entry_to_pte() would BUG_ON() if you tried
- * to use one of the soft2 bits for _PAGE_FILE.
- *
- * Like other architectures, I have aliased _PAGE_FILE with
- * _PAGE_MODIFIED. This works because _PAGE_FILE is never
- * interpreted that way unless _PAGE_PRESENT is clear.
- */
-#define _PAGE_EXEC _AC(0x0000000000001000,UL) /* Executable SW bit */
-#define _PAGE_MODIFIED _AC(0x0000000000000800,UL) /* Modified (dirty) */
-#define _PAGE_FILE _AC(0x0000000000000800,UL) /* Pagecache page */
-#define _PAGE_ACCESSED _AC(0x0000000000000400,UL) /* Accessed (ref'd) */
-#define _PAGE_READ _AC(0x0000000000000200,UL) /* Readable SW Bit */
-#define _PAGE_WRITE _AC(0x0000000000000100,UL) /* Writable SW Bit */
-#define _PAGE_PRESENT _AC(0x0000000000000080,UL) /* Present */
+/* PTE bits which are the same in SUN4U and SUN4V format. */
+#define _PAGE_VALID _AC(0x8000000000000000,UL) /* Valid TTE */
+#define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/
+
+/* SUN4U pte bits... */
+#define _PAGE_SZ4MB_4U _AC(0x6000000000000000,UL) /* 4MB Page */
+#define _PAGE_SZ512K_4U _AC(0x4000000000000000,UL) /* 512K Page */
+#define _PAGE_SZ64K_4U _AC(0x2000000000000000,UL) /* 64K Page */
+#define _PAGE_SZ8K_4U _AC(0x0000000000000000,UL) /* 8K Page */
+#define _PAGE_NFO_4U _AC(0x1000000000000000,UL) /* No Fault Only */
+#define _PAGE_IE_4U _AC(0x0800000000000000,UL) /* Invert Endianness */
+#define _PAGE_SOFT2_4U _AC(0x07FC000000000000,UL) /* Software bits, set 2 */
+#define _PAGE_RES1_4U _AC(0x0002000000000000,UL) /* Reserved */
+#define _PAGE_SZ32MB_4U _AC(0x0001000000000000,UL) /* (Panther) 32MB page */
+#define _PAGE_SZ256MB_4U _AC(0x2001000000000000,UL) /* (Panther) 256MB page */
+#define _PAGE_SN_4U _AC(0x0000800000000000,UL) /* (Cheetah) Snoop */
+#define _PAGE_RES2_4U _AC(0x0000780000000000,UL) /* Reserved */
+#define _PAGE_PADDR_4U _AC(0x000007FFFFFFE000,UL) /* (Cheetah) pa[42:13] */
+#define _PAGE_SOFT_4U _AC(0x0000000000001F80,UL) /* Software bits: */
+#define _PAGE_EXEC_4U _AC(0x0000000000001000,UL) /* Executable SW bit */
+#define _PAGE_MODIFIED_4U _AC(0x0000000000000800,UL) /* Modified (dirty) */
+#define _PAGE_FILE_4U _AC(0x0000000000000800,UL) /* Pagecache page */
+#define _PAGE_ACCESSED_4U _AC(0x0000000000000400,UL) /* Accessed (ref'd) */
+#define _PAGE_READ_4U _AC(0x0000000000000200,UL) /* Readable SW Bit */
+#define _PAGE_WRITE_4U _AC(0x0000000000000100,UL) /* Writable SW Bit */
+#define _PAGE_PRESENT_4U _AC(0x0000000000000080,UL) /* Present */
+#define _PAGE_L_4U _AC(0x0000000000000040,UL) /* Locked TTE */
+#define _PAGE_CP_4U _AC(0x0000000000000020,UL) /* Cacheable in P-Cache */
+#define _PAGE_CV_4U _AC(0x0000000000000010,UL) /* Cacheable in V-Cache */
+#define _PAGE_E_4U _AC(0x0000000000000008,UL) /* side-Effect */
+#define _PAGE_P_4U _AC(0x0000000000000004,UL) /* Privileged Page */
+#define _PAGE_W_4U _AC(0x0000000000000002,UL) /* Writable */
+
+/* SUN4V pte bits... */
+#define _PAGE_NFO_4V _AC(0x4000000000000000,UL) /* No Fault Only */
+#define _PAGE_SOFT2_4V _AC(0x3F00000000000000,UL) /* Software bits, set 2 */
+#define _PAGE_MODIFIED_4V _AC(0x2000000000000000,UL) /* Modified (dirty) */
+#define _PAGE_ACCESSED_4V _AC(0x1000000000000000,UL) /* Accessed (ref'd) */
+#define _PAGE_READ_4V _AC(0x0800000000000000,UL) /* Readable SW Bit */
+#define _PAGE_WRITE_4V _AC(0x0400000000000000,UL) /* Writable SW Bit */
+#define _PAGE_PADDR_4V _AC(0x00FFFFFFFFFFE000,UL) /* paddr[55:13] */
+#define _PAGE_IE_4V _AC(0x0000000000001000,UL) /* Invert Endianness */
+#define _PAGE_E_4V _AC(0x0000000000000800,UL) /* side-Effect */
+#define _PAGE_CP_4V _AC(0x0000000000000400,UL) /* Cacheable in P-Cache */
+#define _PAGE_CV_4V _AC(0x0000000000000200,UL) /* Cacheable in V-Cache */
+#define _PAGE_P_4V _AC(0x0000000000000100,UL) /* Privileged Page */
+#define _PAGE_EXEC_4V _AC(0x0000000000000080,UL) /* Executable Page */
+#define _PAGE_W_4V _AC(0x0000000000000040,UL) /* Writable */
+#define _PAGE_SOFT_4V _AC(0x0000000000000030,UL) /* Software bits */
+#define _PAGE_FILE_4V _AC(0x0000000000000020,UL) /* Pagecache page */
+#define _PAGE_PRESENT_4V _AC(0x0000000000000010,UL) /* Present */
+#define _PAGE_RESV_4V _AC(0x0000000000000008,UL) /* Reserved */
+#define _PAGE_SZ16GB_4V _AC(0x0000000000000007,UL) /* 16GB Page */
+#define _PAGE_SZ2GB_4V _AC(0x0000000000000006,UL) /* 2GB Page */
+#define _PAGE_SZ256MB_4V _AC(0x0000000000000005,UL) /* 256MB Page */
+#define _PAGE_SZ32MB_4V _AC(0x0000000000000004,UL) /* 32MB Page */
+#define _PAGE_SZ4MB_4V _AC(0x0000000000000003,UL) /* 4MB Page */
+#define _PAGE_SZ512K_4V _AC(0x0000000000000002,UL) /* 512K Page */
+#define _PAGE_SZ64K_4V _AC(0x0000000000000001,UL) /* 64K Page */
+#define _PAGE_SZ8K_4V _AC(0x0000000000000000,UL) /* 8K Page */
#if PAGE_SHIFT == 13
-#define _PAGE_SZBITS _PAGE_SZ8K
+#define _PAGE_SZBITS_4U _PAGE_SZ8K_4U
+#define _PAGE_SZBITS_4V _PAGE_SZ8K_4V
#elif PAGE_SHIFT == 16
-#define _PAGE_SZBITS _PAGE_SZ64K
+#define _PAGE_SZBITS_4U _PAGE_SZ64K_4U
+#define _PAGE_SZBITS_4V _PAGE_SZ64K_4V
#elif PAGE_SHIFT == 19
-#define _PAGE_SZBITS _PAGE_SZ512K
+#define _PAGE_SZBITS_4U _PAGE_SZ512K_4U
+#define _PAGE_SZBITS_4V _PAGE_SZ512K_4V
#elif PAGE_SHIFT == 22
-#define _PAGE_SZBITS _PAGE_SZ4MB
+#define _PAGE_SZBITS_4U _PAGE_SZ4MB_4U
+#define _PAGE_SZBITS_4V _PAGE_SZ4MB_4V
#else
#error Wrong PAGE_SHIFT specified
#endif
#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
-#define _PAGE_SZHUGE _PAGE_SZ4MB
+#define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U
+#define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
-#define _PAGE_SZHUGE _PAGE_SZ512K
+#define _PAGE_SZHUGE_4U _PAGE_SZ512K_4U
+#define _PAGE_SZHUGE_4V _PAGE_SZ512K_4V
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
-#define _PAGE_SZHUGE _PAGE_SZ64K
+#define _PAGE_SZHUGE_4U _PAGE_SZ64K_4U
+#define _PAGE_SZHUGE_4V _PAGE_SZ64K_4V
#endif
-#define _PAGE_CACHE (_PAGE_CP | _PAGE_CV)
+/* These are actually filled in at boot time by sun4{u,v}_pgprot_init() */
+#define __P000 __pgprot(0)
+#define __P001 __pgprot(0)
+#define __P010 __pgprot(0)
+#define __P011 __pgprot(0)
+#define __P100 __pgprot(0)
+#define __P101 __pgprot(0)
+#define __P110 __pgprot(0)
+#define __P111 __pgprot(0)
+
+#define __S000 __pgprot(0)
+#define __S001 __pgprot(0)
+#define __S010 __pgprot(0)
+#define __S011 __pgprot(0)
+#define __S100 __pgprot(0)
+#define __S101 __pgprot(0)
+#define __S110 __pgprot(0)
+#define __S111 __pgprot(0)
-#define __DIRTY_BITS (_PAGE_MODIFIED | _PAGE_WRITE | _PAGE_W)
-#define __ACCESS_BITS (_PAGE_ACCESSED | _PAGE_READ | _PAGE_R)
-#define __PRIV_BITS _PAGE_P
+#ifndef __ASSEMBLY__
-#define PAGE_NONE __pgprot (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_CACHE)
+extern pte_t mk_pte_io(unsigned long, pgprot_t, int, unsigned long);
-/* Don't set the TTE _PAGE_W bit here, else the dirty bit never gets set. */
-#define PAGE_SHARED __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \
- __ACCESS_BITS | _PAGE_WRITE | _PAGE_EXEC)
+extern unsigned long pte_sz_bits(unsigned long size);
-#define PAGE_COPY __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \
- __ACCESS_BITS | _PAGE_EXEC)
+extern pgprot_t PAGE_KERNEL;
+extern pgprot_t PAGE_KERNEL_LOCKED;
+extern pgprot_t PAGE_COPY;
+extern pgprot_t PAGE_SHARED;
-#define PAGE_READONLY __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \
- __ACCESS_BITS | _PAGE_EXEC)
+/* XXX This uglyness is for the atyfb driver's sparc mmap() support. XXX */
+extern unsigned long _PAGE_IE;
+extern unsigned long _PAGE_E;
+extern unsigned long _PAGE_CACHE;
-#define PAGE_KERNEL __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \
- __PRIV_BITS | \
- __ACCESS_BITS | __DIRTY_BITS | _PAGE_EXEC)
+extern unsigned long pg_iobits;
+extern unsigned long _PAGE_ALL_SZ_BITS;
+extern unsigned long _PAGE_SZBITS;
-#define PAGE_SHARED_NOEXEC __pgprot (_PAGE_PRESENT | _PAGE_VALID | \
- _PAGE_CACHE | \
- __ACCESS_BITS | _PAGE_WRITE)
+extern struct page *mem_map_zero;
+#define ZERO_PAGE(vaddr) (mem_map_zero)
-#define PAGE_COPY_NOEXEC __pgprot (_PAGE_PRESENT | _PAGE_VALID | \
- _PAGE_CACHE | __ACCESS_BITS)
+/* PFNs are real physical page numbers. However, mem_map only begins to record
+ * per-page information starting at pfn_base. This is to handle systems where
+ * the first physical page in the machine is at some huge physical address,
+ * such as 4GB. This is common on a partitioned E10000, for example.
+ */
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
+{
+ unsigned long paddr = pfn << PAGE_SHIFT;
+ unsigned long sz_bits;
+
+ sz_bits = 0UL;
+ if (_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL) {
+ __asm__ __volatile__(
+ "\n661: sethi %uhi(%1), %0\n"
+ " sllx %0, 32, %0\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " mov %2, %0\n"
+ " nop\n"
+ " .previous\n"
+ : "=r" (sz_bits)
+ : "i" (_PAGE_SZBITS_4U), "i" (_PAGE_SZBITS_4V));
+ }
+ return __pte(paddr | sz_bits | pgprot_val(prot));
+}
+#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
-#define PAGE_READONLY_NOEXEC __pgprot (_PAGE_PRESENT | _PAGE_VALID | \
- _PAGE_CACHE | __ACCESS_BITS)
+/* This one can be done with two shifts. */
+static inline unsigned long pte_pfn(pte_t pte)
+{
+ unsigned long ret;
+
+ __asm__ __volatile__(
+ "\n661: sllx %1, %2, %0\n"
+ " srlx %0, %3, %0\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sllx %1, %4, %0\n"
+ " srlx %0, %5, %0\n"
+ " .previous\n"
+ : "=r" (ret)
+ : "r" (pte_val(pte)),
+ "i" (21), "i" (21 + PAGE_SHIFT),
+ "i" (8), "i" (8 + PAGE_SHIFT));
+
+ return ret;
+}
+#define pte_page(x) pfn_to_page(pte_pfn(x))
-#define _PFN_MASK _PAGE_PADDR
+static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
+{
+ unsigned long mask, tmp;
+
+ /* SUN4U: 0x600307ffffffecb8 (negated == 0x9ffcf80000001347)
+ * SUN4V: 0x30ffffffffffee17 (negated == 0xcf000000000011e8)
+ *
+ * Even if we use negation tricks the result is still a 6
+ * instruction sequence, so don't try to play fancy and just
+ * do the most straightforward implementation.
+ *
+ * Note: We encode this into 3 sun4v 2-insn patch sequences.
+ */
-#define pg_iobits (_PAGE_VALID | _PAGE_PRESENT | __DIRTY_BITS | \
- __ACCESS_BITS | _PAGE_E)
+ __asm__ __volatile__(
+ "\n661: sethi %%uhi(%2), %1\n"
+ " sethi %%hi(%2), %0\n"
+ "\n662: or %1, %%ulo(%2), %1\n"
+ " or %0, %%lo(%2), %0\n"
+ "\n663: sllx %1, 32, %1\n"
+ " or %0, %1, %0\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%3), %1\n"
+ " sethi %%hi(%3), %0\n"
+ " .word 662b\n"
+ " or %1, %%ulo(%3), %1\n"
+ " or %0, %%lo(%3), %0\n"
+ " .word 663b\n"
+ " sllx %1, 32, %1\n"
+ " or %0, %1, %0\n"
+ " .previous\n"
+ : "=r" (mask), "=r" (tmp)
+ : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U |
+ _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_PRESENT_4U |
+ _PAGE_SZBITS_4U),
+ "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
+ _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | _PAGE_PRESENT_4V |
+ _PAGE_SZBITS_4V));
+
+ return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask));
+}
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY_NOEXEC
-#define __P010 PAGE_COPY_NOEXEC
-#define __P011 PAGE_COPY_NOEXEC
-#define __P100 PAGE_READONLY
-#define __P101 PAGE_READONLY
-#define __P110 PAGE_COPY
-#define __P111 PAGE_COPY
+static inline pte_t pgoff_to_pte(unsigned long off)
+{
+ off <<= PAGE_SHIFT;
+
+ __asm__ __volatile__(
+ "\n661: or %0, %2, %0\n"
+ " .section .sun4v_1insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " or %0, %3, %0\n"
+ " .previous\n"
+ : "=r" (off)
+ : "0" (off), "i" (_PAGE_FILE_4U), "i" (_PAGE_FILE_4V));
+
+ return __pte(off);
+}
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY_NOEXEC
-#define __S010 PAGE_SHARED_NOEXEC
-#define __S011 PAGE_SHARED_NOEXEC
-#define __S100 PAGE_READONLY
-#define __S101 PAGE_READONLY
-#define __S110 PAGE_SHARED
-#define __S111 PAGE_SHARED
+static inline pgprot_t pgprot_noncached(pgprot_t prot)
+{
+ unsigned long val = pgprot_val(prot);
+
+ __asm__ __volatile__(
+ "\n661: andn %0, %2, %0\n"
+ " or %0, %3, %0\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " andn %0, %4, %0\n"
+ " or %0, %3, %0\n"
+ " .previous\n"
+ : "=r" (val)
+ : "0" (val), "i" (_PAGE_CP_4U | _PAGE_CV_4U), "i" (_PAGE_E_4U),
+ "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V));
+
+ return __pgprot(val);
+}
+/* Various pieces of code check for platform support by ifdef testing
+ * on "pgprot_noncached". That's broken and should be fixed, but for
+ * now...
+ */
+#define pgprot_noncached pgprot_noncached
-#ifndef __ASSEMBLY__
+#ifdef CONFIG_HUGETLB_PAGE
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+ unsigned long mask;
+
+ __asm__ __volatile__(
+ "\n661: sethi %%uhi(%1), %0\n"
+ " sllx %0, 32, %0\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " mov %2, %0\n"
+ " nop\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_SZHUGE_4U), "i" (_PAGE_SZHUGE_4V));
+
+ return __pte(pte_val(pte) | mask);
+}
+#endif
-extern unsigned long phys_base;
-extern unsigned long pfn_base;
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+ unsigned long val = pte_val(pte), tmp;
+
+ __asm__ __volatile__(
+ "\n661: or %0, %3, %0\n"
+ " nop\n"
+ "\n662: nop\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%4), %1\n"
+ " sllx %1, 32, %1\n"
+ " .word 662b\n"
+ " or %1, %%lo(%4), %1\n"
+ " or %0, %1, %0\n"
+ " .previous\n"
+ : "=r" (val), "=r" (tmp)
+ : "0" (val), "i" (_PAGE_MODIFIED_4U | _PAGE_W_4U),
+ "i" (_PAGE_MODIFIED_4V | _PAGE_W_4V));
+
+ return __pte(val);
+}
-extern struct page *mem_map_zero;
-#define ZERO_PAGE(vaddr) (mem_map_zero)
+static inline pte_t pte_mkclean(pte_t pte)
+{
+ unsigned long val = pte_val(pte), tmp;
+
+ __asm__ __volatile__(
+ "\n661: andn %0, %3, %0\n"
+ " nop\n"
+ "\n662: nop\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%4), %1\n"
+ " sllx %1, 32, %1\n"
+ " .word 662b\n"
+ " or %1, %%lo(%4), %1\n"
+ " andn %0, %1, %0\n"
+ " .previous\n"
+ : "=r" (val), "=r" (tmp)
+ : "0" (val), "i" (_PAGE_MODIFIED_4U | _PAGE_W_4U),
+ "i" (_PAGE_MODIFIED_4V | _PAGE_W_4V));
+
+ return __pte(val);
+}
-/* PFNs are real physical page numbers. However, mem_map only begins to record
- * per-page information starting at pfn_base. This is to handle systems where
- * the first physical page in the machine is at some huge physical address,
- * such as 4GB. This is common on a partitioned E10000, for example.
- */
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+ unsigned long val = pte_val(pte), mask;
+
+ __asm__ __volatile__(
+ "\n661: mov %1, %0\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%2), %0\n"
+ " sllx %0, 32, %0\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_WRITE_4U), "i" (_PAGE_WRITE_4V));
+
+ return __pte(val | mask);
+}
-#define pfn_pte(pfn, prot) \
- __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot) | _PAGE_SZBITS)
-#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ unsigned long val = pte_val(pte), tmp;
+
+ __asm__ __volatile__(
+ "\n661: andn %0, %3, %0\n"
+ " nop\n"
+ "\n662: nop\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%4), %1\n"
+ " sllx %1, 32, %1\n"
+ " .word 662b\n"
+ " or %1, %%lo(%4), %1\n"
+ " andn %0, %1, %0\n"
+ " .previous\n"
+ : "=r" (val), "=r" (tmp)
+ : "0" (val), "i" (_PAGE_WRITE_4U | _PAGE_W_4U),
+ "i" (_PAGE_WRITE_4V | _PAGE_W_4V));
+
+ return __pte(val);
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+ unsigned long mask;
+
+ __asm__ __volatile__(
+ "\n661: mov %1, %0\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%2), %0\n"
+ " sllx %0, 32, %0\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_ACCESSED_4U), "i" (_PAGE_ACCESSED_4V));
+
+ mask |= _PAGE_R;
+
+ return __pte(pte_val(pte) & ~mask);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+ unsigned long mask;
+
+ __asm__ __volatile__(
+ "\n661: mov %1, %0\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%2), %0\n"
+ " sllx %0, 32, %0\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_ACCESSED_4U), "i" (_PAGE_ACCESSED_4V));
+
+ mask |= _PAGE_R;
+
+ return __pte(pte_val(pte) | mask);
+}
-#define pte_pfn(x) ((pte_val(x) & _PAGE_PADDR)>>PAGE_SHIFT)
-#define pte_page(x) pfn_to_page(pte_pfn(x))
+static inline unsigned long pte_young(pte_t pte)
+{
+ unsigned long mask;
+
+ __asm__ __volatile__(
+ "\n661: mov %1, %0\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%2), %0\n"
+ " sllx %0, 32, %0\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_ACCESSED_4U), "i" (_PAGE_ACCESSED_4V));
+
+ return (pte_val(pte) & mask);
+}
+
+static inline unsigned long pte_dirty(pte_t pte)
+{
+ unsigned long mask;
+
+ __asm__ __volatile__(
+ "\n661: mov %1, %0\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%2), %0\n"
+ " sllx %0, 32, %0\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_MODIFIED_4U), "i" (_PAGE_MODIFIED_4V));
+
+ return (pte_val(pte) & mask);
+}
-static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
+static inline unsigned long pte_write(pte_t pte)
{
- pte_t __pte;
- const unsigned long preserve_mask = (_PFN_MASK |
- _PAGE_MODIFIED | _PAGE_ACCESSED |
- _PAGE_CACHE | _PAGE_E |
- _PAGE_PRESENT | _PAGE_SZBITS);
+ unsigned long mask;
+
+ __asm__ __volatile__(
+ "\n661: mov %1, %0\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%2), %0\n"
+ " sllx %0, 32, %0\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_WRITE_4U), "i" (_PAGE_WRITE_4V));
+
+ return (pte_val(pte) & mask);
+}
- pte_val(__pte) = (pte_val(orig_pte) & preserve_mask) |
- (pgprot_val(new_prot) & ~preserve_mask);
+static inline unsigned long pte_exec(pte_t pte)
+{
+ unsigned long mask;
+
+ __asm__ __volatile__(
+ "\n661: sethi %%hi(%1), %0\n"
+ " .section .sun4v_1insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " mov %2, %0\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_EXEC_4U), "i" (_PAGE_EXEC_4V));
+
+ return (pte_val(pte) & mask);
+}
- return __pte;
+static inline unsigned long pte_read(pte_t pte)
+{
+ unsigned long mask;
+
+ __asm__ __volatile__(
+ "\n661: mov %1, %0\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%2), %0\n"
+ " sllx %0, 32, %0\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_READ_4U), "i" (_PAGE_READ_4V));
+
+ return (pte_val(pte) & mask);
}
+
+static inline unsigned long pte_file(pte_t pte)
+{
+ unsigned long val = pte_val(pte);
+
+ __asm__ __volatile__(
+ "\n661: and %0, %2, %0\n"
+ " .section .sun4v_1insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " and %0, %3, %0\n"
+ " .previous\n"
+ : "=r" (val)
+ : "0" (val), "i" (_PAGE_FILE_4U), "i" (_PAGE_FILE_4V));
+
+ return val;
+}
+
+static inline unsigned long pte_present(pte_t pte)
+{
+ unsigned long val = pte_val(pte);
+
+ __asm__ __volatile__(
+ "\n661: and %0, %2, %0\n"
+ " .section .sun4v_1insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " and %0, %3, %0\n"
+ " .previous\n"
+ : "=r" (val)
+ : "0" (val), "i" (_PAGE_PRESENT_4U), "i" (_PAGE_PRESENT_4V));
+
+ return val;
+}
+
#define pmd_set(pmdp, ptep) \
(pmd_val(*(pmdp)) = (__pa((unsigned long) (ptep)) >> 11UL))
#define pud_set(pudp, pmdp) \
@@ -253,8 +631,6 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd))
#define pud_page(pud) \
((unsigned long) __va((((unsigned long)pud_val(pud))<<11UL)))
-#define pte_none(pte) (!pte_val(pte))
-#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT)
#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_bad(pmd) (0)
#define pmd_present(pmd) (pmd_val(pmd) != 0U)
@@ -264,30 +640,8 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
#define pud_present(pud) (pud_val(pud) != 0U)
#define pud_clear(pudp) (pud_val(*(pudp)) = 0U)
-/* The following only work if pte_present() is true.
- * Undefined behaviour if not..
- */
-#define pte_read(pte) (pte_val(pte) & _PAGE_READ)
-#define pte_exec(pte) (pte_val(pte) & _PAGE_EXEC)
-#define pte_write(pte) (pte_val(pte) & _PAGE_WRITE)
-#define pte_dirty(pte) (pte_val(pte) & _PAGE_MODIFIED)
-#define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED)
-#define pte_wrprotect(pte) (__pte(pte_val(pte) & ~(_PAGE_WRITE|_PAGE_W)))
-#define pte_rdprotect(pte) \
- (__pte(((pte_val(pte)<<1UL)>>1UL) & ~_PAGE_READ))
-#define pte_mkclean(pte) \
- (__pte(pte_val(pte) & ~(_PAGE_MODIFIED|_PAGE_W)))
-#define pte_mkold(pte) \
- (__pte(((pte_val(pte)<<1UL)>>1UL) & ~_PAGE_ACCESSED))
-
-/* Permanent address of a page. */
-#define __page_address(page) page_address(page)
-
-/* Be very careful when you change these three, they are delicate. */
-#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_ACCESSED | _PAGE_R))
-#define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_WRITE))
-#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_MODIFIED | _PAGE_W))
-#define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_SZHUGE))
+/* Same in both SUN4V and SUN4U. */
+#define pte_none(pte) (!pte_val(pte))
/* to find an entry in a page-table-directory. */
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
@@ -296,11 +650,6 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-/* extract the pgd cache used for optimizing the tlb miss
- * slow path when executing 32-bit compat processes
- */
-#define get_pgd_cache(pgd) ((unsigned long) pgd_val(*pgd) << 11)
-
/* Find an entry in the second-level page table.. */
#define pmd_offset(pudp, address) \
((pmd_t *) pud_page(*(pudp)) + \
@@ -327,6 +676,9 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *p
/* It is more efficient to let flush_tlb_kernel_range()
* handle init_mm tlb flushes.
+ *
+ * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U
+ * and SUN4V pte layout, so this inline test is fine.
*/
if (likely(mm != &init_mm) && (pte_val(orig) & _PAGE_VALID))
tlb_batch_add(mm, addr, ptep, orig);
@@ -361,42 +713,23 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
/* File offset in PTE support. */
-#define pte_file(pte) (pte_val(pte) & _PAGE_FILE)
+extern unsigned long pte_file(pte_t);
#define pte_to_pgoff(pte) (pte_val(pte) >> PAGE_SHIFT)
-#define pgoff_to_pte(off) (__pte(((off) << PAGE_SHIFT) | _PAGE_FILE))
+extern pte_t pgoff_to_pte(unsigned long);
#define PTE_FILE_MAX_BITS (64UL - PAGE_SHIFT - 1UL)
extern unsigned long prom_virt_to_phys(unsigned long, int *);
-static __inline__ unsigned long
-sun4u_get_pte (unsigned long addr)
-{
- pgd_t *pgdp;
- pud_t *pudp;
- pmd_t *pmdp;
- pte_t *ptep;
-
- if (addr >= PAGE_OFFSET)
- return addr & _PAGE_PADDR;
- if ((addr >= LOW_OBP_ADDRESS) && (addr < HI_OBP_ADDRESS))
- return prom_virt_to_phys(addr, NULL);
- pgdp = pgd_offset_k(addr);
- pudp = pud_offset(pgdp, addr);
- pmdp = pmd_offset(pudp, addr);
- ptep = pte_offset_kernel(pmdp, addr);
- return pte_val(*ptep) & _PAGE_PADDR;
-}
+extern unsigned long sun4u_get_pte(unsigned long);
-static __inline__ unsigned long
-__get_phys (unsigned long addr)
+static inline unsigned long __get_phys(unsigned long addr)
{
- return sun4u_get_pte (addr);
+ return sun4u_get_pte(addr);
}
-static __inline__ int
-__get_iospace (unsigned long addr)
+static inline int __get_iospace(unsigned long addr)
{
- return ((sun4u_get_pte (addr) & 0xf0000000) >> 28);
+ return ((sun4u_get_pte(addr) & 0xf0000000) >> 28);
}
extern unsigned long *sparc64_valid_addr_bitmap;
@@ -409,11 +742,6 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
unsigned long pfn,
unsigned long size, pgprot_t prot);
-/* Clear virtual and physical cachability, set side-effect bit. */
-#define pgprot_noncached(prot) \
- (__pgprot((pgprot_val(prot) & ~(_PAGE_CP | _PAGE_CV)) | \
- _PAGE_E))
-
/*
* For sparc32&64, the pfn in io_remap_pfn_range() carries <iospace> in
* its high 4 bits. These macros/functions put it there or get it from there.
@@ -424,8 +752,11 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
#include <asm-generic/pgtable.h>
-/* We provide our own get_unmapped_area to cope with VA holes for userland */
+/* We provide our own get_unmapped_area to cope with VA holes and
+ * SHM area cache aliasing for userland.
+ */
#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
/* We provide a special get_unmapped_area for framebuffer mmaps to try and use
* the largest alignment possible such that larget PTEs can be used.
@@ -435,12 +766,9 @@ extern unsigned long get_fb_unmapped_area(struct file *filp, unsigned long,
unsigned long);
#define HAVE_ARCH_FB_UNMAPPED_AREA
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init() do { } while (0)
-
-extern void check_pgt_cache(void);
+extern void pgtable_cache_init(void);
+extern void sun4v_register_fault_status(void);
+extern void sun4v_ktsb_register(void);
#endif /* !(__ASSEMBLY__) */
diff --git a/include/asm-sparc64/pil.h b/include/asm-sparc64/pil.h
index 8f87750c3517..79f827eb3f5d 100644
--- a/include/asm-sparc64/pil.h
+++ b/include/asm-sparc64/pil.h
@@ -16,11 +16,13 @@
#define PIL_SMP_CALL_FUNC 1
#define PIL_SMP_RECEIVE_SIGNAL 2
#define PIL_SMP_CAPTURE 3
+#define PIL_SMP_CTX_NEW_VERSION 4
#ifndef __ASSEMBLY__
#define PIL_RESERVED(PIL) ((PIL) == PIL_SMP_CALL_FUNC || \
(PIL) == PIL_SMP_RECEIVE_SIGNAL || \
- (PIL) == PIL_SMP_CAPTURE)
+ (PIL) == PIL_SMP_CAPTURE || \
+ (PIL) == PIL_SMP_CTX_NEW_VERSION)
#endif
#endif /* !(_SPARC64_PIL_H) */
diff --git a/include/asm-sparc64/processor.h b/include/asm-sparc64/processor.h
index cd8d9b4c8658..c6896b88283e 100644
--- a/include/asm-sparc64/processor.h
+++ b/include/asm-sparc64/processor.h
@@ -28,6 +28,8 @@
* User lives in his very own context, and cannot reference us. Note
* that TASK_SIZE is a misnomer, it really gives maximum user virtual
* address that the kernel will allocate out.
+ *
+ * XXX No longer using virtual page tables, kill this upper limit...
*/
#define VA_BITS 44
#ifndef __ASSEMBLY__
@@ -37,18 +39,6 @@
#endif
#define TASK_SIZE ((unsigned long)-VPTE_SIZE)
-/*
- * The vpte base must be able to hold the entire vpte, half
- * of which lives above, and half below, the base. And it
- * is placed as close to the highest address range as possible.
- */
-#define VPTE_BASE_SPITFIRE (-(VPTE_SIZE/2))
-#if 1
-#define VPTE_BASE_CHEETAH VPTE_BASE_SPITFIRE
-#else
-#define VPTE_BASE_CHEETAH 0xffe0000000000000
-#endif
-
#ifndef __ASSEMBLY__
typedef struct {
@@ -101,7 +91,8 @@ extern unsigned long thread_saved_pc(struct task_struct *);
/* Do necessary setup to start up a newly executed thread. */
#define start_thread(regs, pc, sp) \
do { \
- regs->tstate = (regs->tstate & (TSTATE_CWP)) | (TSTATE_INITIAL_MM|TSTATE_IE) | (ASI_PNF << 24); \
+ unsigned long __asi = ASI_PNF; \
+ regs->tstate = (regs->tstate & (TSTATE_CWP)) | (TSTATE_INITIAL_MM|TSTATE_IE) | (__asi << 24UL); \
regs->tpc = ((pc & (~3)) - 4); \
regs->tnpc = regs->tpc + 4; \
regs->y = 0; \
@@ -138,10 +129,10 @@ do { \
#define start_thread32(regs, pc, sp) \
do { \
+ unsigned long __asi = ASI_PNF; \
pc &= 0x00000000ffffffffUL; \
sp &= 0x00000000ffffffffUL; \
-\
- regs->tstate = (regs->tstate & (TSTATE_CWP))|(TSTATE_INITIAL_MM|TSTATE_IE|TSTATE_AM); \
+ regs->tstate = (regs->tstate & (TSTATE_CWP))|(TSTATE_INITIAL_MM|TSTATE_IE|TSTATE_AM) | (__asi << 24UL); \
regs->tpc = ((pc & (~3)) - 4); \
regs->tnpc = regs->tpc + 4; \
regs->y = 0; \
@@ -226,6 +217,8 @@ static inline void prefetchw(const void *x)
#define spin_lock_prefetch(x) prefetchw(x)
+#define HAVE_ARCH_PICK_MMAP_LAYOUT
+
#endif /* !(__ASSEMBLY__) */
#endif /* !(__ASM_SPARC64_PROCESSOR_H) */
diff --git a/include/asm-sparc64/pstate.h b/include/asm-sparc64/pstate.h
index 29fb74aa805d..49a7924a89ab 100644
--- a/include/asm-sparc64/pstate.h
+++ b/include/asm-sparc64/pstate.h
@@ -28,11 +28,12 @@
/* The V9 TSTATE Register (with SpitFire and Linux extensions).
*
- * ---------------------------------------------------------------
- * | Resv | CCR | ASI | %pil | PSTATE | Resv | CWP |
- * ---------------------------------------------------------------
- * 63 40 39 32 31 24 23 20 19 8 7 5 4 0
+ * ---------------------------------------------------------------------
+ * | Resv | GL | CCR | ASI | %pil | PSTATE | Resv | CWP |
+ * ---------------------------------------------------------------------
+ * 63 43 42 40 39 32 31 24 23 20 19 8 7 5 4 0
*/
+#define TSTATE_GL _AC(0x0000070000000000,UL) /* Global reg level */
#define TSTATE_CCR _AC(0x000000ff00000000,UL) /* Condition Codes. */
#define TSTATE_XCC _AC(0x000000f000000000,UL) /* Condition Codes. */
#define TSTATE_XNEG _AC(0x0000008000000000,UL) /* %xcc Negative. */
diff --git a/include/asm-sparc64/scratchpad.h b/include/asm-sparc64/scratchpad.h
new file mode 100644
index 000000000000..5e8b01fb3343
--- /dev/null
+++ b/include/asm-sparc64/scratchpad.h
@@ -0,0 +1,14 @@
+#ifndef _SPARC64_SCRATCHPAD_H
+#define _SPARC64_SCRATCHPAD_H
+
+/* Sun4v scratchpad registers, accessed via ASI_SCRATCHPAD. */
+
+#define SCRATCHPAD_MMU_MISS 0x00 /* Shared with OBP - set by OBP */
+#define SCRATCHPAD_CPUID 0x08 /* Shared with OBP - set by hypervisor */
+#define SCRATCHPAD_UTSBREG1 0x10
+#define SCRATCHPAD_UTSBREG2 0x18
+ /* 0x20 and 0x28, hypervisor only... */
+#define SCRATCHPAD_UNUSED1 0x30
+#define SCRATCHPAD_UNUSED2 0x38 /* Reserved for OBP */
+
+#endif /* !(_SPARC64_SCRATCHPAD_H) */
diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h
index 473edb2603ec..89d86ecaab24 100644
--- a/include/asm-sparc64/smp.h
+++ b/include/asm-sparc64/smp.h
@@ -33,37 +33,13 @@
extern cpumask_t phys_cpu_present_map;
#define cpu_possible_map phys_cpu_present_map
+extern cpumask_t cpu_sibling_map[NR_CPUS];
+
/*
* General functions that each host system must provide.
*/
-static __inline__ int hard_smp_processor_id(void)
-{
- if (tlb_type == cheetah || tlb_type == cheetah_plus) {
- unsigned long cfg, ver;
- __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
- if ((ver >> 32) == 0x003e0016) {
- __asm__ __volatile__("ldxa [%%g0] %1, %0"
- : "=r" (cfg)
- : "i" (ASI_JBUS_CONFIG));
- return ((cfg >> 17) & 0x1f);
- } else {
- __asm__ __volatile__("ldxa [%%g0] %1, %0"
- : "=r" (cfg)
- : "i" (ASI_SAFARI_CONFIG));
- return ((cfg >> 17) & 0x3ff);
- }
- } else if (this_is_starfire != 0) {
- return starfire_hard_smp_processor_id();
- } else {
- unsigned long upaconfig;
- __asm__ __volatile__("ldxa [%%g0] %1, %0"
- : "=r" (upaconfig)
- : "i" (ASI_UPA_CONFIG));
- return ((upaconfig >> 17) & 0x1f);
- }
-}
-
+extern int hard_smp_processor_id(void);
#define raw_smp_processor_id() (current_thread_info()->cpu)
extern void smp_setup_cpu_possible_map(void);
diff --git a/include/asm-sparc64/sparsemem.h b/include/asm-sparc64/sparsemem.h
new file mode 100644
index 000000000000..ed5c9d8541e2
--- /dev/null
+++ b/include/asm-sparc64/sparsemem.h
@@ -0,0 +1,12 @@
+#ifndef _SPARC64_SPARSEMEM_H
+#define _SPARC64_SPARSEMEM_H
+
+#ifdef __KERNEL__
+
+#define SECTION_SIZE_BITS 26
+#define MAX_PHYSADDR_BITS 42
+#define MAX_PHYSMEM_BITS 42
+
+#endif /* !(__KERNEL__) */
+
+#endif /* !(_SPARC64_SPARSEMEM_H) */
diff --git a/include/asm-sparc64/spitfire.h b/include/asm-sparc64/spitfire.h
index 962638c9d122..23ad8a7987ad 100644
--- a/include/asm-sparc64/spitfire.h
+++ b/include/asm-sparc64/spitfire.h
@@ -44,6 +44,7 @@ enum ultra_tlb_layout {
spitfire = 0,
cheetah = 1,
cheetah_plus = 2,
+ hypervisor = 3,
};
extern enum ultra_tlb_layout tlb_type;
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
index af254e581834..a18ec87a52c1 100644
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -209,9 +209,10 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \
/* so that ASI is only written if it changes, think again. */ \
__asm__ __volatile__("wr %%g0, %0, %%asi" \
: : "r" (__thread_flag_byte_ptr(task_thread_info(next))[TI_FLAG_BYTE_CURRENT_DS]));\
+ trap_block[current_thread_info()->cpu].thread = \
+ task_thread_info(next); \
__asm__ __volatile__( \
"mov %%g4, %%g7\n\t" \
- "wrpr %%g0, 0x95, %%pstate\n\t" \
"stx %%i6, [%%sp + 2047 + 0x70]\n\t" \
"stx %%i7, [%%sp + 2047 + 0x78]\n\t" \
"rdpr %%wstate, %%o5\n\t" \
@@ -225,14 +226,10 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \
"ldx [%%g6 + %3], %%o6\n\t" \
"ldub [%%g6 + %2], %%o5\n\t" \
"ldub [%%g6 + %4], %%o7\n\t" \
- "mov %%g6, %%l2\n\t" \
"wrpr %%o5, 0x0, %%wstate\n\t" \
"ldx [%%sp + 2047 + 0x70], %%i6\n\t" \
"ldx [%%sp + 2047 + 0x78], %%i7\n\t" \
- "wrpr %%g0, 0x94, %%pstate\n\t" \
- "mov %%l2, %%g6\n\t" \
"ldx [%%g6 + %6], %%g4\n\t" \
- "wrpr %%g0, 0x96, %%pstate\n\t" \
"brz,pt %%o7, 1f\n\t" \
" mov %%g7, %0\n\t" \
"b,a ret_from_syscall\n\t" \
diff --git a/include/asm-sparc64/thread_info.h b/include/asm-sparc64/thread_info.h
index ac9d068aab4f..2ebf7f27bf91 100644
--- a/include/asm-sparc64/thread_info.h
+++ b/include/asm-sparc64/thread_info.h
@@ -64,8 +64,6 @@ struct thread_info {
__u64 kernel_cntd0, kernel_cntd1;
__u64 pcr_reg;
- __u64 cee_stuff;
-
struct restart_block restart_block;
struct pt_regs *kern_una_regs;
@@ -104,10 +102,9 @@ struct thread_info {
#define TI_KERN_CNTD0 0x00000480
#define TI_KERN_CNTD1 0x00000488
#define TI_PCR 0x00000490
-#define TI_CEE_STUFF 0x00000498
-#define TI_RESTART_BLOCK 0x000004a0
-#define TI_KUNA_REGS 0x000004c8
-#define TI_KUNA_INSN 0x000004d0
+#define TI_RESTART_BLOCK 0x00000498
+#define TI_KUNA_REGS 0x000004c0
+#define TI_KUNA_INSN 0x000004c8
#define TI_FPREGS 0x00000500
/* We embed this in the uppermost byte of thread_info->flags */
diff --git a/include/asm-sparc64/timex.h b/include/asm-sparc64/timex.h
index 9e8d4175bcb2..2a5e4ebaad80 100644
--- a/include/asm-sparc64/timex.h
+++ b/include/asm-sparc64/timex.h
@@ -14,4 +14,10 @@
typedef unsigned long cycles_t;
#define get_cycles() tick_ops->get_tick()
+#define ARCH_HAS_READ_CURRENT_TIMER 1
+#define read_current_timer(timer_val_p) \
+({ *timer_val_p = tick_ops->get_tick(); \
+ 0; \
+})
+
#endif
diff --git a/include/asm-sparc64/tlbflush.h b/include/asm-sparc64/tlbflush.h
index 3ef9909ac3ac..9ad5d9c51d42 100644
--- a/include/asm-sparc64/tlbflush.h
+++ b/include/asm-sparc64/tlbflush.h
@@ -5,6 +5,11 @@
#include <linux/mm.h>
#include <asm/mmu_context.h>
+/* TSB flush operations. */
+struct mmu_gather;
+extern void flush_tsb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_tsb_user(struct mmu_gather *mp);
+
/* TLB flush operations. */
extern void flush_tlb_pending(void);
@@ -14,28 +19,36 @@ extern void flush_tlb_pending(void);
#define flush_tlb_page(vma,addr) flush_tlb_pending()
#define flush_tlb_mm(mm) flush_tlb_pending()
+/* Local cpu only. */
extern void __flush_tlb_all(void);
+
extern void __flush_tlb_page(unsigned long context, unsigned long page, unsigned long r);
extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end);
#ifndef CONFIG_SMP
-#define flush_tlb_all() __flush_tlb_all()
#define flush_tlb_kernel_range(start,end) \
- __flush_tlb_kernel_range(start,end)
+do { flush_tsb_kernel_range(start,end); \
+ __flush_tlb_kernel_range(start,end); \
+} while (0)
#else /* CONFIG_SMP */
-extern void smp_flush_tlb_all(void);
extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end);
-#define flush_tlb_all() smp_flush_tlb_all()
#define flush_tlb_kernel_range(start, end) \
- smp_flush_tlb_kernel_range(start, end)
+do { flush_tsb_kernel_range(start,end); \
+ smp_flush_tlb_kernel_range(start, end); \
+} while (0)
#endif /* ! CONFIG_SMP */
-extern void flush_tlb_pgtables(struct mm_struct *, unsigned long, unsigned long);
+static inline void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ /* We don't use virtual page tables for TLB miss processing
+ * any more. Nowadays we use the TSB.
+ */
+}
#endif /* _SPARC64_TLBFLUSH_H */
diff --git a/include/asm-sparc64/tsb.h b/include/asm-sparc64/tsb.h
new file mode 100644
index 000000000000..e82612cd9f33
--- /dev/null
+++ b/include/asm-sparc64/tsb.h
@@ -0,0 +1,281 @@
+#ifndef _SPARC64_TSB_H
+#define _SPARC64_TSB_H
+
+/* The sparc64 TSB is similar to the powerpc hashtables. It's a
+ * power-of-2 sized table of TAG/PTE pairs. The cpu precomputes
+ * pointers into this table for 8K and 64K page sizes, and also a
+ * comparison TAG based upon the virtual address and context which
+ * faults.
+ *
+ * TLB miss trap handler software does the actual lookup via something
+ * of the form:
+ *
+ * ldxa [%g0] ASI_{D,I}MMU_TSB_8KB_PTR, %g1
+ * ldxa [%g0] ASI_{D,I}MMU, %g6
+ * sllx %g6, 22, %g6
+ * srlx %g6, 22, %g6
+ * ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4
+ * cmp %g4, %g6
+ * bne,pn %xcc, tsb_miss_{d,i}tlb
+ * mov FAULT_CODE_{D,I}TLB, %g3
+ * stxa %g5, [%g0] ASI_{D,I}TLB_DATA_IN
+ * retry
+ *
+ *
+ * Each 16-byte slot of the TSB is the 8-byte tag and then the 8-byte
+ * PTE. The TAG is of the same layout as the TLB TAG TARGET mmu
+ * register which is:
+ *
+ * -------------------------------------------------
+ * | - | CONTEXT | - | VADDR bits 63:22 |
+ * -------------------------------------------------
+ * 63 61 60 48 47 42 41 0
+ *
+ * But actually, since we use per-mm TSB's, we zero out the CONTEXT
+ * field.
+ *
+ * Like the powerpc hashtables we need to use locking in order to
+ * synchronize while we update the entries. PTE updates need locking
+ * as well.
+ *
+ * We need to carefully choose a lock bits for the TSB entry. We
+ * choose to use bit 47 in the tag. Also, since we never map anything
+ * at page zero in context zero, we use zero as an invalid tag entry.
+ * When the lock bit is set, this forces a tag comparison failure.
+ */
+
+#define TSB_TAG_LOCK_BIT 47
+#define TSB_TAG_LOCK_HIGH (1 << (TSB_TAG_LOCK_BIT - 32))
+
+#define TSB_TAG_INVALID_BIT 46
+#define TSB_TAG_INVALID_HIGH (1 << (TSB_TAG_INVALID_BIT - 32))
+
+#define TSB_MEMBAR membar #StoreStore
+
+/* Some cpus support physical address quad loads. We want to use
+ * those if possible so we don't need to hard-lock the TSB mapping
+ * into the TLB. We encode some instruction patching in order to
+ * support this.
+ *
+ * The kernel TSB is locked into the TLB by virtue of being in the
+ * kernel image, so we don't play these games for swapper_tsb access.
+ */
+#ifndef __ASSEMBLY__
+struct tsb_ldquad_phys_patch_entry {
+ unsigned int addr;
+ unsigned int sun4u_insn;
+ unsigned int sun4v_insn;
+};
+extern struct tsb_ldquad_phys_patch_entry __tsb_ldquad_phys_patch,
+ __tsb_ldquad_phys_patch_end;
+
+struct tsb_phys_patch_entry {
+ unsigned int addr;
+ unsigned int insn;
+};
+extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
+#endif
+#define TSB_LOAD_QUAD(TSB, REG) \
+661: ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG; \
+ .section .tsb_ldquad_phys_patch, "ax"; \
+ .word 661b; \
+ ldda [TSB] ASI_QUAD_LDD_PHYS, REG; \
+ ldda [TSB] ASI_QUAD_LDD_PHYS_4V, REG; \
+ .previous
+
+#define TSB_LOAD_TAG_HIGH(TSB, REG) \
+661: lduwa [TSB] ASI_N, REG; \
+ .section .tsb_phys_patch, "ax"; \
+ .word 661b; \
+ lduwa [TSB] ASI_PHYS_USE_EC, REG; \
+ .previous
+
+#define TSB_LOAD_TAG(TSB, REG) \
+661: ldxa [TSB] ASI_N, REG; \
+ .section .tsb_phys_patch, "ax"; \
+ .word 661b; \
+ ldxa [TSB] ASI_PHYS_USE_EC, REG; \
+ .previous
+
+#define TSB_CAS_TAG_HIGH(TSB, REG1, REG2) \
+661: casa [TSB] ASI_N, REG1, REG2; \
+ .section .tsb_phys_patch, "ax"; \
+ .word 661b; \
+ casa [TSB] ASI_PHYS_USE_EC, REG1, REG2; \
+ .previous
+
+#define TSB_CAS_TAG(TSB, REG1, REG2) \
+661: casxa [TSB] ASI_N, REG1, REG2; \
+ .section .tsb_phys_patch, "ax"; \
+ .word 661b; \
+ casxa [TSB] ASI_PHYS_USE_EC, REG1, REG2; \
+ .previous
+
+#define TSB_STORE(ADDR, VAL) \
+661: stxa VAL, [ADDR] ASI_N; \
+ .section .tsb_phys_patch, "ax"; \
+ .word 661b; \
+ stxa VAL, [ADDR] ASI_PHYS_USE_EC; \
+ .previous
+
+#define TSB_LOCK_TAG(TSB, REG1, REG2) \
+99: TSB_LOAD_TAG_HIGH(TSB, REG1); \
+ sethi %hi(TSB_TAG_LOCK_HIGH), REG2;\
+ andcc REG1, REG2, %g0; \
+ bne,pn %icc, 99b; \
+ nop; \
+ TSB_CAS_TAG_HIGH(TSB, REG1, REG2); \
+ cmp REG1, REG2; \
+ bne,pn %icc, 99b; \
+ nop; \
+ TSB_MEMBAR
+
+#define TSB_WRITE(TSB, TTE, TAG) \
+ add TSB, 0x8, TSB; \
+ TSB_STORE(TSB, TTE); \
+ sub TSB, 0x8, TSB; \
+ TSB_MEMBAR; \
+ TSB_STORE(TSB, TAG);
+
+#define KTSB_LOAD_QUAD(TSB, REG) \
+ ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG;
+
+#define KTSB_STORE(ADDR, VAL) \
+ stxa VAL, [ADDR] ASI_N;
+
+#define KTSB_LOCK_TAG(TSB, REG1, REG2) \
+99: lduwa [TSB] ASI_N, REG1; \
+ sethi %hi(TSB_TAG_LOCK_HIGH), REG2;\
+ andcc REG1, REG2, %g0; \
+ bne,pn %icc, 99b; \
+ nop; \
+ casa [TSB] ASI_N, REG1, REG2;\
+ cmp REG1, REG2; \
+ bne,pn %icc, 99b; \
+ nop; \
+ TSB_MEMBAR
+
+#define KTSB_WRITE(TSB, TTE, TAG) \
+ add TSB, 0x8, TSB; \
+ stxa TTE, [TSB] ASI_N; \
+ sub TSB, 0x8, TSB; \
+ TSB_MEMBAR; \
+ stxa TAG, [TSB] ASI_N;
+
+ /* Do a kernel page table walk. Leaves physical PTE pointer in
+ * REG1. Jumps to FAIL_LABEL on early page table walk termination.
+ * VADDR will not be clobbered, but REG2 will.
+ */
+#define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \
+ sethi %hi(swapper_pg_dir), REG1; \
+ or REG1, %lo(swapper_pg_dir), REG1; \
+ sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ andn REG2, 0x3, REG2; \
+ lduw [REG1 + REG2], REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ sllx REG1, 11, REG1; \
+ andn REG2, 0x3, REG2; \
+ lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - PMD_SHIFT, REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ sllx REG1, 11, REG1; \
+ andn REG2, 0x7, REG2; \
+ add REG1, REG2, REG1;
+
+ /* Do a user page table walk in MMU globals. Leaves physical PTE
+ * pointer in REG1. Jumps to FAIL_LABEL on early page table walk
+ * termination. Physical base of page tables is in PHYS_PGD which
+ * will not be modified.
+ *
+ * VADDR will not be clobbered, but REG1 and REG2 will.
+ */
+#define USER_PGTABLE_WALK_TL1(VADDR, PHYS_PGD, REG1, REG2, FAIL_LABEL) \
+ sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ andn REG2, 0x3, REG2; \
+ lduwa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ sllx REG1, 11, REG1; \
+ andn REG2, 0x3, REG2; \
+ lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - PMD_SHIFT, REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ sllx REG1, 11, REG1; \
+ andn REG2, 0x7, REG2; \
+ add REG1, REG2, REG1;
+
+/* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0.
+ * If no entry is found, FAIL_LABEL will be branched to. On success
+ * the resulting PTE value will be left in REG1. VADDR is preserved
+ * by this routine.
+ */
+#define OBP_TRANS_LOOKUP(VADDR, REG1, REG2, REG3, FAIL_LABEL) \
+ sethi %hi(prom_trans), REG1; \
+ or REG1, %lo(prom_trans), REG1; \
+97: ldx [REG1 + 0x00], REG2; \
+ brz,pn REG2, FAIL_LABEL; \
+ nop; \
+ ldx [REG1 + 0x08], REG3; \
+ add REG2, REG3, REG3; \
+ cmp REG2, VADDR; \
+ bgu,pt %xcc, 98f; \
+ cmp VADDR, REG3; \
+ bgeu,pt %xcc, 98f; \
+ ldx [REG1 + 0x10], REG3; \
+ sub VADDR, REG2, REG2; \
+ ba,pt %xcc, 99f; \
+ add REG3, REG2, REG1; \
+98: ba,pt %xcc, 97b; \
+ add REG1, (3 * 8), REG1; \
+99:
+
+ /* We use a 32K TSB for the whole kernel, this allows to
+ * handle about 16MB of modules and vmalloc mappings without
+ * incurring many hash conflicts.
+ */
+#define KERNEL_TSB_SIZE_BYTES (32 * 1024)
+#define KERNEL_TSB_NENTRIES \
+ (KERNEL_TSB_SIZE_BYTES / 16)
+#define KERNEL_TSB4M_NENTRIES 4096
+
+ /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL
+ * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries
+ * and the found TTE will be left in REG1. REG3 and REG4 must
+ * be an even/odd pair of registers.
+ *
+ * VADDR and TAG will be preserved and not clobbered by this macro.
+ */
+#define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
+ sethi %hi(swapper_tsb), REG1; \
+ or REG1, %lo(swapper_tsb), REG1; \
+ srlx VADDR, PAGE_SHIFT, REG2; \
+ and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \
+ sllx REG2, 4, REG2; \
+ add REG1, REG2, REG2; \
+ KTSB_LOAD_QUAD(REG2, REG3); \
+ cmp REG3, TAG; \
+ be,a,pt %xcc, OK_LABEL; \
+ mov REG4, REG1;
+
+ /* This version uses a trick, the TAG is already (VADDR >> 22) so
+ * we can make use of that for the index computation.
+ */
+#define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
+ sethi %hi(swapper_4m_tsb), REG1; \
+ or REG1, %lo(swapper_4m_tsb), REG1; \
+ and TAG, (KERNEL_TSB_NENTRIES - 1), REG2; \
+ sllx REG2, 4, REG2; \
+ add REG1, REG2, REG2; \
+ KTSB_LOAD_QUAD(REG2, REG3); \
+ cmp REG3, TAG; \
+ be,a,pt %xcc, OK_LABEL; \
+ mov REG4, REG1;
+
+#endif /* !(_SPARC64_TSB_H) */
diff --git a/include/asm-sparc64/ttable.h b/include/asm-sparc64/ttable.h
index 2784f80094c3..2d5e3c464df5 100644
--- a/include/asm-sparc64/ttable.h
+++ b/include/asm-sparc64/ttable.h
@@ -93,7 +93,7 @@
#define SYSCALL_TRAP(routine, systbl) \
sethi %hi(109f), %g7; \
- ba,pt %xcc, scetrap; \
+ ba,pt %xcc, etrap; \
109: or %g7, %lo(109b), %g7; \
sethi %hi(systbl), %l7; \
ba,pt %xcc, routine; \
@@ -109,14 +109,14 @@
nop;nop;nop;
#define TRAP_UTRAP(handler,lvl) \
- ldx [%g6 + TI_UTRAPS], %g1; \
- sethi %hi(109f), %g7; \
- brz,pn %g1, utrap; \
- or %g7, %lo(109f), %g7; \
- ba,pt %xcc, utrap; \
-109: ldx [%g1 + handler*8], %g1; \
- ba,pt %xcc, utrap_ill; \
- mov lvl, %o1;
+ mov handler, %g3; \
+ ba,pt %xcc, utrap_trap; \
+ mov lvl, %g4; \
+ nop; \
+ nop; \
+ nop; \
+ nop; \
+ nop;
#ifdef CONFIG_SUNOS_EMUL
#define SUNOS_SYSCALL_TRAP SYSCALL_TRAP(linux_sparc_syscall32, sunos_sys_table)
@@ -136,8 +136,6 @@
#else
#define SOLARIS_SYSCALL_TRAP TRAP(solaris_syscall)
#endif
-/* FIXME: Write these actually */
-#define NETBSD_SYSCALL_TRAP TRAP(netbsd_syscall)
#define BREAKPOINT_TRAP TRAP(breakpoint_trap)
#define TRAP_IRQ(routine, level) \
@@ -182,6 +180,26 @@
#define KPROBES_TRAP(lvl) TRAP_ARG(bad_trap, lvl)
#endif
+#define SUN4V_ITSB_MISS \
+ ldxa [%g0] ASI_SCRATCHPAD, %g2; \
+ ldx [%g2 + HV_FAULT_I_ADDR_OFFSET], %g4; \
+ ldx [%g2 + HV_FAULT_I_CTX_OFFSET], %g5; \
+ srlx %g4, 22, %g6; \
+ ba,pt %xcc, sun4v_itsb_miss; \
+ nop; \
+ nop; \
+ nop;
+
+#define SUN4V_DTSB_MISS \
+ ldxa [%g0] ASI_SCRATCHPAD, %g2; \
+ ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4; \
+ ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5; \
+ srlx %g4, 22, %g6; \
+ ba,pt %xcc, sun4v_dtsb_miss; \
+ nop; \
+ nop; \
+ nop;
+
/* Before touching these macros, you owe it to yourself to go and
* see how arch/sparc64/kernel/winfixup.S works... -DaveM
*
@@ -221,6 +239,31 @@
saved; retry; nop; nop; nop; nop; nop; nop; \
nop; nop; nop; nop; nop; nop; nop; nop;
+#define SPILL_0_NORMAL_ETRAP \
+etrap_kernel_spill: \
+ stx %l0, [%sp + STACK_BIAS + 0x00]; \
+ stx %l1, [%sp + STACK_BIAS + 0x08]; \
+ stx %l2, [%sp + STACK_BIAS + 0x10]; \
+ stx %l3, [%sp + STACK_BIAS + 0x18]; \
+ stx %l4, [%sp + STACK_BIAS + 0x20]; \
+ stx %l5, [%sp + STACK_BIAS + 0x28]; \
+ stx %l6, [%sp + STACK_BIAS + 0x30]; \
+ stx %l7, [%sp + STACK_BIAS + 0x38]; \
+ stx %i0, [%sp + STACK_BIAS + 0x40]; \
+ stx %i1, [%sp + STACK_BIAS + 0x48]; \
+ stx %i2, [%sp + STACK_BIAS + 0x50]; \
+ stx %i3, [%sp + STACK_BIAS + 0x58]; \
+ stx %i4, [%sp + STACK_BIAS + 0x60]; \
+ stx %i5, [%sp + STACK_BIAS + 0x68]; \
+ stx %i6, [%sp + STACK_BIAS + 0x70]; \
+ stx %i7, [%sp + STACK_BIAS + 0x78]; \
+ saved; \
+ sub %g1, 2, %g1; \
+ ba,pt %xcc, etrap_save; \
+ wrpr %g1, %cwp; \
+ nop; nop; nop; nop; nop; nop; nop; nop; \
+ nop; nop; nop; nop;
+
/* Normal 64bit spill */
#define SPILL_1_GENERIC(ASI) \
add %sp, STACK_BIAS + 0x00, %g1; \
@@ -254,6 +297,67 @@
b,a,pt %xcc, spill_fixup_mna; \
b,a,pt %xcc, spill_fixup;
+#define SPILL_1_GENERIC_ETRAP \
+etrap_user_spill_64bit: \
+ stxa %l0, [%sp + STACK_BIAS + 0x00] %asi; \
+ stxa %l1, [%sp + STACK_BIAS + 0x08] %asi; \
+ stxa %l2, [%sp + STACK_BIAS + 0x10] %asi; \
+ stxa %l3, [%sp + STACK_BIAS + 0x18] %asi; \
+ stxa %l4, [%sp + STACK_BIAS + 0x20] %asi; \
+ stxa %l5, [%sp + STACK_BIAS + 0x28] %asi; \
+ stxa %l6, [%sp + STACK_BIAS + 0x30] %asi; \
+ stxa %l7, [%sp + STACK_BIAS + 0x38] %asi; \
+ stxa %i0, [%sp + STACK_BIAS + 0x40] %asi; \
+ stxa %i1, [%sp + STACK_BIAS + 0x48] %asi; \
+ stxa %i2, [%sp + STACK_BIAS + 0x50] %asi; \
+ stxa %i3, [%sp + STACK_BIAS + 0x58] %asi; \
+ stxa %i4, [%sp + STACK_BIAS + 0x60] %asi; \
+ stxa %i5, [%sp + STACK_BIAS + 0x68] %asi; \
+ stxa %i6, [%sp + STACK_BIAS + 0x70] %asi; \
+ stxa %i7, [%sp + STACK_BIAS + 0x78] %asi; \
+ saved; \
+ sub %g1, 2, %g1; \
+ ba,pt %xcc, etrap_save; \
+ wrpr %g1, %cwp; \
+ nop; nop; nop; nop; nop; \
+ nop; nop; nop; nop; \
+ ba,a,pt %xcc, etrap_spill_fixup_64bit; \
+ ba,a,pt %xcc, etrap_spill_fixup_64bit; \
+ ba,a,pt %xcc, etrap_spill_fixup_64bit;
+
+#define SPILL_1_GENERIC_ETRAP_FIXUP \
+etrap_spill_fixup_64bit: \
+ ldub [%g6 + TI_WSAVED], %g1; \
+ sll %g1, 3, %g3; \
+ add %g6, %g3, %g3; \
+ stx %sp, [%g3 + TI_RWIN_SPTRS]; \
+ sll %g1, 7, %g3; \
+ add %g6, %g3, %g3; \
+ stx %l0, [%g3 + TI_REG_WINDOW + 0x00]; \
+ stx %l1, [%g3 + TI_REG_WINDOW + 0x08]; \
+ stx %l2, [%g3 + TI_REG_WINDOW + 0x10]; \
+ stx %l3, [%g3 + TI_REG_WINDOW + 0x18]; \
+ stx %l4, [%g3 + TI_REG_WINDOW + 0x20]; \
+ stx %l5, [%g3 + TI_REG_WINDOW + 0x28]; \
+ stx %l6, [%g3 + TI_REG_WINDOW + 0x30]; \
+ stx %l7, [%g3 + TI_REG_WINDOW + 0x38]; \
+ stx %i0, [%g3 + TI_REG_WINDOW + 0x40]; \
+ stx %i1, [%g3 + TI_REG_WINDOW + 0x48]; \
+ stx %i2, [%g3 + TI_REG_WINDOW + 0x50]; \
+ stx %i3, [%g3 + TI_REG_WINDOW + 0x58]; \
+ stx %i4, [%g3 + TI_REG_WINDOW + 0x60]; \
+ stx %i5, [%g3 + TI_REG_WINDOW + 0x68]; \
+ stx %i6, [%g3 + TI_REG_WINDOW + 0x70]; \
+ stx %i7, [%g3 + TI_REG_WINDOW + 0x78]; \
+ add %g1, 1, %g1; \
+ stb %g1, [%g6 + TI_WSAVED]; \
+ saved; \
+ rdpr %cwp, %g1; \
+ sub %g1, 2, %g1; \
+ ba,pt %xcc, etrap_save; \
+ wrpr %g1, %cwp; \
+ nop; nop; nop
+
/* Normal 32bit spill */
#define SPILL_2_GENERIC(ASI) \
srl %sp, 0, %sp; \
@@ -287,6 +391,68 @@
b,a,pt %xcc, spill_fixup_mna; \
b,a,pt %xcc, spill_fixup;
+#define SPILL_2_GENERIC_ETRAP \
+etrap_user_spill_32bit: \
+ srl %sp, 0, %sp; \
+ stwa %l0, [%sp + 0x00] %asi; \
+ stwa %l1, [%sp + 0x04] %asi; \
+ stwa %l2, [%sp + 0x08] %asi; \
+ stwa %l3, [%sp + 0x0c] %asi; \
+ stwa %l4, [%sp + 0x10] %asi; \
+ stwa %l5, [%sp + 0x14] %asi; \
+ stwa %l6, [%sp + 0x18] %asi; \
+ stwa %l7, [%sp + 0x1c] %asi; \
+ stwa %i0, [%sp + 0x20] %asi; \
+ stwa %i1, [%sp + 0x24] %asi; \
+ stwa %i2, [%sp + 0x28] %asi; \
+ stwa %i3, [%sp + 0x2c] %asi; \
+ stwa %i4, [%sp + 0x30] %asi; \
+ stwa %i5, [%sp + 0x34] %asi; \
+ stwa %i6, [%sp + 0x38] %asi; \
+ stwa %i7, [%sp + 0x3c] %asi; \
+ saved; \
+ sub %g1, 2, %g1; \
+ ba,pt %xcc, etrap_save; \
+ wrpr %g1, %cwp; \
+ nop; nop; nop; nop; \
+ nop; nop; nop; nop; \
+ ba,a,pt %xcc, etrap_spill_fixup_32bit; \
+ ba,a,pt %xcc, etrap_spill_fixup_32bit; \
+ ba,a,pt %xcc, etrap_spill_fixup_32bit;
+
+#define SPILL_2_GENERIC_ETRAP_FIXUP \
+etrap_spill_fixup_32bit: \
+ ldub [%g6 + TI_WSAVED], %g1; \
+ sll %g1, 3, %g3; \
+ add %g6, %g3, %g3; \
+ stx %sp, [%g3 + TI_RWIN_SPTRS]; \
+ sll %g1, 7, %g3; \
+ add %g6, %g3, %g3; \
+ stw %l0, [%g3 + TI_REG_WINDOW + 0x00]; \
+ stw %l1, [%g3 + TI_REG_WINDOW + 0x04]; \
+ stw %l2, [%g3 + TI_REG_WINDOW + 0x08]; \
+ stw %l3, [%g3 + TI_REG_WINDOW + 0x0c]; \
+ stw %l4, [%g3 + TI_REG_WINDOW + 0x10]; \
+ stw %l5, [%g3 + TI_REG_WINDOW + 0x14]; \
+ stw %l6, [%g3 + TI_REG_WINDOW + 0x18]; \
+ stw %l7, [%g3 + TI_REG_WINDOW + 0x1c]; \
+ stw %i0, [%g3 + TI_REG_WINDOW + 0x20]; \
+ stw %i1, [%g3 + TI_REG_WINDOW + 0x24]; \
+ stw %i2, [%g3 + TI_REG_WINDOW + 0x28]; \
+ stw %i3, [%g3 + TI_REG_WINDOW + 0x2c]; \
+ stw %i4, [%g3 + TI_REG_WINDOW + 0x30]; \
+ stw %i5, [%g3 + TI_REG_WINDOW + 0x34]; \
+ stw %i6, [%g3 + TI_REG_WINDOW + 0x38]; \
+ stw %i7, [%g3 + TI_REG_WINDOW + 0x3c]; \
+ add %g1, 1, %g1; \
+ stb %g1, [%g6 + TI_WSAVED]; \
+ saved; \
+ rdpr %cwp, %g1; \
+ sub %g1, 2, %g1; \
+ ba,pt %xcc, etrap_save; \
+ wrpr %g1, %cwp; \
+ nop; nop; nop
+
#define SPILL_1_NORMAL SPILL_1_GENERIC(ASI_AIUP)
#define SPILL_2_NORMAL SPILL_2_GENERIC(ASI_AIUP)
#define SPILL_3_NORMAL SPILL_0_NORMAL
@@ -325,6 +491,35 @@
restored; retry; nop; nop; nop; nop; nop; nop; \
nop; nop; nop; nop; nop; nop; nop; nop;
+#define FILL_0_NORMAL_RTRAP \
+kern_rtt_fill: \
+ rdpr %cwp, %g1; \
+ sub %g1, 1, %g1; \
+ wrpr %g1, %cwp; \
+ ldx [%sp + STACK_BIAS + 0x00], %l0; \
+ ldx [%sp + STACK_BIAS + 0x08], %l1; \
+ ldx [%sp + STACK_BIAS + 0x10], %l2; \
+ ldx [%sp + STACK_BIAS + 0x18], %l3; \
+ ldx [%sp + STACK_BIAS + 0x20], %l4; \
+ ldx [%sp + STACK_BIAS + 0x28], %l5; \
+ ldx [%sp + STACK_BIAS + 0x30], %l6; \
+ ldx [%sp + STACK_BIAS + 0x38], %l7; \
+ ldx [%sp + STACK_BIAS + 0x40], %i0; \
+ ldx [%sp + STACK_BIAS + 0x48], %i1; \
+ ldx [%sp + STACK_BIAS + 0x50], %i2; \
+ ldx [%sp + STACK_BIAS + 0x58], %i3; \
+ ldx [%sp + STACK_BIAS + 0x60], %i4; \
+ ldx [%sp + STACK_BIAS + 0x68], %i5; \
+ ldx [%sp + STACK_BIAS + 0x70], %i6; \
+ ldx [%sp + STACK_BIAS + 0x78], %i7; \
+ restored; \
+ add %g1, 1, %g1; \
+ ba,pt %xcc, kern_rtt_restore; \
+ wrpr %g1, %cwp; \
+ nop; nop; nop; nop; nop; \
+ nop; nop; nop; nop;
+
+
/* Normal 64bit fill */
#define FILL_1_GENERIC(ASI) \
add %sp, STACK_BIAS + 0x00, %g1; \
@@ -356,6 +551,33 @@
b,a,pt %xcc, fill_fixup_mna; \
b,a,pt %xcc, fill_fixup;
+#define FILL_1_GENERIC_RTRAP \
+user_rtt_fill_64bit: \
+ ldxa [%sp + STACK_BIAS + 0x00] %asi, %l0; \
+ ldxa [%sp + STACK_BIAS + 0x08] %asi, %l1; \
+ ldxa [%sp + STACK_BIAS + 0x10] %asi, %l2; \
+ ldxa [%sp + STACK_BIAS + 0x18] %asi, %l3; \
+ ldxa [%sp + STACK_BIAS + 0x20] %asi, %l4; \
+ ldxa [%sp + STACK_BIAS + 0x28] %asi, %l5; \
+ ldxa [%sp + STACK_BIAS + 0x30] %asi, %l6; \
+ ldxa [%sp + STACK_BIAS + 0x38] %asi, %l7; \
+ ldxa [%sp + STACK_BIAS + 0x40] %asi, %i0; \
+ ldxa [%sp + STACK_BIAS + 0x48] %asi, %i1; \
+ ldxa [%sp + STACK_BIAS + 0x50] %asi, %i2; \
+ ldxa [%sp + STACK_BIAS + 0x58] %asi, %i3; \
+ ldxa [%sp + STACK_BIAS + 0x60] %asi, %i4; \
+ ldxa [%sp + STACK_BIAS + 0x68] %asi, %i5; \
+ ldxa [%sp + STACK_BIAS + 0x70] %asi, %i6; \
+ ldxa [%sp + STACK_BIAS + 0x78] %asi, %i7; \
+ ba,pt %xcc, user_rtt_pre_restore; \
+ restored; \
+ nop; nop; nop; nop; nop; nop; \
+ nop; nop; nop; nop; nop; \
+ ba,a,pt %xcc, user_rtt_fill_fixup; \
+ ba,a,pt %xcc, user_rtt_fill_fixup; \
+ ba,a,pt %xcc, user_rtt_fill_fixup;
+
+
/* Normal 32bit fill */
#define FILL_2_GENERIC(ASI) \
srl %sp, 0, %sp; \
@@ -387,6 +609,34 @@
b,a,pt %xcc, fill_fixup_mna; \
b,a,pt %xcc, fill_fixup;
+#define FILL_2_GENERIC_RTRAP \
+user_rtt_fill_32bit: \
+ srl %sp, 0, %sp; \
+ lduwa [%sp + 0x00] %asi, %l0; \
+ lduwa [%sp + 0x04] %asi, %l1; \
+ lduwa [%sp + 0x08] %asi, %l2; \
+ lduwa [%sp + 0x0c] %asi, %l3; \
+ lduwa [%sp + 0x10] %asi, %l4; \
+ lduwa [%sp + 0x14] %asi, %l5; \
+ lduwa [%sp + 0x18] %asi, %l6; \
+ lduwa [%sp + 0x1c] %asi, %l7; \
+ lduwa [%sp + 0x20] %asi, %i0; \
+ lduwa [%sp + 0x24] %asi, %i1; \
+ lduwa [%sp + 0x28] %asi, %i2; \
+ lduwa [%sp + 0x2c] %asi, %i3; \
+ lduwa [%sp + 0x30] %asi, %i4; \
+ lduwa [%sp + 0x34] %asi, %i5; \
+ lduwa [%sp + 0x38] %asi, %i6; \
+ lduwa [%sp + 0x3c] %asi, %i7; \
+ ba,pt %xcc, user_rtt_pre_restore; \
+ restored; \
+ nop; nop; nop; nop; nop; \
+ nop; nop; nop; nop; nop; \
+ ba,a,pt %xcc, user_rtt_fill_fixup; \
+ ba,a,pt %xcc, user_rtt_fill_fixup; \
+ ba,a,pt %xcc, user_rtt_fill_fixup;
+
+
#define FILL_1_NORMAL FILL_1_GENERIC(ASI_AIUP)
#define FILL_2_NORMAL FILL_2_GENERIC(ASI_AIUP)
#define FILL_3_NORMAL FILL_0_NORMAL
diff --git a/include/asm-sparc64/uaccess.h b/include/asm-sparc64/uaccess.h
index c91d1e38eac6..afe236ba555b 100644
--- a/include/asm-sparc64/uaccess.h
+++ b/include/asm-sparc64/uaccess.h
@@ -114,16 +114,6 @@ case 8: __put_user_asm(data,x,addr,__pu_ret); break; \
default: __pu_ret = __put_user_bad(); break; \
} __pu_ret; })
-#define __put_user_nocheck_ret(data,addr,size,retval) ({ \
-register int __foo __asm__ ("l1"); \
-switch (size) { \
-case 1: __put_user_asm_ret(data,b,addr,retval,__foo); break; \
-case 2: __put_user_asm_ret(data,h,addr,retval,__foo); break; \
-case 4: __put_user_asm_ret(data,w,addr,retval,__foo); break; \
-case 8: __put_user_asm_ret(data,x,addr,retval,__foo); break; \
-default: if (__put_user_bad()) return retval; break; \
-} })
-
#define __put_user_asm(x,size,addr,ret) \
__asm__ __volatile__( \
"/* Put user asm, inline. */\n" \
@@ -143,33 +133,6 @@ __asm__ __volatile__( \
: "=r" (ret) : "r" (x), "r" (__m(addr)), \
"i" (-EFAULT))
-#define __put_user_asm_ret(x,size,addr,ret,foo) \
-if (__builtin_constant_p(ret) && ret == -EFAULT) \
-__asm__ __volatile__( \
- "/* Put user asm ret, inline. */\n" \
-"1:\t" "st"#size "a %1, [%2] %%asi\n\n\t" \
- ".section __ex_table,\"a\"\n\t" \
- ".align 4\n\t" \
- ".word 1b, __ret_efault\n\n\t" \
- ".previous\n\n\t" \
- : "=r" (foo) : "r" (x), "r" (__m(addr))); \
-else \
-__asm__ __volatile__( \
- "/* Put user asm ret, inline. */\n" \
-"1:\t" "st"#size "a %1, [%2] %%asi\n\n\t" \
- ".section .fixup,#alloc,#execinstr\n\t" \
- ".align 4\n" \
-"3:\n\t" \
- "ret\n\t" \
- " restore %%g0, %3, %%o0\n\n\t" \
- ".previous\n\t" \
- ".section __ex_table,\"a\"\n\t" \
- ".align 4\n\t" \
- ".word 1b, 3b\n\n\t" \
- ".previous\n\n\t" \
- : "=r" (foo) : "r" (x), "r" (__m(addr)), \
- "i" (ret))
-
extern int __put_user_bad(void);
#define __get_user_nocheck(data,addr,size,type) ({ \
@@ -289,14 +252,7 @@ copy_in_user(void __user *to, void __user *from, unsigned long size)
}
#define __copy_in_user copy_in_user
-extern unsigned long __must_check __bzero_noasi(void __user *, unsigned long);
-
-static inline unsigned long __must_check
-__clear_user(void __user *addr, unsigned long size)
-{
-
- return __bzero_noasi(addr, size);
-}
+extern unsigned long __must_check __clear_user(void __user *, unsigned long);
#define clear_user __clear_user
diff --git a/include/asm-sparc64/vdev.h b/include/asm-sparc64/vdev.h
new file mode 100644
index 000000000000..996e6be7b976
--- /dev/null
+++ b/include/asm-sparc64/vdev.h
@@ -0,0 +1,16 @@
+/* vdev.h: SUN4V virtual device interfaces and defines.
+ *
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
+ */
+
+#ifndef _SPARC64_VDEV_H
+#define _SPARC64_VDEV_H
+
+#include <linux/types.h>
+
+extern u32 sun4v_vdev_devhandle;
+extern int sun4v_vdev_root;
+
+extern unsigned int sun4v_vdev_device_interrupt(unsigned int);
+
+#endif /* !(_SPARC64_VDEV_H) */
diff --git a/include/asm-sparc64/xor.h b/include/asm-sparc64/xor.h
index 8b3a7e4b6062..8ce3f1813e28 100644
--- a/include/asm-sparc64/xor.h
+++ b/include/asm-sparc64/xor.h
@@ -2,9 +2,11 @@
* include/asm-sparc64/xor.h
*
* High speed xor_block operation for RAID4/5 utilizing the
- * UltraSparc Visual Instruction Set.
+ * UltraSparc Visual Instruction Set and Niagara block-init
+ * twin-load instructions.
*
* Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,8 +18,7 @@
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <asm/pstate.h>
-#include <asm/asi.h>
+#include <asm/spitfire.h>
extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *);
extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *,
@@ -37,4 +38,29 @@ static struct xor_block_template xor_block_VIS = {
.do_5 = xor_vis_5,
};
-#define XOR_TRY_TEMPLATES xor_speed(&xor_block_VIS)
+extern void xor_niagara_2(unsigned long, unsigned long *, unsigned long *);
+extern void xor_niagara_3(unsigned long, unsigned long *, unsigned long *,
+ unsigned long *);
+extern void xor_niagara_4(unsigned long, unsigned long *, unsigned long *,
+ unsigned long *, unsigned long *);
+extern void xor_niagara_5(unsigned long, unsigned long *, unsigned long *,
+ unsigned long *, unsigned long *, unsigned long *);
+
+static struct xor_block_template xor_block_niagara = {
+ .name = "Niagara",
+ .do_2 = xor_niagara_2,
+ .do_3 = xor_niagara_3,
+ .do_4 = xor_niagara_4,
+ .do_5 = xor_niagara_5,
+};
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+ do { \
+ xor_speed(&xor_block_VIS); \
+ xor_speed(&xor_block_niagara); \
+ } while (0)
+
+/* For VIS for everything except Niagara. */
+#define XOR_SELECT_TEMPLATE(FASTEST) \
+ (tlb_type == hypervisor ? &xor_block_niagara : &xor_block_VIS)