diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/fsl_booke_mmu.c | 7 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 122 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable.c | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_hash32.c | 15 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_nohash.c | 129 |
5 files changed, 196 insertions, 78 deletions
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index cdc7526e9c93..4b66a1ece6d8 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -104,9 +104,10 @@ unsigned long p_mapped_by_tlbcam(phys_addr_t pa) } /* - * Set up one of the I/D BAT (block address translation) register pairs. - * The parameters are not checked; in particular size must be a power - * of 4 between 4k and 256M. + * Set up a variable-size TLB entry (tlbcam). The parameters are not checked; + * in particular size must be a power of 4 between 4k and 256M (or 1G, for cpus + * that support extended page sizes). Note that while some cpus support a + * page size of 4G, we don't allow its use here. */ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, unsigned long size, unsigned long flags, unsigned int pid) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index aa731af720c0..002878ccf90b 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -42,6 +42,12 @@ EXPORT_SYMBOL(node_data); static int min_common_depth; static int n_mem_addr_cells, n_mem_size_cells; +static int form1_affinity; + +#define MAX_DISTANCE_REF_POINTS 4 +static int distance_ref_points_depth; +static const unsigned int *distance_ref_points; +static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS]; /* * Allocate node_to_cpumask_map based on number of available nodes @@ -204,6 +210,39 @@ static const u32 *of_get_usable_memory(struct device_node *memory) return prop; } +int __node_distance(int a, int b) +{ + int i; + int distance = LOCAL_DISTANCE; + + if (!form1_affinity) + return distance; + + for (i = 0; i < distance_ref_points_depth; i++) { + if (distance_lookup_table[a][i] == distance_lookup_table[b][i]) + break; + + /* Double the distance for each NUMA level */ + distance *= 2; + } + + return distance; +} + +static void initialize_distance_lookup_table(int nid, + const unsigned int *associativity) +{ + int i; + + if (!form1_affinity) + return; + + for (i = 0; i < distance_ref_points_depth; i++) { + distance_lookup_table[nid][i] = + associativity[distance_ref_points[i]]; + } +} + /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa * info is found. */ @@ -225,6 +264,10 @@ static int of_node_to_nid_single(struct device_node *device) /* POWER4 LPAR uses 0xffff as invalid node */ if (nid == 0xffff || nid >= MAX_NUMNODES) nid = -1; + + if (nid > 0 && tmp[0] >= distance_ref_points_depth) + initialize_distance_lookup_table(nid, tmp); + out: return nid; } @@ -251,26 +294,10 @@ int of_node_to_nid(struct device_node *device) } EXPORT_SYMBOL_GPL(of_node_to_nid); -/* - * In theory, the "ibm,associativity" property may contain multiple - * associativity lists because a resource may be multiply connected - * into the machine. This resource then has different associativity - * characteristics relative to its multiple connections. We ignore - * this for now. We also assume that all cpu and memory sets have - * their distances represented at a common level. This won't be - * true for hierarchical NUMA. - * - * In any case the ibm,associativity-reference-points should give - * the correct depth for a normal NUMA system. - * - * - Dave Hansen <haveblue@us.ibm.com> - */ static int __init find_min_common_depth(void) { - int depth, index; - const unsigned int *ref_points; + int depth; struct device_node *rtas_root; - unsigned int len; struct device_node *chosen; const char *vec5; @@ -280,18 +307,28 @@ static int __init find_min_common_depth(void) return -1; /* - * this property is 2 32-bit integers, each representing a level of - * depth in the associativity nodes. The first is for an SMP - * configuration (should be all 0's) and the second is for a normal - * NUMA configuration. + * This property is a set of 32-bit integers, each representing + * an index into the ibm,associativity nodes. + * + * With form 0 affinity the first integer is for an SMP configuration + * (should be all 0's) and the second is for a normal NUMA + * configuration. We have only one level of NUMA. + * + * With form 1 affinity the first integer is the most significant + * NUMA boundary and the following are progressively less significant + * boundaries. There can be more than one level of NUMA. */ - index = 1; - ref_points = of_get_property(rtas_root, - "ibm,associativity-reference-points", &len); + distance_ref_points = of_get_property(rtas_root, + "ibm,associativity-reference-points", + &distance_ref_points_depth); + + if (!distance_ref_points) { + dbg("NUMA: ibm,associativity-reference-points not found.\n"); + goto err; + } + + distance_ref_points_depth /= sizeof(int); - /* - * For form 1 affinity information we want the first field - */ #define VEC5_AFFINITY_BYTE 5 #define VEC5_AFFINITY 0x80 chosen = of_find_node_by_path("/chosen"); @@ -299,19 +336,38 @@ static int __init find_min_common_depth(void) vec5 = of_get_property(chosen, "ibm,architecture-vec-5", NULL); if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & VEC5_AFFINITY)) { dbg("Using form 1 affinity\n"); - index = 0; + form1_affinity = 1; } } - if ((len >= 2 * sizeof(unsigned int)) && ref_points) { - depth = ref_points[index]; + if (form1_affinity) { + depth = distance_ref_points[0]; } else { - dbg("NUMA: ibm,associativity-reference-points not found.\n"); - depth = -1; + if (distance_ref_points_depth < 2) { + printk(KERN_WARNING "NUMA: " + "short ibm,associativity-reference-points\n"); + goto err; + } + + depth = distance_ref_points[1]; } - of_node_put(rtas_root); + /* + * Warn and cap if the hardware supports more than + * MAX_DISTANCE_REF_POINTS domains. + */ + if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) { + printk(KERN_WARNING "NUMA: distance array capped at " + "%d entries\n", MAX_DISTANCE_REF_POINTS); + distance_ref_points_depth = MAX_DISTANCE_REF_POINTS; + } + + of_node_put(rtas_root); return depth; + +err: + of_node_put(rtas_root); + return -1; } static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index ebc2f38eb381..2c7e801ab20b 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -92,7 +92,6 @@ static void pte_free_rcu_callback(struct rcu_head *head) static void pte_free_submit(struct pte_freelist_batch *batch) { - INIT_RCU_HEAD(&batch->rcu); call_rcu(&batch->rcu, pte_free_rcu_callback); } diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c index 8aaa8b7eb324..690566b66e8e 100644 --- a/arch/powerpc/mm/tlb_hash32.c +++ b/arch/powerpc/mm/tlb_hash32.c @@ -89,17 +89,6 @@ void tlb_flush(struct mmu_gather *tlb) * -- Cort */ -/* - * 750 SMP is a Bad Idea because the 750 doesn't broadcast all - * the cache operations on the bus. Hence we need to use an IPI - * to get the other CPU(s) to invalidate their TLBs. - */ -#ifdef CONFIG_SMP_750 -#define FINISH_FLUSH smp_send_tlb_invalidate(0) -#else -#define FINISH_FLUSH do { } while (0) -#endif - static void flush_range(struct mm_struct *mm, unsigned long start, unsigned long end) { @@ -138,7 +127,6 @@ static void flush_range(struct mm_struct *mm, unsigned long start, void flush_tlb_kernel_range(unsigned long start, unsigned long end) { flush_range(&init_mm, start, end); - FINISH_FLUSH; } EXPORT_SYMBOL(flush_tlb_kernel_range); @@ -162,7 +150,6 @@ void flush_tlb_mm(struct mm_struct *mm) */ for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); - FINISH_FLUSH; } EXPORT_SYMBOL(flush_tlb_mm); @@ -179,7 +166,6 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr); if (!pmd_none(*pmd)) flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); - FINISH_FLUSH; } EXPORT_SYMBOL(flush_tlb_page); @@ -192,6 +178,5 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { flush_range(vma->vm_mm, start, end); - FINISH_FLUSH; } EXPORT_SYMBOL(flush_tlb_range); diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index d8695b02a968..fe391e942521 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -46,6 +46,7 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { [MMU_PAGE_4K] = { .shift = 12, + .ind = 20, .enc = BOOK3E_PAGESZ_4K, }, [MMU_PAGE_16K] = { @@ -54,6 +55,7 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { }, [MMU_PAGE_64K] = { .shift = 16, + .ind = 28, .enc = BOOK3E_PAGESZ_64K, }, [MMU_PAGE_1M] = { @@ -62,6 +64,7 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { }, [MMU_PAGE_16M] = { .shift = 24, + .ind = 36, .enc = BOOK3E_PAGESZ_16M, }, [MMU_PAGE_256M] = { @@ -344,16 +347,108 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) } } -/* - * Early initialization of the MMU TLB code - */ -static void __early_init_mmu(int boot_cpu) +static void setup_page_sizes(void) +{ + unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); + unsigned int tlb0ps = mfspr(SPRN_TLB0PS); + unsigned int eptcfg = mfspr(SPRN_EPTCFG); + int i, psize; + + /* Look for supported direct sizes */ + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + struct mmu_psize_def *def = &mmu_psize_defs[psize]; + + if (tlb0ps & (1U << (def->shift - 10))) + def->flags |= MMU_PAGE_SIZE_DIRECT; + } + + /* Indirect page sizes supported ? */ + if ((tlb0cfg & TLBnCFG_IND) == 0) + goto no_indirect; + + /* Now, we only deal with one IND page size for each + * direct size. Hopefully all implementations today are + * unambiguous, but we might want to be careful in the + * future. + */ + for (i = 0; i < 3; i++) { + unsigned int ps, sps; + + sps = eptcfg & 0x1f; + eptcfg >>= 5; + ps = eptcfg & 0x1f; + eptcfg >>= 5; + if (!ps || !sps) + continue; + for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { + struct mmu_psize_def *def = &mmu_psize_defs[psize]; + + if (ps == (def->shift - 10)) + def->flags |= MMU_PAGE_SIZE_INDIRECT; + if (sps == (def->shift - 10)) + def->ind = ps + 10; + } + } + no_indirect: + + /* Cleanup array and print summary */ + pr_info("MMU: Supported page sizes\n"); + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + struct mmu_psize_def *def = &mmu_psize_defs[psize]; + const char *__page_type_names[] = { + "unsupported", + "direct", + "indirect", + "direct & indirect" + }; + if (def->flags == 0) { + def->shift = 0; + continue; + } + pr_info(" %8ld KB as %s\n", 1ul << (def->shift - 10), + __page_type_names[def->flags & 0x3]); + } +} + +static void setup_mmu_htw(void) { extern unsigned int interrupt_base_book3e; extern unsigned int exc_data_tlb_miss_htw_book3e; extern unsigned int exc_instruction_tlb_miss_htw_book3e; unsigned int *ibase = &interrupt_base_book3e; + + /* Check if HW tablewalk is present, and if yes, enable it by: + * + * - patching the TLB miss handlers to branch to the + * one dedicates to it + * + * - setting the global book3e_htw_enabled + */ + unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); + + if ((tlb0cfg & TLBnCFG_IND) && + (tlb0cfg & TLBnCFG_PT)) { + /* Our exceptions vectors start with a NOP and -then- a branch + * to deal with single stepping from userspace which stops on + * the second instruction. Thus we need to patch the second + * instruction of the exception, not the first one + */ + patch_branch(ibase + (0x1c0 / 4) + 1, + (unsigned long)&exc_data_tlb_miss_htw_book3e, 0); + patch_branch(ibase + (0x1e0 / 4) + 1, + (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0); + book3e_htw_enabled = 1; + } + pr_info("MMU: Book3E Page Tables %s\n", + book3e_htw_enabled ? "Enabled" : "Disabled"); +} + +/* + * Early initialization of the MMU TLB code + */ +static void __early_init_mmu(int boot_cpu) +{ unsigned int mas4; /* XXX This will have to be decided at runtime, but right @@ -370,35 +465,17 @@ static void __early_init_mmu(int boot_cpu) */ mmu_vmemmap_psize = MMU_PAGE_16M; - /* Check if HW tablewalk is present, and if yes, enable it by: - * - * - patching the TLB miss handlers to branch to the - * one dedicates to it - * - * - setting the global book3e_htw_enabled - * - * - Set MAS4:INDD and default page size - */ - /* XXX This code only checks for TLB 0 capabilities and doesn't * check what page size combos are supported by the HW. It * also doesn't handle the case where a separate array holds * the IND entries from the array loaded by the PT. */ if (boot_cpu) { - unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); + /* Look for supported page sizes */ + setup_page_sizes(); - /* Check if HW loader is supported */ - if ((tlb0cfg & TLBnCFG_IND) && - (tlb0cfg & TLBnCFG_PT)) { - patch_branch(ibase + (0x1c0 / 4), - (unsigned long)&exc_data_tlb_miss_htw_book3e, 0); - patch_branch(ibase + (0x1e0 / 4), - (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0); - book3e_htw_enabled = 1; - } - pr_info("MMU: Book3E Page Tables %s\n", - book3e_htw_enabled ? "Enabled" : "Disabled"); + /* Look for HW tablewalk support */ + setup_mmu_htw(); } /* Set MAS4 based on page table setting */ |