From f430c02b13f00146106fedcace810e61b4493d8c Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 12 Apr 2006 15:21:06 -0500 Subject: [PATCH] powerpc: Quiet page order output No need to always print page orders. Signed-off-by: Olof Johansson Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hash_utils_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index c006d9039633..b43ed92ef471 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -319,7 +319,7 @@ static void __init htab_init_page_sizes(void) mmu_virtual_psize = MMU_PAGE_64K; #endif - printk(KERN_INFO "Page orders: linear mapping = %d, others = %d\n", + printk(KERN_DEBUG "Page orders: linear mapping = %d, others = %d\n", mmu_psize_defs[mmu_linear_psize].shift, mmu_psize_defs[mmu_virtual_psize].shift); -- cgit v1.2.3 From e110b281dc93e3b4587a3d0440bb7ae38daddfde Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 12 Apr 2006 15:25:01 -0500 Subject: [PATCH] powerpc: Less verbose mem configuration output Quieten some of the debug ram config output. we already print out available memory at KERN_INFO level. Signed-off-by: Olof Johansson Signed-off-by: Paul Mackerras --- arch/powerpc/mm/mem.c | 6 +++--- arch/powerpc/mm/numa.c | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 741dd8802d49..69f3b9a20beb 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -299,9 +299,9 @@ void __init paging_init(void) kmap_prot = PAGE_KERNEL; #endif /* CONFIG_HIGHMEM */ - printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", + printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", top_of_ram, total_ram); - printk(KERN_INFO "Memory hole size: %ldMB\n", + printk(KERN_DEBUG "Memory hole size: %ldMB\n", (top_of_ram - total_ram) >> 20); /* * All pages are DMA-able so we put them all in the DMA zone. @@ -380,7 +380,7 @@ void __init mem_init(void) totalhigh_pages++; } totalram_pages += totalhigh_pages; - printk(KERN_INFO "High memory: %luk\n", + printk(KERN_DEBUG "High memory: %luk\n", totalhigh_pages << (PAGE_SHIFT-10)); } #endif /* CONFIG_HIGHMEM */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 0a335f34974c..ea816c618a7b 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -465,9 +465,9 @@ static void __init setup_nonnuma(void) unsigned long total_ram = lmb_phys_mem_size(); unsigned int i; - printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", + printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", top_of_ram, total_ram); - printk(KERN_INFO "Memory hole size: %ldMB\n", + printk(KERN_DEBUG "Memory hole size: %ldMB\n", (top_of_ram - total_ram) >> 20); for (i = 0; i < lmb.memory.cnt; ++i) @@ -485,7 +485,7 @@ void __init dump_numa_cpu_topology(void) return; for_each_online_node(node) { - printk(KERN_INFO "Node %d CPUs:", node); + printk(KERN_DEBUG "Node %d CPUs:", node); count = 0; /* @@ -521,7 +521,7 @@ static void __init dump_numa_memory_topology(void) for_each_online_node(node) { unsigned long i; - printk(KERN_INFO "Node %d Memory:", node); + printk(KERN_DEBUG "Node %d Memory:", node); count = 0; -- cgit v1.2.3 From 2babf5c2ec2f2d5de3e38d20f7df7fd815fd10c9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 17 May 2006 18:00:46 +1000 Subject: [PATCH] powerpc: Unify mem= handling We currently do mem= handling in three seperate places. And as benh pointed out I wrote two of them. Now that we parse command line parameters earlier we can clean this mess up. Moving the parsing out of prom_init means the device tree might be allocated above the memory limit. If that happens we'd have to move it. As it happens we already have logic to do that for kdump, so just genericise it. This also means we might have reserved regions above the memory limit, if we do the bootmem allocator will blow up, so we have to modify lmb_enforce_memory_limit() to truncate the reserves as well. Tested on P5 LPAR, iSeries, F50, 44p. Tested moving device tree on P5 and 44p and F50. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/mm/lmb.c | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c index 417d58518558..8b6f522655a6 100644 --- a/arch/powerpc/mm/lmb.c +++ b/arch/powerpc/mm/lmb.c @@ -89,20 +89,25 @@ static long __init lmb_regions_adjacent(struct lmb_region *rgn, return lmb_addrs_adjacent(base1, size1, base2, size2); } -/* Assumption: base addr of region 1 < base addr of region 2 */ -static void __init lmb_coalesce_regions(struct lmb_region *rgn, - unsigned long r1, unsigned long r2) +static void __init lmb_remove_region(struct lmb_region *rgn, unsigned long r) { unsigned long i; - rgn->region[r1].size += rgn->region[r2].size; - for (i=r2; i < rgn->cnt-1; i++) { - rgn->region[i].base = rgn->region[i+1].base; - rgn->region[i].size = rgn->region[i+1].size; + for (i = r; i < rgn->cnt - 1; i++) { + rgn->region[i].base = rgn->region[i + 1].base; + rgn->region[i].size = rgn->region[i + 1].size; } rgn->cnt--; } +/* Assumption: base addr of region 1 < base addr of region 2 */ +static void __init lmb_coalesce_regions(struct lmb_region *rgn, + unsigned long r1, unsigned long r2) +{ + rgn->region[r1].size += rgn->region[r2].size; + lmb_remove_region(rgn, r2); +} + /* This routine called with relocation disabled. */ void __init lmb_init(void) { @@ -294,17 +299,16 @@ unsigned long __init lmb_end_of_DRAM(void) return (lmb.memory.region[idx].base + lmb.memory.region[idx].size); } -/* - * Truncate the lmb list to memory_limit if it's set - * You must call lmb_analyze() after this. - */ +/* You must call lmb_analyze() after this. */ void __init lmb_enforce_memory_limit(unsigned long memory_limit) { unsigned long i, limit; + struct lmb_property *p; if (! memory_limit) return; + /* Truncate the lmb regions to satisfy the memory limit. */ limit = memory_limit; for (i = 0; i < lmb.memory.cnt; i++) { if (limit > lmb.memory.region[i].size) { @@ -316,4 +320,21 @@ void __init lmb_enforce_memory_limit(unsigned long memory_limit) lmb.memory.cnt = i + 1; break; } + + lmb.rmo_size = lmb.memory.region[0].size; + + /* And truncate any reserves above the limit also. */ + for (i = 0; i < lmb.reserved.cnt; i++) { + p = &lmb.reserved.region[i]; + + if (p->base > memory_limit) + p->size = 0; + else if ((p->base + p->size) > memory_limit) + p->size = memory_limit - p->base; + + if (p->size == 0) { + lmb_remove_region(&lmb.reserved, i); + i--; + } + } } -- cgit v1.2.3 From c5cf0e30bf3d8cb56758abb612827647c0a821cf Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 30 May 2006 14:14:19 +1000 Subject: [PATCH] powerpc: Fix buglet with MMU hash management Our MMU hash management code would not set the "C" bit (changed bit) in the hardware PTE when updating a RO PTE into a RW PTE. That would cause the hardware to possibly to a write back to the hash table to set it on the first store access, which in addition to being a performance issue, might also hit a bug when running with native hash management (non-HV) as our code is specifically optimized for the case where no write back happens. Thus there is a very small therocial window were a hash PTE can become corrupted if that HPTE has just been upgraded to read write, a store access happens on it, and that races with another processor evicting that same slot. Since eviction (caused by an almost full hash) is extremely rare, the bug is very unlikely to happen fortunately. This fixes by allowing the updating of the protection bits in the native hash handling to also set (but not clear) the "C" bit, and, in order to also improve performances in the general case, by always setting that bit on newly inserted hash PTE so that writeback really never happens. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hash_low_64.S | 3 +++ arch/powerpc/mm/hash_native_64.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index e0d02c4a2615..106fba391987 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -136,6 +136,7 @@ _GLOBAL(__hash_page_4K) and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/ andc r0,r30,r0 /* r0 = pte & ~r0 */ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ + ori r3,r3,HPTE_R_C /* Always add "C" bit for perf. */ /* We eventually do the icache sync here (maybe inline that * code rather than call a C function...) @@ -400,6 +401,7 @@ _GLOBAL(__hash_page_4K) and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/ andc r0,r30,r0 /* r0 = pte & ~r0 */ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ + ori r3,r3,HPTE_R_C /* Always add "C" bit for perf. */ /* We eventually do the icache sync here (maybe inline that * code rather than call a C function...) @@ -671,6 +673,7 @@ _GLOBAL(__hash_page_64K) and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/ andc r0,r30,r0 /* r0 = pte & ~r0 */ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ + ori r3,r3,HPTE_R_C /* Always add "C" bit for perf. */ /* We eventually do the icache sync here (maybe inline that * code rather than call a C function...) diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 33654d1b1b43..3b8205033f15 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -238,7 +238,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, DBG_LOW(" -> hit\n"); /* Update the HPTE */ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | - (newpp & (HPTE_R_PP | HPTE_R_N)); + (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)); native_unlock_hpte(hptep); } -- cgit v1.2.3 From 6218a761bbc27acc65248c80024875bcc06d52b1 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sun, 11 Jun 2006 14:15:17 +1000 Subject: powerpc: add context.vdso_base for 32-bit too This adds a vdso_base element to the mm_context_t for 32-bit compiles (both for ARCH=powerpc and ARCH=ppc). This fixes the compile errors that have been reported in arch/powerpc/kernel/signal_32.c. Signed-off-by: Paul Mackerras --- arch/powerpc/mm/mmu_context_32.c | 2 +- arch/powerpc/mm/ppc_mmu_32.c | 2 +- arch/powerpc/mm/tlb_32.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mmu_context_32.c b/arch/powerpc/mm/mmu_context_32.c index a8816e0f6a86..e326e4249e1a 100644 --- a/arch/powerpc/mm/mmu_context_32.c +++ b/arch/powerpc/mm/mmu_context_32.c @@ -30,7 +30,7 @@ #include #include -mm_context_t next_mmu_context; +unsigned long next_mmu_context; unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; #ifdef FEW_CONTEXTS atomic_t nr_free_contexts; diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index ed7fcfe5fd37..1df731e42b50 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -190,7 +190,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, return; pmd = pmd_offset(pgd_offset(mm, ea), ea); if (!pmd_none(*pmd)) - add_hash_page(mm->context, ea, pmd_val(*pmd)); + add_hash_page(mm->context.id, ea, pmd_val(*pmd)); } /* diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_32.c index ad580f3742e5..02eb23e036d5 100644 --- a/arch/powerpc/mm/tlb_32.c +++ b/arch/powerpc/mm/tlb_32.c @@ -42,7 +42,7 @@ void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) if (Hash != 0) { ptephys = __pa(ptep) & PAGE_MASK; - flush_hash_pages(mm->context, addr, ptephys, 1); + flush_hash_pages(mm->context.id, addr, ptephys, 1); } } @@ -102,7 +102,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start, pmd_t *pmd; unsigned long pmd_end; int count; - unsigned int ctx = mm->context; + unsigned int ctx = mm->context.id; if (Hash == 0) { _tlbia(); @@ -172,7 +172,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr); if (!pmd_none(*pmd)) - flush_hash_pages(mm->context, vmaddr, pmd_val(*pmd), 1); + flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); FINISH_FLUSH; } -- cgit v1.2.3 From 430644312810645a6e05855db50a978df9ba3ad3 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 12 Jun 2006 18:38:21 +1000 Subject: powerpc: Remove unused paca->pgdir field The pgdir field in the paca was a leftover from the dynamic VSIDs patch, and is not used in the current kernel code. This removes it. Signed-off-by: Paul Mackerras --- arch/powerpc/mm/slb.c | 3 --- arch/powerpc/mm/stab.c | 4 ---- 2 files changed, 7 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index ffc8ed4de62d..2cc61736feee 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -122,9 +122,6 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) get_paca()->slb_cache_ptr = 0; get_paca()->context = mm->context; -#ifdef CONFIG_PPC_64K_PAGES - get_paca()->pgdir = mm->pgd; -#endif /* CONFIG_PPC_64K_PAGES */ /* * preload some userspace segments into the SLB. diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 4a9291d9fef8..691320c90b78 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -200,10 +200,6 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) __get_cpu_var(stab_cache_ptr) = 0; -#ifdef CONFIG_PPC_64K_PAGES - get_paca()->pgdir = mm->pgd; -#endif /* CONFIG_PPC_64K_PAGES */ - /* Now preload some entries for the new task */ if (test_tsk_thread_flag(tsk, TIF_32BIT)) unmapped_base = TASK_UNMAPPED_BASE_USER32; -- cgit v1.2.3 From bf72aeba2ffef599d1d386425c9e46b82be657cd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 15 Jun 2006 10:45:18 +1000 Subject: powerpc: Use 64k pages without needing cache-inhibited large pages Some POWER5+ machines can do 64k hardware pages for normal memory but not for cache-inhibited pages. This patch lets us use 64k hardware pages for most user processes on such machines (assuming the kernel has been configured with CONFIG_PPC_64K_PAGES=y). User processes start out using 64k pages and get switched to 4k pages if they use any non-cacheable mappings. With this, we use 64k pages for the vmalloc region and 4k pages for the imalloc region. If anything creates a non-cacheable mapping in the vmalloc region, the vmalloc region will get switched to 4k pages. I don't know of any driver other than the DRM that would do this, though, and these machines don't have AGP. When a region gets switched from 64k pages to 4k pages, we do not have to clear out all the 64k HPTEs from the hash table immediately. We use the _PAGE_COMBO bit in the Linux PTE to indicate whether the page was hashed in as a 64k page or a set of 4k pages. If hash_page is trying to insert a 4k page for a Linux PTE and it sees that it has already been inserted as a 64k page, it first invalidates the 64k HPTE before inserting the 4k HPTE. The hash invalidation routines also use the _PAGE_COMBO bit, to determine whether to look for a 64k HPTE or a set of 4k HPTEs to remove. With those two changes, we can tolerate a mix of 4k and 64k HPTEs in the hash table, and they will all get removed when the address space is torn down. Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hash_low_64.S | 28 ++++++++++++++ arch/powerpc/mm/hash_utils_64.c | 84 +++++++++++++++++++++++++++++++++++----- arch/powerpc/mm/mmu_context_64.c | 3 ++ arch/powerpc/mm/slb.c | 29 +++++++------- arch/powerpc/mm/slb_low.S | 17 +++++--- arch/powerpc/mm/tlb_64.c | 5 ++- 6 files changed, 135 insertions(+), 31 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 106fba391987..52e914238959 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -369,6 +369,7 @@ _GLOBAL(__hash_page_4K) rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */ or r30,r30,r31 ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE + oris r30,r30,_PAGE_COMBO@h /* Write the linux PTE atomically (setting busy) */ stdcx. r30,0,r6 bne- 1b @@ -428,6 +429,14 @@ END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) andi. r0,r31,_PAGE_HASHPTE li r26,0 /* Default hidx */ beq htab_insert_pte + + /* + * Check if the pte was already inserted into the hash table + * as a 64k HW page, and invalidate the 64k HPTE if so. + */ + andis. r0,r31,_PAGE_COMBO@h + beq htab_inval_old_hpte + ld r6,STK_PARM(r6)(r1) ori r26,r6,0x8000 /* Load the hidx mask */ ld r26,0(r26) @@ -498,6 +507,19 @@ _GLOBAL(htab_call_hpte_remove) /* Try all again */ b htab_insert_pte + /* + * Call out to C code to invalidate an 64k HW HPTE that is + * useless now that the segment has been switched to 4k pages. + */ +htab_inval_old_hpte: + mr r3,r29 /* virtual addr */ + mr r4,r31 /* PTE.pte */ + li r5,0 /* PTE.hidx */ + li r6,MMU_PAGE_64K /* psize */ + ld r7,STK_PARM(r8)(r1) /* local */ + bl .flush_hash_page + b htab_insert_pte + htab_bail_ok: li r3,0 b htab_bail @@ -638,6 +660,12 @@ _GLOBAL(__hash_page_64K) * is changing this PTE anyway and might hash it. */ bne- ht64_bail_ok +BEGIN_FTR_SECTION + /* Check if PTE has the cache-inhibit bit set */ + andi. r0,r31,_PAGE_NO_CACHE + /* If so, bail out and refault as a 4k page */ + bne- ht64_bail_ok +END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE) /* Prepare new PTE value (turn access RW into DIRTY, then * add BUSY,HASHPTE and ACCESSED) */ diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index b43ed92ef471..d03fd2b4445e 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -92,10 +92,15 @@ unsigned long htab_size_bytes; unsigned long htab_hash_mask; int mmu_linear_psize = MMU_PAGE_4K; int mmu_virtual_psize = MMU_PAGE_4K; +int mmu_vmalloc_psize = MMU_PAGE_4K; +int mmu_io_psize = MMU_PAGE_4K; #ifdef CONFIG_HUGETLB_PAGE int mmu_huge_psize = MMU_PAGE_16M; unsigned int HPAGE_SHIFT; #endif +#ifdef CONFIG_PPC_64K_PAGES +int mmu_ci_restrictions; +#endif /* There are definitions of page sizes arrays to be used when none * is provided by the firmware. @@ -308,20 +313,31 @@ static void __init htab_init_page_sizes(void) else if (mmu_psize_defs[MMU_PAGE_1M].shift) mmu_linear_psize = MMU_PAGE_1M; +#ifdef CONFIG_PPC_64K_PAGES /* * Pick a size for the ordinary pages. Default is 4K, we support - * 64K if cache inhibited large pages are supported by the - * processor + * 64K for user mappings and vmalloc if supported by the processor. + * We only use 64k for ioremap if the processor + * (and firmware) support cache-inhibited large pages. + * If not, we use 4k and set mmu_ci_restrictions so that + * hash_page knows to switch processes that use cache-inhibited + * mappings to 4k pages. */ -#ifdef CONFIG_PPC_64K_PAGES - if (mmu_psize_defs[MMU_PAGE_64K].shift && - cpu_has_feature(CPU_FTR_CI_LARGE_PAGE)) + if (mmu_psize_defs[MMU_PAGE_64K].shift) { mmu_virtual_psize = MMU_PAGE_64K; + mmu_vmalloc_psize = MMU_PAGE_64K; + if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE)) + mmu_io_psize = MMU_PAGE_64K; + else + mmu_ci_restrictions = 1; + } #endif - printk(KERN_DEBUG "Page orders: linear mapping = %d, others = %d\n", + printk(KERN_DEBUG "Page orders: linear mapping = %d, " + "virtual = %d, io = %d\n", mmu_psize_defs[mmu_linear_psize].shift, - mmu_psize_defs[mmu_virtual_psize].shift); + mmu_psize_defs[mmu_virtual_psize].shift, + mmu_psize_defs[mmu_io_psize].shift); #ifdef CONFIG_HUGETLB_PAGE /* Init large page size. Currently, we pick 16M or 1M depending @@ -556,6 +572,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) pte_t *ptep; cpumask_t tmp; int rc, user_region = 0, local = 0; + int psize; DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", ea, access, trap); @@ -575,10 +592,15 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) return 1; } vsid = get_vsid(mm->context.id, ea); + psize = mm->context.user_psize; break; case VMALLOC_REGION_ID: mm = &init_mm; vsid = get_kernel_vsid(ea); + if (ea < VMALLOC_END) + psize = mmu_vmalloc_psize; + else + psize = mmu_io_psize; break; default: /* Not a valid range @@ -629,7 +651,40 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) #ifndef CONFIG_PPC_64K_PAGES rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); #else - if (mmu_virtual_psize == MMU_PAGE_64K) + if (mmu_ci_restrictions) { + /* If this PTE is non-cacheable, switch to 4k */ + if (psize == MMU_PAGE_64K && + (pte_val(*ptep) & _PAGE_NO_CACHE)) { + if (user_region) { + psize = MMU_PAGE_4K; + mm->context.user_psize = MMU_PAGE_4K; + mm->context.sllp = SLB_VSID_USER | + mmu_psize_defs[MMU_PAGE_4K].sllp; + } else if (ea < VMALLOC_END) { + /* + * some driver did a non-cacheable mapping + * in vmalloc space, so switch vmalloc + * to 4k pages + */ + printk(KERN_ALERT "Reducing vmalloc segment " + "to 4kB pages because of " + "non-cacheable mapping\n"); + psize = mmu_vmalloc_psize = MMU_PAGE_4K; + } + } + if (user_region) { + if (psize != get_paca()->context.user_psize) { + get_paca()->context = mm->context; + slb_flush_and_rebolt(); + } + } else if (get_paca()->vmalloc_sllp != + mmu_psize_defs[mmu_vmalloc_psize].sllp) { + get_paca()->vmalloc_sllp = + mmu_psize_defs[mmu_vmalloc_psize].sllp; + slb_flush_and_rebolt(); + } + } + if (psize == MMU_PAGE_64K) rc = __hash_page_64K(ea, access, vsid, ptep, trap, local); else rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); @@ -681,7 +736,18 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, #ifndef CONFIG_PPC_64K_PAGES __hash_page_4K(ea, access, vsid, ptep, trap, local); #else - if (mmu_virtual_psize == MMU_PAGE_64K) + if (mmu_ci_restrictions) { + /* If this PTE is non-cacheable, switch to 4k */ + if (mm->context.user_psize == MMU_PAGE_64K && + (pte_val(*ptep) & _PAGE_NO_CACHE)) { + mm->context.user_psize = MMU_PAGE_4K; + mm->context.sllp = SLB_VSID_USER | + mmu_psize_defs[MMU_PAGE_4K].sllp; + get_paca()->context = mm->context; + slb_flush_and_rebolt(); + } + } + if (mm->context.user_psize == MMU_PAGE_64K) __hash_page_64K(ea, access, vsid, ptep, trap, local); else __hash_page_4K(ea, access, vsid, ptep, trap, local); diff --git a/arch/powerpc/mm/mmu_context_64.c b/arch/powerpc/mm/mmu_context_64.c index 714a84dd8d5d..65d18dca266f 100644 --- a/arch/powerpc/mm/mmu_context_64.c +++ b/arch/powerpc/mm/mmu_context_64.c @@ -49,6 +49,9 @@ again: } mm->context.id = index; + mm->context.user_psize = mmu_virtual_psize; + mm->context.sllp = SLB_VSID_USER | + mmu_psize_defs[mmu_virtual_psize].sllp; return 0; } diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 2cc61736feee..6a8bf6c6000e 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -60,19 +60,19 @@ static inline void create_slbe(unsigned long ea, unsigned long flags, : "memory" ); } -static void slb_flush_and_rebolt(void) +void slb_flush_and_rebolt(void) { /* If you change this make sure you change SLB_NUM_BOLTED * appropriately too. */ - unsigned long linear_llp, virtual_llp, lflags, vflags; + unsigned long linear_llp, vmalloc_llp, lflags, vflags; unsigned long ksp_esid_data; WARN_ON(!irqs_disabled()); linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; - virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp; + vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; lflags = SLB_VSID_KERNEL | linear_llp; - vflags = SLB_VSID_KERNEL | virtual_llp; + vflags = SLB_VSID_KERNEL | vmalloc_llp; ksp_esid_data = mk_esid_data(get_paca()->kstack, 2); if ((ksp_esid_data & ESID_MASK) == PAGE_OFFSET) @@ -164,11 +164,10 @@ static inline void patch_slb_encoding(unsigned int *insn_addr, void slb_initialize(void) { - unsigned long linear_llp, virtual_llp; + unsigned long linear_llp, vmalloc_llp, io_llp; static int slb_encoding_inited; extern unsigned int *slb_miss_kernel_load_linear; - extern unsigned int *slb_miss_kernel_load_virtual; - extern unsigned int *slb_miss_user_load_normal; + extern unsigned int *slb_miss_kernel_load_io; #ifdef CONFIG_HUGETLB_PAGE extern unsigned int *slb_miss_user_load_huge; unsigned long huge_llp; @@ -178,18 +177,19 @@ void slb_initialize(void) /* Prepare our SLB miss handler based on our page size */ linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; - virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp; + io_llp = mmu_psize_defs[mmu_io_psize].sllp; + vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; + get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp; + if (!slb_encoding_inited) { slb_encoding_inited = 1; patch_slb_encoding(slb_miss_kernel_load_linear, SLB_VSID_KERNEL | linear_llp); - patch_slb_encoding(slb_miss_kernel_load_virtual, - SLB_VSID_KERNEL | virtual_llp); - patch_slb_encoding(slb_miss_user_load_normal, - SLB_VSID_USER | virtual_llp); + patch_slb_encoding(slb_miss_kernel_load_io, + SLB_VSID_KERNEL | io_llp); DBG("SLB: linear LLP = %04x\n", linear_llp); - DBG("SLB: virtual LLP = %04x\n", virtual_llp); + DBG("SLB: io LLP = %04x\n", io_llp); #ifdef CONFIG_HUGETLB_PAGE patch_slb_encoding(slb_miss_user_load_huge, SLB_VSID_USER | huge_llp); @@ -204,7 +204,7 @@ void slb_initialize(void) unsigned long lflags, vflags; lflags = SLB_VSID_KERNEL | linear_llp; - vflags = SLB_VSID_KERNEL | virtual_llp; + vflags = SLB_VSID_KERNEL | vmalloc_llp; /* Invalidate the entire SLB (even slot 0) & all the ERATS */ asm volatile("isync":::"memory"); @@ -212,7 +212,6 @@ void slb_initialize(void) asm volatile("isync; slbia; isync":::"memory"); create_slbe(PAGE_OFFSET, lflags, 0); - /* VMALLOC space has 4K pages always for now */ create_slbe(VMALLOC_START, vflags, 1); /* We don't bolt the stack for the time being - we're in boot, diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index abfaabf667bf..8548dcf8ef8b 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -59,10 +59,19 @@ _GLOBAL(slb_miss_kernel_load_linear) li r11,0 b slb_finish_load -1: /* vmalloc/ioremap mapping encoding bits, the "li" instruction below +1: /* vmalloc/ioremap mapping encoding bits, the "li" instructions below * will be patched by the kernel at boot */ -_GLOBAL(slb_miss_kernel_load_virtual) +BEGIN_FTR_SECTION + /* check whether this is in vmalloc or ioremap space */ + clrldi r11,r10,48 + cmpldi r11,(VMALLOC_SIZE >> 28) - 1 + bgt 5f + lhz r11,PACAVMALLOCSLLP(r13) + b slb_finish_load +5: +END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE) +_GLOBAL(slb_miss_kernel_load_io) li r11,0 b slb_finish_load @@ -96,9 +105,7 @@ _GLOBAL(slb_miss_user_load_huge) 1: #endif /* CONFIG_HUGETLB_PAGE */ -_GLOBAL(slb_miss_user_load_normal) - li r11,0 - + lhz r11,PACACONTEXTSLLP(r13) 2: ld r9,PACACONTEXTID(r13) rldimi r10,r9,USER_ESID_BITS,0 diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index f734b11566c2..e7449b068c82 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c @@ -131,7 +131,7 @@ void hpte_update(struct mm_struct *mm, unsigned long addr, { struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); unsigned long vsid; - unsigned int psize = mmu_virtual_psize; + unsigned int psize; int i; i = batch->index; @@ -148,7 +148,8 @@ void hpte_update(struct mm_struct *mm, unsigned long addr, #else BUG(); #endif - } + } else + psize = pte_pagesize_index(pte); /* * This can happen when we are in the middle of a TLB batch and -- cgit v1.2.3 From 227318bbde6c8309b1d20ab46532ec2b737e1fee Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sat, 10 Jun 2006 20:32:01 +1000 Subject: [POWERPC] Remove stale 64bit on 32bit kernel code Remove some stale POWER3/POWER4/970 on 32bit kernel support. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hash_low_32.S | 34 ---------------------------------- arch/powerpc/mm/ppc_mmu_32.c | 10 ---------- 2 files changed, 44 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index ea469eefa146..94255beeecd3 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -74,12 +74,6 @@ _GLOBAL(hash_page_sync) */ .text _GLOBAL(hash_page) -#ifdef CONFIG_PPC64BRIDGE - mfmsr r0 - clrldi r0,r0,1 /* make sure it's in 32-bit mode */ - MTMSRD(r0) - isync -#endif tophys(r7,0) /* gets -KERNELBASE into r7 */ #ifdef CONFIG_SMP addis r8,r7,mmu_hash_lock@h @@ -285,7 +279,6 @@ Hash_base = 0xc0180000 Hash_bits = 12 /* e.g. 256kB hash table */ Hash_msk = (((1 << Hash_bits) - 1) * 64) -#ifndef CONFIG_PPC64BRIDGE /* defines for the PTE format for 32-bit PPCs */ #define PTE_SIZE 8 #define PTEG_SIZE 64 @@ -299,21 +292,6 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64) #define SET_V(r) oris r,r,PTE_V@h #define CLR_V(r,t) rlwinm r,r,0,1,31 -#else -/* defines for the PTE format for 64-bit PPCs */ -#define PTE_SIZE 16 -#define PTEG_SIZE 128 -#define LG_PTEG_SIZE 7 -#define LDPTEu ldu -#define STPTE std -#define CMPPTE cmpd -#define PTE_H 2 -#define PTE_V 1 -#define TST_V(r) andi. r,r,PTE_V -#define SET_V(r) ori r,r,PTE_V -#define CLR_V(r,t) li t,PTE_V; andc r,r,t -#endif /* CONFIG_PPC64BRIDGE */ - #define HASH_LEFT 31-(LG_PTEG_SIZE+Hash_bits-1) #define HASH_RIGHT 31-LG_PTEG_SIZE @@ -331,14 +309,8 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT) /* Construct the high word of the PPC-style PTE (r5) */ -#ifndef CONFIG_PPC64BRIDGE rlwinm r5,r3,7,1,24 /* put VSID in 0x7fffff80 bits */ rlwimi r5,r4,10,26,31 /* put in API (abbrev page index) */ -#else /* CONFIG_PPC64BRIDGE */ - clrlwi r3,r3,8 /* reduce vsid to 24 bits */ - sldi r5,r3,12 /* shift vsid into position */ - rlwimi r5,r4,16,20,24 /* put in API (abbrev page index) */ -#endif /* CONFIG_PPC64BRIDGE */ SET_V(r5) /* set V (valid) bit */ /* Get the address of the primary PTE group in the hash table (r3) */ @@ -516,14 +488,8 @@ _GLOBAL(flush_hash_pages) add r3,r3,r0 /* note code below trims to 24 bits */ /* Construct the high word of the PPC-style PTE (r11) */ -#ifndef CONFIG_PPC64BRIDGE rlwinm r11,r3,7,1,24 /* put VSID in 0x7fffff80 bits */ rlwimi r11,r4,10,26,31 /* put in API (abbrev page index) */ -#else /* CONFIG_PPC64BRIDGE */ - clrlwi r3,r3,8 /* reduce vsid to 24 bits */ - sldi r11,r3,12 /* shift vsid into position */ - rlwimi r11,r4,16,20,24 /* put in API (abbrev page index) */ -#endif /* CONFIG_PPC64BRIDGE */ SET_V(r11) /* set V (valid) bit */ #ifdef CONFIG_SMP diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 1df731e42b50..ab5cd724b122 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -42,11 +42,7 @@ unsigned long _SDR1; union ubat { /* BAT register values to be loaded */ BAT bat; -#ifdef CONFIG_PPC64BRIDGE - u64 word[2]; -#else u32 word[2]; -#endif } BATS[4][2]; /* 4 pairs of IBAT, DBAT */ struct batrange { /* stores address ranges mapped by BATs */ @@ -220,15 +216,9 @@ void __init MMU_init_hw(void) if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105); -#ifdef CONFIG_PPC64BRIDGE -#define LG_HPTEG_SIZE 7 /* 128 bytes per HPTEG */ -#define SDR1_LOW_BITS (lg_n_hpteg - 11) -#define MIN_N_HPTEG 2048 /* min 256kB hash table */ -#else #define LG_HPTEG_SIZE 6 /* 64 bytes per HPTEG */ #define SDR1_LOW_BITS ((n_hpteg - 1) >> 10) #define MIN_N_HPTEG 1024 /* min 64kB hash table */ -#endif /* * Allow 1 HPTE (1/8 HPTEG) for each page of memory. -- cgit v1.2.3 From ee0339f205d60375c5ce1653c0dc318c6ec72668 Mon Sep 17 00:00:00 2001 From: Jon Loeliger Date: Sat, 17 Jun 2006 17:52:44 -0500 Subject: [POWERPC] Add starting of secondary 86xx CPUs. Clear the high BATS during load_up_mmu if FTR_HAS_HIGH_BATS. Allow just a bit more time for secondary CPUs to phone home. Signed-off-by: Wei Zhang Signed-off-by: Haiying Wang Signed-off-by: Jon Loeliger Signed-off-by: Paul Mackerras --- arch/powerpc/mm/ppc_mmu_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index ab5cd724b122..2ed43a493b31 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -43,13 +43,13 @@ unsigned long _SDR1; union ubat { /* BAT register values to be loaded */ BAT bat; u32 word[2]; -} BATS[4][2]; /* 4 pairs of IBAT, DBAT */ +} BATS[8][2]; /* 8 pairs of IBAT, DBAT */ struct batrange { /* stores address ranges mapped by BATs */ unsigned long start; unsigned long limit; unsigned long phys; -} bat_addrs[4]; +} bat_addrs[8]; /* * Return PA for this VA if it is mapped by a BAT, or 0 -- cgit v1.2.3