From c5cf0e30bf3d8cb56758abb612827647c0a821cf Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 30 May 2006 14:14:19 +1000 Subject: [PATCH] powerpc: Fix buglet with MMU hash management Our MMU hash management code would not set the "C" bit (changed bit) in the hardware PTE when updating a RO PTE into a RW PTE. That would cause the hardware to possibly to a write back to the hash table to set it on the first store access, which in addition to being a performance issue, might also hit a bug when running with native hash management (non-HV) as our code is specifically optimized for the case where no write back happens. Thus there is a very small therocial window were a hash PTE can become corrupted if that HPTE has just been upgraded to read write, a store access happens on it, and that races with another processor evicting that same slot. Since eviction (caused by an almost full hash) is extremely rare, the bug is very unlikely to happen fortunately. This fixes by allowing the updating of the protection bits in the native hash handling to also set (but not clear) the "C" bit, and, in order to also improve performances in the general case, by always setting that bit on newly inserted hash PTE so that writeback really never happens. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hash_low_64.S | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/mm/hash_low_64.S') diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index e0d02c4a2615..106fba391987 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -136,6 +136,7 @@ _GLOBAL(__hash_page_4K) and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/ andc r0,r30,r0 /* r0 = pte & ~r0 */ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ + ori r3,r3,HPTE_R_C /* Always add "C" bit for perf. */ /* We eventually do the icache sync here (maybe inline that * code rather than call a C function...) @@ -400,6 +401,7 @@ _GLOBAL(__hash_page_4K) and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/ andc r0,r30,r0 /* r0 = pte & ~r0 */ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ + ori r3,r3,HPTE_R_C /* Always add "C" bit for perf. */ /* We eventually do the icache sync here (maybe inline that * code rather than call a C function...) @@ -671,6 +673,7 @@ _GLOBAL(__hash_page_64K) and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/ andc r0,r30,r0 /* r0 = pte & ~r0 */ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ + ori r3,r3,HPTE_R_C /* Always add "C" bit for perf. */ /* We eventually do the icache sync here (maybe inline that * code rather than call a C function...) -- cgit v1.2.3 From bf72aeba2ffef599d1d386425c9e46b82be657cd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 15 Jun 2006 10:45:18 +1000 Subject: powerpc: Use 64k pages without needing cache-inhibited large pages Some POWER5+ machines can do 64k hardware pages for normal memory but not for cache-inhibited pages. This patch lets us use 64k hardware pages for most user processes on such machines (assuming the kernel has been configured with CONFIG_PPC_64K_PAGES=y). User processes start out using 64k pages and get switched to 4k pages if they use any non-cacheable mappings. With this, we use 64k pages for the vmalloc region and 4k pages for the imalloc region. If anything creates a non-cacheable mapping in the vmalloc region, the vmalloc region will get switched to 4k pages. I don't know of any driver other than the DRM that would do this, though, and these machines don't have AGP. When a region gets switched from 64k pages to 4k pages, we do not have to clear out all the 64k HPTEs from the hash table immediately. We use the _PAGE_COMBO bit in the Linux PTE to indicate whether the page was hashed in as a 64k page or a set of 4k pages. If hash_page is trying to insert a 4k page for a Linux PTE and it sees that it has already been inserted as a 64k page, it first invalidates the 64k HPTE before inserting the 4k HPTE. The hash invalidation routines also use the _PAGE_COMBO bit, to determine whether to look for a 64k HPTE or a set of 4k HPTEs to remove. With those two changes, we can tolerate a mix of 4k and 64k HPTEs in the hash table, and they will all get removed when the address space is torn down. Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hash_low_64.S | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'arch/powerpc/mm/hash_low_64.S') diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 106fba391987..52e914238959 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -369,6 +369,7 @@ _GLOBAL(__hash_page_4K) rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */ or r30,r30,r31 ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE + oris r30,r30,_PAGE_COMBO@h /* Write the linux PTE atomically (setting busy) */ stdcx. r30,0,r6 bne- 1b @@ -428,6 +429,14 @@ END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) andi. r0,r31,_PAGE_HASHPTE li r26,0 /* Default hidx */ beq htab_insert_pte + + /* + * Check if the pte was already inserted into the hash table + * as a 64k HW page, and invalidate the 64k HPTE if so. + */ + andis. r0,r31,_PAGE_COMBO@h + beq htab_inval_old_hpte + ld r6,STK_PARM(r6)(r1) ori r26,r6,0x8000 /* Load the hidx mask */ ld r26,0(r26) @@ -498,6 +507,19 @@ _GLOBAL(htab_call_hpte_remove) /* Try all again */ b htab_insert_pte + /* + * Call out to C code to invalidate an 64k HW HPTE that is + * useless now that the segment has been switched to 4k pages. + */ +htab_inval_old_hpte: + mr r3,r29 /* virtual addr */ + mr r4,r31 /* PTE.pte */ + li r5,0 /* PTE.hidx */ + li r6,MMU_PAGE_64K /* psize */ + ld r7,STK_PARM(r8)(r1) /* local */ + bl .flush_hash_page + b htab_insert_pte + htab_bail_ok: li r3,0 b htab_bail @@ -638,6 +660,12 @@ _GLOBAL(__hash_page_64K) * is changing this PTE anyway and might hash it. */ bne- ht64_bail_ok +BEGIN_FTR_SECTION + /* Check if PTE has the cache-inhibit bit set */ + andi. r0,r31,_PAGE_NO_CACHE + /* If so, bail out and refault as a 4k page */ + bne- ht64_bail_ok +END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE) /* Prepare new PTE value (turn access RW into DIRTY, then * add BUSY,HASHPTE and ACCESSED) */ -- cgit v1.2.3