diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-02 10:16:16 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-02 10:16:16 -0700 |
commit | 5a148af66932c31814e263366094b5812210b501 (patch) | |
tree | c5155ae89d7109533b8b073631bd65a7dd394b9d /arch/powerpc/mm/hugetlbpage.c | |
parent | 99c6bcf46d2233d33e441834e958ed0bc22b190a (diff) | |
parent | 54d5999d98f2ab36ad71b9ef4d82cf5f399205f5 (diff) |
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
Pull powerpc update from Benjamin Herrenschmidt:
"The main highlights this time around are:
- A pile of addition POWER8 bits and nits, such as updated
performance counter support (Michael Ellerman), new branch history
buffer support (Anshuman Khandual), base support for the new PCI
host bridge when not using the hypervisor (Gavin Shan) and other
random related bits and fixes from various contributors.
- Some rework of our page table format by Aneesh Kumar which fixes a
thing or two and paves the way for THP support. THP itself will
not make it this time around however.
- More Freescale updates, including Altivec support on the new e6500
cores, new PCI controller support, and a pile of new boards support
and updates.
- The usual batch of trivial cleanups & fixes"
* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (156 commits)
powerpc: Fix build error for book3e
powerpc: Context switch the new EBB SPRs
powerpc: Turn on the EBB H/FSCR bits
powerpc: Replace CPU_FTR_BCTAR with CPU_FTR_ARCH_207S
powerpc: Setup BHRB instructions facility in HFSCR for POWER8
powerpc: Fix interrupt range check on debug exception
powerpc: Update tlbie/tlbiel as per ISA doc
powerpc: Print page size info during boot
powerpc: print both base and actual page size on hash failure
powerpc: Fix hpte_decode to use the correct decoding for page sizes
powerpc: Decode the pte-lp-encoding bits correctly.
powerpc: Use encode avpn where we need only avpn values
powerpc: Reduce PTE table memory wastage
powerpc: Move the pte free routines from common header
powerpc: Reduce the PTE_INDEX_SIZE
powerpc: Switch 16GB and 16MB explicit hugepages to a different page table format
powerpc: New hugepage directory format
powerpc: Don't truncate pgd_index wrongly
powerpc: Don't hard code the size of pte page
powerpc: Save DAR and DSISR in pt_regs on MCE
...
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 192 |
1 files changed, 152 insertions, 40 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 1a6de0a7d8eb..237c8e5f2640 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -48,30 +48,71 @@ static u64 gpage_freearray[MAX_NUMBER_GPAGES]; static unsigned nr_gpages; #endif -static inline int shift_to_mmu_psize(unsigned int shift) +#define hugepd_none(hpd) ((hpd).pd == 0) + +#ifdef CONFIG_PPC_BOOK3S_64 +/* + * At this point we do the placement change only for BOOK3S 64. This would + * possibly work on other subarchs. + */ + +/* + * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have + * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD; + */ +int pmd_huge(pmd_t pmd) { - int psize; + /* + * leaf pte for huge page, bottom two bits != 00 + */ + return ((pmd_val(pmd) & 0x3) != 0x0); +} - for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) - if (mmu_psize_defs[psize].shift == shift) - return psize; - return -1; +int pud_huge(pud_t pud) +{ + /* + * leaf pte for huge page, bottom two bits != 00 + */ + return ((pud_val(pud) & 0x3) != 0x0); } -static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) +int pgd_huge(pgd_t pgd) { - if (mmu_psize_defs[mmu_psize].shift) - return mmu_psize_defs[mmu_psize].shift; - BUG(); + /* + * leaf pte for huge page, bottom two bits != 00 + */ + return ((pgd_val(pgd) & 0x3) != 0x0); +} +#else +int pmd_huge(pmd_t pmd) +{ + return 0; } -#define hugepd_none(hpd) ((hpd).pd == 0) +int pud_huge(pud_t pud) +{ + return 0; +} + +int pgd_huge(pgd_t pgd) +{ + return 0; +} +#endif +/* + * We have 4 cases for pgds and pmds: + * (1) invalid (all zeroes) + * (2) pointer to next table, as normal; bottom 6 bits == 0 + * (3) leaf pte for huge page, bottom two bits != 00 + * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table + */ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) { pgd_t *pg; pud_t *pu; pmd_t *pm; + pte_t *ret_pte; hugepd_t *hpdp = NULL; unsigned pdshift = PGDIR_SHIFT; @@ -79,30 +120,43 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift *shift = 0; pg = pgdir + pgd_index(ea); - if (is_hugepd(pg)) { + + if (pgd_huge(*pg)) { + ret_pte = (pte_t *) pg; + goto out; + } else if (is_hugepd(pg)) hpdp = (hugepd_t *)pg; - } else if (!pgd_none(*pg)) { + else if (!pgd_none(*pg)) { pdshift = PUD_SHIFT; pu = pud_offset(pg, ea); - if (is_hugepd(pu)) + + if (pud_huge(*pu)) { + ret_pte = (pte_t *) pu; + goto out; + } else if (is_hugepd(pu)) hpdp = (hugepd_t *)pu; else if (!pud_none(*pu)) { pdshift = PMD_SHIFT; pm = pmd_offset(pu, ea); - if (is_hugepd(pm)) + + if (pmd_huge(*pm)) { + ret_pte = (pte_t *) pm; + goto out; + } else if (is_hugepd(pm)) hpdp = (hugepd_t *)pm; - else if (!pmd_none(*pm)) { + else if (!pmd_none(*pm)) return pte_offset_kernel(pm, ea); - } } } - if (!hpdp) return NULL; + ret_pte = hugepte_offset(hpdp, ea, pdshift); + pdshift = hugepd_shift(*hpdp); +out: if (shift) - *shift = hugepd_shift(*hpdp); - return hugepte_offset(hpdp, ea, pdshift); + *shift = pdshift; + return ret_pte; } EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte); @@ -145,6 +199,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (unlikely(!hugepd_none(*hpdp))) break; else + /* We use the old format for PPC_FSL_BOOK3E */ hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; } /* If we bailed from the for loop early, an error occurred, clean up */ @@ -156,9 +211,15 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, #else if (!hugepd_none(*hpdp)) kmem_cache_free(cachep, new); - else + else { +#ifdef CONFIG_PPC_BOOK3S_64 + hpdp->pd = (unsigned long)new | + (shift_to_mmu_psize(pshift) << 2); +#else hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; #endif + } +#endif spin_unlock(&mm->page_table_lock); return 0; } @@ -175,6 +236,61 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, #define HUGEPD_PUD_SHIFT PMD_SHIFT #endif +#ifdef CONFIG_PPC_BOOK3S_64 +/* + * At this point we do the placement change only for BOOK3S 64. This would + * possibly work on other subarchs. + */ +pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) +{ + pgd_t *pg; + pud_t *pu; + pmd_t *pm; + hugepd_t *hpdp = NULL; + unsigned pshift = __ffs(sz); + unsigned pdshift = PGDIR_SHIFT; + + addr &= ~(sz-1); + pg = pgd_offset(mm, addr); + + if (pshift == PGDIR_SHIFT) + /* 16GB huge page */ + return (pte_t *) pg; + else if (pshift > PUD_SHIFT) + /* + * We need to use hugepd table + */ + hpdp = (hugepd_t *)pg; + else { + pdshift = PUD_SHIFT; + pu = pud_alloc(mm, pg, addr); + if (pshift == PUD_SHIFT) + return (pte_t *)pu; + else if (pshift > PMD_SHIFT) + hpdp = (hugepd_t *)pu; + else { + pdshift = PMD_SHIFT; + pm = pmd_alloc(mm, pu, addr); + if (pshift == PMD_SHIFT) + /* 16MB hugepage */ + return (pte_t *)pm; + else + hpdp = (hugepd_t *)pm; + } + } + if (!hpdp) + return NULL; + + BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); + + if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) + return NULL; + + return hugepte_offset(hpdp, addr, pdshift); +} + +#else + pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pg; @@ -212,6 +328,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz return hugepte_offset(hpdp, addr, pdshift); } +#endif #ifdef CONFIG_PPC_FSL_BOOK3E /* Build list of addresses of gigantic pages. This function is used in early @@ -475,7 +592,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, do { pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); - if (pmd_none(*pmd)) + if (pmd_none_or_clear_bad(pmd)) continue; #ifdef CONFIG_PPC_FSL_BOOK3E /* @@ -628,16 +745,6 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) return page; } -int pmd_huge(pmd_t pmd) -{ - return 0; -} - -int pud_huge(pud_t pud) -{ - return 0; -} - struct page * follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) @@ -646,8 +753,8 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, return NULL; } -static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) +int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask; unsigned long pte_end; @@ -742,7 +849,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, struct hstate *hstate = hstate_file(file); int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); - return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); + return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1); } #endif @@ -883,11 +990,16 @@ static int __init hugetlbpage_init(void) pdshift = PUD_SHIFT; else pdshift = PGDIR_SHIFT; - - pgtable_cache_add(pdshift - shift, NULL); - if (!PGT_CACHE(pdshift - shift)) - panic("hugetlbpage_init(): could not create " - "pgtable cache for %d bit pagesize\n", shift); + /* + * if we have pdshift and shift value same, we don't + * use pgt cache for hugepd. + */ + if (pdshift != shift) { + pgtable_cache_add(pdshift - shift, NULL); + if (!PGT_CACHE(pdshift - shift)) + panic("hugetlbpage_init(): could not create " + "pgtable cache for %d bit pagesize\n", shift); + } } /* Set default large page size. Currently, we pick 16M or 1M |