diff options
author | Vinod Rex <vrex@nvidia.com> | 2011-08-26 11:42:40 -0700 |
---|---|---|
committer | Varun Colbert <vcolbert@nvidia.com> | 2011-09-19 17:57:58 -0700 |
commit | b95306beb700ea1dddb228d3f1acfe4efc36f2f6 (patch) | |
tree | 95a1e6bb98d972c87e3aaffd67900f70aabc5134 /arch/arm/mm | |
parent | 5a26af405f149dd0fe84fc6f7b09603035b4de7c (diff) |
arm: mm: change_page_attr support
bug 865816
Adapted from x86 change_page_attr() implementation
Change-Id: I8edaeb99d9c7f0d09465f3426bfe08f3231c857b
Reviewed-on: http://git-master/r/49501
Tested-by: Krishna Reddy <vdumpa@nvidia.com>
Tested-by: Vinod Rex <vrex@nvidia.com>
Reviewed-by: Krishna Reddy <vdumpa@nvidia.com>
Diffstat (limited to 'arch/arm/mm')
-rw-r--r-- | arch/arm/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/arm/mm/mmu.c | 9 | ||||
-rw-r--r-- | arch/arm/mm/pageattr.c | 993 | ||||
-rw-r--r-- | arch/arm/mm/pgd.c | 31 |
4 files changed, 1034 insertions, 1 deletions
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index d63b6c413758..fb7b513bee27 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -6,7 +6,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ iomap.o obj-$(CONFIG_MMU) += fault-armv.o flush.o ioremap.o mmap.o \ - pgd.o mmu.o vmregion.o + pgd.o mmu.o vmregion.o pageattr.o ifneq ($(CONFIG_MMU),y) obj-y += nommu.o diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index fa2637b5479a..7df956a67267 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -561,6 +561,7 @@ static void __init alloc_init_section(pgd_t *pgd, unsigned long addr, const struct mem_type *type) { pmd_t *pmd = pmd_offset(pgd, addr); + unsigned long pages_2m = 0, pages_4k = 0; /* * Try a section mapping - end, addr and phys must all be aligned @@ -579,6 +580,8 @@ static void __init alloc_init_section(pgd_t *pgd, unsigned long addr, phys += SECTION_SIZE; } while (pmd++, addr += SECTION_SIZE, addr != end); + pages_2m += (end-addr) >> SECTION_SHIFT; + flush_pmd_entry(p); } else { /* @@ -586,6 +589,12 @@ static void __init alloc_init_section(pgd_t *pgd, unsigned long addr, * individual L1 entries. */ alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type); + pages_4k += (end-addr) >> PAGE_SHIFT; + } + + if ((addr < lowmem_end_addr) && (end < lowmem_end_addr)) { + update_page_count(PG_LEVEL_2M, pages_2m); + update_page_count(PG_LEVEL_4K, pages_4k); } } diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c new file mode 100644 index 000000000000..ba57e5d9ca50 --- /dev/null +++ b/arch/arm/mm/pageattr.c @@ -0,0 +1,993 @@ +/* + * Copyright 2002 Andi Kleen, SuSE Labs. + * Thanks to Ben LaHaise for precious feedback. + */ +#include <linux/highmem.h> +#include <linux/bootmem.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/pfn.h> +#include <linux/percpu.h> +#include <linux/gfp.h> +#include <linux/vmalloc.h> + +#include <asm/processor.h> +#include <asm/tlbflush.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/uaccess.h> +#include <asm/pgalloc.h> + +#ifdef CPA_DEBUG +#define cpa_debug(x, ...) printk(x, __VA_ARGS__) +#else +#define cpa_debug(x, ...) +#endif + +/* + * The current flushing context - we pass it instead of 5 arguments: + */ +struct cpa_data { + unsigned long *vaddr; + pgprot_t mask_set; + pgprot_t mask_clr; + int numpages; + int flags; + unsigned long pfn; + unsigned force_split:1; + int curpage; + struct page **pages; +}; + +/* + * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings) + * using cpa_lock. So that we don't allow any other cpu, with stale large tlb + * entries change the page attribute in parallel to some other cpu + * splitting a large page entry along with changing the attribute. + */ +static DEFINE_SPINLOCK(cpa_lock); + +#define CPA_FLUSHTLB 1 +#define CPA_ARRAY 2 +#define CPA_PAGES_ARRAY 4 + +#ifdef CONFIG_PROC_FS +static unsigned long direct_pages_count[PG_LEVEL_NUM]; + +void update_page_count(int level, unsigned long pages) +{ + unsigned long flags; + + /* Protect against CPA */ + spin_lock_irqsave(&pgd_lock, flags); + direct_pages_count[level] += pages; + spin_unlock_irqrestore(&pgd_lock, flags); +} + +static void split_page_count(int level) +{ + direct_pages_count[level]--; + direct_pages_count[level - 1] += PTRS_PER_PTE; +} + +void arch_report_meminfo(struct seq_file *m) +{ + seq_printf(m, "DirectMap4k: %8lu kB\n", + direct_pages_count[PG_LEVEL_4K] << 2); + seq_printf(m, "DirectMap2M: %8lu kB\n", + direct_pages_count[PG_LEVEL_2M] << 11); +} +#else +static inline void split_page_count(int level) { } +#endif + +#ifdef CONFIG_DEBUG_PAGEALLOC +# define debug_pagealloc 1 +#else +# define debug_pagealloc 0 +#endif + +static inline int +within(unsigned long addr, unsigned long start, unsigned long end) +{ + return addr >= start && addr < end; +} + +static void cpa_flush_range(unsigned long start, int numpages, int cache) +{ + unsigned int i, level; + unsigned long addr; + + BUG_ON(irqs_disabled()); + WARN_ON(PAGE_ALIGN(start) != start); + + flush_tlb_kernel_range(start, start + (numpages << PAGE_SHIFT)); + + if (!cache) + return; + + for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) { + pte_t *pte = lookup_address(addr, &level); + + /* + * Only flush present addresses: + */ + if (pte && pte_present(*pte)) { + __cpuc_flush_dcache_area((void *) addr, PAGE_SIZE); + outer_flush_range(__pa((void *)addr), + __pa((void *)addr) + PAGE_SIZE); + } + } +} + +static void cpa_flush_array(unsigned long *start, int numpages, int cache, + int in_flags, struct page **pages) +{ + unsigned int i, level; + + BUG_ON(irqs_disabled()); + + for (i = 0; i < numpages; i++) { + unsigned long addr; + pte_t *pte; + + if (in_flags & CPA_PAGES_ARRAY) + addr = (unsigned long)page_address(pages[i]); + else + addr = start[i]; + + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + + if (cache) { + + pte = lookup_address(addr, &level); + + /* + * Only flush present addresses: + */ + if (pte && pte_present(*pte)) { + __cpuc_flush_dcache_area((void *)addr, + PAGE_SIZE); + outer_flush_range(__pa((void *)addr), + __pa((void *)addr) + PAGE_SIZE); + } + } + } +} + +/* + * Certain areas of memory require very specific protection flags, + * for example the kernel text. Callers don't always get this + * right so this function checks and fixes these known static + * required protection bits. + */ +static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, + unsigned long pfn) +{ + pgprot_t forbidden = __pgprot(0); + + /* + * The kernel text needs to be executable for obvious reasons + * Does not cover __inittext since that is gone later on. + */ + if (within(address, (unsigned long)_text, (unsigned long)_etext)) + prot |= L_PTE_EXEC; + + /* + * The .rodata section needs to be read-only. Using the pfn + * catches all aliases. + */ + if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, + __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) + pgprot_val(forbidden) |= L_PTE_WRITE; + + prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); + + + return prot; +} + +static inline pgprot_t pte_to_pmd_pgprot(unsigned long pte, + unsigned long ext_prot) +{ + pgprot_t ref_prot = 0; + + ref_prot |= PMD_TYPE_SECT | PMD_SECT_XN; + + if (pte & L_PTE_MT_BUFFERABLE) + ref_prot |= PMD_SECT_BUFFERABLE; + + if (pte & L_PTE_MT_WRITETHROUGH) + ref_prot |= PMD_SECT_CACHEABLE; + + if (pte & L_PTE_SHARED) + ref_prot |= PMD_SECT_S; + + if (pte & L_PTE_EXEC) + ref_prot &= ~PMD_SECT_XN; + + if (pte & L_PTE_WRITE) + ref_prot |= PMD_SECT_AP_WRITE; + + ref_prot |= (ext_prot & (PTE_EXT_AP0 | PTE_EXT_AP1 | PTE_EXT_APX | + PTE_EXT_NG | (7 << 6))) << 6; + + return ref_prot; +} + +static inline pgprot_t pmd_to_pte_pgprot(unsigned long pmd, + unsigned long *ext_prot) +{ + pgprot_t ref_prot = 0; + + ref_prot |= L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_EXEC; + + if (pmd & PMD_SECT_BUFFERABLE) + ref_prot |= L_PTE_MT_BUFFERABLE; + + if (pmd & PMD_SECT_CACHEABLE) + ref_prot |= L_PTE_MT_WRITETHROUGH; + + if (pmd & PMD_SECT_S) + ref_prot |= L_PTE_SHARED; + + if (pmd & PMD_SECT_XN) + ref_prot &= ~L_PTE_EXEC; + + if (pmd & PMD_SECT_AP_WRITE) + ref_prot |= L_PTE_WRITE; + + /* AP/APX/TEX bits */ + *ext_prot = (pmd & (PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | + PMD_SECT_APX | PMD_SECT_nG | (7 << 12))) >> 6; + + return ref_prot; +} + +/* + * Lookup the page table entry for a virtual address. Return a pointer + * to the entry and the level of the mapping. + * + * Note: We return pud and pmd either when the entry is marked large + * or when the present bit is not set. Otherwise we would return a + * pointer to a nonexisting mapping. + */ +pte_t *lookup_address(unsigned long address, unsigned int *level) +{ + pgd_t *pgd = pgd_offset_k(address); + pte_t *pte; + pmd_t *pmd; + + /* pmds are folded into pgds on ARM */ + *level = PG_LEVEL_NONE; + + if (pgd == NULL || pgd_none(*pgd)) + return NULL; + + pmd = pmd_offset(pgd, address); + + if (pmd == NULL || pmd_none(*pmd) || !pmd_present(*pmd)) + return NULL; + + if (((pmd_val(*pmd) & (PMD_TYPE_SECT | PMD_SECT_SUPER)) + == (PMD_TYPE_SECT | PMD_SECT_SUPER)) || !pmd_present(*pmd)) { + + return NULL; + } else if (pmd_val(*pmd) & PMD_TYPE_SECT) { + + *level = PG_LEVEL_2M; + return (pte_t *)pmd; + } + + pte = pte_offset_kernel(pmd, address); + + if ((pte == NULL) || pte_none(*pte)) + return NULL; + + *level = PG_LEVEL_4K; + + return pte; +} +EXPORT_SYMBOL_GPL(lookup_address); + +/* + * Set the new pmd in all the pgds we know about: + */ +static void __set_pmd_pte(pmd_t *pmd, unsigned long address, pte_t *pte) +{ + struct page *page; + + cpa_debug("__set_pmd_pte %x %x %x\n", pmd, pte, *pte); + + /* change init_mm */ + pmd_populate_kernel(&init_mm, pmd, pte); + + /* change entry in all the pgd's */ + list_for_each_entry(page, &pgd_list, lru) { + cpa_debug("list %x %x %x\n", (unsigned long)page, + (unsigned long)pgd_index(address), address); + pmd = pmd_offset(((pgd_t *)page_address(page)) + + pgd_index(address), address); + pmd_populate_kernel(NULL, pmd, pte); + } + +} + +static int +try_preserve_large_page(pte_t *kpte, unsigned long address, + struct cpa_data *cpa) +{ + unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; + pte_t old_pte, *tmp; + pgprot_t old_prot, new_prot, ext_prot, req_prot; + int i, do_split = 1; + unsigned int level; + + if (cpa->force_split) + return 1; + + spin_lock_irqsave(&pgd_lock, flags); + /* + * Check for races, another CPU might have split this page + * up already: + */ + tmp = lookup_address(address, &level); + if (tmp != kpte) + goto out_unlock; + + switch (level) { + + case PG_LEVEL_2M: + psize = PMD_SIZE; + pmask = PMD_MASK; + break; + + default: + do_split = -EINVAL; + goto out_unlock; + } + + /* + * Calculate the number of pages, which fit into this large + * page starting at address: + */ + nextpage_addr = (address + psize) & pmask; + numpages = (nextpage_addr - address) >> PAGE_SHIFT; + if (numpages < cpa->numpages) + cpa->numpages = numpages; + + old_prot = new_prot = req_prot = pmd_to_pte_pgprot(pmd_val(*kpte), + &ext_prot); + + pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); + pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); + + /* + * old_pte points to the large page base address. So we need + * to add the offset of the virtual address: + */ + pfn = pmd_pfn(*kpte) + ((address & (psize - 1)) >> PAGE_SHIFT); + cpa->pfn = pfn; + + new_prot = static_protections(req_prot, address, pfn); + + /* + * We need to check the full range, whether + * static_protection() requires a different pgprot for one of + * the pages in the range we try to preserve: + */ + addr = address & pmask; + pfn = pmd_pfn(old_pte); + for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) { + pgprot_t chk_prot = static_protections(req_prot, addr, pfn); + + if (pgprot_val(chk_prot) != pgprot_val(new_prot)) + goto out_unlock; + } + + /* + * If there are no changes, return. maxpages has been updated + * above: + */ + if (pgprot_val(new_prot) == pgprot_val(old_prot)) { + do_split = 0; + goto out_unlock; + } + + /* + * convert prot to pmd format + */ + new_prot = pte_to_pmd_pgprot(new_prot, ext_prot); + + /* + * We need to change the attributes. Check, whether we can + * change the large page in one go. We request a split, when + * the address is not aligned and the number of pages is + * smaller than the number of pages in the large page. Note + * that we limited the number of possible pages already to + * the number of pages in the large page. + */ + if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { + /* + * The address is aligned and the number of pages + * covers the full page. + */ + phys_addr_t phys = __pfn_to_phys(pmd_pfn(*kpte)); + pmd_t *p = (pmd_t *)kpte; + + *kpte++ = __pmd(phys | new_prot); + *kpte = __pmd((phys + SECTION_SIZE) | new_prot); + flush_pmd_entry(p); + cpa->flags |= CPA_FLUSHTLB; + do_split = 0; + cpa_debug("preserving page at phys %x pmd %x\n", phys, p); + } + +out_unlock: + spin_unlock_irqrestore(&pgd_lock, flags); + + return do_split; +} + +static int split_large_page(pte_t *kpte, unsigned long address) +{ + unsigned long flags, pfn, pfninc = 1; + unsigned int i, level; + pte_t *pbase, *tmp; + pgprot_t ref_prot = 0, ext_prot = 0; + int ret = 0; + + pbase = pte_alloc_one_kernel(&init_mm, address); + if (!pbase) + return -ENOMEM; + + cpa_debug("split_large_page %x PMD %x new pte @ %x\n", address, + *kpte, pbase); + + spin_lock_irqsave(&pgd_lock, flags); + /* + * Check for races, another CPU might have split this page + * up for us already: + */ + tmp = lookup_address(address, &level); + if (tmp != kpte) + goto out_unlock; + + /* + * we only split 2MB entries for now + */ + if (level != PG_LEVEL_2M) { + ret = -EINVAL; + goto out_unlock; + } + + ref_prot = pmd_to_pte_pgprot(pmd_val(*kpte), &ext_prot); + + /* + * Get the target pfn from the original entry: + */ + pfn = pmd_pfn(*kpte); + for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) + set_pte_ext(&pbase[i], pfn_pte(pfn, ref_prot), ext_prot); + + if (address >= (unsigned long)__va(0) && + address < (unsigned long)__va(lowmem_end_addr)) + split_page_count(level); + + /* + * Install the new, split up pagetable. + */ + __set_pmd_pte((pmd_t *)kpte, address, pbase); + + pbase = NULL; + +out_unlock: + /* + * If we dropped out via the lookup_address check under + * pgd_lock then stick the page back into the pool: + */ + if (pbase) + pte_free_kernel(&init_mm, pbase); + + spin_unlock_irqrestore(&pgd_lock, flags); + + return ret; +} + +static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, + int primary) +{ + /* + * Ignore all non primary paths. + */ + if (!primary) + return 0; + + /* + * Ignore the NULL PTE for kernel identity mapping, as it is expected + * to have holes. + * Also set numpages to '1' indicating that we processed cpa req for + * one virtual address page and its pfn. TBD: numpages can be set based + * on the initial value and the level returned by lookup_address(). + */ + if (within(vaddr, PAGE_OFFSET, + PAGE_OFFSET + lowmem_end_addr)) { + cpa->numpages = 1; + cpa->pfn = __pa(vaddr) >> PAGE_SHIFT; + return 0; + } else { + WARN(1, KERN_WARNING "CPA: called for zero pte. " + "vaddr = %lx cpa->vaddr = %lx\n", vaddr, + *cpa->vaddr); + + return -EFAULT; + } +} + +static int __change_page_attr(struct cpa_data *cpa, int primary) +{ + unsigned long address; + int do_split, err; + unsigned int level; + pte_t *kpte, old_pte; + + if (cpa->flags & CPA_PAGES_ARRAY) { + struct page *page = cpa->pages[cpa->curpage]; + + if (unlikely(PageHighMem(page))) + return 0; + + address = (unsigned long)page_address(page); + + } else if (cpa->flags & CPA_ARRAY) + address = cpa->vaddr[cpa->curpage]; + else + address = *cpa->vaddr; + +repeat: + kpte = lookup_address(address, &level); + if (!kpte) + return __cpa_process_fault(cpa, address, primary); + + old_pte = *kpte; + if (!pte_val(old_pte)) + return __cpa_process_fault(cpa, address, primary); + + if (level == PG_LEVEL_4K) { + pte_t new_pte; + pgprot_t new_prot = pte_pgprot(old_pte); + unsigned long pfn = pte_pfn(old_pte); + + pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); + pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); + + new_prot = static_protections(new_prot, address, pfn); + + /* + * We need to keep the pfn from the existing PTE, + * after all we're only going to change it's attributes + * not the memory it points to + */ + new_pte = pfn_pte(pfn, new_prot); + cpa->pfn = pfn; + + /* + * Do we really change anything ? + */ + if (pte_val(old_pte) != pte_val(new_pte)) { + set_pte_ext(kpte, new_pte, 0); + /* + * FIXME : is this needed on arm? + * set_pte_ext already does a flush + */ + cpa->flags |= CPA_FLUSHTLB; + } + cpa->numpages = 1; + return 0; + } + + /* + * Check, whether we can keep the large page intact + * and just change the pte: + */ + do_split = try_preserve_large_page(kpte, address, cpa); + + /* + * When the range fits into the existing large page, + * return. cp->numpages and cpa->tlbflush have been updated in + * try_large_page: + */ + if (do_split <= 0) + return do_split; + + /* + * We have to split the large page: + */ + err = split_large_page(kpte, address); + + if (!err) { + /* + * Do a global flush tlb after splitting the large page + * and before we do the actual change page attribute in the PTE. + * + * With out this, we violate the TLB application note, that says + * "The TLBs may contain both ordinary and large-page + * translations for a 4-KByte range of linear addresses. This + * may occur if software modifies the paging structures so that + * the page size used for the address range changes. If the two + * translations differ with respect to page frame or attributes + * (e.g., permissions), processor behavior is undefined and may + * be implementation-specific." + * + * We do this global tlb flush inside the cpa_lock, so that we + * don't allow any other cpu, with stale tlb entries change the + * page attribute in parallel, that also falls into the + * just split large page entry. + */ + flush_tlb_all(); + goto repeat; + } + + return err; +} + +static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias); + +static int cpa_process_alias(struct cpa_data *cpa) +{ + struct cpa_data alias_cpa; + unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); + unsigned long vaddr; + int ret; + + if (cpa->pfn >= (lowmem_end_addr >> PAGE_SHIFT)) + return 0; + + /* + * No need to redo, when the primary call touched the direct + * mapping already: + */ + if (cpa->flags & CPA_PAGES_ARRAY) { + struct page *page = cpa->pages[cpa->curpage]; + if (unlikely(PageHighMem(page))) + return 0; + vaddr = (unsigned long)page_address(page); + } else if (cpa->flags & CPA_ARRAY) + vaddr = cpa->vaddr[cpa->curpage]; + else + vaddr = *cpa->vaddr; + + if (!(within(vaddr, PAGE_OFFSET, + PAGE_OFFSET + lowmem_end_addr))) { + + alias_cpa = *cpa; + alias_cpa.vaddr = &laddr; + alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); + + ret = __change_page_attr_set_clr(&alias_cpa, 0); + if (ret) + return ret; + } + + return 0; +} + +static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) +{ + int ret, numpages = cpa->numpages; + + while (numpages) { + /* + * Store the remaining nr of pages for the large page + * preservation check. + */ + cpa->numpages = numpages; + /* for array changes, we can't use large page */ + if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY)) + cpa->numpages = 1; + + if (!debug_pagealloc) + spin_lock(&cpa_lock); + ret = __change_page_attr(cpa, checkalias); + if (!debug_pagealloc) + spin_unlock(&cpa_lock); + if (ret) + return ret; + + if (checkalias) { + ret = cpa_process_alias(cpa); + if (ret) + return ret; + } + + /* + * Adjust the number of pages with the result of the + * CPA operation. Either a large page has been + * preserved or a single page update happened. + */ + BUG_ON(cpa->numpages > numpages); + numpages -= cpa->numpages; + if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) + cpa->curpage++; + else + *cpa->vaddr += cpa->numpages * PAGE_SIZE; + } + return 0; +} + +static inline int cache_attr(pgprot_t attr) +{ + /* + * We need to flush the cache for all memory type changes + * except when a page is being marked write back cacheable + */ + return !((pgprot_val(attr) & L_PTE_MT_MASK) == L_PTE_MT_WRITEBACK); +} + +static int change_page_attr_set_clr(unsigned long *addr, int numpages, + pgprot_t mask_set, pgprot_t mask_clr, + int force_split, int in_flag, + struct page **pages) +{ + struct cpa_data cpa; + int ret, cache, checkalias; + unsigned long baddr = 0; + + if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split) + return 0; + + /* Ensure we are PAGE_SIZE aligned */ + if (in_flag & CPA_ARRAY) { + int i; + for (i = 0; i < numpages; i++) { + if (addr[i] & ~PAGE_MASK) { + addr[i] &= PAGE_MASK; + WARN_ON_ONCE(1); + } + } + } else if (!(in_flag & CPA_PAGES_ARRAY)) { + /* + * in_flag of CPA_PAGES_ARRAY implies it is aligned. + * No need to cehck in that case + */ + if (*addr & ~PAGE_MASK) { + *addr &= PAGE_MASK; + /* + * People should not be passing in unaligned addresses: + */ + WARN_ON_ONCE(1); + } + /* + * Save address for cache flush. *addr is modified in the call + * to __change_page_attr_set_clr() below. + */ + baddr = *addr; + } + + /* Must avoid aliasing mappings in the highmem code */ + kmap_flush_unused(); + + vm_unmap_aliases(); + + cpa.vaddr = addr; + cpa.pages = pages; + cpa.numpages = numpages; + cpa.mask_set = mask_set; + cpa.mask_clr = mask_clr; + cpa.flags = 0; + cpa.curpage = 0; + cpa.force_split = force_split; + + if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY)) + cpa.flags |= in_flag; + + /* No alias checking for XN bit modifications */ + checkalias = (pgprot_val(mask_set) | + pgprot_val(mask_clr)) != L_PTE_EXEC; + + ret = __change_page_attr_set_clr(&cpa, checkalias); + + /* + * Check whether we really changed something: + */ + if (!(cpa.flags & CPA_FLUSHTLB)) + goto out; + + cache = cache_attr(mask_set); + + if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { + cpa_flush_array(addr, numpages, cache, + cpa.flags, pages); + } else + cpa_flush_range(baddr, numpages, cache); + +out: + return ret; +} + +static inline int change_page_attr_set(unsigned long *addr, int numpages, + pgprot_t mask, int array) +{ + return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, + (array ? CPA_ARRAY : 0), NULL); +} + +static inline int change_page_attr_clear(unsigned long *addr, int numpages, + pgprot_t mask, int array) +{ + return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, + (array ? CPA_ARRAY : 0), NULL); +} + +static inline int cpa_set_pages_array(struct page **pages, int numpages, + pgprot_t mask) +{ + return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0, + CPA_PAGES_ARRAY, pages); +} + +static inline int cpa_clear_pages_array(struct page **pages, int numpages, + pgprot_t mask) +{ + return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0, + CPA_PAGES_ARRAY, pages); +} + +int set_memory_uc(unsigned long addr, int numpages) +{ + return change_page_attr_set_clr(&addr, numpages, + __pgprot(L_PTE_MT_UNCACHED), + __pgprot(L_PTE_MT_MASK), 0, 0, NULL); +} +EXPORT_SYMBOL(set_memory_uc); + +int _set_memory_array(unsigned long *addr, int addrinarray, + unsigned long set, unsigned long clr) +{ + return change_page_attr_set_clr(addr, addrinarray, __pgprot(set), + __pgprot(clr), 0, CPA_ARRAY, NULL); +} + +int set_memory_array_uc(unsigned long *addr, int addrinarray) +{ + return _set_memory_array(addr, addrinarray, + L_PTE_MT_UNCACHED, L_PTE_MT_MASK); +} +EXPORT_SYMBOL(set_memory_array_uc); + +int set_memory_array_wc(unsigned long *addr, int addrinarray) +{ + return _set_memory_array(addr, addrinarray, + L_PTE_MT_BUFFERABLE, L_PTE_MT_MASK); +} +EXPORT_SYMBOL(set_memory_array_wc); + +int set_memory_wc(unsigned long addr, int numpages) +{ + int ret; + + ret = change_page_attr_set_clr(&addr, numpages, + __pgprot(L_PTE_MT_BUFFERABLE), + __pgprot(L_PTE_MT_MASK), + 0, 0, NULL); + return ret; +} +EXPORT_SYMBOL(set_memory_wc); + +int set_memory_wb(unsigned long addr, int numpages) +{ + return change_page_attr_set_clr(&addr, numpages, + __pgprot(L_PTE_MT_WRITEBACK), + __pgprot(L_PTE_MT_MASK), + 0, 0, NULL); +} +EXPORT_SYMBOL(set_memory_wb); + +int set_memory_iwb(unsigned long addr, int numpages) +{ + return change_page_attr_set_clr(&addr, numpages, + __pgprot(L_PTE_MT_INNER_WB), + __pgprot(L_PTE_MT_MASK), + 0, 0, NULL); +} +EXPORT_SYMBOL(set_memory_iwb); + +int set_memory_array_wb(unsigned long *addr, int addrinarray) +{ + return change_page_attr_set_clr(addr, addrinarray, + __pgprot(L_PTE_MT_WRITEBACK), + __pgprot(L_PTE_MT_MASK), + 0, CPA_ARRAY, NULL); + +} +EXPORT_SYMBOL(set_memory_array_wb); + +int set_memory_array_iwb(unsigned long *addr, int addrinarray) +{ + return change_page_attr_set_clr(addr, addrinarray, + __pgprot(L_PTE_MT_INNER_WB), + __pgprot(L_PTE_MT_MASK), + 0, CPA_ARRAY, NULL); + +} +EXPORT_SYMBOL(set_memory_array_iwb); + +int set_memory_x(unsigned long addr, int numpages) +{ + return change_page_attr_set(&addr, numpages, __pgprot(L_PTE_EXEC), 0); +} +EXPORT_SYMBOL(set_memory_x); + +int set_memory_nx(unsigned long addr, int numpages) +{ + return change_page_attr_clear(&addr, numpages, + __pgprot(L_PTE_EXEC), 0); +} +EXPORT_SYMBOL(set_memory_nx); + +int set_memory_ro(unsigned long addr, int numpages) +{ + return change_page_attr_clear(&addr, numpages, + __pgprot(L_PTE_WRITE), 0); +} +EXPORT_SYMBOL_GPL(set_memory_ro); + +int set_memory_rw(unsigned long addr, int numpages) +{ + return change_page_attr_set(&addr, numpages, + __pgprot(L_PTE_WRITE), 0); +} +EXPORT_SYMBOL_GPL(set_memory_rw); + +int set_memory_np(unsigned long addr, int numpages) +{ + return change_page_attr_clear(&addr, numpages, + __pgprot(L_PTE_PRESENT), 0); +} + +int set_memory_4k(unsigned long addr, int numpages) +{ + return change_page_attr_set_clr(&addr, numpages, __pgprot(0), + __pgprot(0), 1, 0, NULL); +} + +static int _set_pages_array(struct page **pages, int addrinarray, + unsigned long set, unsigned long clr) +{ + return change_page_attr_set_clr(NULL, addrinarray, + __pgprot(set), + __pgprot(clr), + 0, CPA_PAGES_ARRAY, pages); +} + +int set_pages_array_uc(struct page **pages, int addrinarray) +{ + return _set_pages_array(pages, addrinarray, + L_PTE_MT_UNCACHED, L_PTE_MT_MASK); +} +EXPORT_SYMBOL(set_pages_array_uc); + +int set_pages_array_wc(struct page **pages, int addrinarray) +{ + return _set_pages_array(pages, addrinarray, L_PTE_MT_BUFFERABLE, + L_PTE_MT_MASK); +} +EXPORT_SYMBOL(set_pages_array_wc); + +int set_pages_array_wb(struct page **pages, int addrinarray) +{ + return _set_pages_array(pages, addrinarray, + L_PTE_MT_WRITEBACK, L_PTE_MT_MASK); +} +EXPORT_SYMBOL(set_pages_array_wb); + +int set_pages_array_iwb(struct page **pages, int addrinarray) +{ + return _set_pages_array(pages, addrinarray, + L_PTE_MT_INNER_WB, L_PTE_MT_MASK); +} +EXPORT_SYMBOL(set_pages_array_iwb); diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index ea92009b91df..536bcf393d15 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -19,6 +19,23 @@ #define FIRST_KERNEL_PGD_NR (FIRST_USER_PGD_NR + USER_PTRS_PER_PGD) +DEFINE_SPINLOCK(pgd_lock); +LIST_HEAD(pgd_list); + +static inline void pgd_list_add(pgd_t *pgd) +{ + struct page *page = virt_to_page(pgd); + + list_add(&page->lru, &pgd_list); +} + +static inline void pgd_list_del(pgd_t *pgd) +{ + struct page *page = virt_to_page(pgd); + + list_del(&page->lru); +} + /* * need to get a 16k page for level 1 */ @@ -27,6 +44,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) pgd_t *new_pgd, *init_pgd; pmd_t *new_pmd, *init_pmd; pte_t *new_pte, *init_pte; + unsigned long flags; new_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, 2); if (!new_pgd) @@ -34,6 +52,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) memset(new_pgd, 0, FIRST_KERNEL_PGD_NR * sizeof(pgd_t)); + spin_lock_irqsave(&pgd_lock, flags); /* * Copy over the kernel and IO PGD entries */ @@ -44,6 +63,10 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) #if !defined(CONFIG_CPU_CACHE_V7) || !defined(CONFIG_SMP) clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t)); #endif + + pgd_list_add(new_pgd); + spin_unlock_irqrestore(&pgd_lock, flags); + if (!vectors_high()) { /* * On ARM, first page must always be allocated since it @@ -69,6 +92,9 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) no_pte: pmd_free(mm, new_pmd); no_pmd: + spin_lock_irqsave(&pgd_lock, flags); + pgd_list_del(new_pgd); + spin_unlock_irqrestore(&pgd_lock, flags); free_pages((unsigned long)new_pgd, 2); no_pgd: return NULL; @@ -78,10 +104,15 @@ void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd) { pmd_t *pmd; pgtable_t pte; + unsigned long flags; if (!pgd) return; + spin_lock_irqsave(&pgd_lock, flags); + pgd_list_del(pgd); + spin_unlock_irqrestore(&pgd_lock, flags); + /* pgd is always present and good */ pmd = pmd_off(pgd, 0); if (pmd_none(*pmd)) |