arm: mm: change_page_attr support

bug 865816 Adapted from x86 change_page_attr() implementation Change-Id: I8edaeb99d9c7f0d09465f3426bfe08f3231c857b Reviewed-on: http://git-master/r/49501 Tested-by: Krishna Reddy <vdumpa@nvidia.com> Tested-by: Vinod Rex <vrex@nvidia.com> Reviewed-by: Krishna Reddy <vdumpa@nvidia.com>
author: Vinod Rex <vrex@nvidia.com> 2011-08-26 11:42:40 -0700
committer: Varun Colbert <vcolbert@nvidia.com> 2011-09-19 17:57:58 -0700
commit: b95306beb700ea1dddb228d3f1acfe4efc36f2f6 (patch)
tree: 95a1e6bb98d972c87e3aaffd67900f70aabc5134 /arch/arm/mm
parent: 5a26af405f149dd0fe84fc6f7b09603035b4de7c (diff)
4 files changed, 1034 insertions, 1 deletions
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index d63b6c413758..fb7b513bee27 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -6,7 +6,7 @@ obj-y				:= dma-mapping.o extable.o fault.o init.o \
 				   iomap.o
 
 obj-$(CONFIG_MMU)		+= fault-armv.o flush.o ioremap.o mmap.o \
-				   pgd.o mmu.o vmregion.o
+				   pgd.o mmu.o vmregion.o pageattr.o
 
 ifneq ($(CONFIG_MMU),y)
 obj-y				+= nommu.o
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index fa2637b5479a..7df956a67267 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -561,6 +561,7 @@ static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
 				      const struct mem_type *type)
 {
 	pmd_t *pmd = pmd_offset(pgd, addr);
+	unsigned long pages_2m = 0, pages_4k = 0;
 
 	/*
 	 * Try a section mapping - end, addr and phys must all be aligned
@@ -579,6 +580,8 @@ static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
 			phys += SECTION_SIZE;
 		} while (pmd++, addr += SECTION_SIZE, addr != end);
 
+		pages_2m += (end-addr) >> SECTION_SHIFT;
+
 		flush_pmd_entry(p);
 	} else {
 		/*
@@ -586,6 +589,12 @@ static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
 		 * individual L1 entries.
 		 */
 		alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
+		pages_4k += (end-addr) >> PAGE_SHIFT;
+	}
+
+	if ((addr < lowmem_end_addr) && (end < lowmem_end_addr)) {
+		update_page_count(PG_LEVEL_2M, pages_2m);
+		update_page_count(PG_LEVEL_4K, pages_4k);
 	}
 }
 
diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c
new file mode 100644
index 000000000000..ba57e5d9ca50
--- /dev/null
+++ b/arch/arm/mm/pageattr.c
@@ -0,0 +1,993 @@
+/*
+ * Copyright 2002 Andi Kleen, SuSE Labs.
+ * Thanks to Ben LaHaise for precious feedback.
+ */
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/pfn.h>
+#include <linux/percpu.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+
+#include <asm/processor.h>
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+
+#ifdef CPA_DEBUG
+#define cpa_debug(x, ...)  printk(x, __VA_ARGS__)
+#else
+#define cpa_debug(x, ...)
+#endif
+
+/*
+ * The current flushing context - we pass it instead of 5 arguments:
+ */
+struct cpa_data {
+	unsigned long	*vaddr;
+	pgprot_t	mask_set;
+	pgprot_t	mask_clr;
+	int		numpages;
+	int		flags;
+	unsigned long	pfn;
+	unsigned	force_split:1;
+	int		curpage;
+	struct page	**pages;
+};
+
+/*
+ * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings)
+ * using cpa_lock. So that we don't allow any other cpu, with stale large tlb
+ * entries change the page attribute in parallel to some other cpu
+ * splitting a large page entry along with changing the attribute.
+ */
+static DEFINE_SPINLOCK(cpa_lock);
+
+#define CPA_FLUSHTLB 1
+#define CPA_ARRAY 2
+#define CPA_PAGES_ARRAY 4
+
+#ifdef CONFIG_PROC_FS
+static unsigned long direct_pages_count[PG_LEVEL_NUM];
+
+void update_page_count(int level, unsigned long pages)
+{
+	unsigned long flags;
+
+	/* Protect against CPA */
+	spin_lock_irqsave(&pgd_lock, flags);
+	direct_pages_count[level] += pages;
+	spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+static void split_page_count(int level)
+{
+	direct_pages_count[level]--;
+	direct_pages_count[level - 1] += PTRS_PER_PTE;
+}
+
+void arch_report_meminfo(struct seq_file *m)
+{
+	seq_printf(m, "DirectMap4k:    %8lu kB\n",
+			direct_pages_count[PG_LEVEL_4K] << 2);
+	seq_printf(m, "DirectMap2M:    %8lu kB\n",
+			direct_pages_count[PG_LEVEL_2M] << 11);
+}
+#else
+static inline void split_page_count(int level) { }
+#endif
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+# define debug_pagealloc 1
+#else
+# define debug_pagealloc 0
+#endif
+
+static inline int
+within(unsigned long addr, unsigned long start, unsigned long end)
+{
+	return addr >= start && addr < end;
+}
+
+static void cpa_flush_range(unsigned long start, int numpages, int cache)
+{
+	unsigned int i, level;
+	unsigned long addr;
+
+	BUG_ON(irqs_disabled());
+	WARN_ON(PAGE_ALIGN(start) != start);
+
+	flush_tlb_kernel_range(start, start + (numpages << PAGE_SHIFT));
+
+	if (!cache)
+		return;
+
+	for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) {
+		pte_t *pte = lookup_address(addr, &level);
+
+		/*
+		 * Only flush present addresses:
+		 */
+		if (pte && pte_present(*pte)) {
+			__cpuc_flush_dcache_area((void *) addr, PAGE_SIZE);
+			outer_flush_range(__pa((void *)addr),
+					__pa((void *)addr) + PAGE_SIZE);
+		}
+	}
+}
+
+static void cpa_flush_array(unsigned long *start, int numpages, int cache,
+			    int in_flags, struct page **pages)
+{
+	unsigned int i, level;
+
+	BUG_ON(irqs_disabled());
+
+	for (i = 0; i < numpages; i++) {
+		unsigned long addr;
+		pte_t *pte;
+
+		if (in_flags & CPA_PAGES_ARRAY)
+			addr = (unsigned long)page_address(pages[i]);
+		else
+			addr = start[i];
+
+		flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+		if (cache) {
+
+			pte = lookup_address(addr, &level);
+
+			/*
+			 * Only flush present addresses:
+			 */
+			if (pte && pte_present(*pte)) {
+				__cpuc_flush_dcache_area((void *)addr,
+					PAGE_SIZE);
+				outer_flush_range(__pa((void *)addr),
+					__pa((void *)addr) + PAGE_SIZE);
+			}
+		}
+	}
+}
+
+/*
+ * Certain areas of memory require very specific protection flags,
+ * for example the kernel text. Callers don't always get this
+ * right so this function checks and fixes these known static
+ * required protection bits.
+ */
+static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
+				   unsigned long pfn)
+{
+	pgprot_t forbidden = __pgprot(0);
+
+	/*
+	 * The kernel text needs to be executable for obvious reasons
+	 * Does not cover __inittext since that is gone later on.
+	 */
+	if (within(address, (unsigned long)_text, (unsigned long)_etext))
+		prot |= L_PTE_EXEC;
+
+	/*
+	 * The .rodata section needs to be read-only. Using the pfn
+	 * catches all aliases.
+	 */
+	if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
+		   __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
+		pgprot_val(forbidden) |= L_PTE_WRITE;
+
+	prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
+
+
+	return prot;
+}
+
+static inline pgprot_t pte_to_pmd_pgprot(unsigned long pte,
+				unsigned long ext_prot)
+{
+	pgprot_t ref_prot = 0;
+
+	ref_prot |= PMD_TYPE_SECT | PMD_SECT_XN;
+
+	if (pte & L_PTE_MT_BUFFERABLE)
+		ref_prot |= PMD_SECT_BUFFERABLE;
+
+	if (pte & L_PTE_MT_WRITETHROUGH)
+		ref_prot |= PMD_SECT_CACHEABLE;
+
+	if (pte & L_PTE_SHARED)
+		ref_prot |= PMD_SECT_S;
+
+	if (pte & L_PTE_EXEC)
+		ref_prot &= ~PMD_SECT_XN;
+
+	if (pte & L_PTE_WRITE)
+		ref_prot |= PMD_SECT_AP_WRITE;
+
+	ref_prot |= (ext_prot & (PTE_EXT_AP0 | PTE_EXT_AP1 | PTE_EXT_APX |
+			PTE_EXT_NG | (7 << 6))) << 6;
+
+	return ref_prot;
+}
+
+static inline pgprot_t pmd_to_pte_pgprot(unsigned long pmd,
+				unsigned long *ext_prot)
+{
+	pgprot_t ref_prot = 0;
+
+	ref_prot |= L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_EXEC;
+
+	if (pmd & PMD_SECT_BUFFERABLE)
+		ref_prot |= L_PTE_MT_BUFFERABLE;
+
+	if (pmd & PMD_SECT_CACHEABLE)
+		ref_prot |= L_PTE_MT_WRITETHROUGH;
+
+	if (pmd & PMD_SECT_S)
+		ref_prot |= L_PTE_SHARED;
+
+	if (pmd & PMD_SECT_XN)
+		ref_prot &= ~L_PTE_EXEC;
+
+	if (pmd & PMD_SECT_AP_WRITE)
+		ref_prot |= L_PTE_WRITE;
+
+	/* AP/APX/TEX bits */
+	*ext_prot = (pmd & (PMD_SECT_AP_WRITE | PMD_SECT_AP_READ |
+			PMD_SECT_APX | PMD_SECT_nG | (7 << 12))) >> 6;
+
+	return ref_prot;
+}
+
+/*
+ * Lookup the page table entry for a virtual address. Return a pointer
+ * to the entry and the level of the mapping.
+ *
+ * Note: We return pud and pmd either when the entry is marked large
+ * or when the present bit is not set. Otherwise we would return a
+ * pointer to a nonexisting mapping.
+ */
+pte_t *lookup_address(unsigned long address, unsigned int *level)
+{
+	pgd_t *pgd = pgd_offset_k(address);
+	pte_t *pte;
+	pmd_t *pmd;
+
+	/* pmds are folded into pgds on ARM */
+	*level = PG_LEVEL_NONE;
+
+	if (pgd == NULL || pgd_none(*pgd))
+		return NULL;
+
+	pmd = pmd_offset(pgd, address);
+
+	if (pmd == NULL || pmd_none(*pmd) || !pmd_present(*pmd))
+		return NULL;
+
+	if (((pmd_val(*pmd) & (PMD_TYPE_SECT | PMD_SECT_SUPER))
+		== (PMD_TYPE_SECT | PMD_SECT_SUPER)) || !pmd_present(*pmd)) {
+
+		return NULL;
+	} else if (pmd_val(*pmd) & PMD_TYPE_SECT) {
+
+		*level = PG_LEVEL_2M;
+		return (pte_t *)pmd;
+	}
+
+	pte = pte_offset_kernel(pmd, address);
+
+	if ((pte == NULL) || pte_none(*pte))
+		return NULL;
+
+	*level = PG_LEVEL_4K;
+
+	return pte;
+}
+EXPORT_SYMBOL_GPL(lookup_address);
+
+/*
+ * Set the new pmd in all the pgds we know about:
+ */
+static void __set_pmd_pte(pmd_t *pmd, unsigned long address, pte_t *pte)
+{
+	struct page *page;
+
+	cpa_debug("__set_pmd_pte %x %x %x\n", pmd, pte, *pte);
+
+	/* change init_mm */
+	pmd_populate_kernel(&init_mm, pmd, pte);
+
+	/* change entry in all the pgd's */
+	list_for_each_entry(page, &pgd_list, lru) {
+		cpa_debug("list %x %x %x\n", (unsigned long)page,
+			(unsigned long)pgd_index(address), address);
+		pmd = pmd_offset(((pgd_t *)page_address(page)) +
+			pgd_index(address), address);
+		pmd_populate_kernel(NULL, pmd, pte);
+	}
+
+}
+
+static int
+try_preserve_large_page(pte_t *kpte, unsigned long address,
+			struct cpa_data *cpa)
+{
+	unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
+	pte_t old_pte, *tmp;
+	pgprot_t old_prot, new_prot, ext_prot, req_prot;
+	int i, do_split = 1;
+	unsigned int level;
+
+	if (cpa->force_split)
+		return 1;
+
+	spin_lock_irqsave(&pgd_lock, flags);
+	/*
+	 * Check for races, another CPU might have split this page
+	 * up already:
+	 */
+	tmp = lookup_address(address, &level);
+	if (tmp != kpte)
+		goto out_unlock;
+
+	switch (level) {
+
+	case PG_LEVEL_2M:
+		psize = PMD_SIZE;
+		pmask = PMD_MASK;
+		break;
+
+	default:
+		do_split = -EINVAL;
+		goto out_unlock;
+	}
+
+	/*
+	 * Calculate the number of pages, which fit into this large
+	 * page starting at address:
+	 */
+	nextpage_addr = (address + psize) & pmask;
+	numpages = (nextpage_addr - address) >> PAGE_SHIFT;
+	if (numpages < cpa->numpages)
+		cpa->numpages = numpages;
+
+	old_prot = new_prot = req_prot = pmd_to_pte_pgprot(pmd_val(*kpte),
+						&ext_prot);
+
+	pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
+	pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
+
+	/*
+	 * old_pte points to the large page base address. So we need
+	 * to add the offset of the virtual address:
+	 */
+	pfn = pmd_pfn(*kpte) + ((address & (psize - 1)) >> PAGE_SHIFT);
+	cpa->pfn = pfn;
+
+	new_prot = static_protections(req_prot, address, pfn);
+
+	/*
+	 * We need to check the full range, whether
+	 * static_protection() requires a different pgprot for one of
+	 * the pages in the range we try to preserve:
+	 */
+	addr = address & pmask;
+	pfn = pmd_pfn(old_pte);
+	for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
+		pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
+
+		if (pgprot_val(chk_prot) != pgprot_val(new_prot))
+			goto out_unlock;
+	}
+
+	/*
+	 * If there are no changes, return. maxpages has been updated
+	 * above:
+	 */
+	if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
+		do_split = 0;
+		goto out_unlock;
+	}
+
+	/*
+	 * convert prot to pmd format
+	 */
+	new_prot = pte_to_pmd_pgprot(new_prot, ext_prot);
+
+	/*
+	 * We need to change the attributes. Check, whether we can
+	 * change the large page in one go. We request a split, when
+	 * the address is not aligned and the number of pages is
+	 * smaller than the number of pages in the large page. Note
+	 * that we limited the number of possible pages already to
+	 * the number of pages in the large page.
+	 */
+	if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
+		/*
+		 * The address is aligned and the number of pages
+		 * covers the full page.
+		 */
+		phys_addr_t phys = __pfn_to_phys(pmd_pfn(*kpte));
+		pmd_t *p = (pmd_t *)kpte;
+
+		*kpte++ = __pmd(phys | new_prot);
+		*kpte   = __pmd((phys + SECTION_SIZE) | new_prot);
+		flush_pmd_entry(p);
+		cpa->flags |= CPA_FLUSHTLB;
+		do_split = 0;
+		cpa_debug("preserving page at phys %x pmd %x\n", phys, p);
+	}
+
+out_unlock:
+	spin_unlock_irqrestore(&pgd_lock, flags);
+
+	return do_split;
+}
+
+static int split_large_page(pte_t *kpte, unsigned long address)
+{
+	unsigned long flags, pfn, pfninc = 1;
+	unsigned int i, level;
+	pte_t *pbase, *tmp;
+	pgprot_t ref_prot = 0, ext_prot = 0;
+	int ret = 0;
+
+	pbase = pte_alloc_one_kernel(&init_mm, address);
+	if (!pbase)
+		return -ENOMEM;
+
+	cpa_debug("split_large_page %x PMD %x new pte @ %x\n", address,
+			*kpte, pbase);
+
+	spin_lock_irqsave(&pgd_lock, flags);
+	/*
+	 * Check for races, another CPU might have split this page
+	 * up for us already:
+	 */
+	tmp = lookup_address(address, &level);
+	if (tmp != kpte)
+		goto out_unlock;
+
+	/*
+	 * we only split 2MB entries for now
+	*/
+	if (level != PG_LEVEL_2M) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	ref_prot = pmd_to_pte_pgprot(pmd_val(*kpte), &ext_prot);
+
+	/*
+	 * Get the target pfn from the original entry:
+	 */
+	pfn = pmd_pfn(*kpte);
+	for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
+		set_pte_ext(&pbase[i], pfn_pte(pfn, ref_prot), ext_prot);
+
+	if (address >= (unsigned long)__va(0) &&
+		address < (unsigned long)__va(lowmem_end_addr))
+		split_page_count(level);
+
+	/*
+	 * Install the new, split up pagetable.
+	 */
+	__set_pmd_pte((pmd_t *)kpte, address, pbase);
+
+	pbase = NULL;
+
+out_unlock:
+	/*
+	 * If we dropped out via the lookup_address check under
+	 * pgd_lock then stick the page back into the pool:
+	 */
+	if (pbase)
+		pte_free_kernel(&init_mm, pbase);
+
+	spin_unlock_irqrestore(&pgd_lock, flags);
+
+	return ret;
+}
+
+static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
+			       int primary)
+{
+	/*
+	 * Ignore all non primary paths.
+	 */
+	if (!primary)
+		return 0;
+
+	/*
+	 * Ignore the NULL PTE for kernel identity mapping, as it is expected
+	 * to have holes.
+	 * Also set numpages to '1' indicating that we processed cpa req for
+	 * one virtual address page and its pfn. TBD: numpages can be set based
+	 * on the initial value and the level returned by lookup_address().
+	 */
+	if (within(vaddr, PAGE_OFFSET,
+		   PAGE_OFFSET + lowmem_end_addr)) {
+		cpa->numpages = 1;
+		cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
+		return 0;
+	} else {
+		WARN(1, KERN_WARNING "CPA: called for zero pte. "
+			"vaddr = %lx cpa->vaddr = %lx\n", vaddr,
+			*cpa->vaddr);
+
+		return -EFAULT;
+	}
+}
+
+static int __change_page_attr(struct cpa_data *cpa, int primary)
+{
+	unsigned long address;
+	int do_split, err;
+	unsigned int level;
+	pte_t *kpte, old_pte;
+
+	if (cpa->flags & CPA_PAGES_ARRAY) {
+		struct page *page = cpa->pages[cpa->curpage];
+
+		if (unlikely(PageHighMem(page)))
+			return 0;
+
+		address = (unsigned long)page_address(page);
+
+	} else if (cpa->flags & CPA_ARRAY)
+		address = cpa->vaddr[cpa->curpage];
+	else
+		address = *cpa->vaddr;
+
+repeat:
+	kpte = lookup_address(address, &level);
+	if (!kpte)
+		return __cpa_process_fault(cpa, address, primary);
+
+	old_pte = *kpte;
+	if (!pte_val(old_pte))
+		return __cpa_process_fault(cpa, address, primary);
+
+	if (level == PG_LEVEL_4K) {
+		pte_t new_pte;
+		pgprot_t new_prot = pte_pgprot(old_pte);
+		unsigned long pfn = pte_pfn(old_pte);
+
+		pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
+		pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
+
+		new_prot = static_protections(new_prot, address, pfn);
+
+		/*
+		 * We need to keep the pfn from the existing PTE,
+		 * after all we're only going to change it's attributes
+		 * not the memory it points to
+		 */
+		new_pte = pfn_pte(pfn, new_prot);
+		cpa->pfn = pfn;
+
+		/*
+		 * Do we really change anything ?
+		 */
+		if (pte_val(old_pte) != pte_val(new_pte)) {
+			set_pte_ext(kpte, new_pte, 0);
+			/*
+			 * FIXME : is this needed on arm?
+			 * set_pte_ext already does a flush
+			 */
+			cpa->flags |= CPA_FLUSHTLB;
+		}
+		cpa->numpages = 1;
+		return 0;
+	}
+
+	/*
+	 * Check, whether we can keep the large page intact
+	 * and just change the pte:
+	 */
+	do_split = try_preserve_large_page(kpte, address, cpa);
+
+	/*
+	 * When the range fits into the existing large page,
+	 * return. cp->numpages and cpa->tlbflush have been updated in
+	 * try_large_page:
+	 */
+	if (do_split <= 0)
+		return do_split;
+
+	/*
+	 * We have to split the large page:
+	 */
+	err = split_large_page(kpte, address);
+
+	if (!err) {
+		/*
+		 * Do a global flush tlb after splitting the large page
+		 * and before we do the actual change page attribute in the PTE.
+		 *
+		 * With out this, we violate the TLB application note, that says
+		 * "The TLBs may contain both ordinary and large-page
+		 *  translations for a 4-KByte range of linear addresses. This
+		 *  may occur if software modifies the paging structures so that
+		 *  the page size used for the address range changes. If the two
+		 *  translations differ with respect to page frame or attributes
+		 *  (e.g., permissions), processor behavior is undefined and may
+		 *  be implementation-specific."
+		 *
+		 * We do this global tlb flush inside the cpa_lock, so that we
+		 * don't allow any other cpu, with stale tlb entries change the
+		 * page attribute in parallel, that also falls into the
+		 * just split large page entry.
+		 */
+		flush_tlb_all();
+		goto repeat;
+	}
+
+	return err;
+}
+
+static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
+
+static int cpa_process_alias(struct cpa_data *cpa)
+{
+	struct cpa_data alias_cpa;
+	unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
+	unsigned long vaddr;
+	int ret;
+
+	if (cpa->pfn >= (lowmem_end_addr >> PAGE_SHIFT))
+		return 0;
+
+	/*
+	 * No need to redo, when the primary call touched the direct
+	 * mapping already:
+	 */
+	if (cpa->flags & CPA_PAGES_ARRAY) {
+		struct page *page = cpa->pages[cpa->curpage];
+		if (unlikely(PageHighMem(page)))
+			return 0;
+		vaddr = (unsigned long)page_address(page);
+	} else if (cpa->flags & CPA_ARRAY)
+		vaddr = cpa->vaddr[cpa->curpage];
+	else
+		vaddr = *cpa->vaddr;
+
+	if (!(within(vaddr, PAGE_OFFSET,
+		    PAGE_OFFSET + lowmem_end_addr))) {
+
+		alias_cpa = *cpa;
+		alias_cpa.vaddr = &laddr;
+		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
+
+		ret = __change_page_attr_set_clr(&alias_cpa, 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
+{
+	int ret, numpages = cpa->numpages;
+
+	while (numpages) {
+		/*
+		 * Store the remaining nr of pages for the large page
+		 * preservation check.
+		 */
+		cpa->numpages = numpages;
+		/* for array changes, we can't use large page */
+		if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY))
+			cpa->numpages = 1;
+
+		if (!debug_pagealloc)
+			spin_lock(&cpa_lock);
+		ret = __change_page_attr(cpa, checkalias);
+		if (!debug_pagealloc)
+			spin_unlock(&cpa_lock);
+		if (ret)
+			return ret;
+
+		if (checkalias) {
+			ret = cpa_process_alias(cpa);
+			if (ret)
+				return ret;
+		}
+
+		/*
+		 * Adjust the number of pages with the result of the
+		 * CPA operation. Either a large page has been
+		 * preserved or a single page update happened.
+		 */
+		BUG_ON(cpa->numpages > numpages);
+		numpages -= cpa->numpages;
+		if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
+			cpa->curpage++;
+		else
+			*cpa->vaddr += cpa->numpages * PAGE_SIZE;
+	}
+	return 0;
+}
+
+static inline int cache_attr(pgprot_t attr)
+{
+	/*
+	 * We need to flush the cache for all memory type changes
+	 * except when a page is being marked write back cacheable
+	 */
+	return !((pgprot_val(attr) & L_PTE_MT_MASK) == L_PTE_MT_WRITEBACK);
+}
+
+static int change_page_attr_set_clr(unsigned long *addr, int numpages,
+				    pgprot_t mask_set, pgprot_t mask_clr,
+				    int force_split, int in_flag,
+				    struct page **pages)
+{
+	struct cpa_data cpa;
+	int ret, cache, checkalias;
+	unsigned long baddr = 0;
+
+	if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
+		return 0;
+
+	/* Ensure we are PAGE_SIZE aligned */
+	if (in_flag & CPA_ARRAY) {
+		int i;
+		for (i = 0; i < numpages; i++) {
+			if (addr[i] & ~PAGE_MASK) {
+				addr[i] &= PAGE_MASK;
+				WARN_ON_ONCE(1);
+			}
+		}
+	} else if (!(in_flag & CPA_PAGES_ARRAY)) {
+		/*
+		 * in_flag of CPA_PAGES_ARRAY implies it is aligned.
+		 * No need to cehck in that case
+		 */
+		if (*addr & ~PAGE_MASK) {
+			*addr &= PAGE_MASK;
+			/*
+			 * People should not be passing in unaligned addresses:
+			 */
+			WARN_ON_ONCE(1);
+		}
+		/*
+		 * Save address for cache flush. *addr is modified in the call
+		 * to __change_page_attr_set_clr() below.
+		 */
+		baddr = *addr;
+	}
+
+	/* Must avoid aliasing mappings in the highmem code */
+	kmap_flush_unused();
+
+	vm_unmap_aliases();
+
+	cpa.vaddr = addr;
+	cpa.pages = pages;
+	cpa.numpages = numpages;
+	cpa.mask_set = mask_set;
+	cpa.mask_clr = mask_clr;
+	cpa.flags = 0;
+	cpa.curpage = 0;
+	cpa.force_split = force_split;
+
+	if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
+		cpa.flags |= in_flag;
+
+	/* No alias checking for XN bit modifications */
+	checkalias = (pgprot_val(mask_set) |
+				pgprot_val(mask_clr)) != L_PTE_EXEC;
+
+	ret = __change_page_attr_set_clr(&cpa, checkalias);
+
+	/*
+	 * Check whether we really changed something:
+	 */
+	if (!(cpa.flags & CPA_FLUSHTLB))
+		goto out;
+
+	cache = cache_attr(mask_set);
+
+	if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
+		cpa_flush_array(addr, numpages, cache,
+				cpa.flags, pages);
+	} else
+		cpa_flush_range(baddr, numpages, cache);
+
+out:
+	return ret;
+}
+
+static inline int change_page_attr_set(unsigned long *addr, int numpages,
+				       pgprot_t mask, int array)
+{
+	return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
+		(array ? CPA_ARRAY : 0), NULL);
+}
+
+static inline int change_page_attr_clear(unsigned long *addr, int numpages,
+					 pgprot_t mask, int array)
+{
+	return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
+		(array ? CPA_ARRAY : 0), NULL);
+}
+
+static inline int cpa_set_pages_array(struct page **pages, int numpages,
+				       pgprot_t mask)
+{
+	return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0,
+		CPA_PAGES_ARRAY, pages);
+}
+
+static inline int cpa_clear_pages_array(struct page **pages, int numpages,
+					 pgprot_t mask)
+{
+	return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0,
+		CPA_PAGES_ARRAY, pages);
+}
+
+int set_memory_uc(unsigned long addr, int numpages)
+{
+	return change_page_attr_set_clr(&addr, numpages,
+		__pgprot(L_PTE_MT_UNCACHED),
+			__pgprot(L_PTE_MT_MASK), 0, 0, NULL);
+}
+EXPORT_SYMBOL(set_memory_uc);
+
+int _set_memory_array(unsigned long *addr, int addrinarray,
+		unsigned long set, unsigned long clr)
+{
+	return change_page_attr_set_clr(addr, addrinarray, __pgprot(set),
+		__pgprot(clr), 0, CPA_ARRAY, NULL);
+}
+
+int set_memory_array_uc(unsigned long *addr, int addrinarray)
+{
+	return _set_memory_array(addr, addrinarray,
+		L_PTE_MT_UNCACHED, L_PTE_MT_MASK);
+}
+EXPORT_SYMBOL(set_memory_array_uc);
+
+int set_memory_array_wc(unsigned long *addr, int addrinarray)
+{
+	return _set_memory_array(addr, addrinarray,
+		L_PTE_MT_BUFFERABLE, L_PTE_MT_MASK);
+}
+EXPORT_SYMBOL(set_memory_array_wc);
+
+int set_memory_wc(unsigned long addr, int numpages)
+{
+	int ret;
+
+	ret = change_page_attr_set_clr(&addr, numpages,
+			__pgprot(L_PTE_MT_BUFFERABLE),
+			__pgprot(L_PTE_MT_MASK),
+			0, 0, NULL);
+	return ret;
+}
+EXPORT_SYMBOL(set_memory_wc);
+
+int set_memory_wb(unsigned long addr, int numpages)
+{
+	return change_page_attr_set_clr(&addr, numpages,
+			__pgprot(L_PTE_MT_WRITEBACK),
+			__pgprot(L_PTE_MT_MASK),
+			0, 0, NULL);
+}
+EXPORT_SYMBOL(set_memory_wb);
+
+int set_memory_iwb(unsigned long addr, int numpages)
+{
+	return change_page_attr_set_clr(&addr, numpages,
+			__pgprot(L_PTE_MT_INNER_WB),
+			__pgprot(L_PTE_MT_MASK),
+			0, 0, NULL);
+}
+EXPORT_SYMBOL(set_memory_iwb);
+
+int set_memory_array_wb(unsigned long *addr, int addrinarray)
+{
+	return change_page_attr_set_clr(addr, addrinarray,
+			__pgprot(L_PTE_MT_WRITEBACK),
+			__pgprot(L_PTE_MT_MASK),
+			0, CPA_ARRAY, NULL);
+
+}
+EXPORT_SYMBOL(set_memory_array_wb);
+
+int set_memory_array_iwb(unsigned long *addr, int addrinarray)
+{
+	return change_page_attr_set_clr(addr, addrinarray,
+			__pgprot(L_PTE_MT_INNER_WB),
+			__pgprot(L_PTE_MT_MASK),
+			0, CPA_ARRAY, NULL);
+
+}
+EXPORT_SYMBOL(set_memory_array_iwb);
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+	return change_page_attr_set(&addr, numpages, __pgprot(L_PTE_EXEC), 0);
+}
+EXPORT_SYMBOL(set_memory_x);
+
+int set_memory_nx(unsigned long addr, int numpages)
+{
+	return change_page_attr_clear(&addr, numpages,
+		__pgprot(L_PTE_EXEC), 0);
+}
+EXPORT_SYMBOL(set_memory_nx);
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+	return change_page_attr_clear(&addr, numpages,
+		__pgprot(L_PTE_WRITE), 0);
+}
+EXPORT_SYMBOL_GPL(set_memory_ro);
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+	return change_page_attr_set(&addr, numpages,
+		__pgprot(L_PTE_WRITE), 0);
+}
+EXPORT_SYMBOL_GPL(set_memory_rw);
+
+int set_memory_np(unsigned long addr, int numpages)
+{
+	return change_page_attr_clear(&addr, numpages,
+		 __pgprot(L_PTE_PRESENT), 0);
+}
+
+int set_memory_4k(unsigned long addr, int numpages)
+{
+	return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
+					__pgprot(0), 1, 0, NULL);
+}
+
+static int _set_pages_array(struct page **pages, int addrinarray,
+		unsigned long set, unsigned long clr)
+{
+	return change_page_attr_set_clr(NULL, addrinarray,
+			__pgprot(set),
+			__pgprot(clr),
+			0, CPA_PAGES_ARRAY, pages);
+}
+
+int set_pages_array_uc(struct page **pages, int addrinarray)
+{
+	return _set_pages_array(pages, addrinarray,
+			L_PTE_MT_UNCACHED, L_PTE_MT_MASK);
+}
+EXPORT_SYMBOL(set_pages_array_uc);
+
+int set_pages_array_wc(struct page **pages, int addrinarray)
+{
+	return _set_pages_array(pages, addrinarray, L_PTE_MT_BUFFERABLE,
+			L_PTE_MT_MASK);
+}
+EXPORT_SYMBOL(set_pages_array_wc);
+
+int set_pages_array_wb(struct page **pages, int addrinarray)
+{
+	return _set_pages_array(pages, addrinarray,
+			L_PTE_MT_WRITEBACK, L_PTE_MT_MASK);
+}
+EXPORT_SYMBOL(set_pages_array_wb);
+
+int set_pages_array_iwb(struct page **pages, int addrinarray)
+{
+	return _set_pages_array(pages, addrinarray,
+			L_PTE_MT_INNER_WB, L_PTE_MT_MASK);
+}
+EXPORT_SYMBOL(set_pages_array_iwb);
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index ea92009b91df..536bcf393d15 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -19,6 +19,23 @@
 
 #define FIRST_KERNEL_PGD_NR	(FIRST_USER_PGD_NR + USER_PTRS_PER_PGD)
 
+DEFINE_SPINLOCK(pgd_lock);
+LIST_HEAD(pgd_list);
+
+static inline void pgd_list_add(pgd_t *pgd)
+{
+	struct page *page = virt_to_page(pgd);
+
+	list_add(&page->lru, &pgd_list);
+}
+
+static inline void pgd_list_del(pgd_t *pgd)
+{
+	struct page *page = virt_to_page(pgd);
+
+	list_del(&page->lru);
+}
+
 /*
  * need to get a 16k page for level 1
  */
@@ -27,6 +44,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 	pgd_t *new_pgd, *init_pgd;
 	pmd_t *new_pmd, *init_pmd;
 	pte_t *new_pte, *init_pte;
+	unsigned long flags;
 
 	new_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, 2);
 	if (!new_pgd)
@@ -34,6 +52,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 
 	memset(new_pgd, 0, FIRST_KERNEL_PGD_NR * sizeof(pgd_t));
 
+	spin_lock_irqsave(&pgd_lock, flags);
 	/*
 	 * Copy over the kernel and IO PGD entries
 	 */
@@ -44,6 +63,10 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 #if !defined(CONFIG_CPU_CACHE_V7) || !defined(CONFIG_SMP)
 	clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t));
 #endif
+
+	pgd_list_add(new_pgd);
+	spin_unlock_irqrestore(&pgd_lock, flags);
+
 	if (!vectors_high()) {
 		/*
 		 * On ARM, first page must always be allocated since it
@@ -69,6 +92,9 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 no_pte:
 	pmd_free(mm, new_pmd);
 no_pmd:
+	spin_lock_irqsave(&pgd_lock, flags);
+	pgd_list_del(new_pgd);
+	spin_unlock_irqrestore(&pgd_lock, flags);
 	free_pages((unsigned long)new_pgd, 2);
 no_pgd:
 	return NULL;
@@ -78,10 +104,15 @@ void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd)
 {
 	pmd_t *pmd;
 	pgtable_t pte;
+	unsigned long flags;
 
 	if (!pgd)
 		return;
 
+	spin_lock_irqsave(&pgd_lock, flags);
+	pgd_list_del(pgd);
+	spin_unlock_irqrestore(&pgd_lock, flags);
+
 	/* pgd is always present and good */
 	pmd = pmd_off(pgd, 0);
 	if (pmd_none(*pmd))
author	Vinod Rex <vrex@nvidia.com>	2011-08-26 11:42:40 -0700
committer	Varun Colbert <vcolbert@nvidia.com>	2011-09-19 17:57:58 -0700
commit	b95306beb700ea1dddb228d3f1acfe4efc36f2f6 (patch)
tree	95a1e6bb98d972c87e3aaffd67900f70aabc5134 /arch/arm/mm
parent	5a26af405f149dd0fe84fc6f7b09603035b4de7c (diff)