summaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/discontig_32.c3
-rw-r--r--arch/x86/mm/dump_pagetables.c10
-rw-r--r--arch/x86/mm/gup.c298
-rw-r--r--arch/x86/mm/hugetlbpage.c78
-rw-r--r--arch/x86/mm/init_32.c9
-rw-r--r--arch/x86/mm/init_64.c197
-rw-r--r--arch/x86/mm/ioremap.c18
-rw-r--r--arch/x86/mm/memtest.c123
-rw-r--r--arch/x86/mm/mmio-mod.c4
-rw-r--r--arch/x86/mm/numa_64.c8
-rw-r--r--arch/x86/mm/pageattr-test.c3
-rw-r--r--arch/x86/mm/pageattr.c27
-rw-r--r--arch/x86/mm/pat.c144
-rw-r--r--arch/x86/mm/pgtable.c3
-rw-r--r--arch/x86/mm/pgtable_32.c47
-rw-r--r--arch/x86/mm/srat_32.c12
17 files changed, 698 insertions, 289 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 9873716e9f76..dfb932dcf136 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,5 @@
obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o
+ pat.o pgtable.o gup.o
obj-$(CONFIG_X86_32) += pgtable_32.o
@@ -21,3 +21,4 @@ obj-$(CONFIG_K8_NUMA) += k8topology_64.o
endif
obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
+obj-$(CONFIG_MEMTEST) += memtest.o
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 5dfef9fa061a..62fa440678d8 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -42,7 +42,6 @@
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
-static bootmem_data_t node0_bdata;
/*
* numa interface - we expect the numa architecture specific code to have
@@ -385,7 +384,7 @@ void __init initmem_init(unsigned long start_pfn,
for_each_online_node(nid)
memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
- NODE_DATA(0)->bdata = &node0_bdata;
+ NODE_DATA(0)->bdata = &bootmem_node_data[0];
setup_bootmem_allocator();
}
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 0bb0caed8971..a20d1fa64b4e 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -148,8 +148,8 @@ static void note_page(struct seq_file *m, struct pg_state *st,
* we have now. "break" is either changing perms, levels or
* address space marker.
*/
- prot = pgprot_val(new_prot) & ~(PTE_MASK);
- cur = pgprot_val(st->current_prot) & ~(PTE_MASK);
+ prot = pgprot_val(new_prot) & ~(PTE_PFN_MASK);
+ cur = pgprot_val(st->current_prot) & ~(PTE_PFN_MASK);
if (!st->level) {
/* First entry */
@@ -221,7 +221,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
for (i = 0; i < PTRS_PER_PMD; i++) {
st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
if (!pmd_none(*start)) {
- pgprotval_t prot = pmd_val(*start) & ~PTE_MASK;
+ pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK;
if (pmd_large(*start) || !pmd_present(*start))
note_page(m, st, __pgprot(prot), 3);
@@ -253,7 +253,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
for (i = 0; i < PTRS_PER_PUD; i++) {
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
if (!pud_none(*start)) {
- pgprotval_t prot = pud_val(*start) & ~PTE_MASK;
+ pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK;
if (pud_large(*start) || !pud_present(*start))
note_page(m, st, __pgprot(prot), 2);
@@ -288,7 +288,7 @@ static void walk_pgd_level(struct seq_file *m)
for (i = 0; i < PTRS_PER_PGD; i++) {
st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
if (!pgd_none(*start)) {
- pgprotval_t prot = pgd_val(*start) & ~PTE_MASK;
+ pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK;
if (pgd_large(*start) || !pgd_present(*start))
note_page(m, &st, __pgprot(prot), 1);
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
new file mode 100644
index 000000000000..007bb06c7504
--- /dev/null
+++ b/arch/x86/mm/gup.c
@@ -0,0 +1,298 @@
+/*
+ * Lockless get_user_pages_fast for x86
+ *
+ * Copyright (C) 2008 Nick Piggin
+ * Copyright (C) 2008 Novell Inc.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
+#include <linux/highmem.h>
+
+#include <asm/pgtable.h>
+
+static inline pte_t gup_get_pte(pte_t *ptep)
+{
+#ifndef CONFIG_X86_PAE
+ return *ptep;
+#else
+ /*
+ * With get_user_pages_fast, we walk down the pagetables without taking
+ * any locks. For this we would like to load the pointers atoimcally,
+ * but that is not possible (without expensive cmpxchg8b) on PAE. What
+ * we do have is the guarantee that a pte will only either go from not
+ * present to present, or present to not present or both -- it will not
+ * switch to a completely different present page without a TLB flush in
+ * between; something that we are blocking by holding interrupts off.
+ *
+ * Setting ptes from not present to present goes:
+ * ptep->pte_high = h;
+ * smp_wmb();
+ * ptep->pte_low = l;
+ *
+ * And present to not present goes:
+ * ptep->pte_low = 0;
+ * smp_wmb();
+ * ptep->pte_high = 0;
+ *
+ * We must ensure here that the load of pte_low sees l iff pte_high
+ * sees h. We load pte_high *after* loading pte_low, which ensures we
+ * don't see an older value of pte_high. *Then* we recheck pte_low,
+ * which ensures that we haven't picked up a changed pte high. We might
+ * have got rubbish values from pte_low and pte_high, but we are
+ * guaranteed that pte_low will not have the present bit set *unless*
+ * it is 'l'. And get_user_pages_fast only operates on present ptes, so
+ * we're safe.
+ *
+ * gup_get_pte should not be used or copied outside gup.c without being
+ * very careful -- it does not atomically load the pte or anything that
+ * is likely to be useful for you.
+ */
+ pte_t pte;
+
+retry:
+ pte.pte_low = ptep->pte_low;
+ smp_rmb();
+ pte.pte_high = ptep->pte_high;
+ smp_rmb();
+ if (unlikely(pte.pte_low != ptep->pte_low))
+ goto retry;
+
+ return pte;
+#endif
+}
+
+/*
+ * The performance critical leaf functions are made noinline otherwise gcc
+ * inlines everything into a single function which results in too much
+ * register pressure.
+ */
+static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ unsigned long mask;
+ pte_t *ptep;
+
+ mask = _PAGE_PRESENT|_PAGE_USER;
+ if (write)
+ mask |= _PAGE_RW;
+
+ ptep = pte_offset_map(&pmd, addr);
+ do {
+ pte_t pte = gup_get_pte(ptep);
+ struct page *page;
+
+ if ((pte_val(pte) & (mask | _PAGE_SPECIAL)) != mask) {
+ pte_unmap(ptep);
+ return 0;
+ }
+ VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+ page = pte_page(pte);
+ get_page(page);
+ pages[*nr] = page;
+ (*nr)++;
+
+ } while (ptep++, addr += PAGE_SIZE, addr != end);
+ pte_unmap(ptep - 1);
+
+ return 1;
+}
+
+static inline void get_head_page_multiple(struct page *page, int nr)
+{
+ VM_BUG_ON(page != compound_head(page));
+ VM_BUG_ON(page_count(page) == 0);
+ atomic_add(nr, &page->_count);
+}
+
+static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ unsigned long mask;
+ pte_t pte = *(pte_t *)&pmd;
+ struct page *head, *page;
+ int refs;
+
+ mask = _PAGE_PRESENT|_PAGE_USER;
+ if (write)
+ mask |= _PAGE_RW;
+ if ((pte_val(pte) & mask) != mask)
+ return 0;
+ /* hugepages are never "special" */
+ VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL);
+ VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+
+ refs = 0;
+ head = pte_page(pte);
+ page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ do {
+ VM_BUG_ON(compound_head(page) != head);
+ pages[*nr] = page;
+ (*nr)++;
+ page++;
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+ get_head_page_multiple(head, refs);
+
+ return 1;
+}
+
+static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
+ int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ pmd_t *pmdp;
+
+ pmdp = pmd_offset(&pud, addr);
+ do {
+ pmd_t pmd = *pmdp;
+
+ next = pmd_addr_end(addr, end);
+ if (pmd_none(pmd))
+ return 0;
+ if (unlikely(pmd_large(pmd))) {
+ if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
+ return 0;
+ } else {
+ if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+ return 0;
+ }
+ } while (pmdp++, addr = next, addr != end);
+
+ return 1;
+}
+
+static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ unsigned long mask;
+ pte_t pte = *(pte_t *)&pud;
+ struct page *head, *page;
+ int refs;
+
+ mask = _PAGE_PRESENT|_PAGE_USER;
+ if (write)
+ mask |= _PAGE_RW;
+ if ((pte_val(pte) & mask) != mask)
+ return 0;
+ /* hugepages are never "special" */
+ VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL);
+ VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+
+ refs = 0;
+ head = pte_page(pte);
+ page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+ do {
+ VM_BUG_ON(compound_head(page) != head);
+ pages[*nr] = page;
+ (*nr)++;
+ page++;
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+ get_head_page_multiple(head, refs);
+
+ return 1;
+}
+
+static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+ int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ pud_t *pudp;
+
+ pudp = pud_offset(&pgd, addr);
+ do {
+ pud_t pud = *pudp;
+
+ next = pud_addr_end(addr, end);
+ if (pud_none(pud))
+ return 0;
+ if (unlikely(pud_large(pud))) {
+ if (!gup_huge_pud(pud, addr, next, write, pages, nr))
+ return 0;
+ } else {
+ if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+ return 0;
+ }
+ } while (pudp++, addr = next, addr != end);
+
+ return 1;
+}
+
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+ struct page **pages)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long addr, len, end;
+ unsigned long next;
+ pgd_t *pgdp;
+ int nr = 0;
+
+ start &= PAGE_MASK;
+ addr = start;
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+ if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+ start, len)))
+ goto slow_irqon;
+
+ /*
+ * XXX: batch / limit 'nr', to avoid large irq off latency
+ * needs some instrumenting to determine the common sizes used by
+ * important workloads (eg. DB2), and whether limiting the batch size
+ * will decrease performance.
+ *
+ * It seems like we're in the clear for the moment. Direct-IO is
+ * the main guy that batches up lots of get_user_pages, and even
+ * they are limited to 64-at-a-time which is not so many.
+ */
+ /*
+ * This doesn't prevent pagetable teardown, but does prevent
+ * the pagetables and pages from being freed on x86.
+ *
+ * So long as we atomically load page table pointers versus teardown
+ * (which we do on x86, with the above PAE exception), we can follow the
+ * address down to the the page and take a ref on it.
+ */
+ local_irq_disable();
+ pgdp = pgd_offset(mm, addr);
+ do {
+ pgd_t pgd = *pgdp;
+
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(pgd))
+ goto slow;
+ if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+ goto slow;
+ } while (pgdp++, addr = next, addr != end);
+ local_irq_enable();
+
+ VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
+ return nr;
+
+ {
+ int ret;
+
+slow:
+ local_irq_enable();
+slow_irqon:
+ /* Try to get the remaining pages with get_user_pages */
+ start += nr << PAGE_SHIFT;
+ pages += nr;
+
+ down_read(&mm->mmap_sem);
+ ret = get_user_pages(current, mm, start,
+ (end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
+ up_read(&mm->mmap_sem);
+
+ /* Have to be a bit careful with return values */
+ if (nr > 0) {
+ if (ret < 0)
+ ret = nr;
+ else
+ ret += nr;
+ }
+
+ return ret;
+ }
+}
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 0b3d567e686d..8f307d914c2e 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -124,7 +124,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
return 1;
}
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+ unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
pud_t *pud;
@@ -133,9 +134,14 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
if (pud) {
- if (pud_none(*pud))
- huge_pmd_share(mm, addr, pud);
- pte = (pte_t *) pmd_alloc(mm, pud, addr);
+ if (sz == PUD_SIZE) {
+ pte = (pte_t *)pud;
+ } else {
+ BUG_ON(sz != PMD_SIZE);
+ if (pud_none(*pud))
+ huge_pmd_share(mm, addr, pud);
+ pte = (pte_t *) pmd_alloc(mm, pud, addr);
+ }
}
BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
@@ -151,8 +157,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
pgd = pgd_offset(mm, addr);
if (pgd_present(*pgd)) {
pud = pud_offset(pgd, addr);
- if (pud_present(*pud))
+ if (pud_present(*pud)) {
+ if (pud_large(*pud))
+ return (pte_t *)pud;
pmd = pmd_offset(pud, addr);
+ }
}
return (pte_t *) pmd;
}
@@ -188,6 +197,11 @@ int pmd_huge(pmd_t pmd)
return 0;
}
+int pud_huge(pud_t pud)
+{
+ return 0;
+}
+
struct page *
follow_huge_pmd(struct mm_struct *mm, unsigned long address,
pmd_t *pmd, int write)
@@ -208,6 +222,11 @@ int pmd_huge(pmd_t pmd)
return !!(pmd_val(pmd) & _PAGE_PSE);
}
+int pud_huge(pud_t pud)
+{
+ return !!(pud_val(pud) & _PAGE_PSE);
+}
+
struct page *
follow_huge_pmd(struct mm_struct *mm, unsigned long address,
pmd_t *pmd, int write)
@@ -216,9 +235,22 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
page = pte_page(*(pte_t *)pmd);
if (page)
- page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
+ page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
+ return page;
+}
+
+struct page *
+follow_huge_pud(struct mm_struct *mm, unsigned long address,
+ pud_t *pud, int write)
+{
+ struct page *page;
+
+ page = pte_page(*(pte_t *)pud);
+ if (page)
+ page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
return page;
}
+
#endif
/* x86_64 also uses this file */
@@ -228,6 +260,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
+ struct hstate *h = hstate_file(file);
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long start_addr;
@@ -240,7 +273,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
}
full_search:
- addr = ALIGN(start_addr, HPAGE_SIZE);
+ addr = ALIGN(start_addr, huge_page_size(h));
for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
/* At this point: (!vma || addr < vma->vm_end). */
@@ -262,7 +295,7 @@ full_search:
}
if (addr + mm->cached_hole_size < vma->vm_start)
mm->cached_hole_size = vma->vm_start - addr;
- addr = ALIGN(vma->vm_end, HPAGE_SIZE);
+ addr = ALIGN(vma->vm_end, huge_page_size(h));
}
}
@@ -270,6 +303,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
unsigned long addr0, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
+ struct hstate *h = hstate_file(file);
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev_vma;
unsigned long base = mm->mmap_base, addr = addr0;
@@ -290,7 +324,7 @@ try_again:
goto fail;
/* either no address requested or cant fit in requested address hole */
- addr = (mm->free_area_cache - len) & HPAGE_MASK;
+ addr = (mm->free_area_cache - len) & huge_page_mask(h);
do {
/*
* Lookup failure means no vma is above this address,
@@ -321,7 +355,7 @@ try_again:
largest_hole = vma->vm_start - addr;
/* try just below the current vma->vm_start */
- addr = (vma->vm_start - len) & HPAGE_MASK;
+ addr = (vma->vm_start - len) & huge_page_mask(h);
} while (len <= vma->vm_start);
fail:
@@ -359,22 +393,23 @@ unsigned long
hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
+ struct hstate *h = hstate_file(file);
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- if (len & ~HPAGE_MASK)
+ if (len & ~huge_page_mask(h))
return -EINVAL;
if (len > TASK_SIZE)
return -ENOMEM;
if (flags & MAP_FIXED) {
- if (prepare_hugepage_range(addr, len))
+ if (prepare_hugepage_range(file, addr, len))
return -EINVAL;
return addr;
}
if (addr) {
- addr = ALIGN(addr, HPAGE_SIZE);
+ addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
(!vma || addr + len <= vma->vm_start))
@@ -390,3 +425,20 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
+#ifdef CONFIG_X86_64
+static __init int setup_hugepagesz(char *opt)
+{
+ unsigned long ps = memparse(opt, &opt);
+ if (ps == PMD_SIZE) {
+ hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
+ } else if (ps == PUD_SIZE && cpu_has_gbpages) {
+ hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+ } else {
+ printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n",
+ ps >> 20);
+ return 0;
+ }
+ return 1;
+}
+__setup("hugepagesz=", setup_hugepagesz);
+#endif
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 9689a5138e64..60ec1d08ff24 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -458,11 +458,7 @@ static void __init pagetable_init(void)
{
pgd_t *pgd_base = swapper_pg_dir;
- paravirt_pagetable_setup_start(pgd_base);
-
permanent_kmaps_init(pgd_base);
-
- paravirt_pagetable_setup_done(pgd_base);
}
#ifdef CONFIG_ACPI_SLEEP
@@ -844,6 +840,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
reserve_early(table_start << PAGE_SHIFT,
table_end << PAGE_SHIFT, "PGTABLE");
+ if (!after_init_bootmem)
+ early_memtest(start, end);
+
return end >> PAGE_SHIFT;
}
@@ -868,8 +867,6 @@ void __init paging_init(void)
*/
sparse_init();
zone_sizes_init();
-
- paravirt_post_allocator_init();
}
/*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 306049edd553..d3746efb060d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -60,7 +60,7 @@ static unsigned long dma_reserve __initdata;
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-int direct_gbpages __meminitdata
+int direct_gbpages
#ifdef CONFIG_DIRECT_GBPAGES
= 1
#endif
@@ -86,46 +86,13 @@ early_param("gbpages", parse_direct_gbpages_on);
* around without checking the pgd every time.
*/
-void show_mem(void)
-{
- long i, total = 0, reserved = 0;
- long shared = 0, cached = 0;
- struct page *page;
- pg_data_t *pgdat;
-
- printk(KERN_INFO "Mem-info:\n");
- show_free_areas();
- for_each_online_pgdat(pgdat) {
- for (i = 0; i < pgdat->node_spanned_pages; ++i) {
- /*
- * This loop can take a while with 256 GB and
- * 4k pages so defer the NMI watchdog:
- */
- if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
- touch_nmi_watchdog();
-
- if (!pfn_valid(pgdat->node_start_pfn + i))
- continue;
-
- page = pfn_to_page(pgdat->node_start_pfn + i);
- total++;
- if (PageReserved(page))
- reserved++;
- else if (PageSwapCache(page))
- cached++;
- else if (page_count(page))
- shared += page_count(page) - 1;
- }
- }
- printk(KERN_INFO "%lu pages of RAM\n", total);
- printk(KERN_INFO "%lu reserved pages\n", reserved);
- printk(KERN_INFO "%lu pages shared\n", shared);
- printk(KERN_INFO "%lu pages swap cached\n", cached);
-}
-
int after_bootmem;
-static __init void *spp_getpage(void)
+/*
+ * NOTE: This function is marked __ref because it calls __init function
+ * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
+ */
+static __ref void *spp_getpage(void)
{
void *ptr;
@@ -274,7 +241,7 @@ static unsigned long __initdata table_start;
static unsigned long __meminitdata table_end;
static unsigned long __meminitdata table_top;
-static __meminit void *alloc_low_page(unsigned long *phys)
+static __ref void *alloc_low_page(unsigned long *phys)
{
unsigned long pfn = table_end++;
void *adr;
@@ -295,7 +262,7 @@ static __meminit void *alloc_low_page(unsigned long *phys)
return adr;
}
-static __meminit void unmap_low_page(void *adr)
+static __ref void unmap_low_page(void *adr)
{
if (after_bootmem)
return;
@@ -351,6 +318,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
{
unsigned long pages = 0;
unsigned long last_map_addr = end;
+ unsigned long start = address;
int i = pmd_index(address);
@@ -368,16 +336,24 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
}
if (pmd_val(*pmd)) {
- if (!pmd_large(*pmd))
+ if (!pmd_large(*pmd)) {
+ spin_lock(&init_mm.page_table_lock);
last_map_addr = phys_pte_update(pmd, address,
- end);
+ end);
+ spin_unlock(&init_mm.page_table_lock);
+ }
+ /* Count entries we're using from level2_ident_pgt */
+ if (start == 0)
+ pages++;
continue;
}
if (page_size_mask & (1<<PG_LEVEL_2M)) {
pages++;
+ spin_lock(&init_mm.page_table_lock);
set_pte((pte_t *)pmd,
pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
+ spin_unlock(&init_mm.page_table_lock);
last_map_addr = (address & PMD_MASK) + PMD_SIZE;
continue;
}
@@ -386,7 +362,9 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
last_map_addr = phys_pte_init(pte, address, end);
unmap_low_page(pte);
+ spin_lock(&init_mm.page_table_lock);
pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
+ spin_unlock(&init_mm.page_table_lock);
}
update_page_count(PG_LEVEL_2M, pages);
return last_map_addr;
@@ -399,9 +377,7 @@ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
pmd_t *pmd = pmd_offset(pud, 0);
unsigned long last_map_addr;
- spin_lock(&init_mm.page_table_lock);
last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask);
- spin_unlock(&init_mm.page_table_lock);
__flush_tlb_all();
return last_map_addr;
}
@@ -437,20 +413,21 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
if (page_size_mask & (1<<PG_LEVEL_1G)) {
pages++;
+ spin_lock(&init_mm.page_table_lock);
set_pte((pte_t *)pud,
pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
+ spin_unlock(&init_mm.page_table_lock);
last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
continue;
}
pmd = alloc_low_page(&pmd_phys);
-
- spin_lock(&init_mm.page_table_lock);
last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask);
unmap_low_page(pmd);
+
+ spin_lock(&init_mm.page_table_lock);
pud_populate(&init_mm, pud, __va(pmd_phys));
spin_unlock(&init_mm.page_table_lock);
-
}
__flush_tlb_all();
update_page_count(PG_LEVEL_1G, pages);
@@ -517,118 +494,6 @@ static void __init init_gbpages(void)
direct_gbpages = 0;
}
-#ifdef CONFIG_MEMTEST
-
-static void __init memtest(unsigned long start_phys, unsigned long size,
- unsigned pattern)
-{
- unsigned long i;
- unsigned long *start;
- unsigned long start_bad;
- unsigned long last_bad;
- unsigned long val;
- unsigned long start_phys_aligned;
- unsigned long count;
- unsigned long incr;
-
- switch (pattern) {
- case 0:
- val = 0UL;
- break;
- case 1:
- val = -1UL;
- break;
- case 2:
- val = 0x5555555555555555UL;
- break;
- case 3:
- val = 0xaaaaaaaaaaaaaaaaUL;
- break;
- default:
- return;
- }
-
- incr = sizeof(unsigned long);
- start_phys_aligned = ALIGN(start_phys, incr);
- count = (size - (start_phys_aligned - start_phys))/incr;
- start = __va(start_phys_aligned);
- start_bad = 0;
- last_bad = 0;
-
- for (i = 0; i < count; i++)
- start[i] = val;
- for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
- if (*start != val) {
- if (start_phys_aligned == last_bad + incr) {
- last_bad += incr;
- } else {
- if (start_bad) {
- printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved",
- val, start_bad, last_bad + incr);
- reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
- }
- start_bad = last_bad = start_phys_aligned;
- }
- }
- }
- if (start_bad) {
- printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved",
- val, start_bad, last_bad + incr);
- reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
- }
-
-}
-
-/* default is disabled */
-static int memtest_pattern __initdata;
-
-static int __init parse_memtest(char *arg)
-{
- if (arg)
- memtest_pattern = simple_strtoul(arg, NULL, 0);
- return 0;
-}
-
-early_param("memtest", parse_memtest);
-
-static void __init early_memtest(unsigned long start, unsigned long end)
-{
- u64 t_start, t_size;
- unsigned pattern;
-
- if (!memtest_pattern)
- return;
-
- printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
- for (pattern = 0; pattern < memtest_pattern; pattern++) {
- t_start = start;
- t_size = 0;
- while (t_start < end) {
- t_start = find_e820_area_size(t_start, &t_size, 1);
-
- /* done ? */
- if (t_start >= end)
- break;
- if (t_start + t_size > end)
- t_size = end - t_start;
-
- printk(KERN_CONT "\n %016llx - %016llx pattern %d",
- (unsigned long long)t_start,
- (unsigned long long)t_start + t_size, pattern);
-
- memtest(t_start, t_size, pattern);
-
- t_start += t_size;
- }
- }
- printk(KERN_CONT "\n");
-}
-#else
-static void __init early_memtest(unsigned long start, unsigned long end)
-{
-}
-#endif
-
static unsigned long __init kernel_physical_mapping_init(unsigned long start,
unsigned long end,
unsigned long page_size_mask)
@@ -654,16 +519,14 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start,
continue;
}
- if (after_bootmem)
- pud = pud_offset(pgd, start & PGDIR_MASK);
- else
- pud = alloc_low_page(&pud_phys);
-
+ pud = alloc_low_page(&pud_phys);
last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
page_size_mask);
unmap_low_page(pud);
- pgd_populate(&init_mm, pgd_offset_k(start),
- __va(pud_phys));
+
+ spin_lock(&init_mm.page_table_lock);
+ pgd_populate(&init_mm, pgd, __va(pud_phys));
+ spin_unlock(&init_mm.page_table_lock);
}
return last_map_addr;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 24c1d3c30186..d4b6e6a29ae3 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -170,7 +170,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
phys_addr &= PAGE_MASK;
size = PAGE_ALIGN(last_addr+1) - phys_addr;
- retval = reserve_memtype(phys_addr, phys_addr + size,
+ retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
prot_val, &new_prot_val);
if (retval) {
pr_debug("Warning: reserve_memtype returned %d\n", retval);
@@ -330,6 +330,14 @@ static void __iomem *ioremap_default(resource_size_t phys_addr,
return (void __iomem *)ret;
}
+void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
+ unsigned long prot_val)
+{
+ return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK),
+ __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_prot);
+
/**
* iounmap - Free a IO remapping
* @addr: virtual address from ioremap_*
@@ -545,13 +553,11 @@ static int __init check_early_ioremap_leak(void)
{
if (!early_ioremap_nested)
return 0;
-
- printk(KERN_WARNING
+ WARN(1, KERN_WARNING
"Debug warning: early ioremap leak of %d areas detected.\n",
- early_ioremap_nested);
+ early_ioremap_nested);
printk(KERN_WARNING
- "please boot with early_ioremap_debug and report the dmesg.\n");
- WARN_ON(1);
+ "please boot with early_ioremap_debug and report the dmesg.\n");
return 1;
}
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
new file mode 100644
index 000000000000..672e17f8262a
--- /dev/null
+++ b/arch/x86/mm/memtest.c
@@ -0,0 +1,123 @@
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/pfn.h>
+
+#include <asm/e820.h>
+
+static void __init memtest(unsigned long start_phys, unsigned long size,
+ unsigned pattern)
+{
+ unsigned long i;
+ unsigned long *start;
+ unsigned long start_bad;
+ unsigned long last_bad;
+ unsigned long val;
+ unsigned long start_phys_aligned;
+ unsigned long count;
+ unsigned long incr;
+
+ switch (pattern) {
+ case 0:
+ val = 0UL;
+ break;
+ case 1:
+ val = -1UL;
+ break;
+ case 2:
+#ifdef CONFIG_X86_64
+ val = 0x5555555555555555UL;
+#else
+ val = 0x55555555UL;
+#endif
+ break;
+ case 3:
+#ifdef CONFIG_X86_64
+ val = 0xaaaaaaaaaaaaaaaaUL;
+#else
+ val = 0xaaaaaaaaUL;
+#endif
+ break;
+ default:
+ return;
+ }
+
+ incr = sizeof(unsigned long);
+ start_phys_aligned = ALIGN(start_phys, incr);
+ count = (size - (start_phys_aligned - start_phys))/incr;
+ start = __va(start_phys_aligned);
+ start_bad = 0;
+ last_bad = 0;
+
+ for (i = 0; i < count; i++)
+ start[i] = val;
+ for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
+ if (*start != val) {
+ if (start_phys_aligned == last_bad + incr) {
+ last_bad += incr;
+ } else {
+ if (start_bad) {
+ printk(KERN_CONT "\n %010lx bad mem addr %010lx - %010lx reserved",
+ val, start_bad, last_bad + incr);
+ reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
+ }
+ start_bad = last_bad = start_phys_aligned;
+ }
+ }
+ }
+ if (start_bad) {
+ printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved",
+ val, start_bad, last_bad + incr);
+ reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
+ }
+
+}
+
+/* default is disabled */
+static int memtest_pattern __initdata;
+
+static int __init parse_memtest(char *arg)
+{
+ if (arg)
+ memtest_pattern = simple_strtoul(arg, NULL, 0);
+ return 0;
+}
+
+early_param("memtest", parse_memtest);
+
+void __init early_memtest(unsigned long start, unsigned long end)
+{
+ u64 t_start, t_size;
+ unsigned pattern;
+
+ if (!memtest_pattern)
+ return;
+
+ printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
+ for (pattern = 0; pattern < memtest_pattern; pattern++) {
+ t_start = start;
+ t_size = 0;
+ while (t_start < end) {
+ t_start = find_e820_area_size(t_start, &t_size, 1);
+
+ /* done ? */
+ if (t_start >= end)
+ break;
+ if (t_start + t_size > end)
+ t_size = end - t_start;
+
+ printk(KERN_CONT "\n %010llx - %010llx pattern %d",
+ (unsigned long long)t_start,
+ (unsigned long long)t_start + t_size, pattern);
+
+ memtest(t_start, t_size, pattern);
+
+ t_start += t_size;
+ }
+ }
+ printk(KERN_CONT "\n");
+}
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index e7397e108beb..635b50e85581 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -430,7 +430,9 @@ static void enter_uniprocessor(void)
"may miss events.\n");
}
-static void leave_uniprocessor(void)
+/* __ref because leave_uniprocessor calls cpu_up which is __cpuinit,
+ but this whole function is ifdefed CONFIG_HOTPLUG_CPU */
+static void __ref leave_uniprocessor(void)
{
int cpu;
int err;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index b432d5781773..a4dd793d6003 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -20,15 +20,9 @@
#include <asm/acpi.h>
#include <asm/k8.h>
-#ifndef Dprintk
-#define Dprintk(x...)
-#endif
-
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
-static bootmem_data_t plat_node_bdata[MAX_NUMNODES];
-
struct memnode memnode;
s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
@@ -202,7 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
nodedata_phys + pgdat_size - 1);
memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
- NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
+ NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
NODE_DATA(nodeid)->node_start_pfn = start_pfn;
NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 0dcd42eb94e6..d4aa503caaa2 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -221,8 +221,7 @@ static int pageattr_test(void)
failed += print_split(&sc);
if (failed) {
- printk(KERN_ERR "NOT PASSED. Please report.\n");
- WARN_ON(1);
+ WARN(1, KERN_ERR "NOT PASSED. Please report.\n");
return -EINVAL;
} else {
if (print)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 65c6e46bf059..43e2f8483e4f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -55,13 +55,19 @@ static void split_page_count(int level)
int arch_report_meminfo(char *page)
{
- int n = sprintf(page, "DirectMap4k: %8lu\n"
- "DirectMap2M: %8lu\n",
- direct_pages_count[PG_LEVEL_4K],
- direct_pages_count[PG_LEVEL_2M]);
+ int n = sprintf(page, "DirectMap4k: %8lu kB\n",
+ direct_pages_count[PG_LEVEL_4K] << 2);
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+ n += sprintf(page + n, "DirectMap2M: %8lu kB\n",
+ direct_pages_count[PG_LEVEL_2M] << 11);
+#else
+ n += sprintf(page + n, "DirectMap4M: %8lu kB\n",
+ direct_pages_count[PG_LEVEL_2M] << 12);
+#endif
#ifdef CONFIG_X86_64
- n += sprintf(page + n, "DirectMap1G: %8lu\n",
- direct_pages_count[PG_LEVEL_1G]);
+ if (direct_gbpages)
+ n += sprintf(page + n, "DirectMap1G: %8lu kB\n",
+ direct_pages_count[PG_LEVEL_1G] << 20);
#endif
return n;
}
@@ -592,10 +598,9 @@ repeat:
if (!pte_val(old_pte)) {
if (!primary)
return 0;
- printk(KERN_WARNING "CPA: called for zero pte. "
+ WARN(1, KERN_WARNING "CPA: called for zero pte. "
"vaddr = %lx cpa->vaddr = %lx\n", address,
cpa->vaddr);
- WARN_ON(1);
return -EINVAL;
}
@@ -844,7 +849,7 @@ int set_memory_uc(unsigned long addr, int numpages)
/*
* for now UC MINUS. see comments in ioremap_nocache()
*/
- if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
+ if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
_PAGE_CACHE_UC_MINUS, NULL))
return -EINVAL;
@@ -863,7 +868,7 @@ int set_memory_wc(unsigned long addr, int numpages)
if (!pat_enabled)
return set_memory_uc(addr, numpages);
- if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
+ if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
_PAGE_CACHE_WC, NULL))
return -EINVAL;
@@ -879,7 +884,7 @@ int _set_memory_wb(unsigned long addr, int numpages)
int set_memory_wb(unsigned long addr, int numpages)
{
- free_memtype(addr, addr + numpages * PAGE_SIZE);
+ free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
return _set_memory_wb(addr, numpages);
}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index d4585077977a..2a50e0fa64a5 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -12,6 +12,8 @@
#include <linux/gfp.h>
#include <linux/fs.h>
#include <linux/bootmem.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
#include <asm/msr.h>
#include <asm/tlbflush.h>
@@ -205,6 +207,9 @@ static int chk_conflict(struct memtype *new, struct memtype *entry,
return -EBUSY;
}
+static struct memtype *cached_entry;
+static u64 cached_start;
+
/*
* req_type typically has one of the:
* - _PAGE_CACHE_WB
@@ -278,11 +283,17 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
spin_lock(&memtype_lock);
+ if (cached_entry && start >= cached_start)
+ entry = cached_entry;
+ else
+ entry = list_entry(&memtype_list, struct memtype, nd);
+
/* Search for existing mapping that overlaps the current range */
where = NULL;
- list_for_each_entry(entry, &memtype_list, nd) {
+ list_for_each_entry_continue(entry, &memtype_list, nd) {
if (end <= entry->start) {
where = entry->nd.prev;
+ cached_entry = list_entry(where, struct memtype, nd);
break;
} else if (start <= entry->start) { /* end > entry->start */
err = chk_conflict(new, entry, new_type);
@@ -290,6 +301,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
dprintk("Overlap at 0x%Lx-0x%Lx\n",
entry->start, entry->end);
where = entry->nd.prev;
+ cached_entry = list_entry(where,
+ struct memtype, nd);
}
break;
} else if (start < entry->end) { /* start > entry->start */
@@ -297,7 +310,20 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
if (!err) {
dprintk("Overlap at 0x%Lx-0x%Lx\n",
entry->start, entry->end);
- where = &entry->nd;
+ cached_entry = list_entry(entry->nd.prev,
+ struct memtype, nd);
+
+ /*
+ * Move to right position in the linked
+ * list to add this new entry
+ */
+ list_for_each_entry_continue(entry,
+ &memtype_list, nd) {
+ if (start <= entry->start) {
+ where = entry->nd.prev;
+ break;
+ }
+ }
}
break;
}
@@ -312,6 +338,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
return err;
}
+ cached_start = start;
+
if (where)
list_add(&new->nd, where);
else
@@ -341,6 +369,9 @@ int free_memtype(u64 start, u64 end)
spin_lock(&memtype_lock);
list_for_each_entry(entry, &memtype_list, nd) {
if (entry->start == start && entry->end == end) {
+ if (cached_entry == entry || cached_start == start)
+ cached_entry = NULL;
+
list_del(&entry->nd);
kfree(entry);
err = 0;
@@ -359,22 +390,14 @@ int free_memtype(u64 start, u64 end)
}
-/*
- * /dev/mem mmap interface. The memtype used for mapping varies:
- * - Use UC for mappings with O_SYNC flag
- * - Without O_SYNC flag, if there is any conflict in reserve_memtype,
- * inherit the memtype from existing mapping.
- * - Else use UC_MINUS memtype (for backward compatibility with existing
- * X drivers.
- */
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
{
return vma_prot;
}
-#ifdef CONFIG_NONPROMISC_DEVMEM
-/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/
+#ifdef CONFIG_STRICT_DEVMEM
+/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
{
return 1;
@@ -398,20 +421,20 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
}
return 1;
}
-#endif /* CONFIG_NONPROMISC_DEVMEM */
+#endif /* CONFIG_STRICT_DEVMEM */
int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t *vma_prot)
{
u64 offset = ((u64) pfn) << PAGE_SHIFT;
- unsigned long flags = _PAGE_CACHE_UC_MINUS;
+ unsigned long flags = -1;
int retval;
if (!range_is_allowed(pfn, size))
return 0;
if (file->f_flags & O_SYNC) {
- flags = _PAGE_CACHE_UC;
+ flags = _PAGE_CACHE_UC_MINUS;
}
#ifdef CONFIG_X86_32
@@ -434,13 +457,14 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
#endif
/*
- * With O_SYNC, we can only take UC mapping. Fail if we cannot.
+ * With O_SYNC, we can only take UC_MINUS mapping. Fail if we cannot.
+ *
* Without O_SYNC, we want to get
* - WB for WB-able memory and no other conflicting mappings
* - UC_MINUS for non-WB-able memory with no other conflicting mappings
* - Inherit from confliting mappings otherwise
*/
- if (flags != _PAGE_CACHE_UC_MINUS) {
+ if (flags != -1) {
retval = reserve_memtype(offset, offset + size, flags, NULL);
} else {
retval = reserve_memtype(offset, offset + size, -1, &flags);
@@ -489,3 +513,89 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
free_memtype(addr, addr + size);
}
+
+#if defined(CONFIG_DEBUG_FS)
+
+/* get Nth element of the linked list */
+static struct memtype *memtype_get_idx(loff_t pos)
+{
+ struct memtype *list_node, *print_entry;
+ int i = 1;
+
+ print_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL);
+ if (!print_entry)
+ return NULL;
+
+ spin_lock(&memtype_lock);
+ list_for_each_entry(list_node, &memtype_list, nd) {
+ if (pos == i) {
+ *print_entry = *list_node;
+ spin_unlock(&memtype_lock);
+ return print_entry;
+ }
+ ++i;
+ }
+ spin_unlock(&memtype_lock);
+ kfree(print_entry);
+ return NULL;
+}
+
+static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ if (*pos == 0) {
+ ++*pos;
+ seq_printf(seq, "PAT memtype list:\n");
+ }
+
+ return memtype_get_idx(*pos);
+}
+
+static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ ++*pos;
+ return memtype_get_idx(*pos);
+}
+
+static void memtype_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int memtype_seq_show(struct seq_file *seq, void *v)
+{
+ struct memtype *print_entry = (struct memtype *)v;
+
+ seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
+ print_entry->start, print_entry->end);
+ kfree(print_entry);
+ return 0;
+}
+
+static struct seq_operations memtype_seq_ops = {
+ .start = memtype_seq_start,
+ .next = memtype_seq_next,
+ .stop = memtype_seq_stop,
+ .show = memtype_seq_show,
+};
+
+static int memtype_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &memtype_seq_ops);
+}
+
+static const struct file_operations memtype_fops = {
+ .open = memtype_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static int __init pat_memtype_list_init(void)
+{
+ debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir,
+ NULL, &memtype_fops);
+ return 0;
+}
+
+late_initcall(pat_memtype_list_init);
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 557b2abceef8..d50302774fe2 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -207,6 +207,9 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
unsigned long addr;
int i;
+ if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
+ return;
+
pud = pud_offset(pgd, 0);
for (addr = i = 0; i < PREALLOCATED_PMDS;
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index b4becbf8c570..cab0abbd1ebe 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -20,53 +20,6 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
-void show_mem(void)
-{
- int total = 0, reserved = 0;
- int shared = 0, cached = 0;
- int highmem = 0;
- struct page *page;
- pg_data_t *pgdat;
- unsigned long i;
- unsigned long flags;
-
- printk(KERN_INFO "Mem-info:\n");
- show_free_areas();
- for_each_online_pgdat(pgdat) {
- pgdat_resize_lock(pgdat, &flags);
- for (i = 0; i < pgdat->node_spanned_pages; ++i) {
- if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
- touch_nmi_watchdog();
- page = pgdat_page_nr(pgdat, i);
- total++;
- if (PageHighMem(page))
- highmem++;
- if (PageReserved(page))
- reserved++;
- else if (PageSwapCache(page))
- cached++;
- else if (page_count(page))
- shared += page_count(page) - 1;
- }
- pgdat_resize_unlock(pgdat, &flags);
- }
- printk(KERN_INFO "%d pages of RAM\n", total);
- printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
- printk(KERN_INFO "%d reserved pages\n", reserved);
- printk(KERN_INFO "%d pages shared\n", shared);
- printk(KERN_INFO "%d pages swap cached\n", cached);
-
- printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY));
- printk(KERN_INFO "%lu pages writeback\n",
- global_page_state(NR_WRITEBACK));
- printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
- printk(KERN_INFO "%lu pages slab\n",
- global_page_state(NR_SLAB_RECLAIMABLE) +
- global_page_state(NR_SLAB_UNRECLAIMABLE));
- printk(KERN_INFO "%lu pages pagetables\n",
- global_page_state(NR_PAGETABLE));
-}
-
/*
* Associate a virtual page frame with a given physical page frame
* and protection flags for that frame.
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 1eb2973a301c..16ae70fc57e7 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -178,7 +178,7 @@ void acpi_numa_arch_fixup(void)
* start of the node, and that the current "end" address is after
* the previous one.
*/
-static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk)
+static __init int node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk)
{
/*
* Only add present memory as told by the e820.
@@ -189,10 +189,10 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c
if (memory_chunk->start_pfn >= max_pfn) {
printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n",
memory_chunk->start_pfn, memory_chunk->end_pfn);
- return;
+ return -1;
}
if (memory_chunk->nid != nid)
- return;
+ return -1;
if (!node_has_online_mem(nid))
node_start_pfn[nid] = memory_chunk->start_pfn;
@@ -202,6 +202,8 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c
if (node_end_pfn[nid] < memory_chunk->end_pfn)
node_end_pfn[nid] = memory_chunk->end_pfn;
+
+ return 0;
}
int __init get_memcfg_from_srat(void)
@@ -259,7 +261,9 @@ int __init get_memcfg_from_srat(void)
printk(KERN_DEBUG
"chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
- node_read_chunk(chunk->nid, chunk);
+ if (node_read_chunk(chunk->nid, chunk))
+ continue;
+
e820_register_active_regions(chunk->nid, chunk->start_pfn,
min(chunk->end_pfn, max_pfn));
}