summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-11 18:23:28 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-11 18:23:28 -0800
commit59d53737a8640482995fea13c6e2c0fd016115d6 (patch)
tree3423eb92315865d76cb8d488513bfef6ab9251d0 /fs
parentd3f180ea1a44aecba1b0dab2a253428e77f906bf (diff)
parent8138a67a5557ffea3a21dfd6f037842d4e748513 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge second set of updates from Andrew Morton: "More of MM" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (83 commits) mm/nommu.c: fix arithmetic overflow in __vm_enough_memory() mm/mmap.c: fix arithmetic overflow in __vm_enough_memory() vmstat: Reduce time interval to stat update on idle cpu mm/page_owner.c: remove unnecessary stack_trace field Documentation/filesystems/proc.txt: describe /proc/<pid>/map_files mm: incorporate read-only pages into transparent huge pages vmstat: do not use deferrable delayed work for vmstat_update mm: more aggressive page stealing for UNMOVABLE allocations mm: always steal split buddies in fallback allocations mm: when stealing freepages, also take pages created by splitting buddy page mincore: apply page table walker on do_mincore() mm: /proc/pid/clear_refs: avoid split_huge_page() mm: pagewalk: fix misbehavior of walk_page_range for vma(VM_PFNMAP) mempolicy: apply page table walker on queue_pages_range() arch/powerpc/mm/subpage-prot.c: use walk->vma and walk_page_vma() memcg: cleanup preparation for page table walk numa_maps: remove numa_maps->vma numa_maps: fix typo in gather_hugetbl_stats pagemap: use walk->vma instead of calling find_vma() clear_refs: remove clear_refs_private->vma and introduce clear_refs_test_walk() ...
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/proc/page.c16
-rw-r--r--fs/proc/task_mmu.c218
3 files changed, 124 insertions, 112 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 790dbae3343c..c73df6a7c9b6 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1407,8 +1407,8 @@ int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
while (index <= end_index) {
page = find_get_page(inode->i_mapping, index);
BUG_ON(!page); /* Pages should be in the extent_io_tree */
- account_page_redirty(page);
__set_page_dirty_nobuffers(page);
+ account_page_redirty(page);
page_cache_release(page);
index++;
}
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 1e3187da1fed..7eee2d8b97d9 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -5,6 +5,7 @@
#include <linux/ksm.h>
#include <linux/mm.h>
#include <linux/mmzone.h>
+#include <linux/huge_mm.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/hugetlb.h>
@@ -121,9 +122,18 @@ u64 stable_page_flags(struct page *page)
* just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon
* to make sure a given page is a thp, not a non-huge compound page.
*/
- else if (PageTransCompound(page) && (PageLRU(compound_head(page)) ||
- PageAnon(compound_head(page))))
- u |= 1 << KPF_THP;
+ else if (PageTransCompound(page)) {
+ struct page *head = compound_head(page);
+
+ if (PageLRU(head) || PageAnon(head))
+ u |= 1 << KPF_THP;
+ else if (is_huge_zero_page(head)) {
+ u |= 1 << KPF_ZERO_PAGE;
+ u |= 1 << KPF_THP;
+ }
+ } else if (is_zero_pfn(page_to_pfn(page)))
+ u |= 1 << KPF_ZERO_PAGE;
+
/*
* Caveats on high order pages: page->_count will only be set
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 6396f88c6687..0e36c1e49fe3 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -21,7 +21,7 @@
void task_mem(struct seq_file *m, struct mm_struct *mm)
{
- unsigned long data, text, lib, swap;
+ unsigned long data, text, lib, swap, ptes, pmds;
unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
/*
@@ -42,6 +42,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
swap = get_mm_counter(mm, MM_SWAPENTS);
+ ptes = PTRS_PER_PTE * sizeof(pte_t) * atomic_long_read(&mm->nr_ptes);
+ pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm);
seq_printf(m,
"VmPeak:\t%8lu kB\n"
"VmSize:\t%8lu kB\n"
@@ -54,6 +56,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
"VmExe:\t%8lu kB\n"
"VmLib:\t%8lu kB\n"
"VmPTE:\t%8lu kB\n"
+ "VmPMD:\t%8lu kB\n"
"VmSwap:\t%8lu kB\n",
hiwater_vm << (PAGE_SHIFT-10),
total_vm << (PAGE_SHIFT-10),
@@ -63,8 +66,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
total_rss << (PAGE_SHIFT-10),
data << (PAGE_SHIFT-10),
mm->stack_vm << (PAGE_SHIFT-10), text, lib,
- (PTRS_PER_PTE * sizeof(pte_t) *
- atomic_long_read(&mm->nr_ptes)) >> 10,
+ ptes >> 10,
+ pmds >> 10,
swap << (PAGE_SHIFT-10));
}
@@ -433,7 +436,6 @@ const struct file_operations proc_tid_maps_operations = {
#ifdef CONFIG_PROC_PAGE_MONITOR
struct mem_size_stats {
- struct vm_area_struct *vma;
unsigned long resident;
unsigned long shared_clean;
unsigned long shared_dirty;
@@ -482,7 +484,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct mm_walk *walk)
{
struct mem_size_stats *mss = walk->private;
- struct vm_area_struct *vma = mss->vma;
+ struct vm_area_struct *vma = walk->vma;
struct page *page = NULL;
if (pte_present(*pte)) {
@@ -506,7 +508,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
struct mm_walk *walk)
{
struct mem_size_stats *mss = walk->private;
- struct vm_area_struct *vma = mss->vma;
+ struct vm_area_struct *vma = walk->vma;
struct page *page;
/* FOLL_DUMP will return -EFAULT on huge zero page */
@@ -527,8 +529,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
- struct mem_size_stats *mss = walk->private;
- struct vm_area_struct *vma = mss->vma;
+ struct vm_area_struct *vma = walk->vma;
pte_t *pte;
spinlock_t *ptl;
@@ -620,10 +621,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
};
memset(&mss, 0, sizeof mss);
- mss.vma = vma;
/* mmap_sem is held in m_start */
- if (vma->vm_mm && !is_vm_hugetlb_page(vma))
- walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
+ walk_page_vma(vma, &smaps_walk);
show_map_vma(m, vma, is_pid);
@@ -737,14 +736,13 @@ enum clear_refs_types {
};
struct clear_refs_private {
- struct vm_area_struct *vma;
enum clear_refs_types type;
};
+#ifdef CONFIG_MEM_SOFT_DIRTY
static inline void clear_soft_dirty(struct vm_area_struct *vma,
unsigned long addr, pte_t *pte)
{
-#ifdef CONFIG_MEM_SOFT_DIRTY
/*
* The soft-dirty tracker uses #PF-s to catch writes
* to pages, so write-protect the pte as well. See the
@@ -761,19 +759,60 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
}
set_pte_at(vma->vm_mm, addr, pte, ptent);
-#endif
}
+static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ pmd_t pmd = *pmdp;
+
+ pmd = pmd_wrprotect(pmd);
+ pmd = pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY);
+
+ if (vma->vm_flags & VM_SOFTDIRTY)
+ vma->vm_flags &= ~VM_SOFTDIRTY;
+
+ set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
+}
+
+#else
+
+static inline void clear_soft_dirty(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *pte)
+{
+}
+
+static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+}
+#endif
+
static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
struct clear_refs_private *cp = walk->private;
- struct vm_area_struct *vma = cp->vma;
+ struct vm_area_struct *vma = walk->vma;
pte_t *pte, ptent;
spinlock_t *ptl;
struct page *page;
- split_huge_page_pmd(vma, addr, pmd);
+ if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+ if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
+ clear_soft_dirty_pmd(vma, addr, pmd);
+ goto out;
+ }
+
+ page = pmd_page(*pmd);
+
+ /* Clear accessed and referenced bits. */
+ pmdp_test_and_clear_young(vma, addr, pmd);
+ ClearPageReferenced(page);
+out:
+ spin_unlock(ptl);
+ return 0;
+ }
+
if (pmd_trans_unstable(pmd))
return 0;
@@ -802,6 +841,28 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
return 0;
}
+static int clear_refs_test_walk(unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct clear_refs_private *cp = walk->private;
+ struct vm_area_struct *vma = walk->vma;
+
+ if (vma->vm_flags & VM_PFNMAP)
+ return 1;
+
+ /*
+ * Writing 1 to /proc/pid/clear_refs affects all pages.
+ * Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
+ * Writing 3 to /proc/pid/clear_refs only affects file mapped pages.
+ * Writing 4 to /proc/pid/clear_refs affects all pages.
+ */
+ if (cp->type == CLEAR_REFS_ANON && vma->vm_file)
+ return 1;
+ if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file)
+ return 1;
+ return 0;
+}
+
static ssize_t clear_refs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
@@ -842,6 +903,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
};
struct mm_walk clear_refs_walk = {
.pmd_entry = clear_refs_pte_range,
+ .test_walk = clear_refs_test_walk,
.mm = mm,
.private = &cp,
};
@@ -861,28 +923,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
}
mmu_notifier_invalidate_range_start(mm, 0, -1);
}
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- cp.vma = vma;
- if (is_vm_hugetlb_page(vma))
- continue;
- /*
- * Writing 1 to /proc/pid/clear_refs affects all pages.
- *
- * Writing 2 to /proc/pid/clear_refs only affects
- * Anonymous pages.
- *
- * Writing 3 to /proc/pid/clear_refs only affects file
- * mapped pages.
- *
- * Writing 4 to /proc/pid/clear_refs affects all pages.
- */
- if (type == CLEAR_REFS_ANON && vma->vm_file)
- continue;
- if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
- continue;
- walk_page_range(vma->vm_start, vma->vm_end,
- &clear_refs_walk);
- }
+ walk_page_range(0, ~0UL, &clear_refs_walk);
if (type == CLEAR_REFS_SOFT_DIRTY)
mmu_notifier_invalidate_range_end(mm, 0, -1);
flush_tlb_mm(mm);
@@ -1050,15 +1091,13 @@ static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemap
static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma = walk->vma;
struct pagemapread *pm = walk->private;
spinlock_t *ptl;
- pte_t *pte;
+ pte_t *pte, *orig_pte;
int err = 0;
- /* find the first VMA at or above 'addr' */
- vma = find_vma(walk->mm, addr);
- if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+ if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
int pmd_flags2;
if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
@@ -1084,51 +1123,20 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (pmd_trans_unstable(pmd))
return 0;
- while (1) {
- /* End of address space hole, which we mark as non-present. */
- unsigned long hole_end;
-
- if (vma)
- hole_end = min(end, vma->vm_start);
- else
- hole_end = end;
-
- for (; addr < hole_end; addr += PAGE_SIZE) {
- pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
-
- err = add_to_pagemap(addr, &pme, pm);
- if (err)
- return err;
- }
-
- if (!vma || vma->vm_start >= end)
- break;
- /*
- * We can't possibly be in a hugetlb VMA. In general,
- * for a mm_walk with a pmd_entry and a hugetlb_entry,
- * the pmd_entry can only be called on addresses in a
- * hugetlb if the walk starts in a non-hugetlb VMA and
- * spans a hugepage VMA. Since pagemap_read walks are
- * PMD-sized and PMD-aligned, this will never be true.
- */
- BUG_ON(is_vm_hugetlb_page(vma));
-
- /* Addresses in the VMA. */
- for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) {
- pagemap_entry_t pme;
- pte = pte_offset_map(pmd, addr);
- pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
- pte_unmap(pte);
- err = add_to_pagemap(addr, &pme, pm);
- if (err)
- return err;
- }
+ /*
+ * We can assume that @vma always points to a valid one and @end never
+ * goes beyond vma->vm_end.
+ */
+ orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ for (; addr < end; pte++, addr += PAGE_SIZE) {
+ pagemap_entry_t pme;
- if (addr == end)
+ pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
+ err = add_to_pagemap(addr, &pme, pm);
+ if (err)
break;
-
- vma = find_vma(walk->mm, addr);
}
+ pte_unmap_unlock(orig_pte, ptl);
cond_resched();
@@ -1154,15 +1162,12 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
struct mm_walk *walk)
{
struct pagemapread *pm = walk->private;
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma = walk->vma;
int err = 0;
int flags2;
pagemap_entry_t pme;
- vma = find_vma(walk->mm, addr);
- WARN_ON_ONCE(!vma);
-
- if (vma && (vma->vm_flags & VM_SOFTDIRTY))
+ if (vma->vm_flags & VM_SOFTDIRTY)
flags2 = __PM_SOFT_DIRTY;
else
flags2 = 0;
@@ -1322,7 +1327,6 @@ const struct file_operations proc_pagemap_operations = {
#ifdef CONFIG_NUMA
struct numa_maps {
- struct vm_area_struct *vma;
unsigned long pages;
unsigned long anon;
unsigned long active;
@@ -1391,18 +1395,17 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
- struct numa_maps *md;
+ struct numa_maps *md = walk->private;
+ struct vm_area_struct *vma = walk->vma;
spinlock_t *ptl;
pte_t *orig_pte;
pte_t *pte;
- md = walk->private;
-
- if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) {
+ if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
pte_t huge_pte = *(pte_t *)pmd;
struct page *page;
- page = can_gather_numa_stats(huge_pte, md->vma, addr);
+ page = can_gather_numa_stats(huge_pte, vma, addr);
if (page)
gather_stats(page, md, pte_dirty(huge_pte),
HPAGE_PMD_SIZE/PAGE_SIZE);
@@ -1414,7 +1417,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
return 0;
orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
do {
- struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
+ struct page *page = can_gather_numa_stats(*pte, vma, addr);
if (!page)
continue;
gather_stats(page, md, pte_dirty(*pte), 1);
@@ -1424,7 +1427,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
return 0;
}
#ifdef CONFIG_HUGETLB_PAGE
-static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
+static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
unsigned long addr, unsigned long end, struct mm_walk *walk)
{
struct numa_maps *md;
@@ -1443,7 +1446,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
}
#else
-static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
+static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
unsigned long addr, unsigned long end, struct mm_walk *walk)
{
return 0;
@@ -1461,7 +1464,12 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
struct numa_maps *md = &numa_priv->md;
struct file *file = vma->vm_file;
struct mm_struct *mm = vma->vm_mm;
- struct mm_walk walk = {};
+ struct mm_walk walk = {
+ .hugetlb_entry = gather_hugetlb_stats,
+ .pmd_entry = gather_pte_stats,
+ .private = md,
+ .mm = mm,
+ };
struct mempolicy *pol;
char buffer[64];
int nid;
@@ -1472,13 +1480,6 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
/* Ensure we start with an empty set of numa_maps statistics. */
memset(md, 0, sizeof(*md));
- md->vma = vma;
-
- walk.hugetlb_entry = gather_hugetbl_stats;
- walk.pmd_entry = gather_pte_stats;
- walk.private = md;
- walk.mm = mm;
-
pol = __get_vma_policy(vma, vma->vm_start);
if (pol) {
mpol_to_str(buffer, sizeof(buffer), pol);
@@ -1512,7 +1513,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
if (is_vm_hugetlb_page(vma))
seq_puts(m, " huge");
- walk_page_range(vma->vm_start, vma->vm_end, &walk);
+ /* mmap_sem is held by m_start */
+ walk_page_vma(vma, &walk);
if (!md->pages)
goto out;