From aae8679b0ebcaa92f99c1c3cb0cd651594a43915 Mon Sep 17 00:00:00 2001 From: Thomas Tuttle Date: Thu, 5 Jun 2008 22:46:31 -0700 Subject: pagemap: fix bug in add_to_pagemap, require aligned-length reads of /proc/pid/pagemap Fix a bug in add_to_pagemap. Previously, since pm->out was a char *, put_user was only copying 1 byte of every PFN, resulting in the top 7 bytes of each PFN not being copied. By requiring that reads be a multiple of 8 bytes, I can make pm->out and pm->end u64*s instead of char*s, which makes put_user work properly, and also simplifies the logic in add_to_pagemap a bit. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Thomas Tuttle Cc: Matt Mackall Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) (limited to 'fs/proc/task_mmu.c') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 88717c0f941b..17403629e330 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -496,7 +496,7 @@ const struct file_operations proc_clear_refs_operations = { }; struct pagemapread { - char __user *out, *end; + u64 __user *out, *end; }; #define PM_ENTRY_BYTES sizeof(u64) @@ -519,21 +519,11 @@ struct pagemapread { static int add_to_pagemap(unsigned long addr, u64 pfn, struct pagemapread *pm) { - /* - * Make sure there's room in the buffer for an - * entire entry. Otherwise, only copy part of - * the pfn. - */ - if (pm->out + PM_ENTRY_BYTES >= pm->end) { - if (copy_to_user(pm->out, &pfn, pm->end - pm->out)) - return -EFAULT; - pm->out = pm->end; - return PM_END_OF_BUFFER; - } - if (put_user(pfn, pm->out)) return -EFAULT; - pm->out += PM_ENTRY_BYTES; + pm->out++; + if (pm->out >= pm->end) + return PM_END_OF_BUFFER; return 0; } @@ -634,7 +624,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, ret = -EINVAL; /* file position must be aligned */ - if (*ppos % PM_ENTRY_BYTES) + if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES)) goto out_task; ret = 0; @@ -664,8 +654,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, goto out_pages; } - pm.out = buf; - pm.end = buf + count; + pm.out = (u64 *)buf; + pm.end = (u64 *)(buf + count); if (!ptrace_may_attach(task)) { ret = -EIO; @@ -690,9 +680,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (ret == PM_END_OF_BUFFER) ret = 0; /* don't need mmap_sem for these, but this looks cleaner */ - *ppos += pm.out - buf; + *ppos += (char *)pm.out - buf; if (!ret) - ret = pm.out - buf; + ret = (char *)pm.out - buf; } out_pages: -- cgit v1.2.3 From 2165009bdf63f79716a36ad545df14c3cdf958b7 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 12 Jun 2008 15:21:47 -0700 Subject: pagemap: pass mm into pagewalkers We need this at least for huge page detection for now, because powerpc needs the vm_area_struct to be able to determine whether a virtual address is referring to a huge page (its pmd_huge() doesn't work). It might also come in handy for some of the other users. Signed-off-by: Dave Hansen Acked-by: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) (limited to 'fs/proc/task_mmu.c') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 17403629e330..f0df3109343d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -315,9 +315,9 @@ struct mem_size_stats { }; static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - void *private) + struct mm_walk *walk) { - struct mem_size_stats *mss = private; + struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = mss->vma; pte_t *pte, ptent; spinlock_t *ptl; @@ -365,19 +365,21 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, return 0; } -static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range }; - static int show_smap(struct seq_file *m, void *v) { struct vm_area_struct *vma = v; struct mem_size_stats mss; int ret; + struct mm_walk smaps_walk = { + .pmd_entry = smaps_pte_range, + .mm = vma->vm_mm, + .private = &mss, + }; memset(&mss, 0, sizeof mss); mss.vma = vma; if (vma->vm_mm && !is_vm_hugetlb_page(vma)) - walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end, - &smaps_walk, &mss); + walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); ret = show_map(m, v); if (ret) @@ -426,9 +428,9 @@ const struct file_operations proc_smaps_operations = { }; static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, - unsigned long end, void *private) + unsigned long end, struct mm_walk *walk) { - struct vm_area_struct *vma = private; + struct vm_area_struct *vma = walk->private; pte_t *pte, ptent; spinlock_t *ptl; struct page *page; @@ -452,8 +454,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, return 0; } -static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range }; - static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -476,11 +476,17 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, return -ESRCH; mm = get_task_mm(task); if (mm) { + static struct mm_walk clear_refs_walk; + memset(&clear_refs_walk, 0, sizeof(clear_refs_walk)); + clear_refs_walk.pmd_entry = clear_refs_pte_range; + clear_refs_walk.mm = mm; down_read(&mm->mmap_sem); - for (vma = mm->mmap; vma; vma = vma->vm_next) + for (vma = mm->mmap; vma; vma = vma->vm_next) { + clear_refs_walk.private = vma; if (!is_vm_hugetlb_page(vma)) - walk_page_range(mm, vma->vm_start, vma->vm_end, - &clear_refs_walk, vma); + walk_page_range(vma->vm_start, vma->vm_end, + &clear_refs_walk); + } flush_tlb_mm(mm); up_read(&mm->mmap_sem); mmput(mm); @@ -528,9 +534,9 @@ static int add_to_pagemap(unsigned long addr, u64 pfn, } static int pagemap_pte_hole(unsigned long start, unsigned long end, - void *private) + struct mm_walk *walk) { - struct pagemapread *pm = private; + struct pagemapread *pm = walk->private; unsigned long addr; int err = 0; for (addr = start; addr < end; addr += PAGE_SIZE) { @@ -548,9 +554,9 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte) } static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - void *private) + struct mm_walk *walk) { - struct pagemapread *pm = private; + struct pagemapread *pm = walk->private; pte_t *pte; int err = 0; @@ -675,8 +681,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, * user buffer is tracked in "pm", and the walk * will stop when we hit the end of the buffer. */ - ret = walk_page_range(mm, start_vaddr, end_vaddr, - &pagemap_walk, &pm); + ret = walk_page_range(start_vaddr, end_vaddr, + &pagemap_walk); if (ret == PM_END_OF_BUFFER) ret = 0; /* don't need mmap_sem for these, but this looks cleaner */ -- cgit v1.2.3 From bcf8039ed45f56013c4afea5520bca7d909e5e61 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 12 Jun 2008 15:21:48 -0700 Subject: pagemap: fix large pages in pagemap We were walking right into huge page areas in the pagemap walker, and calling the pmds pmd_bad() and clearing them. That leaked huge pages. Bad. This patch at least works around that for now. It ignores huge pages in the pagemap walker for the time being, and won't leak those pages. Signed-off-by: Dave Hansen Acked-by: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) (limited to 'fs/proc/task_mmu.c') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f0df3109343d..ab8ccc9d14ff 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -553,24 +553,45 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte) return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); } +static unsigned long pte_to_pagemap_entry(pte_t pte) +{ + unsigned long pme = 0; + if (is_swap_pte(pte)) + pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) + | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; + else if (pte_present(pte)) + pme = PM_PFRAME(pte_pfn(pte)) + | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; + return pme; +} + static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { + struct vm_area_struct *vma; struct pagemapread *pm = walk->private; pte_t *pte; int err = 0; + /* find the first VMA at or above 'addr' */ + vma = find_vma(walk->mm, addr); for (; addr != end; addr += PAGE_SIZE) { u64 pfn = PM_NOT_PRESENT; - pte = pte_offset_map(pmd, addr); - if (is_swap_pte(*pte)) - pfn = PM_PFRAME(swap_pte_to_pagemap_entry(*pte)) - | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; - else if (pte_present(*pte)) - pfn = PM_PFRAME(pte_pfn(*pte)) - | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; - /* unmap so we're not in atomic when we copy to userspace */ - pte_unmap(pte); + + /* check to see if we've left 'vma' behind + * and need a new, higher one */ + if (vma && (addr >= vma->vm_end)) + vma = find_vma(walk->mm, addr); + + /* check that 'vma' actually covers this address, + * and that it isn't a huge page vma */ + if (vma && (vma->vm_start <= addr) && + !is_vm_hugetlb_page(vma)) { + pte = pte_offset_map(pmd, addr); + pfn = pte_to_pagemap_entry(*pte); + /* unmap before userspace copy */ + pte_unmap(pte); + } err = add_to_pagemap(addr, pfn, pm); if (err) return err; -- cgit v1.2.3