From eb4866d0066ffd5446751c102d64feb3318d8bd1 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 20 Sep 2011 15:19:38 -0700 Subject: make /proc/$pid/numa_maps gather_stats() take variable page size We need to teach the numa_maps code about transparent huge pages. The first step is to teach gather_stats() that the pte it is dealing with might represent more than one page. Note that will we use this in a moment for transparent huge pages since they have use a single pmd_t which _acts_ as a "surrogate" for a bunch of smaller pte_t's. I'm a _bit_ unhappy that this interface counts in hugetlbfs page sizes for hugetlbfs pages and PAGE_SIZE for normal pages. That means that to figure out how many _bytes_ "dirty=1" means, you must first know the hugetlbfs page size. That's easier said than done especially if you don't have visibility in to the mount. But, that's probably a discussion for another day especially since it would change behavior to fix it. But, just in case anyone wonders why this patch only passes a '1' in the hugetlb case... Signed-off-by: Dave Hansen Acked-by: Hugh Dickins Acked-by: David Rientjes Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'fs/proc/task_mmu.c') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 25b6a887adb9..61342a454bd9 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -877,30 +877,31 @@ struct numa_maps_private { struct numa_maps md; }; -static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) +static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, + unsigned long nr_pages) { int count = page_mapcount(page); - md->pages++; + md->pages += nr_pages; if (pte_dirty || PageDirty(page)) - md->dirty++; + md->dirty += nr_pages; if (PageSwapCache(page)) - md->swapcache++; + md->swapcache += nr_pages; if (PageActive(page) || PageUnevictable(page)) - md->active++; + md->active += nr_pages; if (PageWriteback(page)) - md->writeback++; + md->writeback += nr_pages; if (PageAnon(page)) - md->anon++; + md->anon += nr_pages; if (count > md->mapcount_max) md->mapcount_max = count; - md->node[page_to_nid(page)]++; + md->node[page_to_nid(page)] += nr_pages; } static int gather_pte_stats(pmd_t *pmd, unsigned long addr, @@ -931,7 +932,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, if (!node_isset(nid, node_states[N_HIGH_MEMORY])) continue; - gather_stats(page, md, pte_dirty(*pte)); + gather_stats(page, md, pte_dirty(*pte), 1); } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap_unlock(orig_pte, ptl); @@ -952,7 +953,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, return 0; md = walk->private; - gather_stats(page, md, pte_dirty(*pte)); + gather_stats(page, md, pte_dirty(*pte), 1); return 0; } -- cgit v1.2.3 From 3200a8aaab0c9ccdc0f59b0dac2d4a47029137fa Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 20 Sep 2011 15:19:39 -0700 Subject: break out numa_maps gather_pte_stats() checks gather_pte_stats() does a number of checks on a target page to see whether it should even be considered for statistics. This breaks that code out in to a separate function so that we can use it in the transparent hugepage case in the next patch. Signed-off-by: Dave Hansen Acked-by: Hugh Dickins Reviewed-by: Christoph Lameter Acked-by: David Rientjes Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) (limited to 'fs/proc/task_mmu.c') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 61342a454bd9..9dca07e0758d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -904,6 +904,29 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, md->node[page_to_nid(page)] += nr_pages; } +static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, + unsigned long addr) +{ + struct page *page; + int nid; + + if (!pte_present(pte)) + return NULL; + + page = vm_normal_page(vma, addr, pte); + if (!page) + return NULL; + + if (PageReserved(page)) + return NULL; + + nid = page_to_nid(page); + if (!node_isset(nid, node_states[N_HIGH_MEMORY])) + return NULL; + + return page; +} + static int gather_pte_stats(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -915,23 +938,9 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, md = walk->private; orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); do { - struct page *page; - int nid; - - if (!pte_present(*pte)) - continue; - - page = vm_normal_page(md->vma, addr, *pte); + struct page *page = can_gather_numa_stats(*pte, md->vma, addr); if (!page) continue; - - if (PageReserved(page)) - continue; - - nid = page_to_nid(page); - if (!node_isset(nid, node_states[N_HIGH_MEMORY])) - continue; - gather_stats(page, md, pte_dirty(*pte), 1); } while (pte++, addr += PAGE_SIZE, addr != end); -- cgit v1.2.3 From 32ef43848f283e0ef945d3c67e851c143fea3970 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 20 Sep 2011 15:19:41 -0700 Subject: teach /proc/$pid/numa_maps about transparent hugepages This is modeled after the smaps code. It detects transparent hugepages and then does a single gather_stats() for the page as a whole. This has two benifits: 1. It is more efficient since it does many pages in a single shot. 2. It does not have to break down the huge page. Signed-off-by: Dave Hansen Acked-by: Hugh Dickins Acked-by: David Rientjes Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'fs/proc/task_mmu.c') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 9dca07e0758d..5afaa58a8630 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -936,6 +936,26 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, pte_t *pte; md = walk->private; + spin_lock(&walk->mm->page_table_lock); + if (pmd_trans_huge(*pmd)) { + if (pmd_trans_splitting(*pmd)) { + spin_unlock(&walk->mm->page_table_lock); + wait_split_huge_page(md->vma->anon_vma, pmd); + } else { + pte_t huge_pte = *(pte_t *)pmd; + struct page *page; + + page = can_gather_numa_stats(huge_pte, md->vma, addr); + if (page) + gather_stats(page, md, pte_dirty(huge_pte), + HPAGE_PMD_SIZE/PAGE_SIZE); + spin_unlock(&walk->mm->page_table_lock); + return 0; + } + } else { + spin_unlock(&walk->mm->page_table_lock); + } + orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); do { struct page *page = can_gather_numa_stats(*pte, md->vma, addr); -- cgit v1.2.3