From e8a03feb54ca7f1768bbdc2b491f9ef654e6d01d Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 14 Apr 2010 17:59:28 -0400 Subject: rmap: add exclusively owned pages to the newest anon_vma The recent anon_vma fixes cause many anonymous pages to end up in the parent process anon_vma, even when the page is exclusively owned by the current process. Adding exclusively owned anonymous pages to the top anon_vma reduces rmap scanning overhead, especially in workloads with forking servers. This patch adds a parameter to __page_set_anon_rmap that can be used to indicate whether or not the added page is exclusively owned by the current process. Pages added through page_add_new_anon_rmap are exclusively owned by the current process, and can be added to the top anon_vma. Pages added through page_add_anon_rmap can be either shared or exclusively owned, so we do the conservative thing and add it to the oldest anon_vma. A next step would be to add the exclusive parameter to page_add_anon_rmap, to be used from functions where we do know for sure whether a page is exclusively owned. Signed-off-by: Rik van Riel Reviewed-by: Johannes Weiner Lightly-tested-by: Borislav Petkov Reviewed-by: Minchan Kim [ Edited to look nicer - Linus ] Signed-off-by: Linus Torvalds --- mm/rmap.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'mm/rmap.c') diff --git a/mm/rmap.c b/mm/rmap.c index 4bad3267537a..526704e8215d 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -730,23 +730,28 @@ void page_move_anon_rmap(struct page *page, * @page: the page to add the mapping to * @vma: the vm area in which the mapping is added * @address: the user virtual address mapped + * @exclusive: the page is exclusively owned by the current process */ static void __page_set_anon_rmap(struct page *page, - struct vm_area_struct *vma, unsigned long address) + struct vm_area_struct *vma, unsigned long address, int exclusive) { - struct anon_vma_chain *avc; - struct anon_vma *anon_vma; + struct anon_vma *anon_vma = vma->anon_vma; - BUG_ON(!vma->anon_vma); + BUG_ON(!anon_vma); /* - * We must use the _oldest_ possible anon_vma for the page mapping! + * If the page isn't exclusively mapped into this vma, + * we must use the _oldest_ possible anon_vma for the + * page mapping! * - * So take the last AVC chain entry in the vma, which is the deepest - * ancestor, and use the anon_vma from that. + * So take the last AVC chain entry in the vma, which is + * the deepest ancestor, and use the anon_vma from that. */ - avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma); - anon_vma = avc->anon_vma; + if (!exclusive) { + struct anon_vma_chain *avc; + avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma); + anon_vma = avc->anon_vma; + } anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; page->mapping = (struct address_space *) anon_vma; @@ -802,7 +807,7 @@ void page_add_anon_rmap(struct page *page, VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); if (first) - __page_set_anon_rmap(page, vma, address); + __page_set_anon_rmap(page, vma, address, 0); else __page_check_anon_rmap(page, vma, address); } @@ -824,7 +829,7 @@ void page_add_new_anon_rmap(struct page *page, SetPageSwapBacked(page); atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ __inc_zone_page_state(page, NR_ANON_PAGES); - __page_set_anon_rmap(page, vma, address); + __page_set_anon_rmap(page, vma, address, 1); if (page_evictable(page, vma)) lru_cache_add_lru(page, LRU_ACTIVE_ANON); else -- cgit v1.2.3 From 31f2b0ebc01fd332cb0997f7ce9f9cde29af9e20 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 23 Apr 2010 13:18:01 -0400 Subject: rmap: anon_vma_prepare() can leak anon_vma_chain If find_mergeable_anon_vma() succeeds but another thread installs ->anon_vma before we take ptl, then allocated == NULL but avc should be freed. Change the code to check avc != NULL to detect this case. Also, a couple of whitespace changes to make the critical section more visible. Signed-off-by: Oleg Nesterov Reviewed-by: Rik van Riel Cc: Hugh Dickins Cc: Pete Zaitcev Cc: Borislav Petkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'mm/rmap.c') diff --git a/mm/rmap.c b/mm/rmap.c index 526704e8215d..07fc94758799 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -133,8 +133,8 @@ int anon_vma_prepare(struct vm_area_struct *vma) goto out_enomem_free_avc; allocated = anon_vma; } - spin_lock(&anon_vma->lock); + spin_lock(&anon_vma->lock); /* page_table_lock to protect against threads */ spin_lock(&mm->page_table_lock); if (likely(!vma->anon_vma)) { @@ -144,14 +144,15 @@ int anon_vma_prepare(struct vm_area_struct *vma) list_add(&avc->same_vma, &vma->anon_vma_chain); list_add(&avc->same_anon_vma, &anon_vma->head); allocated = NULL; + avc = NULL; } spin_unlock(&mm->page_table_lock); - spin_unlock(&anon_vma->lock); - if (unlikely(allocated)) { + + if (unlikely(allocated)) anon_vma_free(allocated); + if (unlikely(avc)) anon_vma_chain_free(avc); - } } return 0; -- cgit v1.2.3