diff options
author | Rik van Riel <riel@redhat.com> | 2010-08-09 17:18:41 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-09 20:44:55 -0700 |
commit | 76545066c8521f3e32c849744744842b4df25b79 (patch) | |
tree | 978b6b003f63e1e22618586b7d9c2dd8ef363614 /mm/rmap.c | |
parent | 012f18004da33ba672e3c60838cc4898126174d3 (diff) |
mm: extend KSM refcounts to the anon_vma root
KSM reference counts can cause an anon_vma to exist after the processe it
belongs to have already exited. Because the anon_vma lock now lives in
the root anon_vma, we need to ensure that the root anon_vma stays around
until after all the "child" anon_vmas have been freed.
The obvious way to do this is to have a "child" anon_vma take a reference
to the root in anon_vma_fork. When the anon_vma is freed at munmap or
process exit, we drop the refcount in anon_vma_unlink and possibly free
the root anon_vma.
The KSM anon_vma reference count function also needs to be modified to
deal with the possibility of freeing 2 levels of anon_vma. The easiest
way to do this is to break out the KSM magic and make it generic.
When compiling without CONFIG_KSM, this code is compiled out.
Signed-off-by: Rik van Riel <riel@redhat.com>
Tested-by: Larry Woodman <lwoodman@redhat.com>
Acked-by: Larry Woodman <lwoodman@redhat.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Tested-by: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 46 |
1 files changed, 45 insertions, 1 deletions
diff --git a/mm/rmap.c b/mm/rmap.c index caa48b27371b..07e9814c7a41 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -235,6 +235,12 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) * lock any of the anon_vmas in this anon_vma tree. */ anon_vma->root = pvma->anon_vma->root; + /* + * With KSM refcounts, an anon_vma can stay around longer than the + * process it belongs to. The root anon_vma needs to be pinned + * until this anon_vma is freed, because the lock lives in the root. + */ + get_anon_vma(anon_vma->root); /* Mark this anon_vma as the one where our new (COWed) pages go. */ vma->anon_vma = anon_vma; anon_vma_chain_link(vma, avc, anon_vma); @@ -264,8 +270,12 @@ static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain) empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma); anon_vma_unlock(anon_vma); - if (empty) + if (empty) { + /* We no longer need the root anon_vma */ + if (anon_vma->root != anon_vma) + drop_anon_vma(anon_vma->root); anon_vma_free(anon_vma); + } } void unlink_anon_vmas(struct vm_area_struct *vma) @@ -1382,6 +1392,40 @@ int try_to_munlock(struct page *page) return try_to_unmap_file(page, TTU_MUNLOCK); } +#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION) +/* + * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root + * if necessary. Be careful to do all the tests under the lock. Once + * we know we are the last user, nobody else can get a reference and we + * can do the freeing without the lock. + */ +void drop_anon_vma(struct anon_vma *anon_vma) +{ + if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) { + struct anon_vma *root = anon_vma->root; + int empty = list_empty(&anon_vma->head); + int last_root_user = 0; + int root_empty = 0; + + /* + * The refcount on a non-root anon_vma got dropped. Drop + * the refcount on the root and check if we need to free it. + */ + if (empty && anon_vma != root) { + last_root_user = atomic_dec_and_test(&root->external_refcount); + root_empty = list_empty(&root->head); + } + anon_vma_unlock(anon_vma); + + if (empty) { + anon_vma_free(anon_vma); + if (root_empty && last_root_user) + anon_vma_free(root); + } + } +} +#endif + #ifdef CONFIG_MIGRATION /* * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file(): |