memcg/sl[au]b: track all the memcg children of a kmem_cache

This enables us to remove all the children of a kmem_cache being destroyed, if for example the kernel module it's being used in gets unloaded. Otherwise, the children will still point to the destroyed parent. Signed-off-by: Suleiman Souhlal <suleiman@google.com> Signed-off-by: Glauber Costa <glommer@parallels.com> Cc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Cc: Frederic Weisbecker <fweisbec@redhat.com> Cc: Greg Thelen <gthelen@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: JoonSoo Kim <js1304@gmail.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Michal Hocko <mhocko@suse.cz> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: Rik van Riel <riel@redhat.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Glauber Costa <glommer@parallels.com> 2012-12-18 14:22:55 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-12-18 15:02:14 -0800
commit: 7cf2798240a2a2230cb16a391beef98d8a7ad362 (patch)
tree: 989f28b74d08bf91938cc5a7fe632faa32e7542f
parent: 1f458cbf122288b23620ee822e19bcbb76c8d6ec (diff)
3 files changed, 55 insertions, 2 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 79fcf0cd7186..e119f3ef793c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -454,6 +454,7 @@ struct kmem_cache *
 __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
 
 void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
+void kmem_cache_destroy_memcg_children(struct kmem_cache *s);
 
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
@@ -601,6 +602,10 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	return cachep;
 }
+
+static inline void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+{
+}
 #endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 270a36789859..4b68ec2c8df6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2772,6 +2772,8 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 	memcg_check_events(memcg, page);
 }
 
+static DEFINE_MUTEX(set_limit_mutex);
+
 #ifdef CONFIG_MEMCG_KMEM
 static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
 {
@@ -3176,6 +3178,51 @@ out:
 	return new_cachep;
 }
 
+void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+{
+	struct kmem_cache *c;
+	int i;
+
+	if (!s->memcg_params)
+		return;
+	if (!s->memcg_params->is_root_cache)
+		return;
+
+	/*
+	 * If the cache is being destroyed, we trust that there is no one else
+	 * requesting objects from it. Even if there are, the sanity checks in
+	 * kmem_cache_destroy should caught this ill-case.
+	 *
+	 * Still, we don't want anyone else freeing memcg_caches under our
+	 * noses, which can happen if a new memcg comes to life. As usual,
+	 * we'll take the set_limit_mutex to protect ourselves against this.
+	 */
+	mutex_lock(&set_limit_mutex);
+	for (i = 0; i < memcg_limited_groups_array_size; i++) {
+		c = s->memcg_params->memcg_caches[i];
+		if (!c)
+			continue;
+
+		/*
+		 * We will now manually delete the caches, so to avoid races
+		 * we need to cancel all pending destruction workers and
+		 * proceed with destruction ourselves.
+		 *
+		 * kmem_cache_destroy() will call kmem_cache_shrink internally,
+		 * and that could spawn the workers again: it is likely that
+		 * the cache still have active pages until this very moment.
+		 * This would lead us back to mem_cgroup_destroy_cache.
+		 *
+		 * But that will not execute at all if the "dead" flag is not
+		 * set, so flip it down to guarantee we are in control.
+		 */
+		c->memcg_params->dead = false;
+		cancel_delayed_work_sync(&c->memcg_params->destroy);
+		kmem_cache_destroy(c);
+	}
+	mutex_unlock(&set_limit_mutex);
+}
+
 struct create_work {
 	struct mem_cgroup *memcg;
 	struct kmem_cache *cachep;
@@ -4284,8 +4331,6 @@ void mem_cgroup_print_bad_page(struct page *page)
 }
 #endif
 
-static DEFINE_MUTEX(set_limit_mutex);
-
 static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
 				unsigned long long val)
 {
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 1c424b6511bf..080a43804bf1 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -249,6 +249,9 @@ EXPORT_SYMBOL(kmem_cache_create);
 
 void kmem_cache_destroy(struct kmem_cache *s)
 {
+	/* Destroy all the children caches if we aren't a memcg cache */
+	kmem_cache_destroy_memcg_children(s);
+
 	get_online_cpus();
 	mutex_lock(&slab_mutex);
 	s->refcount--;
author	Glauber Costa <glommer@parallels.com>	2012-12-18 14:22:55 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-12-18 15:02:14 -0800
commit	7cf2798240a2a2230cb16a391beef98d8a7ad362 (patch)
tree	989f28b74d08bf91938cc5a7fe632faa32e7542f
parent	1f458cbf122288b23620ee822e19bcbb76c8d6ec (diff)