summaryrefslogtreecommitdiff
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorYing Han <yinghan@google.com>2011-05-26 16:25:25 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-26 17:12:35 -0700
commit0ae5e89c60c9eb87da36a2614836bc434b0ec2ad (patch)
tree0d509fd83ac7e7d2f52dfcbba769c43aeeb68b5f /mm/memcontrol.c
parentf042e707ee671e4beb5389abeb9a1819a2cf5532 (diff)
memcg: count the soft_limit reclaim in global background reclaim
The global kswapd scans per-zone LRU and reclaims pages regardless of the cgroup. It breaks memory isolation since one cgroup can end up reclaiming pages from another cgroup. Instead we should rely on memcg-aware target reclaim including per-memcg kswapd and soft_limit hierarchical reclaim under memory pressure. In the global background reclaim, we do soft reclaim before scanning the per-zone LRU. However, the return value is ignored. This patch is the first step to skip shrink_zone() if soft_limit reclaim does enough work. This is part of the effort which tries to reduce reclaiming pages in global LRU in memcg. The per-memcg background reclaim patchset further enhances the per-cgroup targetting reclaim, which I should have V4 posted shortly. Try running multiple memory intensive workloads within seperate memcgs. Watch the counters of soft_steal in memory.stat. $ cat /dev/cgroup/A/memory.stat | grep 'soft' soft_steal 240000 soft_scan 240000 total_soft_steal 240000 total_soft_scan 240000 This patch: In the global background reclaim, we do soft reclaim before scanning the per-zone LRU. However, the return value is ignored. We would like to skip shrink_zone() if soft_limit reclaim does enough work. Also, we need to make the memory pressure balanced across per-memcg zones, like the logic vm-core. This patch is the first step where we start with counting the nr_scanned and nr_reclaimed from soft_limit reclaim into the global scan_control. Signed-off-by: Ying Han <yinghan@google.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Rik van Riel <riel@redhat.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c29
1 files changed, 20 insertions, 9 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fc259926c170..e41a6c26f1e7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1433,7 +1433,8 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
struct zone *zone,
gfp_t gfp_mask,
- unsigned long reclaim_options)
+ unsigned long reclaim_options,
+ unsigned long *total_scanned)
{
struct mem_cgroup *victim;
int ret, total = 0;
@@ -1442,6 +1443,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
unsigned long excess;
+ unsigned long nr_scanned;
excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
@@ -1484,10 +1486,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
continue;
}
/* we use swappiness of local cgroup */
- if (check_soft)
+ if (check_soft) {
ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
- noswap, get_swappiness(victim), zone);
- else
+ noswap, get_swappiness(victim), zone,
+ &nr_scanned);
+ *total_scanned += nr_scanned;
+ } else
ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
noswap, get_swappiness(victim));
css_put(&victim->css);
@@ -1928,7 +1932,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
return CHARGE_WOULDBLOCK;
ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
- gfp_mask, flags);
+ gfp_mask, flags, NULL);
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
return CHARGE_RETRY;
/*
@@ -3211,7 +3215,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
break;
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
- MEM_CGROUP_RECLAIM_SHRINK);
+ MEM_CGROUP_RECLAIM_SHRINK,
+ NULL);
curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
/* Usage is reduced ? */
if (curusage >= oldusage)
@@ -3271,7 +3276,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
MEM_CGROUP_RECLAIM_NOSWAP |
- MEM_CGROUP_RECLAIM_SHRINK);
+ MEM_CGROUP_RECLAIM_SHRINK,
+ NULL);
curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
/* Usage is reduced ? */
if (curusage >= oldusage)
@@ -3285,7 +3291,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
}
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
- gfp_t gfp_mask)
+ gfp_t gfp_mask,
+ unsigned long *total_scanned)
{
unsigned long nr_reclaimed = 0;
struct mem_cgroup_per_zone *mz, *next_mz = NULL;
@@ -3293,6 +3300,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
int loop = 0;
struct mem_cgroup_tree_per_zone *mctz;
unsigned long long excess;
+ unsigned long nr_scanned;
if (order > 0)
return 0;
@@ -3311,10 +3319,13 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
if (!mz)
break;
+ nr_scanned = 0;
reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
gfp_mask,
- MEM_CGROUP_RECLAIM_SOFT);
+ MEM_CGROUP_RECLAIM_SOFT,
+ &nr_scanned);
nr_reclaimed += reclaimed;
+ *total_scanned += nr_scanned;
spin_lock(&mctz->lock);
/*