From 20ba27f52eded4d14e309ac57971c9c83dec46e4 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 24 Sep 2013 15:27:33 -0700 Subject: revert "memcg, vmscan: do not fall into reclaim-all pass too quickly" Revert commit e975de998b96 ("memcg, vmscan: do not fall into reclaim-all pass too quickly") I merged this prematurely - Michal and Johannes still disagree about the overall design direction and the future remains unclear. Cc: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index 8ed1b775bdc9..97b0ed16749f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2176,11 +2176,10 @@ static inline bool should_continue_reclaim(struct zone *zone, } } -static int +static void __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) { unsigned long nr_reclaimed, nr_scanned; - int groups_scanned = 0; do { struct mem_cgroup *root = sc->target_mem_cgroup; @@ -2198,7 +2197,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) while ((memcg = mem_cgroup_iter_cond(root, memcg, &reclaim, filter))) { struct lruvec *lruvec; - groups_scanned++; lruvec = mem_cgroup_zone_lruvec(zone, memcg); shrink_lruvec(lruvec, sc); @@ -2226,8 +2224,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, sc->nr_scanned - nr_scanned, sc)); - - return groups_scanned; } @@ -2235,19 +2231,8 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc) { bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc); unsigned long nr_scanned = sc->nr_scanned; - int scanned_groups; - scanned_groups = __shrink_zone(zone, sc, do_soft_reclaim); - /* - * memcg iterator might race with other reclaimer or start from - * a incomplete tree walk so the tree walk in __shrink_zone - * might have missed groups that are above the soft limit. Try - * another loop to catch up with others. Do it just once to - * prevent from reclaim latencies when other reclaimers always - * preempt this one. - */ - if (do_soft_reclaim && !scanned_groups) - __shrink_zone(zone, sc, do_soft_reclaim); + __shrink_zone(zone, sc, do_soft_reclaim); /* * No group is over the soft limit or those that are do not have -- cgit v1.2.3 From 3120055e869f2a208480f238680d097eec8f0e02 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 24 Sep 2013 15:27:35 -0700 Subject: revert "memcg, vmscan: do not attempt soft limit reclaim if it would not scan anything" Revert commit e839b6a1c8d0 ("memcg, vmscan: do not attempt soft limit reclaim if it would not scan anything") I merged this prematurely - Michal and Johannes still disagree about the overall design direction and the future remains unclear. Cc: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index 97b0ed16749f..cdd300b81485 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -142,9 +142,7 @@ static bool global_reclaim(struct scan_control *sc) static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) { - struct mem_cgroup *root = sc->target_mem_cgroup; - return !mem_cgroup_disabled() && - mem_cgroup_soft_reclaim_eligible(root, root) != SKIP_TREE; + return !mem_cgroup_disabled(); } #else static bool global_reclaim(struct scan_control *sc) -- cgit v1.2.3 From 694fbc0fe78518d06efa63910bf4ecee660e7852 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 24 Sep 2013 15:27:37 -0700 Subject: revert "memcg: enhance memcg iterator to support predicates" Revert commit de57780dc659 ("memcg: enhance memcg iterator to support predicates") I merged this prematurely - Michal and Johannes still disagree about the overall design direction and the future remains unclear. Cc: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index cdd300b81485..17a7134de4d7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2185,16 +2185,21 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) .zone = zone, .priority = sc->priority, }; - struct mem_cgroup *memcg = NULL; - mem_cgroup_iter_filter filter = (soft_reclaim) ? - mem_cgroup_soft_reclaim_eligible : NULL; + struct mem_cgroup *memcg; nr_reclaimed = sc->nr_reclaimed; nr_scanned = sc->nr_scanned; - while ((memcg = mem_cgroup_iter_cond(root, memcg, &reclaim, filter))) { + memcg = mem_cgroup_iter(root, NULL, &reclaim); + do { struct lruvec *lruvec; + if (soft_reclaim && + !mem_cgroup_soft_reclaim_eligible(memcg, root)) { + memcg = mem_cgroup_iter(root, memcg, &reclaim); + continue; + } + lruvec = mem_cgroup_zone_lruvec(zone, memcg); shrink_lruvec(lruvec, sc); @@ -2214,7 +2219,8 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) mem_cgroup_iter_break(root, memcg); break; } - } + memcg = mem_cgroup_iter(root, memcg, &reclaim); + } while (memcg); vmpressure(sc->gfp_mask, sc->target_mem_cgroup, sc->nr_scanned - nr_scanned, -- cgit v1.2.3 From b1aff7fcf86c88472b0a70f15d89d7a4adba07bb Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 24 Sep 2013 15:27:38 -0700 Subject: revert "vmscan, memcg: do softlimit reclaim also for targeted reclaim" Revert commit a5b7c87f9207 ("vmscan, memcg: do softlimit reclaim also for targeted reclaim") I merged this prematurely - Michal and Johannes still disagree about the overall design direction and the future remains unclear. Cc: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index 17a7134de4d7..0e081cada4ba 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -142,7 +142,7 @@ static bool global_reclaim(struct scan_control *sc) static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) { - return !mem_cgroup_disabled(); + return !mem_cgroup_disabled() && global_reclaim(sc); } #else static bool global_reclaim(struct scan_control *sc) @@ -2195,7 +2195,7 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) struct lruvec *lruvec; if (soft_reclaim && - !mem_cgroup_soft_reclaim_eligible(memcg, root)) { + !mem_cgroup_soft_reclaim_eligible(memcg)) { memcg = mem_cgroup_iter(root, memcg, &reclaim); continue; } -- cgit v1.2.3 From 0608f43da64a1f1c42507304b5f25bc8b1227aa4 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 24 Sep 2013 15:27:41 -0700 Subject: revert "memcg, vmscan: integrate soft reclaim tighter with zone shrinking code" Revert commit 3b38722efd9f ("memcg, vmscan: integrate soft reclaim tighter with zone shrinking code") I merged this prematurely - Michal and Johannes still disagree about the overall design direction and the future remains unclear. Cc: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 62 ++++++++++++++++++++++++++----------------------------------- 1 file changed, 26 insertions(+), 36 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index 0e081cada4ba..beb35778c69f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -139,21 +139,11 @@ static bool global_reclaim(struct scan_control *sc) { return !sc->target_mem_cgroup; } - -static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) -{ - return !mem_cgroup_disabled() && global_reclaim(sc); -} #else static bool global_reclaim(struct scan_control *sc) { return true; } - -static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) -{ - return false; -} #endif unsigned long zone_reclaimable_pages(struct zone *zone) @@ -2174,8 +2164,7 @@ static inline bool should_continue_reclaim(struct zone *zone, } } -static void -__shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) +static void shrink_zone(struct zone *zone, struct scan_control *sc) { unsigned long nr_reclaimed, nr_scanned; @@ -2194,12 +2183,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) do { struct lruvec *lruvec; - if (soft_reclaim && - !mem_cgroup_soft_reclaim_eligible(memcg)) { - memcg = mem_cgroup_iter(root, memcg, &reclaim); - continue; - } - lruvec = mem_cgroup_zone_lruvec(zone, memcg); shrink_lruvec(lruvec, sc); @@ -2230,24 +2213,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) sc->nr_scanned - nr_scanned, sc)); } - -static void shrink_zone(struct zone *zone, struct scan_control *sc) -{ - bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc); - unsigned long nr_scanned = sc->nr_scanned; - - __shrink_zone(zone, sc, do_soft_reclaim); - - /* - * No group is over the soft limit or those that are do not have - * pages in the zone we are reclaiming so we have to reclaim everybody - */ - if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) { - __shrink_zone(zone, sc, false); - return; - } -} - /* Returns true if compaction should go ahead for a high-order request */ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) { @@ -2309,6 +2274,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) { struct zoneref *z; struct zone *zone; + unsigned long nr_soft_reclaimed; + unsigned long nr_soft_scanned; bool aborted_reclaim = false; /* @@ -2348,6 +2315,18 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) continue; } } + /* + * This steals pages from memory cgroups over softlimit + * and returns the number of reclaimed pages and + * scanned pages. This works for global memory pressure + * and balancing, not for a memcg's limit. + */ + nr_soft_scanned = 0; + nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, + sc->order, sc->gfp_mask, + &nr_soft_scanned); + sc->nr_reclaimed += nr_soft_reclaimed; + sc->nr_scanned += nr_soft_scanned; /* need some check for avoid more shrink_zone() */ } @@ -2941,6 +2920,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, { int i; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ + unsigned long nr_soft_reclaimed; + unsigned long nr_soft_scanned; struct scan_control sc = { .gfp_mask = GFP_KERNEL, .priority = DEF_PRIORITY, @@ -3055,6 +3036,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, sc.nr_scanned = 0; + nr_soft_scanned = 0; + /* + * Call soft limit reclaim before calling shrink_zone. + */ + nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, + order, sc.gfp_mask, + &nr_soft_scanned); + sc.nr_reclaimed += nr_soft_reclaimed; + /* * There should be no need to raise the scanning * priority if enough pages are already being scanned -- cgit v1.2.3 From 117aad1e9e4d97448d1df3f84b08bd65811e6d6a Mon Sep 17 00:00:00 2001 From: Rafael Aquini Date: Mon, 30 Sep 2013 13:45:16 -0700 Subject: mm: avoid reinserting isolated balloon pages into LRU lists Isolated balloon pages can wrongly end up in LRU lists when migrate_pages() finishes its round without draining all the isolated page list. The same issue can happen when reclaim_clean_pages_from_list() tries to reclaim pages from an isolated page list, before migration, in the CMA path. Such balloon page leak opens a race window against LRU lists shrinkers that leads us to the following kernel panic: BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 IP: [] shrink_page_list+0x24e/0x897 PGD 3cda2067 PUD 3d713067 PMD 0 Oops: 0000 [#1] SMP CPU: 0 PID: 340 Comm: kswapd0 Not tainted 3.12.0-rc1-22626-g4367597 #87 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 RIP: shrink_page_list+0x24e/0x897 RSP: 0000:ffff88003da499b8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff88003e82bd60 RCX: 00000000000657d5 RDX: 0000000000000000 RSI: 000000000000031f RDI: ffff88003e82bd40 RBP: ffff88003da49ab0 R08: 0000000000000001 R09: 0000000081121a45 R10: ffffffff81121a45 R11: ffff88003c4a9a28 R12: ffff88003e82bd40 R13: ffff88003da0e800 R14: 0000000000000001 R15: ffff88003da49d58 FS: 0000000000000000(0000) GS:ffff88003fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000067d9000 CR3: 000000003ace5000 CR4: 00000000000407b0 Call Trace: shrink_inactive_list+0x240/0x3de shrink_lruvec+0x3e0/0x566 __shrink_zone+0x94/0x178 shrink_zone+0x3a/0x82 balance_pgdat+0x32a/0x4c2 kswapd+0x2f0/0x372 kthread+0xa2/0xaa ret_from_fork+0x7c/0xb0 Code: 80 7d 8f 01 48 83 95 68 ff ff ff 00 4c 89 e7 e8 5a 7b 00 00 48 85 c0 49 89 c5 75 08 80 7d 8f 00 74 3e eb 31 48 8b 80 18 01 00 00 <48> 8b 74 0d 48 8b 78 30 be 02 00 00 00 ff d2 eb RIP [] shrink_page_list+0x24e/0x897 RSP CR2: 0000000000000028 ---[ end trace 703d2451af6ffbfd ]--- Kernel panic - not syncing: Fatal exception This patch fixes the issue, by assuring the proper tests are made at putback_movable_pages() & reclaim_clean_pages_from_list() to avoid isolated balloon pages being wrongly reinserted in LRU lists. [akpm@linux-foundation.org: clarify awkward comment text] Signed-off-by: Rafael Aquini Reported-by: Luiz Capitulino Tested-by: Luiz Capitulino Cc: Mel Gorman Cc: Rik van Riel Cc: Hugh Dickins Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index beb35778c69f..53f2f82f83ae 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -48,6 +48,7 @@ #include #include +#include #include "internal.h" @@ -1113,7 +1114,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, LIST_HEAD(clean_pages); list_for_each_entry_safe(page, next, page_list, lru) { - if (page_is_file_cache(page) && !PageDirty(page)) { + if (page_is_file_cache(page) && !PageDirty(page) && + !isolated_balloon_page(page)) { ClearPageActive(page); list_move(&page->lru, &clean_pages); } -- cgit v1.2.3 From ae39332162a837c3791bb21172d22382a90a6fd1 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Wed, 16 Oct 2013 13:46:46 -0700 Subject: mm/vmscan.c: don't forget to free shrinker->nr_deferred This leak was added by commit 1d3d4437eae1 ("vmscan: per-node deferred work"). unreferenced object 0xffff88006ada3bd0 (size 8): comm "criu", pid 14781, jiffies 4295238251 (age 105.641s) hex dump (first 8 bytes): 00 00 00 00 00 00 00 00 ........ backtrace: [] kmemleak_alloc+0x5e/0xc0 [] __kmalloc+0x247/0x310 [] register_shrinker+0x3c/0xa0 [] sget+0x5ab/0x670 [] proc_mount+0x54/0x170 [] mount_fs+0x43/0x1b0 [] vfs_kern_mount+0x72/0x110 [] kern_mount_data+0x19/0x30 [] pid_ns_prepare_proc+0x20/0x40 [] alloc_pid+0x466/0x4a0 [] copy_process+0xc6a/0x1860 [] do_fork+0x8b/0x370 [] SyS_clone+0x16/0x20 [] stub_clone+0x69/0x90 [] 0xffffffffffffffff Signed-off-by: Andrew Vagin Cc: Mel Gorman Cc: Michal Hocko Cc: Rik van Riel Cc: Johannes Weiner Cc: Glauber Costa Cc: Chuck Lever Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 1 + 1 file changed, 1 insertion(+) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index 53f2f82f83ae..eea668d9cff6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -211,6 +211,7 @@ void unregister_shrinker(struct shrinker *shrinker) down_write(&shrinker_rwsem); list_del(&shrinker->list); up_write(&shrinker_rwsem); + kfree(shrinker->nr_deferred); } EXPORT_SYMBOL(unregister_shrinker); -- cgit v1.2.3