From 61d3d5108eb621d2a097c41f6cc83bf63b1b6c03 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 6 Jan 2023 14:59:00 +0100 Subject: mm: remove PageMovable export The only in-kernel users that need PageMovable() to be exported are z3fold and zsmalloc and they are only using it for dubious debugging functionality. So remove those usages and the export so that no driver code accidentally thinks that they are allowed to use this symbol. Link: https://lkml.kernel.org/r/20230106135900.3763622-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman Reviewed-by: Sergey Senozhatsky Reviewed-by: Miaohe Lin Reviewed-by: David Hildenbrand Reviewed-by: Christoph Hellwig Acked-by: Minchan Kim Cc: Vitaly Wool Signed-off-by: Andrew Morton --- mm/compaction.c | 1 - 1 file changed, 1 deletion(-) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index ca1603524bbe..62a61de44658 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -122,7 +122,6 @@ bool PageMovable(struct page *page) return false; } -EXPORT_SYMBOL(PageMovable); void __SetPageMovable(struct page *page, const struct movable_operations *mops) { -- cgit v1.2.3 From c6835e8d86bcd8313347e097da140057772307c0 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 10 Jan 2023 21:36:18 +0800 Subject: mm: compaction: remove redundant VM_BUG_ON() in compact_zone() Patch series "Some small improvements for compaction". When I did some compaction testing, I found some small room for improvement as well as some code cleanups. This patch (of 5): The compaction_suitable() will never return values other than COMPACT_SUCCESS, COMPACT_SKIPPED and COMPACT_CONTINUE, so after validation of COMPACT_SUCCESS and COMPACT_SKIPPED, we will never hit other unexpected case. Thus remove the redundant VM_BUG_ON() validation for the return values of compaction_suitable(). Link: https://lkml.kernel.org/r/cover.1673342761.git.baolin.wang@linux.alibaba.com Link: https://lkml.kernel.org/r/740a2396d9b98154dba76e326cba5e798b640ead.1673342761.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- mm/compaction.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index 62a61de44658..5e6f5e35748d 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2313,9 +2313,6 @@ compact_zone(struct compact_control *cc, struct capture_control *capc) if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED) return ret; - /* huh, compaction_suitable is returning something unexpected */ - VM_BUG_ON(ret != COMPACT_CONTINUE); - /* * Clear pageblock skip if there were failures recently and compaction * is about to be retried after being deferred. -- cgit v1.2.3 From 753ec50d976c28b08266dec3110905b377464eb1 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 10 Jan 2023 21:36:19 +0800 Subject: mm: compaction: move list validation into compact_zone() Move the cc.freepages and cc.migratepages list validation into compact_zone() to remove some duplicate code. Link: https://lkml.kernel.org/r/15cf54f7d762e87b04ac3cc74536f7d1ebbcd8cd.1673342761.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- mm/compaction.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index 5e6f5e35748d..f8e8addc8664 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2488,6 +2488,9 @@ out: trace_mm_compaction_end(cc, start_pfn, end_pfn, sync, ret); + VM_BUG_ON(!list_empty(&cc->freepages)); + VM_BUG_ON(!list_empty(&cc->migratepages)); + return ret; } @@ -2526,9 +2529,6 @@ static enum compact_result compact_zone_order(struct zone *zone, int order, ret = compact_zone(&cc, &capc); - VM_BUG_ON(!list_empty(&cc.freepages)); - VM_BUG_ON(!list_empty(&cc.migratepages)); - /* * Make sure we hide capture control first before we read the captured * page pointer, otherwise an interrupt could free and capture a page @@ -2659,9 +2659,6 @@ static void proactive_compact_node(pg_data_t *pgdat) cc.zone = zone; compact_zone(&cc, NULL); - - VM_BUG_ON(!list_empty(&cc.freepages)); - VM_BUG_ON(!list_empty(&cc.migratepages)); } } @@ -2689,9 +2686,6 @@ static void compact_node(int nid) cc.zone = zone; compact_zone(&cc, NULL); - - VM_BUG_ON(!list_empty(&cc.freepages)); - VM_BUG_ON(!list_empty(&cc.migratepages)); } } @@ -2868,9 +2862,6 @@ static void kcompactd_do_work(pg_data_t *pgdat) cc.total_migrate_scanned); count_compact_events(KCOMPACTD_FREE_SCANNED, cc.total_free_scanned); - - VM_BUG_ON(!list_empty(&cc.freepages)); - VM_BUG_ON(!list_empty(&cc.migratepages)); } /* -- cgit v1.2.3 From 1bfb7684db1233d9e3f3f26fbbc0c58d40ff65e7 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 10 Jan 2023 21:36:20 +0800 Subject: mm: compaction: count the migration scanned pages events for proactive compaction The proactive compaction will reuse per-node kcompactd threads, so we should also count the KCOMPACTD_MIGRATE_SCANNED and KCOMPACTD_FREE_SCANNED events for proactive compaction. Link: https://lkml.kernel.org/r/b7f1ece1adc17defa47e3667b5f9fd61f496517a.1673342761.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- mm/compaction.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index f8e8addc8664..62f6bb68c9cb 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2659,6 +2659,11 @@ static void proactive_compact_node(pg_data_t *pgdat) cc.zone = zone; compact_zone(&cc, NULL); + + count_compact_events(KCOMPACTD_MIGRATE_SCANNED, + cc.total_migrate_scanned); + count_compact_events(KCOMPACTD_FREE_SCANNED, + cc.total_free_scanned); } } -- cgit v1.2.3 From 8fff8b6f8d0ef7620e06f3f4cfb912171aef6cd5 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 10 Jan 2023 21:36:21 +0800 Subject: mm: compaction: add missing kcompactd wakeup trace event Add missing kcompactd wakeup trace event for proactive compaction, meanwhile use order = -1 and the highest zone index of the pgdat for the kcompactd wakeup trace event by proactive compaction. Link: https://lkml.kernel.org/r/cbf8097a2d8a1b6800991f2a21575550d3613ce6.1673342761.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- mm/compaction.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index 62f6bb68c9cb..0fd6c81a7809 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2730,6 +2730,8 @@ int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write, continue; pgdat->proactive_compact_trigger = true; + trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, -1, + pgdat->nr_zones - 1); wake_up_interruptible(&pgdat->kcompactd_wait); } } -- cgit v1.2.3 From 9e5522715e6941bcfdc08d066a79d6da0f8cec8e Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 10 Jan 2023 21:36:22 +0800 Subject: mm: compaction: avoid fragmentation score calculation for empty zones There is no need to calculate the fragmentation score for empty zones. Link: https://lkml.kernel.org/r/100331ad9d274a9725e687b00d85d75d7e4a17c7.1673342761.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- mm/compaction.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index 0fd6c81a7809..b758b00a4885 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2025,6 +2025,8 @@ static unsigned int fragmentation_score_node(pg_data_t *pgdat) struct zone *zone; zone = &pgdat->node_zones[zoneid]; + if (!populated_zone(zone)) + continue; score += fragmentation_score_zone_weighted(zone); } -- cgit v1.2.3 From 48731c8436c68ce5597dfe72f3836bd6808bedde Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 25 Jan 2023 13:44:31 +0000 Subject: mm, compaction: rename compact_control->rescan to finish_pageblock Patch series "Fix excessive CPU usage during compaction". Commit 7efc3b726103 ("mm/compaction: fix set skip in fast_find_migrateblock") fixed a problem where pageblocks found by fast_find_migrateblock() were ignored. Unfortunately there were numerous bug reports complaining about high CPU usage and massive stalls once 6.1 was released. Due to the severity, the patch was reverted by Vlastimil as a short-term fix[1] to -stable. The underlying problem for each of the bugs is suspected to be the repeated scanning of the same pageblocks. This series should guarantee forward progress even with commit 7efc3b726103. More information is in the changelog for patch 4. [1] http://lore.kernel.org/r/20230113173345.9692-1-vbabka@suse.cz This patch (of 4): The rescan field was not well named albeit accurate at the time. Rename the field to finish_pageblock to indicate that the remainder of the pageblock should be scanned regardless of COMPACT_CLUSTER_MAX. The intent is that pageblocks with transient failures get marked for skipping to avoid revisiting the same pageblock. Link: https://lkml.kernel.org/r/20230125134434.18017-2-mgorman@techsingularity.net Signed-off-by: Mel Gorman Cc: Chuyi Zhou Cc: Jiri Slaby Cc: Maxim Levitsky Cc: Michal Hocko Cc: Paolo Bonzini Cc: Pedro Falcato Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/compaction.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index b758b00a4885..28a9596609fe 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1101,12 +1101,12 @@ isolate_success_no_list: /* * Avoid isolating too much unless this block is being - * rescanned (e.g. dirty/writeback pages, parallel allocation) + * fully scanned (e.g. dirty/writeback pages, parallel allocation) * or a lock is contended. For contention, isolate quickly to * potentially remove one source of contention. */ if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX && - !cc->rescan && !cc->contended) { + !cc->finish_pageblock && !cc->contended) { ++low_pfn; break; } @@ -1171,14 +1171,14 @@ isolate_abort: } /* - * Updated the cached scanner pfn once the pageblock has been scanned + * Update the cached scanner pfn once the pageblock has been scanned. * Pages will either be migrated in which case there is no point * scanning in the near future or migration failed in which case the * failure reason may persist. The block is marked for skipping if * there were no pages isolated in the block or if the block is * rescanned twice in a row. */ - if (low_pfn == end_pfn && (!nr_isolated || cc->rescan)) { + if (low_pfn == end_pfn && (!nr_isolated || cc->finish_pageblock)) { if (valid_page && !skip_updated) set_pageblock_skip(valid_page); update_cached_migrate(cc, low_pfn); @@ -2372,17 +2372,17 @@ compact_zone(struct compact_control *cc, struct capture_control *capc) unsigned long iteration_start_pfn = cc->migrate_pfn; /* - * Avoid multiple rescans which can happen if a page cannot be - * isolated (dirty/writeback in async mode) or if the migrated - * pages are being allocated before the pageblock is cleared. - * The first rescan will capture the entire pageblock for - * migration. If it fails, it'll be marked skip and scanning - * will proceed as normal. + * Avoid multiple rescans of the same pageblock which can + * happen if a page cannot be isolated (dirty/writeback in + * async mode) or if the migrated pages are being allocated + * before the pageblock is cleared. The first rescan will + * capture the entire pageblock for migration. If it fails, + * it'll be marked skip and scanning will proceed as normal. */ - cc->rescan = false; + cc->finish_pageblock = false; if (pageblock_start_pfn(last_migrated_pfn) == pageblock_start_pfn(iteration_start_pfn)) { - cc->rescan = true; + cc->finish_pageblock = true; } switch (isolate_migratepages(cc)) { -- cgit v1.2.3 From 16b3be4034316bf56a171478cf1dccdf94dede43 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 25 Jan 2023 13:44:32 +0000 Subject: mm, compaction: check if a page has been captured before draining PCP pages If a page has been captured then draining is unnecssary so check first for a captured page. Link: https://lkml.kernel.org/r/20230125134434.18017-3-mgorman@techsingularity.net Signed-off-by: Mel Gorman Cc: Chuyi Zhou Cc: Jiri Slaby Cc: Maxim Levitsky Cc: Michal Hocko Cc: Paolo Bonzini Cc: Pedro Falcato Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/compaction.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index 28a9596609fe..a86559910fd9 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2439,6 +2439,12 @@ compact_zone(struct compact_control *cc, struct capture_control *capc) } } + /* Stop if a page has been captured */ + if (capc && capc->page) { + ret = COMPACT_SUCCESS; + break; + } + check_drain: /* * Has the migration scanner moved away from the previous @@ -2457,12 +2463,6 @@ check_drain: last_migrated_pfn = 0; } } - - /* Stop if a page has been captured */ - if (capc && capc->page) { - ret = COMPACT_SUCCESS; - break; - } } out: -- cgit v1.2.3 From f9d7fc1ae3349759f25903cd867ab72e6ba4a63e Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 25 Jan 2023 13:44:33 +0000 Subject: mm, compaction: finish scanning the current pageblock if requested cc->finish_pageblock is set when the current pageblock should be rescanned but fast_find_migrateblock can select an alternative block. Disable fast_find_migrateblock when the current pageblock scan should be completed. Link: https://lkml.kernel.org/r/20230125134434.18017-4-mgorman@techsingularity.net Signed-off-by: Mel Gorman Cc: Chuyi Zhou Cc: Jiri Slaby Cc: Maxim Levitsky Cc: Michal Hocko Cc: Paolo Bonzini Cc: Pedro Falcato Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/compaction.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index a86559910fd9..91acde906ae3 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1761,6 +1761,13 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc) if (cc->ignore_skip_hint) return pfn; + /* + * If the pageblock should be finished then do not select a different + * pageblock. + */ + if (cc->finish_pageblock) + return pfn; + /* * If the migrate_pfn is not at the start of a zone or the start * of a pageblock then assume this is a continuation of a previous -- cgit v1.2.3 From cfccd2e63e7e0c84c514676cffa755dd71a3b2bc Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 25 Jan 2023 13:44:34 +0000 Subject: mm, compaction: finish pageblocks on complete migration failure Commit 7efc3b726103 ("mm/compaction: fix set skip in fast_find_migrateblock") address an issue where a pageblock selected by fast_find_migrateblock() was ignored. Unfortunately, the same fix resulted in numerous reports of khugepaged or kcompactd stalling for long periods of time or consuming 100% of CPU. Tracing showed that there was a lot of rescanning between a small subset of pageblocks because the conditions for marking the block skip are not met. The scan is not reaching the end of the pageblock because enough pages were isolated but none were migrated successfully. Eventually it circles back to the same block. Pageblock skip tracking tries to minimise both latency and excessive scanning but tracking exactly when a block is fully scanned requires an excessive amount of state. This patch forcibly rescans a pageblock when all isolated pages fail to migrate even though it could be for transient reasons such as page writeback or page dirty. This will sometimes migrate too many pages but pageblocks will be marked skip and forward progress will be made. "Usemen" from the mmtests configuration workload-usemem-stress-numa-compact was used to stress compaction. The compaction trace events were recorded using a 6.2-rc5 kernel that includes commit 7efc3b726103 and count of unique ranges were measured. The top 5 ranges were 3076 range=(0x10ca00-0x10cc00) 3076 range=(0x110a00-0x110c00) 3098 range=(0x13b600-0x13b800) 3104 range=(0x141c00-0x141e00) 11424 range=(0x11b600-0x11b800) While this workload is very different than what the bugs reported, the pattern of the same subset of blocks being repeatedly scanned is observed. At one point, *only* the range range=(0x11b600 ~ 0x11b800) was scanned for 2 seconds. 14 seconds passed between the first migration-related event and the last. With the series applied including this patch, the top 5 ranges were 1 range=(0x11607e-0x116200) 1 range=(0x116200-0x116278) 1 range=(0x116278-0x116400) 1 range=(0x116400-0x116424) 1 range=(0x116424-0x116600) Only unique ranges were scanned and the time between the first migration-related event was 0.11 milliseconds. Link: https://lkml.kernel.org/r/20230125134434.18017-5-mgorman@techsingularity.net Fixes: 7efc3b726103 ("mm/compaction: fix set skip in fast_find_migrateblock") Signed-off-by: Mel Gorman Cc: Chuyi Zhou Cc: Jiri Slaby Cc: Maxim Levitsky Cc: Michal Hocko Cc: Paolo Bonzini Cc: Pedro Falcato Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/compaction.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index 91acde906ae3..d73578af44cc 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2392,6 +2392,7 @@ compact_zone(struct compact_control *cc, struct capture_control *capc) cc->finish_pageblock = true; } +rescan: switch (isolate_migratepages(cc)) { case ISOLATE_ABORT: ret = COMPACT_CONTENDED; @@ -2434,15 +2435,28 @@ compact_zone(struct compact_control *cc, struct capture_control *capc) goto out; } /* - * We failed to migrate at least one page in the current - * order-aligned block, so skip the rest of it. + * If an ASYNC or SYNC_LIGHT fails to migrate a page + * within the current order-aligned block, scan the + * remainder of the pageblock. This will mark the + * pageblock "skip" to avoid rescanning in the near + * future. This will isolate more pages than necessary + * for the request but avoid loops due to + * fast_find_migrateblock revisiting blocks that were + * recently partially scanned. */ - if (cc->direct_compaction && - (cc->mode == MIGRATE_ASYNC)) { - cc->migrate_pfn = block_end_pfn( - cc->migrate_pfn - 1, cc->order); - /* Draining pcplists is useless in this case */ - last_migrated_pfn = 0; + if (cc->direct_compaction && !cc->finish_pageblock && + (cc->mode < MIGRATE_SYNC)) { + cc->finish_pageblock = true; + + /* + * Draining pcplists does not help THP if + * any page failed to migrate. Even after + * drain, the pageblock will not be free. + */ + if (cc->order == COMPACTION_HPAGE_ORDER) + last_migrated_pfn = 0; + + goto rescan; } } -- cgit v1.2.3 From cd7755800eb54e8522f5e51f4e71e6494c1f1572 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 15 Feb 2023 18:39:37 +0800 Subject: mm: change to return bool for isolate_movable_page() Now the isolate_movable_page() can only return 0 or -EBUSY, and no users will care about the negative return value, thus we can convert the isolate_movable_page() to return a boolean value to make the code more clear when checking the movable page isolation state. No functional changes intended. [akpm@linux-foundation.org: remove unneeded comment, per Matthew] Link: https://lkml.kernel.org/r/cb877f73f4fff8d309611082ec740a7065b1ade0.1676424378.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Acked-by: David Hildenbrand Reviewed-by: Matthew Wilcox (Oracle) Acked-by: Linus Torvalds Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/compaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index d73578af44cc..ad7409f70519 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -976,7 +976,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, locked = NULL; } - if (!isolate_movable_page(page, mode)) + if (isolate_movable_page(page, mode)) goto isolate_success; } -- cgit v1.2.3