From 4117992df66a26fa33908b4969e04801534baab1 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Tue, 5 Mar 2019 15:41:24 -0800 Subject: page_poison: play nicely with KASAN KASAN does not play well with the page poisoning (CONFIG_PAGE_POISONING). It triggers false positives in the allocation path: BUG: KASAN: use-after-free in memchr_inv+0x2ea/0x330 Read of size 8 at addr ffff88881f800000 by task swapper/0 CPU: 0 PID: 0 Comm: swapper Not tainted 5.0.0-rc1+ #54 Call Trace: dump_stack+0xe0/0x19a print_address_description.cold.2+0x9/0x28b kasan_report.cold.3+0x7a/0xb5 __asan_report_load8_noabort+0x19/0x20 memchr_inv+0x2ea/0x330 kernel_poison_pages+0x103/0x3d5 get_page_from_freelist+0x15e7/0x4d90 because KASAN has not yet unpoisoned the shadow page for allocation before it checks memchr_inv() but only found a stale poison pattern. Also, false positives in free path, BUG: KASAN: slab-out-of-bounds in kernel_poison_pages+0x29e/0x3d5 Write of size 4096 at addr ffff8888112cc000 by task swapper/0/1 CPU: 5 PID: 1 Comm: swapper/0 Not tainted 5.0.0-rc1+ #55 Call Trace: dump_stack+0xe0/0x19a print_address_description.cold.2+0x9/0x28b kasan_report.cold.3+0x7a/0xb5 check_memory_region+0x22d/0x250 memset+0x28/0x40 kernel_poison_pages+0x29e/0x3d5 __free_pages_ok+0x75f/0x13e0 due to KASAN adds poisoned redzones around slab objects, but the page poisoning needs to poison the whole page. Link: http://lkml.kernel.org/r/20190114233405.67843-1-cai@lca.pw Signed-off-by: Qian Cai Acked-by: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0b9f577b1a2a..10d0f2ed9f69 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1945,8 +1945,8 @@ inline void post_alloc_hook(struct page *page, unsigned int order, arch_alloc_page(page, order); kernel_map_pages(page, 1 << order, 1); - kernel_poison_pages(page, 1 << order, 1); kasan_alloc_pages(page, order); + kernel_poison_pages(page, 1 << order, 1); set_page_owner(page, order, gfp_flags); } -- cgit v1.2.3 From a9cd410a3d296846a8125aa43d97a573a354c472 Mon Sep 17 00:00:00 2001 From: Arun KS Date: Tue, 5 Mar 2019 15:42:14 -0800 Subject: mm/page_alloc.c: memory hotplug: free pages as higher order When freeing pages are done with higher order, time spent on coalescing pages by buddy allocator can be reduced. With section size of 256MB, hot add latency of a single section shows improvement from 50-60 ms to less than 1 ms, hence improving the hot add latency by 60 times. Modify external providers of online callback to align with the change. [arunks@codeaurora.org: v11] Link: http://lkml.kernel.org/r/1547792588-18032-1-git-send-email-arunks@codeaurora.org [akpm@linux-foundation.org: remove unused local, per Arun] [akpm@linux-foundation.org: avoid return of void-returning __free_pages_core(), per Oscar] [akpm@linux-foundation.org: fix it for mm-convert-totalram_pages-and-totalhigh_pages-variables-to-atomic.patch] [arunks@codeaurora.org: v8] Link: http://lkml.kernel.org/r/1547032395-24582-1-git-send-email-arunks@codeaurora.org [arunks@codeaurora.org: v9] Link: http://lkml.kernel.org/r/1547098543-26452-1-git-send-email-arunks@codeaurora.org Link: http://lkml.kernel.org/r/1538727006-5727-1-git-send-email-arunks@codeaurora.org Signed-off-by: Arun KS Reviewed-by: Andrew Morton Acked-by: Michal Hocko Reviewed-by: Oscar Salvador Reviewed-by: Alexander Duyck Cc: K. Y. Srinivasan Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Dan Williams Cc: Vlastimil Babka Cc: Joonsoo Kim Cc: Greg Kroah-Hartman Cc: Mathieu Malaterre Cc: "Kirill A. Shutemov" Cc: Souptick Joarder Cc: Mel Gorman Cc: Aaron Lu Cc: Srivatsa Vaddagiri Cc: Vinayak Menon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 10d0f2ed9f69..5361bd078493 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1303,7 +1303,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) local_irq_restore(flags); } -static void __init __free_pages_boot_core(struct page *page, unsigned int order) +void __free_pages_core(struct page *page, unsigned int order) { unsigned int nr_pages = 1 << order; struct page *p = page; @@ -1382,7 +1382,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn, { if (early_page_uninitialised(pfn)) return; - return __free_pages_boot_core(page, order); + __free_pages_core(page, order); } /* @@ -1472,14 +1472,14 @@ static void __init deferred_free_range(unsigned long pfn, if (nr_pages == pageblock_nr_pages && (pfn & (pageblock_nr_pages - 1)) == 0) { set_pageblock_migratetype(page, MIGRATE_MOVABLE); - __free_pages_boot_core(page, pageblock_order); + __free_pages_core(page, pageblock_order); return; } for (i = 0; i < nr_pages; i++, page++, pfn++) { if ((pfn & (pageblock_nr_pages - 1)) == 0) set_pageblock_migratetype(page, MIGRATE_MOVABLE); - __free_pages_boot_core(page, 0); + __free_pages_core(page, 0); } } -- cgit v1.2.3 From 98fa15f34cb379864757670b8e8743b21456a20e Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 5 Mar 2019 15:42:58 -0800 Subject: mm: replace all open encodings for NUMA_NO_NODE Patch series "Replace all open encodings for NUMA_NO_NODE", v3. All these places for replacement were found by running the following grep patterns on the entire kernel code. Please let me know if this might have missed some instances. This might also have replaced some false positives. I will appreciate suggestions, inputs and review. 1. git grep "nid == -1" 2. git grep "node == -1" 3. git grep "nid = -1" 4. git grep "node = -1" This patch (of 2): At present there are multiple places where invalid node number is encoded as -1. Even though implicitly understood it is always better to have macros in there. Replace these open encodings for an invalid node number with the global macro NUMA_NO_NODE. This helps remove NUMA related assumptions like 'invalid node' from various places redirecting them to a common definition. Link: http://lkml.kernel.org/r/1545127933-10711-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reviewed-by: David Hildenbrand Acked-by: Jeff Kirsher [ixgbe] Acked-by: Jens Axboe [mtip32xx] Acked-by: Vinod Koul [dmaengine.c] Acked-by: Michael Ellerman [powerpc] Acked-by: Doug Ledford [drivers/infiniband] Cc: Joseph Qi Cc: Hans Verkuil Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5361bd078493..1f9f1409df9b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6016,7 +6016,7 @@ int __meminit __early_pfn_to_nid(unsigned long pfn, return state->last_nid; nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn); - if (nid != -1) { + if (nid != NUMA_NO_NODE) { state->last_start = start_pfn; state->last_end = end_pfn; state->last_nid = nid; @@ -6771,7 +6771,7 @@ unsigned long __init node_map_pfn_alignment(void) { unsigned long accl_mask = 0, last_end = 0; unsigned long start, end, mask; - int last_nid = -1; + int last_nid = NUMA_NO_NODE; int i, nid; for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) { -- cgit v1.2.3 From 60cd4bcd62384cfa1e5890cebacccf08b3161156 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Tue, 5 Mar 2019 15:43:13 -0800 Subject: memcg: localize memcg_kmem_enabled() check Move the memcg_kmem_enabled() checks into memcg kmem charge/uncharge functions, so, the users don't have to explicitly check that condition. This is purely code cleanup patch without any functional change. Only the order of checks in memcg_charge_slab() can potentially be changed but the functionally it will be same. This should not matter as memcg_charge_slab() is not in the hot path. Link: http://lkml.kernel.org/r/20190103161203.162375-1-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Roman Gushchin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1f9f1409df9b..034b8b6043a3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1056,7 +1056,7 @@ static __always_inline bool free_pages_prepare(struct page *page, if (PageMappingFlags(page)) page->mapping = NULL; if (memcg_kmem_enabled() && PageKmemcg(page)) - memcg_kmem_uncharge(page, order); + __memcg_kmem_uncharge(page, order); if (check_free) bad += free_pages_check(page); if (bad) @@ -4568,7 +4568,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, out: if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page && - unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) { + unlikely(__memcg_kmem_charge(page, gfp_mask, order) != 0)) { __free_pages(page, order); page = NULL; } -- cgit v1.2.3 From c52e75935f8ded2bd4a75eb08e914bd96802725b Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 5 Mar 2019 15:44:01 -0800 Subject: mm: remove extra drain pages on pcp list In the current implementation, there are two places to isolate a range of page: __offline_pages() and alloc_contig_range(). During this procedure, it will drain pages on pcp list. Below is a brief call flow: __offline_pages()/alloc_contig_range() start_isolate_page_range() set_migratetype_isolate() drain_all_pages() drain_all_pages() <--- A This snippet shows the current logic is isolate and drain pcp list for each pageblock and drain pcp list again for the whole range. start_isolate_page_range is responsible for isolating the given pfn range. One part of that job is to make sure that also pages that are on the allocator pcp lists are properly isolated. Otherwise they could be reused and the range wouldn't be completely isolated until the memory is freed back. While there is no strict guarantee here because pages might get allocated at any time before drain_all_pages is called there doesn't seem to be any strong demand for such a guarantee. In any case, draining is already done at the isolation level and there is no need to do it again later by start_isolate_page_range callers (memory hotplug and CMA allocator currently). Therefore remove pointless draining in existing callers to make the code more clear and functionally correct. [mhocko@suse.com: provide a clearer changelog for the last two paragraphs] Link: http://lkml.kernel.org/r/20190105233141.2329-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang Acked-by: Michal Hocko Acked-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 1 - 1 file changed, 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 034b8b6043a3..8afb6f007f68 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8196,7 +8196,6 @@ int alloc_contig_range(unsigned long start, unsigned long end, */ lru_add_drain_all(); - drain_all_pages(cc.zone); order = 0; outer_start = start; -- cgit v1.2.3 From fd1444b2729289ea3ef6b6096be604f8983e9f9f Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 5 Mar 2019 15:44:50 -0800 Subject: mm, compaction: ignore the fragmentation avoidance boost for isolation and compaction When pageblocks get fragmented, watermarks are artifically boosted to reclaim pages to avoid further fragmentation events. However, compaction is often either fragmentation-neutral or moving movable pages away from unmovable/reclaimable pages. As the true watermarks are preserved, allow compaction to ignore the boost factor. The expected impact is very slight as the main benefit is that compaction is slightly more likely to succeed when the system has been fragmented very recently. On both 1-socket and 2-socket machines for THP-intensive allocation during fragmentation the success rate was increased by less than 1% which is marginal. However, detailed tracing indicated that failure of migration due to a premature ENOMEM triggered by watermark checks were eliminated. Link: http://lkml.kernel.org/r/20190118175136.31341-9-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Andrea Arcangeli Cc: Dan Carpenter Cc: David Rientjes Cc: YueHaibing Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8afb6f007f68..2e132b9e7a93 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2962,7 +2962,7 @@ int __isolate_free_page(struct page *page, unsigned int order) * watermark, because we already know our high-order page * exists. */ - watermark = min_wmark_pages(zone) + (1UL << order); + watermark = zone->_watermark[WMARK_MIN] + (1UL << order); if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA)) return 0; -- cgit v1.2.3 From 5e1f0f098b4649fad53011246bcaeff011ffdf5d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 5 Mar 2019 15:45:41 -0800 Subject: mm, compaction: capture a page under direct compaction Compaction is inherently race-prone as a suitable page freed during compaction can be allocated by any parallel task. This patch uses a capture_control structure to isolate a page immediately when it is freed by a direct compactor in the slow path of the page allocator. The intent is to avoid redundant scanning. 5.0.0-rc1 5.0.0-rc1 selective-v3r17 capture-v3r19 Amean fault-both-1 0.00 ( 0.00%) 0.00 * 0.00%* Amean fault-both-3 2582.11 ( 0.00%) 2563.68 ( 0.71%) Amean fault-both-5 4500.26 ( 0.00%) 4233.52 ( 5.93%) Amean fault-both-7 5819.53 ( 0.00%) 6333.65 ( -8.83%) Amean fault-both-12 9321.18 ( 0.00%) 9759.38 ( -4.70%) Amean fault-both-18 9782.76 ( 0.00%) 10338.76 ( -5.68%) Amean fault-both-24 15272.81 ( 0.00%) 13379.55 * 12.40%* Amean fault-both-30 15121.34 ( 0.00%) 16158.25 ( -6.86%) Amean fault-both-32 18466.67 ( 0.00%) 18971.21 ( -2.73%) Latency is only moderately affected but the devil is in the details. A closer examination indicates that base page fault latency is reduced but latency of huge pages is increased as it takes creater care to succeed. Part of the "problem" is that allocation success rates are close to 100% even when under pressure and compaction gets harder 5.0.0-rc1 5.0.0-rc1 selective-v3r17 capture-v3r19 Percentage huge-3 96.70 ( 0.00%) 98.23 ( 1.58%) Percentage huge-5 96.99 ( 0.00%) 95.30 ( -1.75%) Percentage huge-7 94.19 ( 0.00%) 97.24 ( 3.24%) Percentage huge-12 94.95 ( 0.00%) 97.35 ( 2.53%) Percentage huge-18 96.74 ( 0.00%) 97.30 ( 0.58%) Percentage huge-24 97.07 ( 0.00%) 97.55 ( 0.50%) Percentage huge-30 95.69 ( 0.00%) 98.50 ( 2.95%) Percentage huge-32 96.70 ( 0.00%) 99.27 ( 2.65%) And scan rates are reduced as expected by 6% for the migration scanner and 29% for the free scanner indicating that there is less redundant work. Compaction migrate scanned 20815362 19573286 Compaction free scanned 16352612 11510663 [mgorman@techsingularity.net: remove redundant check] Link: http://lkml.kernel.org/r/20190201143853.GH9565@techsingularity.net Link: http://lkml.kernel.org/r/20190118175136.31341-23-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Andrea Arcangeli Cc: Dan Carpenter Cc: David Rientjes Cc: YueHaibing Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 69 insertions(+), 4 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2e132b9e7a93..09bf2c5f8b4b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -789,6 +789,57 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, return 0; } +#ifdef CONFIG_COMPACTION +static inline struct capture_control *task_capc(struct zone *zone) +{ + struct capture_control *capc = current->capture_control; + + return capc && + !(current->flags & PF_KTHREAD) && + !capc->page && + capc->cc->zone == zone && + capc->cc->direct_compaction ? capc : NULL; +} + +static inline bool +compaction_capture(struct capture_control *capc, struct page *page, + int order, int migratetype) +{ + if (!capc || order != capc->cc->order) + return false; + + /* Do not accidentally pollute CMA or isolated regions*/ + if (is_migrate_cma(migratetype) || + is_migrate_isolate(migratetype)) + return false; + + /* + * Do not let lower order allocations polluate a movable pageblock. + * This might let an unmovable request use a reclaimable pageblock + * and vice-versa but no more than normal fallback logic which can + * have trouble finding a high-order free page. + */ + if (order < pageblock_order && migratetype == MIGRATE_MOVABLE) + return false; + + capc->page = page; + return true; +} + +#else +static inline struct capture_control *task_capc(struct zone *zone) +{ + return NULL; +} + +static inline bool +compaction_capture(struct capture_control *capc, struct page *page, + int order, int migratetype) +{ + return false; +} +#endif /* CONFIG_COMPACTION */ + /* * Freeing function for a buddy system allocator. * @@ -822,6 +873,7 @@ static inline void __free_one_page(struct page *page, unsigned long uninitialized_var(buddy_pfn); struct page *buddy; unsigned int max_order; + struct capture_control *capc = task_capc(zone); max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1); @@ -837,6 +889,11 @@ static inline void __free_one_page(struct page *page, continue_merging: while (order < max_order - 1) { + if (compaction_capture(capc, page, order, migratetype)) { + __mod_zone_freepage_state(zone, -(1 << order), + migratetype); + return; + } buddy_pfn = __find_buddy_pfn(pfn, order); buddy = page + (buddy_pfn - pfn); @@ -3710,7 +3767,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, const struct alloc_context *ac, enum compact_priority prio, enum compact_result *compact_result) { - struct page *page; + struct page *page = NULL; unsigned long pflags; unsigned int noreclaim_flag; @@ -3721,13 +3778,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, noreclaim_flag = memalloc_noreclaim_save(); *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, - prio); + prio, &page); memalloc_noreclaim_restore(noreclaim_flag); psi_memstall_leave(&pflags); - if (*compact_result <= COMPACT_INACTIVE) + if (*compact_result <= COMPACT_INACTIVE) { + WARN_ON_ONCE(page); return NULL; + } /* * At least in one zone compaction wasn't deferred or skipped, so let's @@ -3735,7 +3794,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, */ count_vm_event(COMPACTSTALL); - page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac); + /* Prep a captured page if available */ + if (page) + prep_new_page(page, order, gfp_mask, alloc_flags); + + /* Try get a page from the freelist if available */ + if (!page) + page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac); if (page) { struct zone *zone = page_zone(page); -- cgit v1.2.3 From d9f7979c92f7b34469c1ca5d1f3add6681fd567c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 5 Mar 2019 15:46:09 -0800 Subject: mm: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Link: http://lkml.kernel.org/r/20190122152151.16139-14-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman Cc: Michal Hocko Cc: Vlastimil Babka Cc: David Rientjes Cc: Laura Abbott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 09bf2c5f8b4b..9be9a22ebe35 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3230,24 +3230,14 @@ static int __init fail_page_alloc_debugfs(void) dir = fault_create_debugfs_attr("fail_page_alloc", NULL, &fail_page_alloc.attr); - if (IS_ERR(dir)) - return PTR_ERR(dir); - - if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, - &fail_page_alloc.ignore_gfp_reclaim)) - goto fail; - if (!debugfs_create_bool("ignore-gfp-highmem", mode, dir, - &fail_page_alloc.ignore_gfp_highmem)) - goto fail; - if (!debugfs_create_u32("min-order", mode, dir, - &fail_page_alloc.min_order)) - goto fail; - return 0; -fail: - debugfs_remove_recursive(dir); + debugfs_create_bool("ignore-gfp-wait", mode, dir, + &fail_page_alloc.ignore_gfp_reclaim); + debugfs_create_bool("ignore-gfp-highmem", mode, dir, + &fail_page_alloc.ignore_gfp_highmem); + debugfs_create_u32("min-order", mode, dir, &fail_page_alloc.min_order); - return -ENOMEM; + return 0; } late_initcall(fail_page_alloc_debugfs); -- cgit v1.2.3 From 8bb4e7a2ee26c05a94ae6cb0aec2f82a3523cf35 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 5 Mar 2019 15:46:22 -0800 Subject: mm: fix some typos in mm directory No functional change. Link: http://lkml.kernel.org/r/20190118235123.27843-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang Reviewed-by: Pekka Enberg Acked-by: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9be9a22ebe35..ec250453f5e8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7551,7 +7551,7 @@ static void __setup_per_zone_wmarks(void) * value here. * * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN) - * deltas control asynch page reclaim, and so should + * deltas control async page reclaim, and so should * not be capped for highmem. */ unsigned long min_pages; @@ -8028,7 +8028,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, /* * Hugepages are not in LRU lists, but they're movable. - * We need not scan over tail pages bacause we don't + * We need not scan over tail pages because we don't * handle each tail page individually in migration. */ if (PageHuge(page)) { -- cgit v1.2.3 From 23a7052a5db478bdacf45bea55e6f50171f5eede Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 5 Mar 2019 15:46:43 -0800 Subject: mm/page_alloc.c: check return value of memblock_alloc_node_nopanic() There are two early memory allocations that use memblock_alloc_node_nopanic() and do not check its return value. While this happens very early during boot and chances that the allocation will fail are diminishing, it is still worth to have proper checks for the allocation errors. Link: http://lkml.kernel.org/r/1547734941-944-1-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Reviewed-by: William Kucharski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ec250453f5e8..11a5f50efd97 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6431,10 +6431,14 @@ static void __ref setup_usemap(struct pglist_data *pgdat, { unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize); zone->pageblock_flags = NULL; - if (usemapsize) + if (usemapsize) { zone->pageblock_flags = memblock_alloc_node_nopanic(usemapsize, pgdat->node_id); + if (!zone->pageblock_flags) + panic("Failed to allocate %ld bytes for zone %s pageblock flags on node %d\n", + usemapsize, zone->name, pgdat->node_id); + } } #else static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone, @@ -6664,6 +6668,9 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat) end = ALIGN(end, MAX_ORDER_NR_PAGES); size = (end - start) * sizeof(struct page); map = memblock_alloc_node_nopanic(size, pgdat->node_id); + if (!map) + panic("Failed to allocate %ld bytes for node %d memory map\n", + size, pgdat->node_id); pgdat->node_mem_map = map + offset; } pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", -- cgit v1.2.3 From b9726c26dc21b15a2faea96fae3a42f2f7fffdcb Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 5 Mar 2019 15:48:26 -0800 Subject: numa: make "nr_node_ids" unsigned int Number of NUMA nodes can't be negative. This saves a few bytes on x86_64: add/remove: 0/0 grow/shrink: 4/21 up/down: 27/-265 (-238) Function old new delta hv_synic_alloc.cold 88 110 +22 prealloc_shrinker 260 262 +2 bootstrap 249 251 +2 sched_init_numa 1566 1567 +1 show_slab_objects 778 777 -1 s_show 1201 1200 -1 kmem_cache_init 346 345 -1 __alloc_workqueue_key 1146 1145 -1 mem_cgroup_css_alloc 1614 1612 -2 __do_sys_swapon 4702 4699 -3 __list_lru_init 655 651 -4 nic_probe 2379 2374 -5 store_user_store 118 111 -7 red_zone_store 106 99 -7 poison_store 106 99 -7 wq_numa_init 348 338 -10 __kmem_cache_empty 75 65 -10 task_numa_free 186 173 -13 merge_across_nodes_store 351 336 -15 irq_create_affinity_masks 1261 1246 -15 do_numa_crng_init 343 321 -22 task_numa_fault 4760 4737 -23 swapfile_init 179 156 -23 hv_synic_alloc 536 492 -44 apply_wqattrs_prepare 746 695 -51 Link: http://lkml.kernel.org/r/20190201223029.GA15820@avx2 Signed-off-by: Alexey Dobriyan Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 11a5f50efd97..8df43caf2eb7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -289,7 +289,7 @@ EXPORT_SYMBOL(movable_zone); #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ #if MAX_NUMNODES > 1 -int nr_node_ids __read_mostly = MAX_NUMNODES; +unsigned int nr_node_ids __read_mostly = MAX_NUMNODES; int nr_online_nodes __read_mostly = 1; EXPORT_SYMBOL(nr_node_ids); EXPORT_SYMBOL(nr_online_nodes); -- cgit v1.2.3 From ce0725f78a56a59bdb07cef003bc6fef722da38e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 5 Mar 2019 15:48:29 -0800 Subject: numa: make "nr_online_nodes" unsigned int Number of online NUMA nodes can't be negative as well. This doesn't save space as the variable is used only in 32-bit context, but do it anyway for consistency. Link: http://lkml.kernel.org/r/20190201223151.GB15820@avx2 Signed-off-by: Alexey Dobriyan Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8df43caf2eb7..c29828ec9183 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -290,7 +290,7 @@ EXPORT_SYMBOL(movable_zone); #if MAX_NUMNODES > 1 unsigned int nr_node_ids __read_mostly = MAX_NUMNODES; -int nr_online_nodes __read_mostly = 1; +unsigned int nr_online_nodes __read_mostly = 1; EXPORT_SYMBOL(nr_node_ids); EXPORT_SYMBOL(nr_online_nodes); #endif @@ -5664,7 +5664,7 @@ void __ref build_all_zonelists(pg_data_t *pgdat) else page_group_by_mobility_disabled = 0; - pr_info("Built %i zonelists, mobility grouping %s. Total pages: %ld\n", + pr_info("Built %u zonelists, mobility grouping %s. Total pages: %ld\n", nr_online_nodes, page_group_by_mobility_disabled ? "off" : "on", vm_total_pages); -- cgit v1.2.3 From a862f68a8b360086f248cbc3606029441b5f5197 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 5 Mar 2019 15:48:42 -0800 Subject: docs/core-api/mm: fix return value descriptions in mm/ Many kernel-doc comments in mm/ have the return value descriptions either misformatted or omitted at all which makes kernel-doc script unhappy: $ make V=1 htmldocs ... ./mm/util.c:36: info: Scanning doc for kstrdup ./mm/util.c:41: warning: No description found for return value of 'kstrdup' ./mm/util.c:57: info: Scanning doc for kstrdup_const ./mm/util.c:66: warning: No description found for return value of 'kstrdup_const' ./mm/util.c:75: info: Scanning doc for kstrndup ./mm/util.c:83: warning: No description found for return value of 'kstrndup' ... Fixing the formatting and adding the missing return value descriptions eliminates ~100 such warnings. Link: http://lkml.kernel.org/r/1549549644-4903-4-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Reviewed-by: Andrew Morton Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c29828ec9183..4e1d9118ae52 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4816,6 +4816,8 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order, * This function is also limited by MAX_ORDER. * * Memory allocated by this function must be released by free_pages_exact(). + * + * Return: pointer to the allocated area or %NULL in case of error. */ void *alloc_pages_exact(size_t size, gfp_t gfp_mask) { @@ -4836,6 +4838,8 @@ EXPORT_SYMBOL(alloc_pages_exact); * * Like alloc_pages_exact(), but try to allocate on node nid first before falling * back. + * + * Return: pointer to the allocated area or %NULL in case of error. */ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) { @@ -4869,11 +4873,13 @@ EXPORT_SYMBOL(free_pages_exact); * nr_free_zone_pages - count number of pages beyond high watermark * @offset: The zone index of the highest zone * - * nr_free_zone_pages() counts the number of counts pages which are beyond the + * nr_free_zone_pages() counts the number of pages which are beyond the * high watermark within all zones at or below a given zone index. For each * zone, the number of pages is calculated as: * * nr_free_zone_pages = managed_pages - high_pages + * + * Return: number of pages beyond high watermark. */ static unsigned long nr_free_zone_pages(int offset) { @@ -4900,6 +4906,9 @@ static unsigned long nr_free_zone_pages(int offset) * * nr_free_buffer_pages() counts the number of pages which are beyond the high * watermark within ZONE_DMA and ZONE_NORMAL. + * + * Return: number of pages beyond high watermark within ZONE_DMA and + * ZONE_NORMAL. */ unsigned long nr_free_buffer_pages(void) { @@ -4912,6 +4921,8 @@ EXPORT_SYMBOL_GPL(nr_free_buffer_pages); * * nr_free_pagecache_pages() counts the number of pages which are beyond the * high watermark within all zones. + * + * Return: number of pages beyond high watermark within all zones. */ unsigned long nr_free_pagecache_pages(void) { @@ -5358,7 +5369,8 @@ static int node_load[MAX_NUMNODES]; * from each node to each node in the system), and should also prefer nodes * with no CPUs, since presumably they'll have very little allocation pressure * on them otherwise. - * It returns -1 if no node is found. + * + * Return: node id of the found node or %NUMA_NO_NODE if no node is found. */ static int find_next_best_node(int node, nodemask_t *used_node_mask) { @@ -6269,7 +6281,7 @@ unsigned long __init __absent_pages_in_range(int nid, * @start_pfn: The start PFN to start searching for holes * @end_pfn: The end PFN to stop searching for holes * - * It returns the number of pages frames in memory holes within a range. + * Return: the number of pages frames in memory holes within a range. */ unsigned long __init absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn) @@ -6826,7 +6838,7 @@ void __init setup_nr_node_ids(void) * model has fine enough granularity to avoid incorrect mapping for the * populated node map. * - * Returns the determined alignment in pfn's. 0 if there is no alignment + * Return: the determined alignment in pfn's. 0 if there is no alignment * requirement (single node). */ unsigned long __init node_map_pfn_alignment(void) @@ -6881,7 +6893,7 @@ static unsigned long __init find_min_pfn_for_node(int nid) /** * find_min_pfn_with_active_regions - Find the minimum PFN registered * - * It returns the minimum PFN based on information provided via + * Return: the minimum PFN based on information provided via * memblock_set_node(). */ unsigned long __init find_min_pfn_with_active_regions(void) @@ -8174,7 +8186,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, * pageblocks in the range. Once isolated, the pageblocks should not * be modified by others. * - * Returns zero on success or negative error code. On success all + * Return: zero on success or negative error code. On success all * pages which PFN is in [start, end) are allocated for the caller and * need to be freed with free_contig_range(). */ -- cgit v1.2.3 From afa00112893f4ca02777c3cf4f93011577af5ffc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 Mar 2019 15:49:13 -0800 Subject: mm: unexport free_reserved_area This function is only used by built-in code, which makes perfect sense given the purpose of it. Link: http://lkml.kernel.org/r/20190213174621.29297-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 1 - 1 file changed, 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4e1d9118ae52..3eb01dedfb50 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7341,7 +7341,6 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char return pages; } -EXPORT_SYMBOL(free_reserved_area); #ifdef CONFIG_HIGHMEM void free_highmem_page(struct page *page) -- cgit v1.2.3