From d8f5f7e445f02eb10dee1a0a992146314cf460f8 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 29 Aug 2023 14:37:34 -0700 Subject: hugetlb: set hugetlb page flag before optimizing vmemmap Currently, vmemmap optimization of hugetlb pages is performed before the hugetlb flag (previously hugetlb destructor) is set identifying it as a hugetlb folio. This means there is a window of time where an ordinary folio does not have all associated vmemmap present. The core mm only expects vmemmap to be potentially optimized for hugetlb and device dax. This can cause problems in code such as memory error handling that may want to write to tail struct pages. There is only one call to perform hugetlb vmemmap optimization today. To fix this issue, simply set the hugetlb flag before that call. There was a similar issue in the free hugetlb path that was previously addressed. The two routines that optimize or restore hugetlb vmemmap should only be passed hugetlb folios/pages. To catch any callers not following this rule, add VM_WARN_ON calls to the routines. In the hugetlb free code paths, some calls could be made to restore vmemmap after clearing the hugetlb flag. This was 'safe' as in these cases vmemmap was already present and the call was a NOOP. However, for consistency these calls where eliminated so that we can add the VM_WARN_ON checks. Link: https://lkml.kernel.org/r/20230829213734.69673-1-mike.kravetz@oracle.com Fixes: f41f2ed43ca5 ("mm: hugetlb: free the vmemmap pages associated with each HugeTLB page") Signed-off-by: Mike Kravetz Reviewed-by: Muchun Song Cc: James Houghton Cc: Miaohe Lin Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Usama Arif Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'mm/hugetlb_vmemmap.c') diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 4b9734777f69..aeb7dd889eee 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include "hugetlb_vmemmap.h" @@ -456,6 +457,7 @@ int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head) unsigned long vmemmap_start = (unsigned long)head, vmemmap_end; unsigned long vmemmap_reuse; + VM_WARN_ON_ONCE(!PageHuge(head)); if (!HPageVmemmapOptimized(head)) return 0; @@ -550,6 +552,7 @@ void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head) unsigned long vmemmap_start = (unsigned long)head, vmemmap_end; unsigned long vmemmap_reuse; + VM_WARN_ON_ONCE(!PageHuge(head)); if (!vmemmap_should_optimize(h, head)) return; -- cgit v1.2.3 From 2eaa6c2abb9dd55041a05c20c451790c124d5cf0 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Tue, 5 Sep 2023 20:45:03 +0800 Subject: mm: hugetlb_vmemmap: fix hugetlb page number decrease failed on movable nodes The decreasing of hugetlb pages number failed with the following message given: sh: page allocation failure: order:0, mode:0x204cc0(GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_THISNODE) CPU: 1 PID: 112 Comm: sh Not tainted 6.5.0-rc7-... #45 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace.part.6+0x84/0xe4 show_stack+0x18/0x24 dump_stack_lvl+0x48/0x60 dump_stack+0x18/0x24 warn_alloc+0x100/0x1bc __alloc_pages_slowpath.constprop.107+0xa40/0xad8 __alloc_pages+0x244/0x2d0 hugetlb_vmemmap_restore+0x104/0x1e4 __update_and_free_hugetlb_folio+0x44/0x1f4 update_and_free_hugetlb_folio+0x20/0x68 update_and_free_pages_bulk+0x4c/0xac set_max_huge_pages+0x198/0x334 nr_hugepages_store_common+0x118/0x178 nr_hugepages_store+0x18/0x24 kobj_attr_store+0x18/0x2c sysfs_kf_write+0x40/0x54 kernfs_fop_write_iter+0x164/0x1dc vfs_write+0x3a8/0x460 ksys_write+0x6c/0x100 __arm64_sys_write+0x1c/0x28 invoke_syscall+0x44/0x100 el0_svc_common.constprop.1+0x6c/0xe4 do_el0_svc+0x38/0x94 el0_svc+0x28/0x74 el0t_64_sync_handler+0xa0/0xc4 el0t_64_sync+0x174/0x178 Mem-Info: ... The reason is that the hugetlb pages being released are allocated from movable nodes, and with hugetlb_optimize_vmemmap enabled, vmemmap pages need to be allocated from the same node during the hugetlb pages releasing. With GFP_KERNEL and __GFP_THISNODE set, allocating from movable node is always failed. Fix this problem by removing __GFP_THISNODE. Link: https://lkml.kernel.org/r/20230905124503.24899-1-yuancan@huawei.com Fixes: ad2fa3717b74 ("mm: hugetlb: alloc the vmemmap pages associated with each HugeTLB page") Signed-off-by: Yuan Can Reviewed-by: Muchun Song Cc: Kefeng Wang Cc: Mike Kravetz Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/hugetlb_vmemmap.c') diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index aeb7dd889eee..3e5387835ad6 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -381,7 +381,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, struct list_head *list) { - gfp_t gfp_mask = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_THISNODE; + gfp_t gfp_mask = GFP_KERNEL | __GFP_RETRY_MAYFAIL; unsigned long nr_pages = (end - start) >> PAGE_SHIFT; int nid = page_to_nid((struct page *)start); struct page *page, *next; -- cgit v1.2.3 From 6a898c2757af1ac852bb917a0866d2724f303076 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Wed, 6 Sep 2023 17:31:57 +0800 Subject: mm: hugetlb_vmemmap: allow alloc vmemmap pages fallback to other nodes In vmemmap_remap_free(), a new head vmemmap page is allocated to avoid breaking a contiguous block of struct page memory, however, the allocation can always fail when the given node is movable node. Remove the __GFP_THISNODE to help avoid fragmentation. Link: https://lkml.kernel.org/r/20230906093157.9737-1-yuancan@huawei.com Signed-off-by: Yuan Can Suggested-by: Mike Kravetz Reviewed-by: Mike Kravetz Suggested-by: Muchun Song Reviewed-by: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'mm/hugetlb_vmemmap.c') diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 3e5387835ad6..0bde38626d25 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -320,8 +320,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, .vmemmap_pages = &vmemmap_pages, }; int nid = page_to_nid((struct page *)start); - gfp_t gfp_mask = GFP_KERNEL | __GFP_THISNODE | __GFP_NORETRY | - __GFP_NOWARN; + gfp_t gfp_mask = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; /* * Allocate a new head vmemmap page to avoid breaking a contiguous -- cgit v1.2.3 From a9e34ea1f62c1e359ed197ec01d056c25edb2b61 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Wed, 13 Sep 2023 11:53:58 +0100 Subject: mm: hugetlb_vmemmap: use nid of the head page to reallocate it Patch series "mm: hugetlb: Skip initialization of gigantic tail struct pages if freed by HVO", v5. This series moves the boot time initialization of tail struct pages of a gigantic page to later on in the boot. Only the HUGETLB_VMEMMAP_RESERVE_SIZE / sizeof(struct page) - 1 tail struct pages are initialized at the start. If HVO is successful, then no more tail struct pages need to be initialized. For a 1G hugepage, this series avoid initialization of 262144 - 63 = 262081 struct pages per hugepage. When tested on a 512G system (allocating 500 1G hugepages), the kexec-boot times with DEFERRED_STRUCT_PAGE_INIT enabled are: - with patches, HVO enabled: 1.32 seconds - with patches, HVO disabled: 2.15 seconds - without patches, HVO enabled: 3.90 seconds - without patches, HVO disabled: 3.58 seconds This represents an approximately 70% reduction in boot time and will significantly reduce server downtime when using a large number of gigantic pages. This patch (of 4): If tail page prep and initialization is skipped, then the "start" page will not contain the correct nid. Use the nid from first vmemap page. Link: https://lkml.kernel.org/r/20230913105401.519709-1-usama.arif@bytedance.com Link: https://lkml.kernel.org/r/20230913105401.519709-2-usama.arif@bytedance.com Signed-off-by: Usama Arif Reviewed-by: Muchun Song Reviewed-by: Mike Kravetz Cc: Fam Zheng Cc: Mike Rapoport (IBM) Cc: Punit Agrawal Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/hugetlb_vmemmap.c') diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 0bde38626d25..ad650c7b07ec 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -319,7 +319,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, .reuse_addr = reuse, .vmemmap_pages = &vmemmap_pages, }; - int nid = page_to_nid((struct page *)start); + int nid = page_to_nid((struct page *)reuse); gfp_t gfp_mask = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; /* -- cgit v1.2.3 From fde1c4ecf91640e5a95ec36b71ec2e8ec379ce40 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Wed, 13 Sep 2023 11:54:01 +0100 Subject: mm: hugetlb: skip initialization of gigantic tail struct pages if freed by HVO The new boot flow when it comes to initialization of gigantic pages is as follows: - At boot time, for a gigantic page during __alloc_bootmem_hugepage, the region after the first struct page is marked as noinit. - This results in only the first struct page to be initialized in reserve_bootmem_region. As the tail struct pages are not initialized at this point, there can be a significant saving in boot time if HVO succeeds later on. - Later on in the boot, the head page is prepped and the first HUGETLB_VMEMMAP_RESERVE_SIZE / sizeof(struct page) - 1 tail struct pages are initialized. - HVO is attempted. If it is not successful, then the rest of the tail struct pages are initialized. If it is successful, no more tail struct pages need to be initialized saving significant boot time. The WARN_ON for increased ref count in gather_bootmem_prealloc was changed to a VM_BUG_ON. This is OK as there should be no speculative references this early in boot process. The VM_BUG_ON's are there just in case such code is introduced. [akpm@linux-foundation.org: make it nicer for 80 cols] Link: https://lkml.kernel.org/r/20230913105401.519709-5-usama.arif@bytedance.com Signed-off-by: Usama Arif Reviewed-by: Muchun Song Reviewed-by: Mike Kravetz Cc: Fam Zheng Cc: Mike Rapoport (IBM) Cc: Punit Agrawal Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/hugetlb_vmemmap.c') diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index ad650c7b07ec..76682d1d79a7 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -588,7 +588,7 @@ static int __init hugetlb_vmemmap_init(void) const struct hstate *h; /* HUGETLB_VMEMMAP_RESERVE_SIZE should cover all used struct pages */ - BUILD_BUG_ON(__NR_USED_SUBPAGE * sizeof(struct page) > HUGETLB_VMEMMAP_RESERVE_SIZE); + BUILD_BUG_ON(__NR_USED_SUBPAGE > HUGETLB_VMEMMAP_RESERVE_PAGES); for_each_hstate(h) { if (hugetlb_vmemmap_optimizable(h)) { -- cgit v1.2.3 From 79359d6d24df2f304abbf6f137b01d2b76175ce3 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 18 Oct 2023 19:31:05 -0700 Subject: hugetlb: perform vmemmap optimization on a list of pages When adding hugetlb pages to the pool, we first create a list of the allocated pages before adding to the pool. Pass this list of pages to a new routine hugetlb_vmemmap_optimize_folios() for vmemmap optimization. Due to significant differences in vmemmmap initialization for bootmem allocated hugetlb pages, a new routine prep_and_add_bootmem_folios is created. We also modify the routine vmemmap_should_optimize() to check for pages that are already optimized. There are code paths that might request vmemmap optimization twice and we want to make sure this is not attempted. Link: https://lkml.kernel.org/r/20231019023113.345257-4-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Anshuman Khandual Cc: Barry Song <21cnbao@gmail.com> Cc: David Hildenbrand Cc: David Rientjes Cc: James Houghton Cc: Joao Martins Cc: Konrad Dybcio Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Sergey Senozhatsky Cc: Usama Arif Cc: Xiongchun Duan Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'mm/hugetlb_vmemmap.c') diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 76682d1d79a7..4558b814ffab 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -483,6 +483,9 @@ int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head) /* Return true iff a HugeTLB whose vmemmap should and can be optimized. */ static bool vmemmap_should_optimize(const struct hstate *h, const struct page *head) { + if (HPageVmemmapOptimized((struct page *)head)) + return false; + if (!READ_ONCE(vmemmap_optimize_enabled)) return false; @@ -572,6 +575,14 @@ void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head) SetHPageVmemmapOptimized(head); } +void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list) +{ + struct folio *folio; + + list_for_each_entry(folio, folio_list, lru) + hugetlb_vmemmap_optimize(h, &folio->page); +} + static struct ctl_table hugetlb_vmemmap_sysctls[] = { { .procname = "hugetlb_optimize_vmemmap", -- cgit v1.2.3 From cfb8c75099dbf152db1b075eead5029c5a30ce51 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 18 Oct 2023 19:31:06 -0700 Subject: hugetlb: perform vmemmap restoration on a list of pages The routine update_and_free_pages_bulk already performs vmemmap restoration on the list of hugetlb pages in a separate step. In preparation for more functionality to be added in this step, create a new routine hugetlb_vmemmap_restore_folios() that will restore vmemmap for a list of folios. This new routine must provide sufficient feedback about errors and actual restoration performed so that update_and_free_pages_bulk can perform optimally. Special care must be taken when encountering an error from hugetlb_vmemmap_restore_folios. We want to continue making as much forward progress as possible. A new routine bulk_vmemmap_restore_error handles this specific situation. Link: https://lkml.kernel.org/r/20231019023113.345257-5-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Anshuman Khandual Cc: Barry Song <21cnbao@gmail.com> Cc: David Hildenbrand Cc: David Rientjes Cc: James Houghton Cc: Joao Martins Cc: Konrad Dybcio Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Sergey Senozhatsky Cc: Usama Arif Cc: Xiongchun Duan Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'mm/hugetlb_vmemmap.c') diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 4558b814ffab..77f44b81ff01 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -480,6 +480,44 @@ int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head) return ret; } +/** + * hugetlb_vmemmap_restore_folios - restore vmemmap for every folio on the list. + * @h: hstate. + * @folio_list: list of folios. + * @non_hvo_folios: Output list of folios for which vmemmap exists. + * + * Return: number of folios for which vmemmap was restored, or an error code + * if an error was encountered restoring vmemmap for a folio. + * Folios that have vmemmap are moved to the non_hvo_folios + * list. Processing of entries stops when the first error is + * encountered. The folio that experienced the error and all + * non-processed folios will remain on folio_list. + */ +long hugetlb_vmemmap_restore_folios(const struct hstate *h, + struct list_head *folio_list, + struct list_head *non_hvo_folios) +{ + struct folio *folio, *t_folio; + long restored = 0; + long ret = 0; + + list_for_each_entry_safe(folio, t_folio, folio_list, lru) { + if (folio_test_hugetlb_vmemmap_optimized(folio)) { + ret = hugetlb_vmemmap_restore(h, &folio->page); + if (ret) + break; + restored++; + } + + /* Add non-optimized folios to output list */ + list_move(&folio->lru, non_hvo_folios); + } + + if (!ret) + ret = restored; + return ret; +} + /* Return true iff a HugeTLB whose vmemmap should and can be optimized. */ static bool vmemmap_should_optimize(const struct hstate *h, const struct page *head) { -- cgit v1.2.3 From 91f386bf0772c1976622bd0b119b78094603c3d0 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 18 Oct 2023 19:31:07 -0700 Subject: hugetlb: batch freeing of vmemmap pages Now that batching of hugetlb vmemmap optimization processing is possible, batch the freeing of vmemmap pages. When freeing vmemmap pages for a hugetlb page, we add them to a list that is freed after the entire batch has been processed. This enhances the ability to return contiguous ranges of memory to the low level allocators. Link: https://lkml.kernel.org/r/20231019023113.345257-6-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Anshuman Khandual Cc: Barry Song <21cnbao@gmail.com> Cc: David Hildenbrand Cc: David Rientjes Cc: James Houghton Cc: Joao Martins Cc: Konrad Dybcio Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Sergey Senozhatsky Cc: Usama Arif Cc: Xiongchun Duan Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.c | 82 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 56 insertions(+), 26 deletions(-) (limited to 'mm/hugetlb_vmemmap.c') diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 77f44b81ff01..4ac521e596db 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -251,7 +251,7 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr, } entry = mk_pte(walk->reuse_page, pgprot); - list_add_tail(&page->lru, walk->vmemmap_pages); + list_add(&page->lru, walk->vmemmap_pages); set_pte_at(&init_mm, addr, pte, entry); } @@ -306,18 +306,20 @@ static void vmemmap_restore_pte(pte_t *pte, unsigned long addr, * @end: end address of the vmemmap virtual address range that we want to * remap. * @reuse: reuse address. + * @vmemmap_pages: list to deposit vmemmap pages to be freed. It is callers + * responsibility to free pages. * * Return: %0 on success, negative error code otherwise. */ static int vmemmap_remap_free(unsigned long start, unsigned long end, - unsigned long reuse) + unsigned long reuse, + struct list_head *vmemmap_pages) { int ret; - LIST_HEAD(vmemmap_pages); struct vmemmap_remap_walk walk = { .remap_pte = vmemmap_remap_pte, .reuse_addr = reuse, - .vmemmap_pages = &vmemmap_pages, + .vmemmap_pages = vmemmap_pages, }; int nid = page_to_nid((struct page *)reuse); gfp_t gfp_mask = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; @@ -334,7 +336,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, if (walk.reuse_page) { copy_page(page_to_virt(walk.reuse_page), (void *)walk.reuse_addr); - list_add(&walk.reuse_page->lru, &vmemmap_pages); + list_add(&walk.reuse_page->lru, vmemmap_pages); } /* @@ -365,15 +367,13 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, walk = (struct vmemmap_remap_walk) { .remap_pte = vmemmap_restore_pte, .reuse_addr = reuse, - .vmemmap_pages = &vmemmap_pages, + .vmemmap_pages = vmemmap_pages, }; vmemmap_remap_range(reuse, end, &walk); } mmap_read_unlock(&init_mm); - free_vmemmap_page_list(&vmemmap_pages); - return ret; } @@ -389,7 +389,7 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, page = alloc_pages_node(nid, gfp_mask, 0); if (!page) goto out; - list_add_tail(&page->lru, list); + list_add(&page->lru, list); } return 0; @@ -577,24 +577,17 @@ static bool vmemmap_should_optimize(const struct hstate *h, const struct page *h return true; } -/** - * hugetlb_vmemmap_optimize - optimize @head page's vmemmap pages. - * @h: struct hstate. - * @head: the head page whose vmemmap pages will be optimized. - * - * This function only tries to optimize @head's vmemmap pages and does not - * guarantee that the optimization will succeed after it returns. The caller - * can use HPageVmemmapOptimized(@head) to detect if @head's vmemmap pages - * have been optimized. - */ -void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head) +static int __hugetlb_vmemmap_optimize(const struct hstate *h, + struct page *head, + struct list_head *vmemmap_pages) { + int ret = 0; unsigned long vmemmap_start = (unsigned long)head, vmemmap_end; unsigned long vmemmap_reuse; VM_WARN_ON_ONCE(!PageHuge(head)); if (!vmemmap_should_optimize(h, head)) - return; + return ret; static_branch_inc(&hugetlb_optimize_vmemmap_key); @@ -604,21 +597,58 @@ void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head) /* * Remap the vmemmap virtual address range [@vmemmap_start, @vmemmap_end) - * to the page which @vmemmap_reuse is mapped to, then free the pages - * which the range [@vmemmap_start, @vmemmap_end] is mapped to. + * to the page which @vmemmap_reuse is mapped to. Add pages previously + * mapping the range to vmemmap_pages list so that they can be freed by + * the caller. */ - if (vmemmap_remap_free(vmemmap_start, vmemmap_end, vmemmap_reuse)) + ret = vmemmap_remap_free(vmemmap_start, vmemmap_end, vmemmap_reuse, vmemmap_pages); + if (ret) static_branch_dec(&hugetlb_optimize_vmemmap_key); else SetHPageVmemmapOptimized(head); + + return ret; +} + +/** + * hugetlb_vmemmap_optimize - optimize @head page's vmemmap pages. + * @h: struct hstate. + * @head: the head page whose vmemmap pages will be optimized. + * + * This function only tries to optimize @head's vmemmap pages and does not + * guarantee that the optimization will succeed after it returns. The caller + * can use HPageVmemmapOptimized(@head) to detect if @head's vmemmap pages + * have been optimized. + */ +void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head) +{ + LIST_HEAD(vmemmap_pages); + + __hugetlb_vmemmap_optimize(h, head, &vmemmap_pages); + free_vmemmap_page_list(&vmemmap_pages); } void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list) { struct folio *folio; + LIST_HEAD(vmemmap_pages); + + list_for_each_entry(folio, folio_list, lru) { + int ret = __hugetlb_vmemmap_optimize(h, &folio->page, + &vmemmap_pages); + + /* + * Pages to be freed may have been accumulated. If we + * encounter an ENOMEM, free what we have and try again. + */ + if (ret == -ENOMEM && !list_empty(&vmemmap_pages)) { + free_vmemmap_page_list(&vmemmap_pages); + INIT_LIST_HEAD(&vmemmap_pages); + __hugetlb_vmemmap_optimize(h, &folio->page, &vmemmap_pages); + } + } - list_for_each_entry(folio, folio_list, lru) - hugetlb_vmemmap_optimize(h, &folio->page); + free_vmemmap_page_list(&vmemmap_pages); } static struct ctl_table hugetlb_vmemmap_sysctls[] = { -- cgit v1.2.3 From f4b7e3efaddbf8fa7cffacb5956b0823b7a7730a Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Wed, 18 Oct 2023 19:31:08 -0700 Subject: hugetlb: batch PMD split for bulk vmemmap dedup In an effort to minimize amount of TLB flushes, batch all PMD splits belonging to a range of pages in order to perform only 1 (global) TLB flush. Add a flags field to the walker and pass whether it's a bulk allocation or just a single page to decide to remap. First value (VMEMMAP_SPLIT_NO_TLB_FLUSH) designates the request to not do the TLB flush when we split the PMD. Rebased and updated by Mike Kravetz Link: https://lkml.kernel.org/r/20231019023113.345257-7-mike.kravetz@oracle.com Signed-off-by: Joao Martins Signed-off-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Anshuman Khandual Cc: Barry Song <21cnbao@gmail.com> Cc: David Hildenbrand Cc: David Rientjes Cc: James Houghton Cc: Konrad Dybcio Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Sergey Senozhatsky Cc: Usama Arif Cc: Xiongchun Duan Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 4 deletions(-) (limited to 'mm/hugetlb_vmemmap.c') diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 4ac521e596db..10739e4285d5 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -27,6 +27,8 @@ * @reuse_addr: the virtual address of the @reuse_page page. * @vmemmap_pages: the list head of the vmemmap pages that can be freed * or is mapped from. + * @flags: used to modify behavior in vmemmap page table walking + * operations. */ struct vmemmap_remap_walk { void (*remap_pte)(pte_t *pte, unsigned long addr, @@ -35,9 +37,13 @@ struct vmemmap_remap_walk { struct page *reuse_page; unsigned long reuse_addr; struct list_head *vmemmap_pages; + +/* Skip the TLB flush when we split the PMD */ +#define VMEMMAP_SPLIT_NO_TLB_FLUSH BIT(0) + unsigned long flags; }; -static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start) +static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start, bool flush) { pmd_t __pmd; int i; @@ -80,7 +86,8 @@ static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start) /* Make pte visible before pmd. See comment in pmd_install(). */ smp_wmb(); pmd_populate_kernel(&init_mm, pmd, pgtable); - flush_tlb_kernel_range(start, start + PMD_SIZE); + if (flush) + flush_tlb_kernel_range(start, start + PMD_SIZE); } else { pte_free_kernel(&init_mm, pgtable); } @@ -127,11 +134,20 @@ static int vmemmap_pmd_range(pud_t *pud, unsigned long addr, do { int ret; - ret = split_vmemmap_huge_pmd(pmd, addr & PMD_MASK); + ret = split_vmemmap_huge_pmd(pmd, addr & PMD_MASK, + !(walk->flags & VMEMMAP_SPLIT_NO_TLB_FLUSH)); if (ret) return ret; next = pmd_addr_end(addr, end); + + /* + * We are only splitting, not remapping the hugetlb vmemmap + * pages. + */ + if (!walk->remap_pte) + continue; + vmemmap_pte_range(pmd, addr, next, walk); } while (pmd++, addr = next, addr != end); @@ -198,7 +214,8 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end, return ret; } while (pgd++, addr = next, addr != end); - flush_tlb_kernel_range(start, end); + if (walk->remap_pte) + flush_tlb_kernel_range(start, end); return 0; } @@ -297,6 +314,36 @@ static void vmemmap_restore_pte(pte_t *pte, unsigned long addr, set_pte_at(&init_mm, addr, pte, mk_pte(page, pgprot)); } +/** + * vmemmap_remap_split - split the vmemmap virtual address range [@start, @end) + * backing PMDs of the directmap into PTEs + * @start: start address of the vmemmap virtual address range that we want + * to remap. + * @end: end address of the vmemmap virtual address range that we want to + * remap. + * @reuse: reuse address. + * + * Return: %0 on success, negative error code otherwise. + */ +static int vmemmap_remap_split(unsigned long start, unsigned long end, + unsigned long reuse) +{ + int ret; + struct vmemmap_remap_walk walk = { + .remap_pte = NULL, + .flags = VMEMMAP_SPLIT_NO_TLB_FLUSH, + }; + + /* See the comment in the vmemmap_remap_free(). */ + BUG_ON(start - reuse != PAGE_SIZE); + + mmap_read_lock(&init_mm); + ret = vmemmap_remap_range(reuse, end, &walk); + mmap_read_unlock(&init_mm); + + return ret; +} + /** * vmemmap_remap_free - remap the vmemmap virtual address range [@start, @end) * to the page which @reuse is mapped to, then free vmemmap @@ -320,6 +367,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, .remap_pte = vmemmap_remap_pte, .reuse_addr = reuse, .vmemmap_pages = vmemmap_pages, + .flags = 0, }; int nid = page_to_nid((struct page *)reuse); gfp_t gfp_mask = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; @@ -368,6 +416,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, .remap_pte = vmemmap_restore_pte, .reuse_addr = reuse, .vmemmap_pages = vmemmap_pages, + .flags = 0, }; vmemmap_remap_range(reuse, end, &walk); @@ -419,6 +468,7 @@ static int vmemmap_remap_alloc(unsigned long start, unsigned long end, .remap_pte = vmemmap_restore_pte, .reuse_addr = reuse, .vmemmap_pages = &vmemmap_pages, + .flags = 0, }; /* See the comment in the vmemmap_remap_free(). */ @@ -628,11 +678,45 @@ void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head) free_vmemmap_page_list(&vmemmap_pages); } +static int hugetlb_vmemmap_split(const struct hstate *h, struct page *head) +{ + unsigned long vmemmap_start = (unsigned long)head, vmemmap_end; + unsigned long vmemmap_reuse; + + if (!vmemmap_should_optimize(h, head)) + return 0; + + vmemmap_end = vmemmap_start + hugetlb_vmemmap_size(h); + vmemmap_reuse = vmemmap_start; + vmemmap_start += HUGETLB_VMEMMAP_RESERVE_SIZE; + + /* + * Split PMDs on the vmemmap virtual address range [@vmemmap_start, + * @vmemmap_end] + */ + return vmemmap_remap_split(vmemmap_start, vmemmap_end, vmemmap_reuse); +} + void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list) { struct folio *folio; LIST_HEAD(vmemmap_pages); + list_for_each_entry(folio, folio_list, lru) { + int ret = hugetlb_vmemmap_split(h, &folio->page); + + /* + * Spliting the PMD requires allocating a page, thus lets fail + * early once we encounter the first OOM. No point in retrying + * as it can be dynamically done on remap with the memory + * we get back from the vmemmap deduplication. + */ + if (ret == -ENOMEM) + break; + } + + flush_tlb_all(); + list_for_each_entry(folio, folio_list, lru) { int ret = __hugetlb_vmemmap_optimize(h, &folio->page, &vmemmap_pages); -- cgit v1.2.3 From f13b83fdd996409e3dbf6f34c7629a9d13fdb9c1 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Wed, 18 Oct 2023 19:31:09 -0700 Subject: hugetlb: batch TLB flushes when freeing vmemmap Now that a list of pages is deduplicated at once, the TLB flush can be batched for all vmemmap pages that got remapped. Expand the flags field value to pass whether to skip the TLB flush on remap of the PTE. The TLB flush is global as we don't have guarantees from caller that the set of folios is contiguous, or to add complexity in composing a list of kVAs to flush. Modified by Mike Kravetz to perform TLB flush on single folio if an error is encountered. Link: https://lkml.kernel.org/r/20231019023113.345257-8-mike.kravetz@oracle.com Signed-off-by: Joao Martins Signed-off-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Anshuman Khandual Cc: Barry Song <21cnbao@gmail.com> Cc: David Hildenbrand Cc: David Rientjes Cc: James Houghton Cc: Konrad Dybcio Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Sergey Senozhatsky Cc: Usama Arif Cc: Xiongchun Duan Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.c | 49 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 11 deletions(-) (limited to 'mm/hugetlb_vmemmap.c') diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 10739e4285d5..9df350372046 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -40,6 +40,8 @@ struct vmemmap_remap_walk { /* Skip the TLB flush when we split the PMD */ #define VMEMMAP_SPLIT_NO_TLB_FLUSH BIT(0) +/* Skip the TLB flush when we remap the PTE */ +#define VMEMMAP_REMAP_NO_TLB_FLUSH BIT(1) unsigned long flags; }; @@ -214,7 +216,7 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end, return ret; } while (pgd++, addr = next, addr != end); - if (walk->remap_pte) + if (walk->remap_pte && !(walk->flags & VMEMMAP_REMAP_NO_TLB_FLUSH)) flush_tlb_kernel_range(start, end); return 0; @@ -355,19 +357,21 @@ static int vmemmap_remap_split(unsigned long start, unsigned long end, * @reuse: reuse address. * @vmemmap_pages: list to deposit vmemmap pages to be freed. It is callers * responsibility to free pages. + * @flags: modifications to vmemmap_remap_walk flags * * Return: %0 on success, negative error code otherwise. */ static int vmemmap_remap_free(unsigned long start, unsigned long end, unsigned long reuse, - struct list_head *vmemmap_pages) + struct list_head *vmemmap_pages, + unsigned long flags) { int ret; struct vmemmap_remap_walk walk = { .remap_pte = vmemmap_remap_pte, .reuse_addr = reuse, .vmemmap_pages = vmemmap_pages, - .flags = 0, + .flags = flags, }; int nid = page_to_nid((struct page *)reuse); gfp_t gfp_mask = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; @@ -629,7 +633,8 @@ static bool vmemmap_should_optimize(const struct hstate *h, const struct page *h static int __hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head, - struct list_head *vmemmap_pages) + struct list_head *vmemmap_pages, + unsigned long flags) { int ret = 0; unsigned long vmemmap_start = (unsigned long)head, vmemmap_end; @@ -640,6 +645,18 @@ static int __hugetlb_vmemmap_optimize(const struct hstate *h, return ret; static_branch_inc(&hugetlb_optimize_vmemmap_key); + /* + * Very Subtle + * If VMEMMAP_REMAP_NO_TLB_FLUSH is set, TLB flushing is not performed + * immediately after remapping. As a result, subsequent accesses + * and modifications to struct pages associated with the hugetlb + * page could be to the OLD struct pages. Set the vmemmap optimized + * flag here so that it is copied to the new head page. This keeps + * the old and new struct pages in sync. + * If there is an error during optimization, we will immediately FLUSH + * the TLB and clear the flag below. + */ + SetHPageVmemmapOptimized(head); vmemmap_end = vmemmap_start + hugetlb_vmemmap_size(h); vmemmap_reuse = vmemmap_start; @@ -651,11 +668,12 @@ static int __hugetlb_vmemmap_optimize(const struct hstate *h, * mapping the range to vmemmap_pages list so that they can be freed by * the caller. */ - ret = vmemmap_remap_free(vmemmap_start, vmemmap_end, vmemmap_reuse, vmemmap_pages); - if (ret) + ret = vmemmap_remap_free(vmemmap_start, vmemmap_end, vmemmap_reuse, + vmemmap_pages, flags); + if (ret) { static_branch_dec(&hugetlb_optimize_vmemmap_key); - else - SetHPageVmemmapOptimized(head); + ClearHPageVmemmapOptimized(head); + } return ret; } @@ -674,7 +692,7 @@ void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head) { LIST_HEAD(vmemmap_pages); - __hugetlb_vmemmap_optimize(h, head, &vmemmap_pages); + __hugetlb_vmemmap_optimize(h, head, &vmemmap_pages, 0); free_vmemmap_page_list(&vmemmap_pages); } @@ -719,19 +737,28 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l list_for_each_entry(folio, folio_list, lru) { int ret = __hugetlb_vmemmap_optimize(h, &folio->page, - &vmemmap_pages); + &vmemmap_pages, + VMEMMAP_REMAP_NO_TLB_FLUSH); /* * Pages to be freed may have been accumulated. If we * encounter an ENOMEM, free what we have and try again. + * This can occur in the case that both spliting fails + * halfway and head page allocation also failed. In this + * case __hugetlb_vmemmap_optimize() would free memory + * allowing more vmemmap remaps to occur. */ if (ret == -ENOMEM && !list_empty(&vmemmap_pages)) { + flush_tlb_all(); free_vmemmap_page_list(&vmemmap_pages); INIT_LIST_HEAD(&vmemmap_pages); - __hugetlb_vmemmap_optimize(h, &folio->page, &vmemmap_pages); + __hugetlb_vmemmap_optimize(h, &folio->page, + &vmemmap_pages, + VMEMMAP_REMAP_NO_TLB_FLUSH); } } + flush_tlb_all(); free_vmemmap_page_list(&vmemmap_pages); } -- cgit v1.2.3 From c24f188b22892908a2a3bb2de0ce7d121dd72989 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 18 Oct 2023 19:31:10 -0700 Subject: hugetlb: batch TLB flushes when restoring vmemmap Update the internal hugetlb restore vmemmap code path such that TLB flushing can be batched. Use the existing mechanism of passing the VMEMMAP_REMAP_NO_TLB_FLUSH flag to indicate flushing should not be performed for individual pages. The routine hugetlb_vmemmap_restore_folios is the only user of this new mechanism, and it will perform a global flush after all vmemmap is restored. Link: https://lkml.kernel.org/r/20231019023113.345257-9-mike.kravetz@oracle.com Signed-off-by: Joao Martins Signed-off-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Anshuman Khandual Cc: Barry Song <21cnbao@gmail.com> Cc: David Hildenbrand Cc: David Rientjes Cc: James Houghton Cc: Konrad Dybcio Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Sergey Senozhatsky Cc: Usama Arif Cc: Xiongchun Duan Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) (limited to 'mm/hugetlb_vmemmap.c') diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 9df350372046..d2999c303031 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -461,18 +461,19 @@ out: * @end: end address of the vmemmap virtual address range that we want to * remap. * @reuse: reuse address. + * @flags: modifications to vmemmap_remap_walk flags * * Return: %0 on success, negative error code otherwise. */ static int vmemmap_remap_alloc(unsigned long start, unsigned long end, - unsigned long reuse) + unsigned long reuse, unsigned long flags) { LIST_HEAD(vmemmap_pages); struct vmemmap_remap_walk walk = { .remap_pte = vmemmap_restore_pte, .reuse_addr = reuse, .vmemmap_pages = &vmemmap_pages, - .flags = 0, + .flags = flags, }; /* See the comment in the vmemmap_remap_free(). */ @@ -494,17 +495,7 @@ EXPORT_SYMBOL(hugetlb_optimize_vmemmap_key); static bool vmemmap_optimize_enabled = IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON); core_param(hugetlb_free_vmemmap, vmemmap_optimize_enabled, bool, 0); -/** - * hugetlb_vmemmap_restore - restore previously optimized (by - * hugetlb_vmemmap_optimize()) vmemmap pages which - * will be reallocated and remapped. - * @h: struct hstate. - * @head: the head page whose vmemmap pages will be restored. - * - * Return: %0 if @head's vmemmap pages have been reallocated and remapped, - * negative error code otherwise. - */ -int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head) +static int __hugetlb_vmemmap_restore(const struct hstate *h, struct page *head, unsigned long flags) { int ret; unsigned long vmemmap_start = (unsigned long)head, vmemmap_end; @@ -525,7 +516,7 @@ int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head) * When a HugeTLB page is freed to the buddy allocator, previously * discarded vmemmap pages must be allocated and remapping. */ - ret = vmemmap_remap_alloc(vmemmap_start, vmemmap_end, vmemmap_reuse); + ret = vmemmap_remap_alloc(vmemmap_start, vmemmap_end, vmemmap_reuse, flags); if (!ret) { ClearHPageVmemmapOptimized(head); static_branch_dec(&hugetlb_optimize_vmemmap_key); @@ -534,6 +525,21 @@ int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head) return ret; } +/** + * hugetlb_vmemmap_restore - restore previously optimized (by + * hugetlb_vmemmap_optimize()) vmemmap pages which + * will be reallocated and remapped. + * @h: struct hstate. + * @head: the head page whose vmemmap pages will be restored. + * + * Return: %0 if @head's vmemmap pages have been reallocated and remapped, + * negative error code otherwise. + */ +int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head) +{ + return __hugetlb_vmemmap_restore(h, head, 0); +} + /** * hugetlb_vmemmap_restore_folios - restore vmemmap for every folio on the list. * @h: hstate. @@ -557,7 +563,8 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h, list_for_each_entry_safe(folio, t_folio, folio_list, lru) { if (folio_test_hugetlb_vmemmap_optimized(folio)) { - ret = hugetlb_vmemmap_restore(h, &folio->page); + ret = __hugetlb_vmemmap_restore(h, &folio->page, + VMEMMAP_REMAP_NO_TLB_FLUSH); if (ret) break; restored++; @@ -567,6 +574,8 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h, list_move(&folio->lru, non_hvo_folios); } + if (restored) + flush_tlb_all(); if (!ret) ret = restored; return ret; -- cgit v1.2.3 From c5ad3233ead566d74918bd76ba2dbdbe83ba1d9d Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Wed, 11 Oct 2023 15:45:57 +0100 Subject: hugetlb_vmemmap: use folio argument for hugetlb_vmemmap_* functions Most function calls in hugetlb.c are made with folio arguments. This brings hugetlb_vmemmap calls inline with them by using folio instead of head struct page. Head struct page is still needed within these functions. The set/clear/test functions for hugepages are also changed to folio versions. Link: https://lkml.kernel.org/r/20231011144557.1720481-2-usama.arif@bytedance.com Signed-off-by: Usama Arif Reviewed-by: Muchun Song Cc: Fam Zheng Cc: Mike Kravetz Cc: Punit Agrawal Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.c | 50 ++++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 24 deletions(-) (limited to 'mm/hugetlb_vmemmap.c') diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index d2999c303031..87818ee7f01d 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -495,14 +495,15 @@ EXPORT_SYMBOL(hugetlb_optimize_vmemmap_key); static bool vmemmap_optimize_enabled = IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON); core_param(hugetlb_free_vmemmap, vmemmap_optimize_enabled, bool, 0); -static int __hugetlb_vmemmap_restore(const struct hstate *h, struct page *head, unsigned long flags) +static int __hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio, unsigned long flags) { int ret; + struct page *head = &folio->page; unsigned long vmemmap_start = (unsigned long)head, vmemmap_end; unsigned long vmemmap_reuse; VM_WARN_ON_ONCE(!PageHuge(head)); - if (!HPageVmemmapOptimized(head)) + if (!folio_test_hugetlb_vmemmap_optimized(folio)) return 0; vmemmap_end = vmemmap_start + hugetlb_vmemmap_size(h); @@ -518,7 +519,7 @@ static int __hugetlb_vmemmap_restore(const struct hstate *h, struct page *head, */ ret = vmemmap_remap_alloc(vmemmap_start, vmemmap_end, vmemmap_reuse, flags); if (!ret) { - ClearHPageVmemmapOptimized(head); + folio_clear_hugetlb_vmemmap_optimized(folio); static_branch_dec(&hugetlb_optimize_vmemmap_key); } @@ -526,18 +527,18 @@ static int __hugetlb_vmemmap_restore(const struct hstate *h, struct page *head, } /** - * hugetlb_vmemmap_restore - restore previously optimized (by - * hugetlb_vmemmap_optimize()) vmemmap pages which + * hugetlb_vmemmap_restore_folio - restore previously optimized (by + * hugetlb_vmemmap_optimize_folio()) vmemmap pages which * will be reallocated and remapped. * @h: struct hstate. - * @head: the head page whose vmemmap pages will be restored. + * @folio: the folio whose vmemmap pages will be restored. * - * Return: %0 if @head's vmemmap pages have been reallocated and remapped, + * Return: %0 if @folio's vmemmap pages have been reallocated and remapped, * negative error code otherwise. */ -int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head) +int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio) { - return __hugetlb_vmemmap_restore(h, head, 0); + return __hugetlb_vmemmap_restore_folio(h, folio, 0); } /** @@ -563,7 +564,7 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h, list_for_each_entry_safe(folio, t_folio, folio_list, lru) { if (folio_test_hugetlb_vmemmap_optimized(folio)) { - ret = __hugetlb_vmemmap_restore(h, &folio->page, + ret = __hugetlb_vmemmap_restore_folio(h, folio, VMEMMAP_REMAP_NO_TLB_FLUSH); if (ret) break; @@ -640,12 +641,13 @@ static bool vmemmap_should_optimize(const struct hstate *h, const struct page *h return true; } -static int __hugetlb_vmemmap_optimize(const struct hstate *h, - struct page *head, +static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h, + struct folio *folio, struct list_head *vmemmap_pages, unsigned long flags) { int ret = 0; + struct page *head = &folio->page; unsigned long vmemmap_start = (unsigned long)head, vmemmap_end; unsigned long vmemmap_reuse; @@ -665,7 +667,7 @@ static int __hugetlb_vmemmap_optimize(const struct hstate *h, * If there is an error during optimization, we will immediately FLUSH * the TLB and clear the flag below. */ - SetHPageVmemmapOptimized(head); + folio_set_hugetlb_vmemmap_optimized(folio); vmemmap_end = vmemmap_start + hugetlb_vmemmap_size(h); vmemmap_reuse = vmemmap_start; @@ -681,27 +683,27 @@ static int __hugetlb_vmemmap_optimize(const struct hstate *h, vmemmap_pages, flags); if (ret) { static_branch_dec(&hugetlb_optimize_vmemmap_key); - ClearHPageVmemmapOptimized(head); + folio_clear_hugetlb_vmemmap_optimized(folio); } return ret; } /** - * hugetlb_vmemmap_optimize - optimize @head page's vmemmap pages. + * hugetlb_vmemmap_optimize_folio - optimize @folio's vmemmap pages. * @h: struct hstate. - * @head: the head page whose vmemmap pages will be optimized. + * @folio: the folio whose vmemmap pages will be optimized. * - * This function only tries to optimize @head's vmemmap pages and does not + * This function only tries to optimize @folio's vmemmap pages and does not * guarantee that the optimization will succeed after it returns. The caller - * can use HPageVmemmapOptimized(@head) to detect if @head's vmemmap pages - * have been optimized. + * can use folio_test_hugetlb_vmemmap_optimized(@folio) to detect if @folio's + * vmemmap pages have been optimized. */ -void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head) +void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio) { LIST_HEAD(vmemmap_pages); - __hugetlb_vmemmap_optimize(h, head, &vmemmap_pages, 0); + __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, 0); free_vmemmap_page_list(&vmemmap_pages); } @@ -745,7 +747,7 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l flush_tlb_all(); list_for_each_entry(folio, folio_list, lru) { - int ret = __hugetlb_vmemmap_optimize(h, &folio->page, + int ret = __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, VMEMMAP_REMAP_NO_TLB_FLUSH); @@ -754,14 +756,14 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l * encounter an ENOMEM, free what we have and try again. * This can occur in the case that both spliting fails * halfway and head page allocation also failed. In this - * case __hugetlb_vmemmap_optimize() would free memory + * case __hugetlb_vmemmap_optimize_folio() would free memory * allowing more vmemmap remaps to occur. */ if (ret == -ENOMEM && !list_empty(&vmemmap_pages)) { flush_tlb_all(); free_vmemmap_page_list(&vmemmap_pages); INIT_LIST_HEAD(&vmemmap_pages); - __hugetlb_vmemmap_optimize(h, &folio->page, + __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, VMEMMAP_REMAP_NO_TLB_FLUSH); } -- cgit v1.2.3