From 04a42e72d77a93a166b79c34b7bc862f55a53967 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 14 Dec 2022 22:17:57 -0800 Subject: mm: move folio_set_compound_order() to mm/internal.h folio_set_compound_order() is moved to an mm-internal location so external folio users cannot misuse this function. Change the name of the function to folio_set_order() and use WARN_ON_ONCE() rather than BUG_ON. Also, handle the case if a non-large folio is passed and add clarifying comments to the function. Link: https://lore.kernel.org/lkml/20221207223731.32784-1-sidhartha.kumar@oracle.com/T/ Link: https://lkml.kernel.org/r/20221215061757.223440-1-sidhartha.kumar@oracle.com Fixes: 9fd330582b2f ("mm: add folio dtor and order setter functions") Signed-off-by: Sidhartha Kumar Suggested-by: Mike Kravetz Suggested-by: Muchun Song Suggested-by: Matthew Wilcox Suggested-by: John Hubbard Reviewed-by: John Hubbard Reviewed-by: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bdbfeb6fb393..cfd47a66ded0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1492,7 +1492,7 @@ static void __destroy_compound_gigantic_folio(struct folio *folio, set_page_refcounted(p); } - folio_set_compound_order(folio, 0); + folio_set_order(folio, 0); __folio_clear_head(folio); } @@ -1956,7 +1956,7 @@ static bool __prep_compound_gigantic_folio(struct folio *folio, __folio_clear_reserved(folio); __folio_set_head(folio); /* we rely on prep_new_hugetlb_folio to set the destructor */ - folio_set_compound_order(folio, order); + folio_set_order(folio, order); for (i = 0; i < nr_pages; i++) { p = folio_page(folio, i); @@ -2020,7 +2020,7 @@ out_error: p = folio_page(folio, j); __ClearPageReserved(p); } - folio_set_compound_order(folio, 0); + folio_set_order(folio, 0); __folio_clear_head(folio); return false; } -- cgit v1.2.3 From f1eb1bacfba9019823b2fce42383f010cd561fa6 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 14 Dec 2022 15:15:33 -0500 Subject: mm/uffd: always wr-protect pte in pte|pmd_mkuffd_wp() This patch is a cleanup to always wr-protect pte/pmd in mkuffd_wp paths. The reasons I still think this patch is worthwhile, are: (1) It is a cleanup already; diffstat tells. (2) It just feels natural after I thought about this, if the pte is uffd protected, let's remove the write bit no matter what it was. (2) Since x86 is the only arch that supports uffd-wp, it also redefines pte|pmd_mkuffd_wp() in that it should always contain removals of write bits. It means any future arch that want to implement uffd-wp should naturally follow this rule too. It's good to make it a default, even if with vm_page_prot changes on VM_UFFD_WP. (3) It covers more than vm_page_prot. So no chance of any potential future "accident" (like pte_mkdirty() sparc64 or loongarch, even though it just got its pte_mkdirty fixed <1 month ago). It'll be fairly clear when reading the code too that we don't worry anything before a pte_mkuffd_wp() on uncertainty of the write bit. We may call pte_wrprotect() one more time in some paths (e.g. thp split), but that should be fully local bitop instruction so the overhead should be negligible. Although this patch should logically also fix all the known issues on uffd-wp too recently on page migration (not for numa hint recovery - that may need another explcit pte_wrprotect), but this is not the plan for that fix. So no fixes, and stable doesn't need this. Link: https://lkml.kernel.org/r/20221214201533.1774616-1-peterx@redhat.com Signed-off-by: Peter Xu Acked-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Ives van Hoorne Cc: Mike Kravetz Cc: Nadav Amit Signed-off-by: Andrew Morton --- mm/hugetlb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index cfd47a66ded0..92b3fd01a652 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5919,7 +5919,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, * if populated. */ if (unlikely(pte_marker_uffd_wp(old_pte))) - new_pte = huge_pte_wrprotect(huge_pte_mkuffd_wp(new_pte)); + new_pte = huge_pte_mkuffd_wp(new_pte); set_huge_pte_at(mm, haddr, ptep, new_pte); hugetlb_count_add(pages_per_huge_page(h), mm); @@ -6728,7 +6728,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, pte = huge_pte_modify(old_pte, newprot); pte = arch_make_huge_pte(pte, shift, vma->vm_flags); if (uffd_wp) - pte = huge_pte_mkuffd_wp(huge_pte_wrprotect(pte)); + pte = huge_pte_mkuffd_wp(pte); else if (uffd_wp_resolve) pte = huge_pte_clear_uffd_wp(pte); huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte); -- cgit v1.2.3 From 379c2e60e82ff71510a949033bf8431f39f66c75 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Mon, 12 Dec 2022 15:50:42 -0800 Subject: hugetlb: update vma flag check for hugetlb vma lock The check for whether a hugetlb vma lock exists partially depends on the vma's flags. Currently, it checks for either VM_MAYSHARE or VM_SHARED. The reason both flags are used is because VM_MAYSHARE was previously cleared in hugetlb vmas as they are tore down. This is no longer the case, and only the VM_MAYSHARE check is required. Link: https://lkml.kernel.org/r/20221212235042.178355-2-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Reviewed-by: Miaohe Lin Cc: "Aneesh Kumar K.V" Cc: David Hildenbrand Cc: James Houghton Cc: Mina Almasry Cc: Muchun Song Cc: Naoya Horiguchi Cc: Peter Xu Signed-off-by: Andrew Morton --- mm/hugetlb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 92b3fd01a652..ed1ac2df582c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -262,8 +262,7 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma) */ static bool __vma_shareable_lock(struct vm_area_struct *vma) { - return vma->vm_flags & (VM_MAYSHARE | VM_SHARED) && - vma->vm_private_data; + return vma->vm_flags & VM_MAYSHARE && vma->vm_private_data; } void hugetlb_vma_lock_read(struct vm_area_struct *vma) -- cgit v1.2.3 From bb373dce2c7b473023f9e69f041a22d81171b71a Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 16 Dec 2022 10:50:53 -0500 Subject: mm/hugetlb: don't wait for migration entry during follow page That's what the code does with !hugetlb pages, so we should logically do the same for hugetlb, so migration entry will also be treated as no page. This is probably also the last piece in follow_page code that may sleep, the last one should be removed in cf994dd8af27 ("mm/gup: remove FOLL_MIGRATION", 2022-11-16). Link: https://lkml.kernel.org/r/20221216155100.2043537-3-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Mike Kravetz Reviewed-by: David Hildenbrand Reviewed-by: John Hubbard Cc: Andrea Arcangeli Cc: James Houghton Cc: Jann Horn Cc: Miaohe Lin Cc: Muchun Song Cc: Nadav Amit Cc: Rik van Riel Signed-off-by: Andrew Morton --- mm/hugetlb.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ed1ac2df582c..549f79668756 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6401,7 +6401,6 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, if (WARN_ON_ONCE(flags & FOLL_PIN)) return NULL; -retry: pte = huge_pte_offset(mm, haddr, huge_page_size(h)); if (!pte) return NULL; @@ -6424,16 +6423,6 @@ retry: page = NULL; goto out; } - } else { - if (is_hugetlb_entry_migration(entry)) { - spin_unlock(ptl); - __migration_entry_wait_huge(pte, ptl); - goto retry; - } - /* - * hwpoisoned entry is treated as no_page_table in - * follow_page_mask(). - */ } out: spin_unlock(ptl); -- cgit v1.2.3 From fcd48540d188876c917a377d81cd24c100332a62 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 16 Dec 2022 10:50:55 -0500 Subject: mm/hugetlb: move swap entry handling into vma lock when faulted In hugetlb_fault(), there used to have a special path to handle swap entry at the entrance using huge_pte_offset(). That's unsafe because huge_pte_offset() for a pmd sharable range can access freed pgtables if without any lock to protect the pgtable from being freed after pmd unshare. Here the simplest solution to make it safe is to move the swap handling to be after the vma lock being held. We may need to take the fault mutex on either migration or hwpoison entries now (also the vma lock, but that's really needed), however neither of them is hot path. Note that the vma lock cannot be released in hugetlb_fault() when the migration entry is detected, because in migration_entry_wait_huge() the pgtable page will be used again (by taking the pgtable lock), so that also need to be protected by the vma lock. Modify migration_entry_wait_huge() so that it must be called with vma read lock held, and properly release the lock in __migration_entry_wait_huge(). Link: https://lkml.kernel.org/r/20221216155100.2043537-5-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Mike Kravetz Reviewed-by: John Hubbard Cc: Andrea Arcangeli Cc: David Hildenbrand Cc: James Houghton Cc: Jann Horn Cc: Miaohe Lin Cc: Muchun Song Cc: Nadav Amit Cc: Rik van Riel Signed-off-by: Andrew Morton --- mm/hugetlb.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 549f79668756..7f9db1d9f6a5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5993,22 +5993,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, int need_wait_lock = 0; unsigned long haddr = address & huge_page_mask(h); - ptep = huge_pte_offset(mm, haddr, huge_page_size(h)); - if (ptep) { - /* - * Since we hold no locks, ptep could be stale. That is - * OK as we are only making decisions based on content and - * not actually modifying content here. - */ - entry = huge_ptep_get(ptep); - if (unlikely(is_hugetlb_entry_migration(entry))) { - migration_entry_wait_huge(vma, ptep); - return 0; - } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) - return VM_FAULT_HWPOISON_LARGE | - VM_FAULT_SET_HINDEX(hstate_index(h)); - } - /* * Serialize hugepage allocation and instantiation, so that we don't * get spurious allocation failures if two CPUs race to instantiate @@ -6023,10 +6007,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, * Acquire vma lock before calling huge_pte_alloc and hold * until finished with ptep. This prevents huge_pmd_unshare from * being called elsewhere and making the ptep no longer valid. - * - * ptep could have already be assigned via huge_pte_offset. That - * is OK, as huge_pte_alloc will return the same value unless - * something has changed. */ hugetlb_vma_lock_read(vma); ptep = huge_pte_alloc(mm, vma, haddr, huge_page_size(h)); @@ -6055,8 +6035,23 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, * fault, and is_hugetlb_entry_(migration|hwpoisoned) check will * properly handle it. */ - if (!pte_present(entry)) + if (!pte_present(entry)) { + if (unlikely(is_hugetlb_entry_migration(entry))) { + /* + * Release the hugetlb fault lock now, but retain + * the vma lock, because it is needed to guard the + * huge_pte_lockptr() later in + * migration_entry_wait_huge(). The vma lock will + * be released there. + */ + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + migration_entry_wait_huge(vma, ptep); + return 0; + } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) + ret = VM_FAULT_HWPOISON_LARGE | + VM_FAULT_SET_HINDEX(hstate_index(h)); goto out_mutex; + } /* * If we are going to COW/unshare the mapping later, we examine the -- cgit v1.2.3 From 7d049f3a03ea705522210d70b9d3e223ef86d663 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 16 Dec 2022 10:52:19 -0500 Subject: mm/hugetlb: make hugetlb_follow_page_mask() safe to pmd unshare Since hugetlb_follow_page_mask() walks the pgtable, it needs the vma lock to make sure the pgtable page will not be freed concurrently. Link: https://lkml.kernel.org/r/20221216155219.2043714-1-peterx@redhat.com Signed-off-by: Peter Xu Acked-by: David Hildenbrand Reviewed-by: Mike Kravetz Reviewed-by: John Hubbard Cc: Andrea Arcangeli Cc: James Houghton Cc: Jann Horn Cc: Miaohe Lin Cc: Muchun Song Cc: Nadav Amit Cc: Rik van Riel Signed-off-by: Andrew Morton --- mm/hugetlb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7f9db1d9f6a5..807edc1410e5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6396,9 +6396,10 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, if (WARN_ON_ONCE(flags & FOLL_PIN)) return NULL; + hugetlb_vma_lock_read(vma); pte = huge_pte_offset(mm, haddr, huge_page_size(h)); if (!pte) - return NULL; + goto out_unlock; ptl = huge_pte_lock(h, mm, pte); entry = huge_ptep_get(pte); @@ -6421,6 +6422,8 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, } out: spin_unlock(ptl); +out_unlock: + hugetlb_vma_unlock_read(vma); return page; } -- cgit v1.2.3 From eefc7fa53608920203a1402ecf7255ecfa8bb030 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 16 Dec 2022 10:52:23 -0500 Subject: mm/hugetlb: make follow_hugetlb_page() safe to pmd unshare Since follow_hugetlb_page() walks the pgtable, it needs the vma lock to make sure the pgtable page will not be freed concurrently. Link: https://lkml.kernel.org/r/20221216155223.2043727-1-peterx@redhat.com Signed-off-by: Peter Xu Acked-by: David Hildenbrand Reviewed-by: Mike Kravetz Reviewed-by: John Hubbard Cc: Andrea Arcangeli Cc: James Houghton Cc: Jann Horn Cc: Miaohe Lin Cc: Muchun Song Cc: Nadav Amit Cc: Rik van Riel Signed-off-by: Andrew Morton --- mm/hugetlb.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 807edc1410e5..da4c37553c08 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6454,6 +6454,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, break; } + hugetlb_vma_lock_read(vma); /* * Some archs (sparc64, sh*) have multiple pte_ts to * each hugepage. We have to make sure we get the @@ -6478,6 +6479,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, !hugetlbfs_pagecache_present(h, vma, vaddr)) { if (pte) spin_unlock(ptl); + hugetlb_vma_unlock_read(vma); remainder = 0; break; } @@ -6499,6 +6501,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, if (pte) spin_unlock(ptl); + hugetlb_vma_unlock_read(vma); + if (flags & FOLL_WRITE) fault_flags |= FAULT_FLAG_WRITE; else if (unshare) @@ -6561,6 +6565,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, remainder -= pages_per_huge_page(h); i += pages_per_huge_page(h); spin_unlock(ptl); + hugetlb_vma_unlock_read(vma); continue; } @@ -6590,6 +6595,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, if (WARN_ON_ONCE(!try_grab_folio(pages[i], refs, flags))) { spin_unlock(ptl); + hugetlb_vma_unlock_read(vma); remainder = 0; err = -ENOMEM; break; @@ -6601,6 +6607,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, i += refs; spin_unlock(ptl); + hugetlb_vma_unlock_read(vma); } *nr_pages = remainder; /* -- cgit v1.2.3 From 9c67a20704e763f9cb8cd262c3e45de7bd2816bc Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 16 Dec 2022 10:52:29 -0500 Subject: mm/hugetlb: introduce hugetlb_walk() huge_pte_offset() is the main walker function for hugetlb pgtables. The name is not really representing what it does, though. Instead of renaming it, introduce a wrapper function called hugetlb_walk() which will use huge_pte_offset() inside. Assert on the locks when walking the pgtable. Note, the vma lock assertion will be a no-op for private mappings. Document the last special case in the page_vma_mapped_walk() path where we don't need any more lock to call hugetlb_walk(). Taking vma lock there is not needed because either: (1) potential callers of hugetlb pvmw holds i_mmap_rwsem already (from one rmap_walk()), or (2) the caller will not walk a hugetlb vma at all so the hugetlb code path not reachable (e.g. in ksm or uprobe paths). It's slightly implicit for future page_vma_mapped_walk() callers on that lock requirement. But anyway, when one day this rule breaks, one will get a straightforward warning in hugetlb_walk() with lockdep, then there'll be a way out. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20221216155229.2043750-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Mike Kravetz Reviewed-by: John Hubbard Reviewed-by: David Hildenbrand Cc: Andrea Arcangeli Cc: James Houghton Cc: Jann Horn Cc: Miaohe Lin Cc: Muchun Song Cc: Nadav Amit Cc: Rik van Riel Signed-off-by: Andrew Morton --- mm/hugetlb.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index da4c37553c08..0e5441d6890a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -260,11 +260,6 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma) /* * hugetlb vma_lock helper routines */ -static bool __vma_shareable_lock(struct vm_area_struct *vma) -{ - return vma->vm_flags & VM_MAYSHARE && vma->vm_private_data; -} - void hugetlb_vma_lock_read(struct vm_area_struct *vma) { if (__vma_shareable_lock(vma)) { @@ -4980,7 +4975,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, } else { /* * For shared mappings the vma lock must be held before - * calling huge_pte_offset in the src vma. Otherwise, the + * calling hugetlb_walk() in the src vma. Otherwise, the * returned ptep could go away if part of a shared pmd and * another thread calls huge_pmd_unshare. */ @@ -4990,7 +4985,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, last_addr_mask = hugetlb_mask_last_page(h); for (addr = src_vma->vm_start; addr < src_vma->vm_end; addr += sz) { spinlock_t *src_ptl, *dst_ptl; - src_pte = huge_pte_offset(src, addr, sz); + src_pte = hugetlb_walk(src_vma, addr, sz); if (!src_pte) { addr |= last_addr_mask; continue; @@ -5197,7 +5192,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, hugetlb_vma_lock_write(vma); i_mmap_lock_write(mapping); for (; old_addr < old_end; old_addr += sz, new_addr += sz) { - src_pte = huge_pte_offset(mm, old_addr, sz); + src_pte = hugetlb_walk(vma, old_addr, sz); if (!src_pte) { old_addr |= last_addr_mask; new_addr |= last_addr_mask; @@ -5260,7 +5255,7 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct last_addr_mask = hugetlb_mask_last_page(h); address = start; for (; address < end; address += sz) { - ptep = huge_pte_offset(mm, address, sz); + ptep = hugetlb_walk(vma, address, sz); if (!ptep) { address |= last_addr_mask; continue; @@ -5573,7 +5568,7 @@ retry_avoidcopy: mutex_lock(&hugetlb_fault_mutex_table[hash]); hugetlb_vma_lock_read(vma); spin_lock(ptl); - ptep = huge_pte_offset(mm, haddr, huge_page_size(h)); + ptep = hugetlb_walk(vma, haddr, huge_page_size(h)); if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) goto retry_avoidcopy; @@ -5611,7 +5606,7 @@ retry_avoidcopy: * before the page tables are altered */ spin_lock(ptl); - ptep = huge_pte_offset(mm, haddr, huge_page_size(h)); + ptep = hugetlb_walk(vma, haddr, huge_page_size(h)); if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) { /* Break COW or unshare */ huge_ptep_clear_flush(vma, haddr, ptep); @@ -6397,7 +6392,7 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, return NULL; hugetlb_vma_lock_read(vma); - pte = huge_pte_offset(mm, haddr, huge_page_size(h)); + pte = hugetlb_walk(vma, haddr, huge_page_size(h)); if (!pte) goto out_unlock; @@ -6462,8 +6457,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, * * Note that page table lock is not held when pte is null. */ - pte = huge_pte_offset(mm, vaddr & huge_page_mask(h), - huge_page_size(h)); + pte = hugetlb_walk(vma, vaddr & huge_page_mask(h), + huge_page_size(h)); if (pte) ptl = huge_pte_lock(h, mm, pte); absent = !pte || huge_pte_none(huge_ptep_get(pte)); @@ -6654,7 +6649,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, last_addr_mask = hugetlb_mask_last_page(h); for (; address < end; address += psize) { spinlock_t *ptl; - ptep = huge_pte_offset(mm, address, psize); + ptep = hugetlb_walk(vma, address, psize); if (!ptep) { if (!uffd_wp) { address |= last_addr_mask; @@ -7064,8 +7059,8 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, saddr = page_table_shareable(svma, vma, addr, idx); if (saddr) { - spte = huge_pte_offset(svma->vm_mm, saddr, - vma_mmu_pagesize(svma)); + spte = hugetlb_walk(svma, saddr, + vma_mmu_pagesize(svma)); if (spte) { get_page(virt_to_page(spte)); break; @@ -7377,7 +7372,7 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, hugetlb_vma_lock_write(vma); i_mmap_lock_write(vma->vm_file->f_mapping); for (address = start; address < end; address += PUD_SIZE) { - ptep = huge_pte_offset(mm, address, sz); + ptep = hugetlb_walk(vma, address, sz); if (!ptep) continue; ptl = huge_pte_lock(h, mm, ptep); -- cgit v1.2.3 From c5094ec79cbe487983e3a96548a7eb1c1c82c727 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 16 Dec 2022 14:45:07 -0800 Subject: hugetlb: initialize variable to avoid compiler warning With the gcc 'maybe-uninitialized' warning enabled, gcc will produce: mm/hugetlb.c:6896:20: warning: `chg' may be used uninitialized This is a false positive, but may be difficult for the compiler to determine. maybe-uninitialized is disabled by default, but this gets flagged as a 0-DAY build regression. Initialize the variable to silence the warning. Link: https://lkml.kernel.org/r/20221216224507.106789-1-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Signed-off-by: Andrew Morton --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0e5441d6890a..a82f41024167 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6760,7 +6760,7 @@ bool hugetlb_reserve_pages(struct inode *inode, struct vm_area_struct *vma, vm_flags_t vm_flags) { - long chg, add = -1; + long chg = -1, add = -1; struct hstate *h = hstate_inode(inode); struct hugepage_subpool *spool = subpool_inode(inode); struct resv_map *resv_map; -- cgit v1.2.3 From a79390f5d6a78647fd70856bd42b22d994de0ba2 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 4 Jan 2023 17:52:06 -0500 Subject: mm/mprotect: use long for page accountings and retval Switch to use type "long" for page accountings and retval across the whole procedure of change_protection(). The change should have shrinked the possible maximum page number to be half comparing to previous (ULONG_MAX / 2), but it shouldn't overflow on any system either because the maximum possible pages touched by change protection should be ULONG_MAX / PAGE_SIZE. Two reasons to switch from "unsigned long" to "long": 1. It suites better on count_vm_numa_events(), whose 2nd parameter takes a long type. 2. It paves way for returning negative (error) values in the future. Currently the only caller that consumes this retval is change_prot_numa(), where the unsigned long was converted to an int. Since at it, touching up the numa code to also take a long, so it'll avoid any possible overflow too during the int-size convertion. Link: https://lkml.kernel.org/r/20230104225207.1066932-3-peterx@redhat.com Signed-off-by: Peter Xu Acked-by: Mike Kravetz Acked-by: James Houghton Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: David Hildenbrand Cc: Muchun Song Cc: Nadav Amit Signed-off-by: Andrew Morton --- mm/hugetlb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a82f41024167..a0d6d0980064 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6615,7 +6615,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, return i ? i : err; } -unsigned long hugetlb_change_protection(struct vm_area_struct *vma, +long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot, unsigned long cp_flags) { @@ -6624,7 +6624,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, pte_t *ptep; pte_t pte; struct hstate *h = hstate_vma(vma); - unsigned long pages = 0, psize = huge_page_size(h); + long pages = 0, psize = huge_page_size(h); bool shared_pmd = false; struct mmu_notifier_range range; unsigned long last_addr_mask; -- cgit v1.2.3 From d1751118c88673fe5a948ad82277898e9e284c55 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 4 Jan 2023 17:52:07 -0500 Subject: mm/uffd: detect pgtable allocation failures Before this patch, when there's any pgtable allocation issues happened during change_protection(), the error will be ignored from the syscall. For shmem, there will be an error dumped into the host dmesg. Two issues with that: (1) Doing a trace dump when allocation fails is not anything close to grace. (2) The user should be notified with any kind of such error, so the user can trap it and decide what to do next, either by retrying, or stop the process properly, or anything else. For userfault users, this will change the API of UFFDIO_WRITEPROTECT when pgtable allocation failure happened. It should not normally break anyone, though. If it breaks, then in good ways. One man-page update will be on the way to introduce the new -ENOMEM for UFFDIO_WRITEPROTECT. Not marking stable so we keep the old behavior on the 5.19-till-now kernels. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20230104225207.1066932-4-peterx@redhat.com Signed-off-by: Peter Xu Reported-by: James Houghton Acked-by: James Houghton Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: David Hildenbrand Cc: Mike Kravetz Cc: Muchun Song Cc: Nadav Amit Signed-off-by: Andrew Morton --- mm/hugetlb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a0d6d0980064..6fe65f14d33b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6660,8 +6660,10 @@ long hugetlb_change_protection(struct vm_area_struct *vma, * pre-allocations to install pte markers. */ ptep = huge_pte_alloc(mm, vma, address, psize); - if (!ptep) + if (!ptep) { + pages = -ENOMEM; break; + } } ptl = huge_pte_lock(h, mm, ptep); if (huge_pmd_unshare(mm, vma, address, ptep)) { @@ -6751,7 +6753,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma, hugetlb_vma_unlock_write(vma); mmu_notifier_invalidate_range_end(&range); - return pages << h->order; + return pages > 0 ? (pages << h->order) : pages; } /* Return true if reservation was successful, false otherwise. */ -- cgit v1.2.3 From 7d4a8be0c4b2b7ffb367929d2b352651f083806b Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Tue, 10 Jan 2023 13:57:22 +1100 Subject: mm/mmu_notifier: remove unused mmu_notifier_range_update_to_read_only export mmu_notifier_range_update_to_read_only() was originally introduced in commit c6d23413f81b ("mm/mmu_notifier: mmu_notifier_range_update_to_read_only() helper") as an optimisation for device drivers that know a range has only been mapped read-only. However there are no users of this feature so remove it. As it is the only user of the struct mmu_notifier_range.vma field remove that also. Link: https://lkml.kernel.org/r/20230110025722.600912-1-apopple@nvidia.com Signed-off-by: Alistair Popple Acked-by: Mike Rapoport (IBM) Reviewed-by: Jason Gunthorpe Reviewed-by: Christoph Hellwig Reviewed-by: Mike Kravetz Cc: Ira Weiny Cc: Jerome Glisse Cc: John Hubbard Cc: Ralph Campbell Signed-off-by: Andrew Morton --- mm/hugetlb.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6fe65f14d33b..273a6522aa4c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4966,7 +4966,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, int ret = 0; if (cow) { - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, src_vma, src, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, src, src_vma->vm_start, src_vma->vm_end); mmu_notifier_invalidate_range_start(&range); @@ -5177,7 +5177,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, struct mmu_notifier_range range; bool shared_pmd = false; - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, old_addr, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, old_addr, old_end); adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); /* @@ -5391,7 +5391,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, struct mmu_notifier_range range; struct mmu_gather tlb; - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, start, end); adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); mmu_notifier_invalidate_range_start(&range); @@ -5597,7 +5597,7 @@ retry_avoidcopy: pages_per_huge_page(h)); __SetPageUptodate(new_page); - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, haddr, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, haddr, haddr + huge_page_size(h)); mmu_notifier_invalidate_range_start(&range); @@ -6637,7 +6637,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma, * range if PMD sharing is possible. */ mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_VMA, - 0, vma, mm, start, end); + 0, mm, start, end); adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); BUG_ON(address >= end); @@ -7368,7 +7368,7 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, * No need to call adjust_range_if_pmd_sharing_possible(), because * we have already done the PUD_SIZE alignment. */ - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, start, end); mmu_notifier_invalidate_range_start(&range); hugetlb_vma_lock_write(vma); -- cgit v1.2.3 From 94688e8eb453e616098cb930e5f6fed4a6ea2dfa Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:28:47 +0000 Subject: mm: remove folio_pincount_ptr() and head_compound_pincount() We can use folio->_pincount directly, since all users are guarded by tests of compound/large. Link: https://lkml.kernel.org/r/20230111142915.1001531-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: John Hubbard Signed-off-by: Andrew Morton --- mm/hugetlb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 273a6522aa4c..15b2707c1600 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1476,7 +1476,7 @@ static void __destroy_compound_gigantic_folio(struct folio *folio, atomic_set(folio_mapcount_ptr(folio), 0); atomic_set(folio_subpages_mapcount_ptr(folio), 0); - atomic_set(folio_pincount_ptr(folio), 0); + atomic_set(&folio->_pincount, 0); for (i = 1; i < nr_pages; i++) { p = folio_page(folio, i); @@ -1998,7 +1998,7 @@ static bool __prep_compound_gigantic_folio(struct folio *folio, } atomic_set(folio_mapcount_ptr(folio), -1); atomic_set(folio_subpages_mapcount_ptr(folio), 0); - atomic_set(folio_pincount_ptr(folio), 0); + atomic_set(&folio->_pincount, 0); return true; out_error: -- cgit v1.2.3 From eec20426d48bd7b63c69969a793943ed1a99b731 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:28:48 +0000 Subject: mm: convert head_subpages_mapcount() into folio_nr_pages_mapped() Calling this 'mapcount' is confusing since mapcount is usually the number of times something is mapped; instead this is the number of mapped pages. It's also better to enforce that this is a folio rather than a head page. Move folio_nr_pages_mapped() into mm/internal.h since this is not something we want device drivers or filesystems poking at. Get rid of folio_subpages_mapcount_ptr() and use folio->_nr_pages_mapped directly. Link: https://lkml.kernel.org/r/20230111142915.1001531-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/hugetlb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 15b2707c1600..c9702224931c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1475,7 +1475,7 @@ static void __destroy_compound_gigantic_folio(struct folio *folio, struct page *p; atomic_set(folio_mapcount_ptr(folio), 0); - atomic_set(folio_subpages_mapcount_ptr(folio), 0); + atomic_set(&folio->_nr_pages_mapped, 0); atomic_set(&folio->_pincount, 0); for (i = 1; i < nr_pages; i++) { @@ -1997,7 +1997,7 @@ static bool __prep_compound_gigantic_folio(struct folio *folio, set_compound_head(p, &folio->page); } atomic_set(folio_mapcount_ptr(folio), -1); - atomic_set(folio_subpages_mapcount_ptr(folio), 0); + atomic_set(&folio->_nr_pages_mapped, 0); atomic_set(&folio->_pincount, 0); return true; -- cgit v1.2.3 From 46f2722825983a51e849eb0ef2814e5c7f040fef Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:28:59 +0000 Subject: hugetlb: remove uses of folio_mapcount_ptr Use the entire_mapcount field directly. Link: https://lkml.kernel.org/r/20230111142915.1001531-14-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/hugetlb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c9702224931c..a68e0e597a8f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1474,7 +1474,7 @@ static void __destroy_compound_gigantic_folio(struct folio *folio, int nr_pages = 1 << order; struct page *p; - atomic_set(folio_mapcount_ptr(folio), 0); + atomic_set(&folio->_entire_mapcount, 0); atomic_set(&folio->_nr_pages_mapped, 0); atomic_set(&folio->_pincount, 0); @@ -1996,7 +1996,7 @@ static bool __prep_compound_gigantic_folio(struct folio *folio, if (i != 0) set_compound_head(p, &folio->page); } - atomic_set(folio_mapcount_ptr(folio), -1); + atomic_set(&folio->_entire_mapcount, -1); atomic_set(&folio->_nr_pages_mapped, 0); atomic_set(&folio->_pincount, 0); return true; -- cgit v1.2.3 From 2d678c641a4625d2b1cfeb50d7426fab6d3740b3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:07 +0000 Subject: hugetlb: remove uses of compound_dtor and compound_nr Convert the entire file to use the folio equivalents. Link: https://lkml.kernel.org/r/20230111142915.1001531-22-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/hugetlb.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a68e0e597a8f..ca9e177b9c54 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2038,11 +2038,12 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio, */ int PageHuge(struct page *page) { + struct folio *folio; + if (!PageCompound(page)) return 0; - - page = compound_head(page); - return page[1].compound_dtor == HUGETLB_PAGE_DTOR; + folio = page_folio(page); + return folio->_folio_dtor == HUGETLB_PAGE_DTOR; } EXPORT_SYMBOL_GPL(PageHuge); @@ -2052,10 +2053,11 @@ EXPORT_SYMBOL_GPL(PageHuge); */ int PageHeadHuge(struct page *page_head) { - if (!PageHead(page_head)) + struct folio *folio = (struct folio *)page_head; + if (!folio_test_large(folio)) return 0; - return page_head[1].compound_dtor == HUGETLB_PAGE_DTOR; + return folio->_folio_dtor == HUGETLB_PAGE_DTOR; } EXPORT_SYMBOL_GPL(PageHeadHuge); -- cgit v1.2.3 From 2ff6cecee669bf0fc63eadebac8cfc81f74b9a4c Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Thu, 12 Jan 2023 14:46:03 -0600 Subject: mm/memory-failure: convert hugetlb_clear_page_hwpoison to folios Change hugetlb_clear_page_hwpoison() to folio_clear_hugetlb_hwpoison() by changing the function to take in a folio. This converts one use of ClearPageHWPoison and HPageRawHwpUnreliable to their folio equivalents. Link: https://lkml.kernel.org/r/20230112204608.80136-4-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Acked-by: Naoya Horiguchi Cc: Matthew Wilcox Cc: Miaohe Lin Signed-off-by: Andrew Morton --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ca9e177b9c54..291ad4cb02f9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1731,7 +1731,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page) * which makes any healthy subpages reusable. */ if (unlikely(folio_test_hwpoison(folio))) - hugetlb_clear_page_hwpoison(&folio->page); + folio_clear_hugetlb_hwpoison(folio); for (i = 0; i < pages_per_huge_page(h); i++) { subpage = folio_page(folio, i); -- cgit v1.2.3 From 04bac040bc71b4b37550eed5854f34ca161756f9 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 18 Jan 2023 09:40:39 -0800 Subject: mm/hugetlb: convert get_hwpoison_huge_page() to folios Straightforward conversion of get_hwpoison_huge_page() to get_hwpoison_hugetlb_folio(). Reduces two references to a head page in memory-failure.c [arnd@arndb.de: fix get_hwpoison_hugetlb_folio() stub] Link: https://lkml.kernel.org/r/20230119111920.635260-1-arnd@kernel.org Link: https://lkml.kernel.org/r/20230118174039.14247-1-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Signed-off-by: Arnd Bergmann Acked-by: Naoya Horiguchi Reviewed-by: Matthew Wilcox (Oracle) Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 291ad4cb02f9..0f9df0143772 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7268,18 +7268,18 @@ unlock: return ret; } -int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison) +int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison) { int ret = 0; *hugetlb = false; spin_lock_irq(&hugetlb_lock); - if (PageHeadHuge(page)) { + if (folio_test_hugetlb(folio)) { *hugetlb = true; - if (HPageFreed(page)) + if (folio_test_hugetlb_freed(folio)) ret = 0; - else if (HPageMigratable(page) || unpoison) - ret = get_page_unless_zero(page); + else if (folio_test_hugetlb_migratable(folio) || unpoison) + ret = folio_try_get(folio); else ret = -EBUSY; } -- cgit v1.2.3 From e430a95a04efc557bc4ff9b3035c7c85aee5d63f Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 26 Jan 2023 11:37:48 -0800 Subject: mm: replace VM_LOCKED_CLEAR_MASK with VM_LOCKED_MASK To simplify the usage of VM_LOCKED_CLEAR_MASK in vm_flags_clear(), replace it with VM_LOCKED_MASK bitmask and convert all users. Link: https://lkml.kernel.org/r/20230126193752.297968-4-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Michal Hocko Acked-by: Mel Gorman Acked-by: Mike Rapoport (IBM) Reviewed-by: Davidlohr Bueso Cc: Andy Lutomirski Cc: Arjun Roy Cc: Axel Rasmussen Cc: David Hildenbrand Cc: David Howells Cc: David Rientjes Cc: Eric Dumazet Cc: Greg Thelen Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jann Horn Cc: Joel Fernandes Cc: Johannes Weiner Cc: Kent Overstreet Cc: Laurent Dufour Cc: Liam R. Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcox Cc: Minchan Kim Cc: Paul E. McKenney Cc: Peter Oskolkov Cc: Peter Xu Cc: Peter Zijlstra Cc: Punit Agrawal Cc: Sebastian Andrzej Siewior Cc: Sebastian Reichel Cc: Shakeel Butt Cc: Soheil Hassas Yeganeh Cc: Song Liu Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- mm/hugetlb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0f9df0143772..ab35b1cc9927 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6969,8 +6969,8 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma, unsigned long s_end = sbase + PUD_SIZE; /* Allow segments to share if only one is marked locked */ - unsigned long vm_flags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; - unsigned long svm_flags = svma->vm_flags & VM_LOCKED_CLEAR_MASK; + unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED_MASK; + unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED_MASK; /* * match the virtual addresses, permission and the alignment of the -- cgit v1.2.3 From 6aa3a920125e9f58891e2b5dc2efd4d0c1ff05a6 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 13 Jan 2023 16:30:50 -0600 Subject: mm/hugetlb: convert isolate_hugetlb to folios Patch series "continue hugetlb folio conversion", v3. This series continues the conversion of core hugetlb functions to use folios. This series converts many helper funtions in the hugetlb fault path. This is in preparation for another series to convert the hugetlb fault code paths to operate on folios. This patch (of 8): Convert isolate_hugetlb() to take in a folio and convert its callers to pass a folio. Use page_folio() to convert the callers to use a folio is safe as isolate_hugetlb() operates on a head page. Link: https://lkml.kernel.org/r/20230113223057.173292-1-sidhartha.kumar@oracle.com Link: https://lkml.kernel.org/r/20230113223057.173292-2-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: John Hubbard Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ab35b1cc9927..0c1e1ce113c8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2925,7 +2925,7 @@ retry: * Fail with -EBUSY if not possible. */ spin_unlock_irq(&hugetlb_lock); - ret = isolate_hugetlb(&old_folio->page, list); + ret = isolate_hugetlb(old_folio, list); spin_lock_irq(&hugetlb_lock); goto free_new; } else if (!folio_test_hugetlb_freed(old_folio)) { @@ -3000,7 +3000,7 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) if (hstate_is_gigantic(h)) return -ENOMEM; - if (folio_ref_count(folio) && !isolate_hugetlb(&folio->page, list)) + if (folio_ref_count(folio) && !isolate_hugetlb(folio, list)) ret = 0; else if (!folio_ref_count(folio)) ret = alloc_and_dissolve_hugetlb_folio(h, folio, list); @@ -7250,19 +7250,19 @@ __weak unsigned long hugetlb_mask_last_page(struct hstate *h) * These functions are overwritable if your architecture needs its own * behavior. */ -int isolate_hugetlb(struct page *page, struct list_head *list) +int isolate_hugetlb(struct folio *folio, struct list_head *list) { int ret = 0; spin_lock_irq(&hugetlb_lock); - if (!PageHeadHuge(page) || - !HPageMigratable(page) || - !get_page_unless_zero(page)) { + if (!folio_test_hugetlb(folio) || + !folio_test_hugetlb_migratable(folio) || + !folio_try_get(folio)) { ret = -EBUSY; goto unlock; } - ClearHPageMigratable(page); - list_move_tail(&page->lru, list); + folio_clear_hugetlb_migratable(folio); + list_move_tail(&folio->lru, list); unlock: spin_unlock_irq(&hugetlb_lock); return ret; -- cgit v1.2.3 From 6f6956cf7e6a3034f61780446547e849aa4e216d Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 13 Jan 2023 16:30:51 -0600 Subject: mm/hugetlb: convert __update_and_free_page() to folios Change __update_and_free_page() to __update_and_free_hugetlb_folio() by changing its callers to pass in a folio. Link: https://lkml.kernel.org/r/20230113223057.173292-3-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: John Hubbard Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0c1e1ce113c8..d27fcf768548 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1698,10 +1698,10 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio, enqueue_hugetlb_folio(h, folio); } -static void __update_and_free_page(struct hstate *h, struct page *page) +static void __update_and_free_hugetlb_folio(struct hstate *h, + struct folio *folio) { int i; - struct folio *folio = page_folio(page); struct page *subpage; if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) @@ -1714,7 +1714,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page) if (folio_test_hugetlb_raw_hwp_unreliable(folio)) return; - if (hugetlb_vmemmap_restore(h, page)) { + if (hugetlb_vmemmap_restore(h, &folio->page)) { spin_lock_irq(&hugetlb_lock); /* * If we cannot allocate vmemmap pages, just refuse to free the @@ -1750,7 +1750,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page) destroy_compound_gigantic_folio(folio, huge_page_order(h)); free_gigantic_folio(folio, huge_page_order(h)); } else { - __free_pages(page, huge_page_order(h)); + __free_pages(&folio->page, huge_page_order(h)); } } @@ -1790,7 +1790,7 @@ static void free_hpage_workfn(struct work_struct *work) */ h = size_to_hstate(page_size(page)); - __update_and_free_page(h, page); + __update_and_free_hugetlb_folio(h, page_folio(page)); cond_resched(); } @@ -1807,7 +1807,7 @@ static void update_and_free_hugetlb_folio(struct hstate *h, struct folio *folio, bool atomic) { if (!folio_test_hugetlb_vmemmap_optimized(folio) || !atomic) { - __update_and_free_page(h, &folio->page); + __update_and_free_hugetlb_folio(h, folio); return; } -- cgit v1.2.3 From a36f1e9024740c3820427afca4cd375e32a1bb15 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 13 Jan 2023 16:30:52 -0600 Subject: mm/hugetlb: convert dequeue_hugetlb_page functions to folios dequeue_huge_page_node_exact() is changed to dequeue_hugetlb_folio_node_ exact() and dequeue_huge_page_nodemask() is changed to dequeue_hugetlb_ folio_nodemask(). Update their callers to pass in a folio. Link: https://lkml.kernel.org/r/20230113223057.173292-4-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Cc: John Hubbard Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 56 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 26 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d27fcf768548..3e648fccf33e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1282,32 +1282,33 @@ static void enqueue_hugetlb_folio(struct hstate *h, struct folio *folio) folio_set_hugetlb_freed(folio); } -static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) +static struct folio *dequeue_hugetlb_folio_node_exact(struct hstate *h, + int nid) { - struct page *page; + struct folio *folio; bool pin = !!(current->flags & PF_MEMALLOC_PIN); lockdep_assert_held(&hugetlb_lock); - list_for_each_entry(page, &h->hugepage_freelists[nid], lru) { - if (pin && !is_longterm_pinnable_page(page)) + list_for_each_entry(folio, &h->hugepage_freelists[nid], lru) { + if (pin && !folio_is_longterm_pinnable(folio)) continue; - if (PageHWPoison(page)) + if (folio_test_hwpoison(folio)) continue; - list_move(&page->lru, &h->hugepage_activelist); - set_page_refcounted(page); - ClearHPageFreed(page); + list_move(&folio->lru, &h->hugepage_activelist); + folio_ref_unfreeze(folio, 1); + folio_clear_hugetlb_freed(folio); h->free_huge_pages--; h->free_huge_pages_node[nid]--; - return page; + return folio; } return NULL; } -static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask, int nid, - nodemask_t *nmask) +static struct folio *dequeue_hugetlb_folio_nodemask(struct hstate *h, gfp_t gfp_mask, + int nid, nodemask_t *nmask) { unsigned int cpuset_mems_cookie; struct zonelist *zonelist; @@ -1320,7 +1321,7 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask, retry_cpuset: cpuset_mems_cookie = read_mems_allowed_begin(); for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nmask) { - struct page *page; + struct folio *folio; if (!cpuset_zone_allowed(zone, gfp_mask)) continue; @@ -1332,9 +1333,9 @@ retry_cpuset: continue; node = zone_to_nid(zone); - page = dequeue_huge_page_node_exact(h, node); - if (page) - return page; + folio = dequeue_hugetlb_folio_node_exact(h, node); + if (folio) + return folio; } if (unlikely(read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; @@ -1352,7 +1353,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, unsigned long address, int avoid_reserve, long chg) { - struct page *page = NULL; + struct folio *folio = NULL; struct mempolicy *mpol; gfp_t gfp_mask; nodemask_t *nodemask; @@ -1374,22 +1375,24 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask); if (mpol_is_preferred_many(mpol)) { - page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); + folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask, + nid, nodemask); /* Fallback to all nodes if page==NULL */ nodemask = NULL; } - if (!page) - page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); + if (!folio) + folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask, + nid, nodemask); - if (page && !avoid_reserve && vma_has_reserves(vma, chg)) { - SetHPageRestoreReserve(page); + if (folio && !avoid_reserve && vma_has_reserves(vma, chg)) { + folio_set_hugetlb_restore_reserve(folio); h->resv_huge_pages--; } mpol_cond_put(mpol); - return page; + return &folio->page; err: return NULL; @@ -2475,12 +2478,13 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, { spin_lock_irq(&hugetlb_lock); if (available_huge_pages(h)) { - struct page *page; + struct folio *folio; - page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask); - if (page) { + folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask, + preferred_nid, nmask); + if (folio) { spin_unlock_irq(&hugetlb_lock); - return page; + return &folio->page; } } spin_unlock_irq(&hugetlb_lock); -- cgit v1.2.3 From 3a740e8bb56ef7ee6b9098b694caabab843be067 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 13 Jan 2023 16:30:53 -0600 Subject: mm/hugetlb: convert alloc_surplus_huge_page() to folios Change alloc_surplus_huge_page() to alloc_surplus_hugetlb_folio() and update its callers. Link: https://lkml.kernel.org/r/20230113223057.173292-5-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: John Hubbard Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3e648fccf33e..fa61b4aa68ca 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2378,8 +2378,8 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) /* * Allocates a fresh surplus page from the page allocator. */ -static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask, - int nid, nodemask_t *nmask) +static struct folio *alloc_surplus_hugetlb_folio(struct hstate *h, + gfp_t gfp_mask, int nid, nodemask_t *nmask) { struct folio *folio = NULL; @@ -2416,7 +2416,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask, out_unlock: spin_unlock_irq(&hugetlb_lock); - return &folio->page; + return folio; } static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, @@ -2449,7 +2449,7 @@ static struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, struct vm_area_struct *vma, unsigned long addr) { - struct page *page = NULL; + struct folio *folio = NULL; struct mempolicy *mpol; gfp_t gfp_mask = htlb_alloc_mask(h); int nid; @@ -2460,16 +2460,16 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, gfp_t gfp = gfp_mask | __GFP_NOWARN; gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL); - page = alloc_surplus_huge_page(h, gfp, nid, nodemask); + folio = alloc_surplus_hugetlb_folio(h, gfp, nid, nodemask); /* Fallback to all nodes if page==NULL */ nodemask = NULL; } - if (!page) - page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask); + if (!folio) + folio = alloc_surplus_hugetlb_folio(h, gfp_mask, nid, nodemask); mpol_cond_put(mpol); - return page; + return &folio->page; } /* page migration callback function */ @@ -2518,6 +2518,7 @@ static int gather_surplus_pages(struct hstate *h, long delta) __must_hold(&hugetlb_lock) { LIST_HEAD(surplus_list); + struct folio *folio; struct page *page, *tmp; int ret; long i; @@ -2537,13 +2538,13 @@ static int gather_surplus_pages(struct hstate *h, long delta) retry: spin_unlock_irq(&hugetlb_lock); for (i = 0; i < needed; i++) { - page = alloc_surplus_huge_page(h, htlb_alloc_mask(h), + folio = alloc_surplus_hugetlb_folio(h, htlb_alloc_mask(h), NUMA_NO_NODE, NULL); - if (!page) { + if (!folio) { alloc_ok = false; break; } - list_add(&page->lru, &surplus_list); + list_add(&folio->lru, &surplus_list); cond_resched(); } allocated += i; @@ -3496,7 +3497,7 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, * First take pages out of surplus state. Then make up the * remaining difference by allocating fresh huge pages. * - * We might race with alloc_surplus_huge_page() here and be unable + * We might race with alloc_surplus_hugetlb_folio() here and be unable * to convert a surplus huge page to a normal huge page. That is * not critical, though, it just means the overall size of the * pool might be one hugepage larger than it needs to be, but @@ -3539,7 +3540,7 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, * By placing pages into the surplus state independent of the * overcommit value, we are allowing the surplus pool size to * exceed overcommit. There are few sane options here. Since - * alloc_surplus_huge_page() is checking the global counter, + * alloc_surplus_hugetlb_folio() is checking the global counter, * though, we'll note that we're not allowed to exceed surplus * and won't grow the pool anywhere else. Not until one of the * sysctls are changed, or the surplus pages go out of use. -- cgit v1.2.3 From ff7d853b031302376a0d3640fa1c463d94079637 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 13 Jan 2023 16:30:54 -0600 Subject: mm/hugetlb: increase use of folios in alloc_huge_page() Change hugetlb_cgroup_commit_charge{,_rsvd}(), dequeue_huge_page_vma() and alloc_buddy_huge_page_with_mpol() to use folios so alloc_huge_page() is cleaned by operating on folios until its return. Link: https://lkml.kernel.org/r/20230113223057.173292-6-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: John Hubbard Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index fa61b4aa68ca..5d0d1efbe590 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1348,7 +1348,7 @@ static unsigned long available_huge_pages(struct hstate *h) return h->free_huge_pages - h->resv_huge_pages; } -static struct page *dequeue_huge_page_vma(struct hstate *h, +static struct folio *dequeue_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address, int avoid_reserve, long chg) @@ -1392,7 +1392,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, } mpol_cond_put(mpol); - return &folio->page; + return folio; err: return NULL; @@ -2446,7 +2446,7 @@ static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, * Use the VMA's mpolicy to allocate a huge page from the buddy. */ static -struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, +struct folio *alloc_buddy_hugetlb_folio_with_mpol(struct hstate *h, struct vm_area_struct *vma, unsigned long addr) { struct folio *folio = NULL; @@ -2469,7 +2469,7 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, if (!folio) folio = alloc_surplus_hugetlb_folio(h, gfp_mask, nid, nodemask); mpol_cond_put(mpol); - return &folio->page; + return folio; } /* page migration callback function */ @@ -3018,7 +3018,6 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, { struct hugepage_subpool *spool = subpool_vma(vma); struct hstate *h = hstate_vma(vma); - struct page *page; struct folio *folio; long map_chg, map_commit; long gbl_chg; @@ -3082,34 +3081,34 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, * from the global free pool (global change). gbl_chg == 0 indicates * a reservation exists for the allocation. */ - page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg); - if (!page) { + folio = dequeue_hugetlb_folio_vma(h, vma, addr, avoid_reserve, gbl_chg); + if (!folio) { spin_unlock_irq(&hugetlb_lock); - page = alloc_buddy_huge_page_with_mpol(h, vma, addr); - if (!page) + folio = alloc_buddy_hugetlb_folio_with_mpol(h, vma, addr); + if (!folio) goto out_uncharge_cgroup; spin_lock_irq(&hugetlb_lock); if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) { - SetHPageRestoreReserve(page); + folio_set_hugetlb_restore_reserve(folio); h->resv_huge_pages--; } - list_add(&page->lru, &h->hugepage_activelist); - set_page_refcounted(page); + list_add(&folio->lru, &h->hugepage_activelist); + folio_ref_unfreeze(folio, 1); /* Fall through */ } - folio = page_folio(page); - hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page); + + hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, folio); /* If allocation is not consuming a reservation, also store the * hugetlb_cgroup pointer on the page. */ if (deferred_reserve) { hugetlb_cgroup_commit_charge_rsvd(idx, pages_per_huge_page(h), - h_cg, page); + h_cg, folio); } spin_unlock_irq(&hugetlb_lock); - hugetlb_set_page_subpool(page, spool); + hugetlb_set_folio_subpool(folio, spool); map_commit = vma_commit_reservation(h, vma, addr); if (unlikely(map_chg > map_commit)) { @@ -3130,7 +3129,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h), pages_per_huge_page(h), folio); } - return page; + return &folio->page; out_uncharge_cgroup: hugetlb_cgroup_uncharge_cgroup(idx, pages_per_huge_page(h), h_cg); -- cgit v1.2.3 From e37d3e838d9078538f920957d1e89682b6764977 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 13 Jan 2023 16:30:55 -0600 Subject: mm/hugetlb: convert alloc_migrate_huge_page to folios Change alloc_huge_page_nodemask() to alloc_hugetlb_folio_nodemask() and alloc_migrate_huge_page() to alloc_migrate_hugetlb_folio(). Both functions now return a folio rather than a page. Link: https://lkml.kernel.org/r/20230113223057.173292-7-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: John Hubbard Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5d0d1efbe590..57894beb3382 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2419,7 +2419,7 @@ out_unlock: return folio; } -static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, +static struct folio *alloc_migrate_hugetlb_folio(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nmask) { struct folio *folio; @@ -2439,7 +2439,7 @@ static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, */ folio_set_hugetlb_temporary(folio); - return &folio->page; + return folio; } /* @@ -2472,8 +2472,8 @@ struct folio *alloc_buddy_hugetlb_folio_with_mpol(struct hstate *h, return folio; } -/* page migration callback function */ -struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, +/* folio migration callback function */ +struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask) { spin_lock_irq(&hugetlb_lock); @@ -2484,12 +2484,12 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, preferred_nid, nmask); if (folio) { spin_unlock_irq(&hugetlb_lock); - return &folio->page; + return folio; } } spin_unlock_irq(&hugetlb_lock); - return alloc_migrate_huge_page(h, gfp_mask, preferred_nid, nmask); + return alloc_migrate_hugetlb_folio(h, gfp_mask, preferred_nid, nmask); } /* mempolicy aware migration callback */ @@ -2498,16 +2498,16 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, { struct mempolicy *mpol; nodemask_t *nodemask; - struct page *page; + struct folio *folio; gfp_t gfp_mask; int node; gfp_mask = htlb_alloc_mask(h); node = huge_node(vma, address, gfp_mask, &mpol, &nodemask); - page = alloc_huge_page_nodemask(h, node, nodemask, gfp_mask); + folio = alloc_hugetlb_folio_nodemask(h, node, nodemask, gfp_mask); mpol_cond_put(mpol); - return page; + return &folio->page; } /* -- cgit v1.2.3 From 0ffdc38eb564c1c71a58bbaf874945ba54293ff9 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 13 Jan 2023 16:30:56 -0600 Subject: mm/hugetlb: convert restore_reserve_on_error() to folios Use the hugetlb folio flag macros inside restore_reserve_on_error() and update the comments to reflect the use of folios. Link: https://lkml.kernel.org/r/20230113223057.173292-8-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: John Hubbard Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 57894beb3382..3120c3db60c4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2819,22 +2819,23 @@ static long vma_del_reservation(struct hstate *h, void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, unsigned long address, struct page *page) { + struct folio *folio = page_folio(page); long rc = vma_needs_reservation(h, vma, address); - if (HPageRestoreReserve(page)) { + if (folio_test_hugetlb_restore_reserve(folio)) { if (unlikely(rc < 0)) /* * Rare out of memory condition in reserve map - * manipulation. Clear HPageRestoreReserve so that - * global reserve count will not be incremented + * manipulation. Clear hugetlb_restore_reserve so + * that global reserve count will not be incremented * by free_huge_page. This will make it appear - * as though the reservation for this page was + * as though the reservation for this folio was * consumed. This may prevent the task from - * faulting in the page at a later time. This + * faulting in the folio at a later time. This * is better than inconsistent global huge page * accounting of reserve counts. */ - ClearHPageRestoreReserve(page); + folio_clear_hugetlb_restore_reserve(folio); else if (rc) (void)vma_add_reservation(h, vma, address); else @@ -2845,7 +2846,7 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, * This indicates there is an entry in the reserve map * not added by alloc_huge_page. We know it was added * before the alloc_huge_page call, otherwise - * HPageRestoreReserve would be set on the page. + * hugetlb_restore_reserve would be set on the folio. * Remove the entry so that a subsequent allocation * does not consume a reservation. */ @@ -2854,12 +2855,12 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, /* * VERY rare out of memory condition. Since * we can not delete the entry, set - * HPageRestoreReserve so that the reserve - * count will be incremented when the page + * hugetlb_restore_reserve so that the reserve + * count will be incremented when the folio * is freed. This reserve will be consumed * on a subsequent allocation. */ - SetHPageRestoreReserve(page); + folio_set_hugetlb_restore_reserve(folio); } else if (rc < 0) { /* * Rare out of memory condition from @@ -2875,12 +2876,12 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, /* * For private mappings, no entry indicates * a reservation is present. Since we can - * not add an entry, set SetHPageRestoreReserve - * on the page so reserve count will be + * not add an entry, set hugetlb_restore_reserve + * on the folio so reserve count will be * incremented when freed. This reserve will * be consumed on a subsequent allocation. */ - SetHPageRestoreReserve(page); + folio_set_hugetlb_restore_reserve(folio); } else /* * No reservation present, do nothing -- cgit v1.2.3 From bdd7be075acb650cc57d8ee752b5375b966ad07e Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 13 Jan 2023 16:30:57 -0600 Subject: mm/hugetlb: convert demote_free_huge_page to folios Change demote_free_huge_page to demote_free_hugetlb_folio() and change demote_pool_huge_page() pass in a folio. Link: https://lkml.kernel.org/r/20230113223057.173292-9-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Cc: John Hubbard Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3120c3db60c4..4ecdbad9a451 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3579,12 +3579,12 @@ out: return 0; } -static int demote_free_huge_page(struct hstate *h, struct page *page) +static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio) { - int i, nid = page_to_nid(page); + int i, nid = folio_nid(folio); struct hstate *target_hstate; - struct folio *folio = page_folio(page); struct page *subpage; + struct folio *inner_folio; int rc = 0; target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order); @@ -3592,18 +3592,18 @@ static int demote_free_huge_page(struct hstate *h, struct page *page) remove_hugetlb_folio_for_demote(h, folio, false); spin_unlock_irq(&hugetlb_lock); - rc = hugetlb_vmemmap_restore(h, page); + rc = hugetlb_vmemmap_restore(h, &folio->page); if (rc) { - /* Allocation of vmemmmap failed, we can not demote page */ + /* Allocation of vmemmmap failed, we can not demote folio */ spin_lock_irq(&hugetlb_lock); - set_page_refcounted(page); - add_hugetlb_folio(h, page_folio(page), false); + folio_ref_unfreeze(folio, 1); + add_hugetlb_folio(h, folio, false); return rc; } /* * Use destroy_compound_hugetlb_folio_for_demote for all huge page - * sizes as it will not ref count pages. + * sizes as it will not ref count folios. */ destroy_compound_hugetlb_folio_for_demote(folio, huge_page_order(h)); @@ -3618,15 +3618,15 @@ static int demote_free_huge_page(struct hstate *h, struct page *page) mutex_lock(&target_hstate->resize_lock); for (i = 0; i < pages_per_huge_page(h); i += pages_per_huge_page(target_hstate)) { - subpage = nth_page(page, i); - folio = page_folio(subpage); + subpage = folio_page(folio, i); + inner_folio = page_folio(subpage); if (hstate_is_gigantic(target_hstate)) - prep_compound_gigantic_folio_for_demote(folio, + prep_compound_gigantic_folio_for_demote(inner_folio, target_hstate->order); else prep_compound_page(subpage, target_hstate->order); - set_page_private(subpage, 0); - prep_new_hugetlb_folio(target_hstate, folio, nid); + folio_change_private(inner_folio, NULL); + prep_new_hugetlb_folio(target_hstate, inner_folio, nid); free_huge_page(subpage); } mutex_unlock(&target_hstate->resize_lock); @@ -3648,7 +3648,7 @@ static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed) __must_hold(&hugetlb_lock) { int nr_nodes, node; - struct page *page; + struct folio *folio; lockdep_assert_held(&hugetlb_lock); @@ -3659,11 +3659,10 @@ static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed) } for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) { - list_for_each_entry(page, &h->hugepage_freelists[node], lru) { - if (PageHWPoison(page)) + list_for_each_entry(folio, &h->hugepage_freelists[node], lru) { + if (folio_test_hwpoison(folio)) continue; - - return demote_free_huge_page(h, page); + return demote_free_hugetlb_folio(h, folio); } } -- cgit v1.2.3 From ea4c353df37750d170dc0dcbfa8c47c984779733 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 25 Jan 2023 09:05:30 -0800 Subject: mm/hugetlb: convert hugetlb_install_page to folios Patch series "convert hugetlb fault functions to folios", v2. This series converts the hugetlb page faulting functions to operate on folios. These include hugetlb_no_page(), hugetlb_wp(), copy_hugetlb_page_range(), and hugetlb_mcopy_atomic_pte(). This patch (of 8): Change hugetlb_install_page() to hugetlb_install_folio(). This reduces one user of the Huge Page flag macros which take in a page. Link: https://lkml.kernel.org/r/20230125170537.96973-1-sidhartha.kumar@oracle.com Link: https://lkml.kernel.org/r/20230125170537.96973-2-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: Gerald Schaefer Cc: John Hubbard Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4ecdbad9a451..b246f2b4d0bd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4946,14 +4946,14 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte) } static void -hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr, - struct page *new_page) +hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr, + struct folio *new_folio) { - __SetPageUptodate(new_page); - hugepage_add_new_anon_rmap(new_page, vma, addr); - set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1)); + __folio_mark_uptodate(new_folio); + hugepage_add_new_anon_rmap(&new_folio->page, vma, addr); + set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, &new_folio->page, 1)); hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm); - SetHPageMigratable(new_page); + folio_set_hugetlb_migratable(new_folio); } int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, @@ -5107,7 +5107,7 @@ again: /* huge_ptep of dst_pte won't change as in child */ goto again; } - hugetlb_install_page(dst_vma, dst_pte, addr, new); + hugetlb_install_folio(dst_vma, dst_pte, addr, page_folio(new)); spin_unlock(src_ptl); spin_unlock(dst_ptl); continue; -- cgit v1.2.3 From 91a2fb956ad993f3cbcfc632611e17e3699fb652 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 25 Jan 2023 09:05:31 -0800 Subject: mm/hugetlb: convert hugetlbfs_pagecache_present() to folios Refactor hugetlbfs_pagecache_present() to avoid getting and dropping a refcount on a page. Use RCU and page_cache_next_miss() instead. Link: https://lkml.kernel.org/r/20230125170537.96973-3-sidhartha.kumar@oracle.com Suggested-by: Matthew Wilcox Signed-off-by: Sidhartha Kumar Cc: Gerald Schaefer Cc: John Hubbard Cc: kernel test robot Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b246f2b4d0bd..a0d486ed5411 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5651,17 +5651,15 @@ out_release_old: static bool hugetlbfs_pagecache_present(struct hstate *h, struct vm_area_struct *vma, unsigned long address) { - struct address_space *mapping; - pgoff_t idx; - struct page *page; + struct address_space *mapping = vma->vm_file->f_mapping; + pgoff_t idx = vma_hugecache_offset(h, vma, address); + bool present; - mapping = vma->vm_file->f_mapping; - idx = vma_hugecache_offset(h, vma, address); + rcu_read_lock(); + present = page_cache_next_miss(mapping, idx, 1) != idx; + rcu_read_unlock(); - page = find_get_page(mapping, idx); - if (page) - put_page(page); - return page != NULL; + return present; } int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping, -- cgit v1.2.3 From ea8e72f4116a995c2aba3fb738ac372c4115375a Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 25 Jan 2023 09:05:32 -0800 Subject: mm/hugetlb: convert putback_active_hugepage to take in a folio Convert putback_active_hugepage() to folio_putback_active_hugetlb(), this removes one user of the Huge Page macros which take in a page. The callers in migrate.c are also cleaned up by being able to directly use the src and dst folio variables. Link: https://lkml.kernel.org/r/20230125170537.96973-4-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: Gerald Schaefer Cc: John Hubbard Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a0d486ed5411..fd1ce61b8f3f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7300,13 +7300,13 @@ int get_huge_page_for_hwpoison(unsigned long pfn, int flags, return ret; } -void putback_active_hugepage(struct page *page) +void folio_putback_active_hugetlb(struct folio *folio) { spin_lock_irq(&hugetlb_lock); - SetHPageMigratable(page); - list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist); + folio_set_hugetlb_migratable(folio); + list_move_tail(&folio->lru, &(folio_hstate(folio))->hugepage_activelist); spin_unlock_irq(&hugetlb_lock); - put_page(page); + folio_put(folio); } void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason) -- cgit v1.2.3 From d0ce0e47b323a8d7fb5dc3314ce56afa650ade2d Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 25 Jan 2023 09:05:33 -0800 Subject: mm/hugetlb: convert hugetlb fault paths to use alloc_hugetlb_folio() Change alloc_huge_page() to alloc_hugetlb_folio() by changing all callers to handle the now folio return type of the function. In this conversion, alloc_huge_page_vma() is also changed to alloc_hugetlb_folio_vma() and hugepage_add_new_anon_rmap() is changed to take in a folio directly. Many additions of '&folio->page' are cleaned up in subsequent patches. hugetlbfs_fallocate() is also refactored to use the RCU + page_cache_next_miss() API. Link: https://lkml.kernel.org/r/20230125170537.96973-5-sidhartha.kumar@oracle.com Suggested-by: Mike Kravetz Reported-by: kernel test robot Signed-off-by: Sidhartha Kumar Cc: Gerald Schaefer Cc: John Hubbard Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 201 ++++++++++++++++++++++++++++++----------------------------- 1 file changed, 101 insertions(+), 100 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index fd1ce61b8f3f..ea8d4611779b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2493,7 +2493,7 @@ struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, } /* mempolicy aware migration callback */ -struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, +struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address) { struct mempolicy *mpol; @@ -2507,7 +2507,7 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, folio = alloc_hugetlb_folio_nodemask(h, node, nodemask, gfp_mask); mpol_cond_put(mpol); - return &folio->page; + return folio; } /* @@ -2798,14 +2798,14 @@ static long vma_del_reservation(struct hstate *h, /* * This routine is called to restore reservation information on error paths. - * It should ONLY be called for pages allocated via alloc_huge_page(), and - * the hugetlb mutex should remain held when calling this routine. + * It should ONLY be called for folios allocated via alloc_hugetlb_folio(), + * and the hugetlb mutex should remain held when calling this routine. * * It handles two specific cases: * 1) A reservation was in place and the page consumed the reservation. * HPageRestoreReserve is set in the page. * 2) No reservation was in place for the page, so HPageRestoreReserve is - * not set. However, alloc_huge_page always updates the reserve map. + * not set. However, alloc_hugetlb_folio always updates the reserve map. * * In case 1, free_huge_page later in the error path will increment the * global reserve count. But, free_huge_page does not have enough context @@ -2814,7 +2814,7 @@ static long vma_del_reservation(struct hstate *h, * reserve count adjustments to be made by free_huge_page. Make sure the * reserve map indicates there is a reservation present. * - * In case 2, simply undo reserve map modifications done by alloc_huge_page. + * In case 2, simply undo reserve map modifications done by alloc_hugetlb_folio. */ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, unsigned long address, struct page *page) @@ -2844,8 +2844,8 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, if (!rc) { /* * This indicates there is an entry in the reserve map - * not added by alloc_huge_page. We know it was added - * before the alloc_huge_page call, otherwise + * not added by alloc_hugetlb_folio. We know it was added + * before the alloc_hugetlb_folio call, otherwise * hugetlb_restore_reserve would be set on the folio. * Remove the entry so that a subsequent allocation * does not consume a reservation. @@ -3014,7 +3014,7 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) return ret; } -struct page *alloc_huge_page(struct vm_area_struct *vma, +struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve) { struct hugepage_subpool *spool = subpool_vma(vma); @@ -3023,7 +3023,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, long map_chg, map_commit; long gbl_chg; int ret, idx; - struct hugetlb_cgroup *h_cg; + struct hugetlb_cgroup *h_cg = NULL; bool deferred_reserve; idx = hstate_index(h); @@ -3130,7 +3130,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h), pages_per_huge_page(h), folio); } - return &folio->page; + return folio; out_uncharge_cgroup: hugetlb_cgroup_uncharge_cgroup(idx, pages_per_huge_page(h), h_cg); @@ -4950,7 +4950,7 @@ hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long add struct folio *new_folio) { __folio_mark_uptodate(new_folio); - hugepage_add_new_anon_rmap(&new_folio->page, vma, addr); + hugepage_add_new_anon_rmap(new_folio, vma, addr); set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, &new_folio->page, 1)); hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm); folio_set_hugetlb_migratable(new_folio); @@ -5080,34 +5080,34 @@ again: } else if (page_try_dup_anon_rmap(ptepage, true, src_vma)) { pte_t src_pte_old = entry; - struct page *new; + struct folio *new_folio; spin_unlock(src_ptl); spin_unlock(dst_ptl); /* Do not use reserve as it's private owned */ - new = alloc_huge_page(dst_vma, addr, 1); - if (IS_ERR(new)) { + new_folio = alloc_hugetlb_folio(dst_vma, addr, 1); + if (IS_ERR(new_folio)) { put_page(ptepage); - ret = PTR_ERR(new); + ret = PTR_ERR(new_folio); break; } - copy_user_huge_page(new, ptepage, addr, dst_vma, + copy_user_huge_page(&new_folio->page, ptepage, addr, dst_vma, npages); put_page(ptepage); - /* Install the new huge page if src pte stable */ + /* Install the new hugetlb folio if src pte stable */ dst_ptl = huge_pte_lock(h, dst, dst_pte); src_ptl = huge_pte_lockptr(h, src, src_pte); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); entry = huge_ptep_get(src_pte); if (!pte_same(src_pte_old, entry)) { restore_reserve_on_error(h, dst_vma, addr, - new); - put_page(new); + &new_folio->page); + folio_put(new_folio); /* huge_ptep of dst_pte won't change as in child */ goto again; } - hugetlb_install_folio(dst_vma, dst_pte, addr, page_folio(new)); + hugetlb_install_folio(dst_vma, dst_pte, addr, new_folio); spin_unlock(src_ptl); spin_unlock(dst_ptl); continue; @@ -5478,7 +5478,8 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, const bool unshare = flags & FAULT_FLAG_UNSHARE; pte_t pte; struct hstate *h = hstate_vma(vma); - struct page *old_page, *new_page; + struct page *old_page; + struct folio *new_folio; int outside_reserve = 0; vm_fault_t ret = 0; unsigned long haddr = address & huge_page_mask(h); @@ -5539,9 +5540,9 @@ retry_avoidcopy: * be acquired again before returning to the caller, as expected. */ spin_unlock(ptl); - new_page = alloc_huge_page(vma, haddr, outside_reserve); + new_folio = alloc_hugetlb_folio(vma, haddr, outside_reserve); - if (IS_ERR(new_page)) { + if (IS_ERR(new_folio)) { /* * If a process owning a MAP_PRIVATE mapping fails to COW, * it is due to references held by a child and an insufficient @@ -5586,7 +5587,7 @@ retry_avoidcopy: return 0; } - ret = vmf_error(PTR_ERR(new_page)); + ret = vmf_error(PTR_ERR(new_folio)); goto out_release_old; } @@ -5599,9 +5600,9 @@ retry_avoidcopy: goto out_release_all; } - copy_user_huge_page(new_page, old_page, address, vma, + copy_user_huge_page(&new_folio->page, old_page, address, vma, pages_per_huge_page(h)); - __SetPageUptodate(new_page); + __folio_mark_uptodate(new_folio); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, haddr, haddr + huge_page_size(h)); @@ -5618,12 +5619,12 @@ retry_avoidcopy: huge_ptep_clear_flush(vma, haddr, ptep); mmu_notifier_invalidate_range(mm, range.start, range.end); page_remove_rmap(old_page, vma, true); - hugepage_add_new_anon_rmap(new_page, vma, haddr); + hugepage_add_new_anon_rmap(new_folio, vma, haddr); set_huge_pte_at(mm, haddr, ptep, - make_huge_pte(vma, new_page, !unshare)); - SetHPageMigratable(new_page); + make_huge_pte(vma, &new_folio->page, !unshare)); + folio_set_hugetlb_migratable(new_folio); /* Make the old page be freed below */ - new_page = old_page; + new_folio = page_folio(old_page); } spin_unlock(ptl); mmu_notifier_invalidate_range_end(&range); @@ -5632,9 +5633,9 @@ out_release_all: * No restore in case of successful pagetable update (Break COW or * unshare) */ - if (new_page != old_page) - restore_reserve_on_error(h, vma, haddr, new_page); - put_page(new_page); + if (new_folio != page_folio(old_page)) + restore_reserve_on_error(h, vma, haddr, &new_folio->page); + folio_put(new_folio); out_release_old: put_page(old_page); @@ -5753,11 +5754,11 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, vm_fault_t ret = VM_FAULT_SIGBUS; int anon_rmap = 0; unsigned long size; - struct page *page; + struct folio *folio; pte_t new_pte; spinlock_t *ptl; unsigned long haddr = address & huge_page_mask(h); - bool new_page, new_pagecache_page = false; + bool new_folio, new_pagecache_folio = false; u32 hash = hugetlb_fault_mutex_hash(mapping, idx); /* @@ -5776,9 +5777,9 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, * Use page lock to guard against racing truncation * before we get page_table_lock. */ - new_page = false; - page = find_lock_page(mapping, idx); - if (!page) { + new_folio = false; + folio = filemap_lock_folio(mapping, idx); + if (!folio) { size = i_size_read(mapping->host) >> huge_page_shift(h); if (idx >= size) goto out; @@ -5811,8 +5812,8 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, VM_UFFD_MISSING); } - page = alloc_huge_page(vma, haddr, 0); - if (IS_ERR(page)) { + folio = alloc_hugetlb_folio(vma, haddr, 0); + if (IS_ERR(folio)) { /* * Returning error will result in faulting task being * sent SIGBUS. The hugetlb fault mutex prevents two @@ -5826,17 +5827,17 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, * sure there really is no pte entry. */ if (hugetlb_pte_stable(h, mm, ptep, old_pte)) - ret = vmf_error(PTR_ERR(page)); + ret = vmf_error(PTR_ERR(folio)); else ret = 0; goto out; } - clear_huge_page(page, address, pages_per_huge_page(h)); - __SetPageUptodate(page); - new_page = true; + clear_huge_page(&folio->page, address, pages_per_huge_page(h)); + __folio_mark_uptodate(folio); + new_folio = true; if (vma->vm_flags & VM_MAYSHARE) { - int err = hugetlb_add_to_page_cache(page, mapping, idx); + int err = hugetlb_add_to_page_cache(&folio->page, mapping, idx); if (err) { /* * err can't be -EEXIST which implies someone @@ -5845,13 +5846,13 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, * to the page cache. So it's safe to call * restore_reserve_on_error() here. */ - restore_reserve_on_error(h, vma, haddr, page); - put_page(page); + restore_reserve_on_error(h, vma, haddr, &folio->page); + folio_put(folio); goto out; } - new_pagecache_page = true; + new_pagecache_folio = true; } else { - lock_page(page); + folio_lock(folio); if (unlikely(anon_vma_prepare(vma))) { ret = VM_FAULT_OOM; goto backout_unlocked; @@ -5864,7 +5865,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, * don't have hwpoisoned swap entry for errored virtual address. * So we need to block hugepage fault by PG_hwpoison bit check. */ - if (unlikely(PageHWPoison(page))) { + if (unlikely(folio_test_hwpoison(folio))) { ret = VM_FAULT_HWPOISON_LARGE | VM_FAULT_SET_HINDEX(hstate_index(h)); goto backout_unlocked; @@ -5872,8 +5873,8 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, /* Check for page in userfault range. */ if (userfaultfd_minor(vma)) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); /* See comment in userfaultfd_missing() block above */ if (!hugetlb_pte_stable(h, mm, ptep, old_pte)) { ret = 0; @@ -5907,10 +5908,10 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, goto backout; if (anon_rmap) - hugepage_add_new_anon_rmap(page, vma, haddr); + hugepage_add_new_anon_rmap(folio, vma, haddr); else - page_dup_file_rmap(page, true); - new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) + page_dup_file_rmap(&folio->page, true); + new_pte = make_huge_pte(vma, &folio->page, ((vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_SHARED))); /* * If this pte was previously wr-protected, keep it wr-protected even @@ -5923,20 +5924,20 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, hugetlb_count_add(pages_per_huge_page(h), mm); if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { /* Optimization, do the COW without a second fault */ - ret = hugetlb_wp(mm, vma, address, ptep, flags, page, ptl); + ret = hugetlb_wp(mm, vma, address, ptep, flags, &folio->page, ptl); } spin_unlock(ptl); /* - * Only set HPageMigratable in newly allocated pages. Existing pages - * found in the pagecache may not have HPageMigratableset if they have + * Only set hugetlb_migratable in newly allocated pages. Existing pages + * found in the pagecache may not have hugetlb_migratable if they have * been isolated for migration. */ - if (new_page) - SetHPageMigratable(page); + if (new_folio) + folio_set_hugetlb_migratable(folio); - unlock_page(page); + folio_unlock(folio); out: hugetlb_vma_unlock_read(vma); mutex_unlock(&hugetlb_fault_mutex_table[hash]); @@ -5945,11 +5946,11 @@ out: backout: spin_unlock(ptl); backout_unlocked: - if (new_page && !new_pagecache_page) - restore_reserve_on_error(h, vma, haddr, page); + if (new_folio && !new_pagecache_folio) + restore_reserve_on_error(h, vma, haddr, &folio->page); - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); goto out; } @@ -6173,16 +6174,16 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t _dst_pte; spinlock_t *ptl; int ret = -ENOMEM; - struct page *page; + struct folio *folio; int writable; - bool page_in_pagecache = false; + bool folio_in_pagecache = false; if (is_continue) { ret = -EFAULT; - page = find_lock_page(mapping, idx); - if (!page) + folio = filemap_lock_folio(mapping, idx); + if (!folio) goto out; - page_in_pagecache = true; + folio_in_pagecache = true; } else if (!*pagep) { /* If a page already exists, then it's UFFDIO_COPY for * a non-missing case. Return -EEXIST. @@ -6193,34 +6194,34 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, goto out; } - page = alloc_huge_page(dst_vma, dst_addr, 0); - if (IS_ERR(page)) { + folio = alloc_hugetlb_folio(dst_vma, dst_addr, 0); + if (IS_ERR(folio)) { ret = -ENOMEM; goto out; } - ret = copy_huge_page_from_user(page, + ret = copy_huge_page_from_user(&folio->page, (const void __user *) src_addr, pages_per_huge_page(h), false); /* fallback to copy_from_user outside mmap_lock */ if (unlikely(ret)) { ret = -ENOENT; - /* Free the allocated page which may have + /* Free the allocated folio which may have * consumed a reservation. */ - restore_reserve_on_error(h, dst_vma, dst_addr, page); - put_page(page); + restore_reserve_on_error(h, dst_vma, dst_addr, &folio->page); + folio_put(folio); - /* Allocate a temporary page to hold the copied + /* Allocate a temporary folio to hold the copied * contents. */ - page = alloc_huge_page_vma(h, dst_vma, dst_addr); - if (!page) { + folio = alloc_hugetlb_folio_vma(h, dst_vma, dst_addr); + if (!folio) { ret = -ENOMEM; goto out; } - *pagep = page; + *pagep = &folio->page; /* Set the outparam pagep and return to the caller to * copy the contents outside the lock. Don't free the * page. @@ -6236,25 +6237,25 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, goto out; } - page = alloc_huge_page(dst_vma, dst_addr, 0); - if (IS_ERR(page)) { + folio = alloc_hugetlb_folio(dst_vma, dst_addr, 0); + if (IS_ERR(folio)) { put_page(*pagep); ret = -ENOMEM; *pagep = NULL; goto out; } - copy_user_huge_page(page, *pagep, dst_addr, dst_vma, + copy_user_huge_page(&folio->page, *pagep, dst_addr, dst_vma, pages_per_huge_page(h)); put_page(*pagep); *pagep = NULL; } /* - * The memory barrier inside __SetPageUptodate makes sure that + * The memory barrier inside __folio_mark_uptodate makes sure that * preceding stores to the page contents become visible before * the set_pte_at() write. */ - __SetPageUptodate(page); + __folio_mark_uptodate(folio); /* Add shared, newly allocated pages to the page cache. */ if (vm_shared && !is_continue) { @@ -6269,16 +6270,16 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, * hugetlb_fault_mutex_table that here must be hold by * the caller. */ - ret = hugetlb_add_to_page_cache(page, mapping, idx); + ret = hugetlb_add_to_page_cache(&folio->page, mapping, idx); if (ret) goto out_release_nounlock; - page_in_pagecache = true; + folio_in_pagecache = true; } ptl = huge_pte_lock(h, dst_mm, dst_pte); ret = -EIO; - if (PageHWPoison(page)) + if (folio_test_hwpoison(folio)) goto out_release_unlock; /* @@ -6290,10 +6291,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, if (!huge_pte_none_mostly(huge_ptep_get(dst_pte))) goto out_release_unlock; - if (page_in_pagecache) - page_dup_file_rmap(page, true); + if (folio_in_pagecache) + page_dup_file_rmap(&folio->page, true); else - hugepage_add_new_anon_rmap(page, dst_vma, dst_addr); + hugepage_add_new_anon_rmap(folio, dst_vma, dst_addr); /* * For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY @@ -6304,7 +6305,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, else writable = dst_vma->vm_flags & VM_WRITE; - _dst_pte = make_huge_pte(dst_vma, page, writable); + _dst_pte = make_huge_pte(dst_vma, &folio->page, writable); /* * Always mark UFFDIO_COPY page dirty; note that this may not be * extremely important for hugetlbfs for now since swapping is not @@ -6326,20 +6327,20 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, spin_unlock(ptl); if (!is_continue) - SetHPageMigratable(page); + folio_set_hugetlb_migratable(folio); if (vm_shared || is_continue) - unlock_page(page); + folio_unlock(folio); ret = 0; out: return ret; out_release_unlock: spin_unlock(ptl); if (vm_shared || is_continue) - unlock_page(page); + folio_unlock(folio); out_release_nounlock: - if (!page_in_pagecache) - restore_reserve_on_error(h, dst_vma, dst_addr, page); - put_page(page); + if (!folio_in_pagecache) + restore_reserve_on_error(h, dst_vma, dst_addr, &folio->page); + folio_put(folio); goto out; } #endif /* CONFIG_USERFAULTFD */ @@ -6871,7 +6872,7 @@ bool hugetlb_reserve_pages(struct inode *inode, /* * pages in this range were added to the reserve * map between region_chg and region_add. This - * indicates a race with alloc_huge_page. Adjust + * indicates a race with alloc_hugetlb_folio. Adjust * the subpool and reserve counts modified above * based on the difference. */ -- cgit v1.2.3 From d2d7bb44bfbd29200426ba17741550d36e081f91 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 25 Jan 2023 09:05:34 -0800 Subject: mm/hugetlb: convert restore_reserve_on_error to take in a folio Every caller of restore_reserve_on_error() is now passing in &folio->page, change the function to take in a folio directly and clean up the call sites. Link: https://lkml.kernel.org/r/20230125170537.96973-6-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Cc: Gerald Schaefer Cc: John Hubbard Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ea8d4611779b..1f6270c586c0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2802,9 +2802,9 @@ static long vma_del_reservation(struct hstate *h, * and the hugetlb mutex should remain held when calling this routine. * * It handles two specific cases: - * 1) A reservation was in place and the page consumed the reservation. - * HPageRestoreReserve is set in the page. - * 2) No reservation was in place for the page, so HPageRestoreReserve is + * 1) A reservation was in place and the folio consumed the reservation. + * hugetlb_restore_reserve is set in the folio. + * 2) No reservation was in place for the page, so hugetlb_restore_reserve is * not set. However, alloc_hugetlb_folio always updates the reserve map. * * In case 1, free_huge_page later in the error path will increment the @@ -2817,9 +2817,8 @@ static long vma_del_reservation(struct hstate *h, * In case 2, simply undo reserve map modifications done by alloc_hugetlb_folio. */ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, - unsigned long address, struct page *page) + unsigned long address, struct folio *folio) { - struct folio *folio = page_folio(page); long rc = vma_needs_reservation(h, vma, address); if (folio_test_hugetlb_restore_reserve(folio)) { @@ -5102,7 +5101,7 @@ again: entry = huge_ptep_get(src_pte); if (!pte_same(src_pte_old, entry)) { restore_reserve_on_error(h, dst_vma, addr, - &new_folio->page); + new_folio); folio_put(new_folio); /* huge_ptep of dst_pte won't change as in child */ goto again; @@ -5634,7 +5633,7 @@ out_release_all: * unshare) */ if (new_folio != page_folio(old_page)) - restore_reserve_on_error(h, vma, haddr, &new_folio->page); + restore_reserve_on_error(h, vma, haddr, new_folio); folio_put(new_folio); out_release_old: put_page(old_page); @@ -5846,7 +5845,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, * to the page cache. So it's safe to call * restore_reserve_on_error() here. */ - restore_reserve_on_error(h, vma, haddr, &folio->page); + restore_reserve_on_error(h, vma, haddr, folio); folio_put(folio); goto out; } @@ -5947,7 +5946,7 @@ backout: spin_unlock(ptl); backout_unlocked: if (new_folio && !new_pagecache_folio) - restore_reserve_on_error(h, vma, haddr, &folio->page); + restore_reserve_on_error(h, vma, haddr, folio); folio_unlock(folio); folio_put(folio); @@ -6210,7 +6209,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, /* Free the allocated folio which may have * consumed a reservation. */ - restore_reserve_on_error(h, dst_vma, dst_addr, &folio->page); + restore_reserve_on_error(h, dst_vma, dst_addr, folio); folio_put(folio); /* Allocate a temporary folio to hold the copied @@ -6339,7 +6338,7 @@ out_release_unlock: folio_unlock(folio); out_release_nounlock: if (!folio_in_pagecache) - restore_reserve_on_error(h, dst_vma, dst_addr, &folio->page); + restore_reserve_on_error(h, dst_vma, dst_addr, folio); folio_put(folio); goto out; } -- cgit v1.2.3 From 9b91c0e277a3dbb165c2e4301be7a231dc2f76f7 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 25 Jan 2023 09:05:35 -0800 Subject: mm/hugetlb: convert hugetlb_add_to_page_cache to take in a folio Every caller of hugetlb_add_to_page_cache() is now passing in &folio->page, change the function to take in a folio directly and clean up the call sites. Link: https://lkml.kernel.org/r/20230125170537.96973-7-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Cc: Gerald Schaefer Cc: John Hubbard Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1f6270c586c0..de1f73e5e200 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5662,10 +5662,9 @@ static bool hugetlbfs_pagecache_present(struct hstate *h, return present; } -int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping, +int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, pgoff_t idx) { - struct folio *folio = page_folio(page); struct inode *inode = mapping->host; struct hstate *h = hstate_inode(inode); int err; @@ -5677,7 +5676,7 @@ int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping, __folio_clear_locked(folio); return err; } - ClearHPageRestoreReserve(page); + folio_clear_hugetlb_restore_reserve(folio); /* * mark folio dirty so that it will not be removed from cache/file @@ -5836,7 +5835,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, new_folio = true; if (vma->vm_flags & VM_MAYSHARE) { - int err = hugetlb_add_to_page_cache(&folio->page, mapping, idx); + int err = hugetlb_add_to_page_cache(folio, mapping, idx); if (err) { /* * err can't be -EEXIST which implies someone @@ -6269,7 +6268,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, * hugetlb_fault_mutex_table that here must be hold by * the caller. */ - ret = hugetlb_add_to_page_cache(&folio->page, mapping, idx); + ret = hugetlb_add_to_page_cache(folio, mapping, idx); if (ret) goto out_release_nounlock; folio_in_pagecache = true; -- cgit v1.2.3 From 371607a3c793d7183b0faecc1fb4aa88fadcf202 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 25 Jan 2023 09:05:36 -0800 Subject: mm/hugetlb: convert hugetlb_wp() to take in a folio Change the pagecache_page argument of hugetlb_wp to pagecache_folio. Replaces a call to find_lock_page() with filemap_lock_folio(). Link: https://lkml.kernel.org/r/20230125170537.96973-8-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reported-by: gerald.schaefer@linux.ibm.com Cc: John Hubbard Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index de1f73e5e200..3a01a9dbf445 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5472,7 +5472,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, */ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *ptep, unsigned int flags, - struct page *pagecache_page, spinlock_t *ptl) + struct folio *pagecache_folio, spinlock_t *ptl) { const bool unshare = flags & FAULT_FLAG_UNSHARE; pte_t pte; @@ -5529,7 +5529,7 @@ retry_avoidcopy: * of the full address range. */ if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) && - old_page != pagecache_page) + page_folio(old_page) != pagecache_folio) outside_reserve = 1; get_page(old_page); @@ -5922,7 +5922,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, hugetlb_count_add(pages_per_huge_page(h), mm); if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { /* Optimization, do the COW without a second fault */ - ret = hugetlb_wp(mm, vma, address, ptep, flags, &folio->page, ptl); + ret = hugetlb_wp(mm, vma, address, ptep, flags, folio, ptl); } spin_unlock(ptl); @@ -5985,7 +5985,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, u32 hash; pgoff_t idx; struct page *page = NULL; - struct page *pagecache_page = NULL; + struct folio *pagecache_folio = NULL; struct hstate *h = hstate_vma(vma); struct address_space *mapping; int need_wait_lock = 0; @@ -6067,7 +6067,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, /* Just decrements count, does not deallocate */ vma_end_reservation(h, vma, haddr); - pagecache_page = find_lock_page(mapping, idx); + pagecache_folio = filemap_lock_folio(mapping, idx); } ptl = huge_pte_lock(h, mm, ptep); @@ -6087,9 +6087,9 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, }; spin_unlock(ptl); - if (pagecache_page) { - unlock_page(pagecache_page); - put_page(pagecache_page); + if (pagecache_folio) { + folio_unlock(pagecache_folio); + folio_put(pagecache_folio); } hugetlb_vma_unlock_read(vma); mutex_unlock(&hugetlb_fault_mutex_table[hash]); @@ -6098,11 +6098,11 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, /* * hugetlb_wp() requires page locks of pte_page(entry) and - * pagecache_page, so here we need take the former one - * when page != pagecache_page or !pagecache_page. + * pagecache_folio, so here we need take the former one + * when page != pagecache_folio or !pagecache_folio. */ page = pte_page(entry); - if (page != pagecache_page) + if (page_folio(page) != pagecache_folio) if (!trylock_page(page)) { need_wait_lock = 1; goto out_ptl; @@ -6113,7 +6113,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) { if (!huge_pte_write(entry)) { ret = hugetlb_wp(mm, vma, address, ptep, flags, - pagecache_page, ptl); + pagecache_folio, ptl); goto out_put_page; } else if (likely(flags & FAULT_FLAG_WRITE)) { entry = huge_pte_mkdirty(entry); @@ -6124,15 +6124,15 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, flags & FAULT_FLAG_WRITE)) update_mmu_cache(vma, haddr, ptep); out_put_page: - if (page != pagecache_page) + if (page_folio(page) != pagecache_folio) unlock_page(page); put_page(page); out_ptl: spin_unlock(ptl); - if (pagecache_page) { - unlock_page(pagecache_page); - put_page(pagecache_page); + if (pagecache_folio) { + folio_unlock(pagecache_folio); + folio_put(pagecache_folio); } out_mutex: hugetlb_vma_unlock_read(vma); -- cgit v1.2.3 From 9747b9e92418b61c2281561e0651803f1fad0159 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 15 Feb 2023 18:39:36 +0800 Subject: mm: hugetlb: change to return bool for isolate_hugetlb() Now the isolate_hugetlb() only returns 0 or -EBUSY, and most users did not care about the negative value, thus we can convert the isolate_hugetlb() to return a boolean value to make code more clear when checking the hugetlb isolation state. Moreover converts 2 users which will consider the negative value returned by isolate_hugetlb(). No functional changes intended. [akpm@linux-foundation.org: shorten locked section, per SeongJae Park] Link: https://lkml.kernel.org/r/12a287c5bebc13df304387087bbecc6421510849.1676424378.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Acked-by: David Hildenbrand Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Mike Kravetz Acked-by: Linus Torvalds Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/hugetlb.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3a01a9dbf445..07abcb6eb203 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2925,12 +2925,15 @@ retry: */ goto free_new; } else if (folio_ref_count(old_folio)) { + bool isolated; + /* * Someone has grabbed the folio, try to isolate it here. * Fail with -EBUSY if not possible. */ spin_unlock_irq(&hugetlb_lock); - ret = isolate_hugetlb(old_folio, list); + isolated = isolate_hugetlb(old_folio, list); + ret = isolated ? 0 : -EBUSY; spin_lock_irq(&hugetlb_lock); goto free_new; } else if (!folio_test_hugetlb_freed(old_folio)) { @@ -3005,7 +3008,7 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) if (hstate_is_gigantic(h)) return -ENOMEM; - if (folio_ref_count(folio) && !isolate_hugetlb(folio, list)) + if (folio_ref_count(folio) && isolate_hugetlb(folio, list)) ret = 0; else if (!folio_ref_count(folio)) ret = alloc_and_dissolve_hugetlb_folio(h, folio, list); @@ -7251,15 +7254,15 @@ __weak unsigned long hugetlb_mask_last_page(struct hstate *h) * These functions are overwritable if your architecture needs its own * behavior. */ -int isolate_hugetlb(struct folio *folio, struct list_head *list) +bool isolate_hugetlb(struct folio *folio, struct list_head *list) { - int ret = 0; + bool ret = true; spin_lock_irq(&hugetlb_lock); if (!folio_test_hugetlb(folio) || !folio_test_hugetlb_migratable(folio) || !folio_try_get(folio)) { - ret = -EBUSY; + ret = false; goto unlock; } folio_clear_hugetlb_migratable(folio); -- cgit v1.2.3