From b22cc9a9c7ff0ad8998d58fdd7122de6038c46a7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 11 Aug 2025 13:26:27 +0200 Subject: mm/rmap: convert "enum rmap_level" to "enum pgtable_level" Let's factor it out, and convert all checks for unsupported levels to BUILD_BUG(). The code is written in a way such that force-inlining will optimize out the levels. [nathan@kernel.org: always inline __folio_rmap_sanity_checks()] Link: https://lkml.kernel.org/r/20250814-rmap-fix-build_bug-conversion-v1-1-fb7b10a0b362@kernel.org Link: https://lkml.kernel.org/r/20250811112631.759341-8-david@redhat.com Signed-off-by: David Hildenbrand Signed-off-by: Nathan Chancellor Reviewed-by: Lorenzo Stoakes Cc: Alistair Popple Cc: Al Viro Cc: Baolin Wang Cc: Barry Song Cc: Christian Brauner Cc: Christophe Leroy Cc: Dan Williams Cc: David Vrabel Cc: Dev Jain Cc: Hugh Dickins Cc: Jan Kara Cc: Jann Horn Cc: Juegren Gross Cc: Lance Yang Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mariano Pache Cc: Matthew Wilcox (Oracle) Cc: Michael Ellerman Cc: Michal Hocko Cc: Mike Rapoport Cc: Nicholas Piggin Cc: Oleksandr Tyshchenko Cc: Oscar Salvador Cc: Ryan Roberts Cc: Stefano Stabellini Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Wei Yang Cc: Zi Yan Cc: Nathan Chancellor Signed-off-by: Andrew Morton --- include/linux/rmap.h | 62 ++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 34 deletions(-) (limited to 'include/linux/rmap.h') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 6cd020eea37a..e8aff6d2deda 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -394,18 +394,8 @@ typedef int __bitwise rmap_t; /* The anonymous (sub)page is exclusive to a single process. */ #define RMAP_EXCLUSIVE ((__force rmap_t)BIT(0)) -/* - * Internally, we're using an enum to specify the granularity. We make the - * compiler emit specialized code for each granularity. - */ -enum rmap_level { - RMAP_LEVEL_PTE = 0, - RMAP_LEVEL_PMD, - RMAP_LEVEL_PUD, -}; - -static inline void __folio_rmap_sanity_checks(const struct folio *folio, - const struct page *page, int nr_pages, enum rmap_level level) +static __always_inline void __folio_rmap_sanity_checks(const struct folio *folio, + const struct page *page, int nr_pages, enum pgtable_level level) { /* hugetlb folios are handled separately. */ VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); @@ -427,18 +417,18 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio, VM_WARN_ON_FOLIO(page_folio(page + nr_pages - 1) != folio, folio); switch (level) { - case RMAP_LEVEL_PTE: + case PGTABLE_LEVEL_PTE: break; - case RMAP_LEVEL_PMD: + case PGTABLE_LEVEL_PMD: /* * We don't support folios larger than a single PMD yet. So - * when RMAP_LEVEL_PMD is set, we assume that we are creating + * when PGTABLE_LEVEL_PMD is set, we assume that we are creating * a single "entire" mapping of the folio. */ VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio); VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio); break; - case RMAP_LEVEL_PUD: + case PGTABLE_LEVEL_PUD: /* * Assume that we are creating a single "entire" mapping of the * folio. @@ -447,7 +437,7 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio, VM_WARN_ON_FOLIO(nr_pages != HPAGE_PUD_NR, folio); break; default: - VM_WARN_ON_ONCE(true); + BUILD_BUG(); } /* @@ -567,14 +557,14 @@ static inline void hugetlb_remove_rmap(struct folio *folio) static __always_inline void __folio_dup_file_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *dst_vma, - enum rmap_level level) + enum pgtable_level level) { const int orig_nr_pages = nr_pages; __folio_rmap_sanity_checks(folio, page, nr_pages, level); switch (level) { - case RMAP_LEVEL_PTE: + case PGTABLE_LEVEL_PTE: if (!folio_test_large(folio)) { atomic_inc(&folio->_mapcount); break; @@ -587,11 +577,13 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, } folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; - case RMAP_LEVEL_PMD: - case RMAP_LEVEL_PUD: + case PGTABLE_LEVEL_PMD: + case PGTABLE_LEVEL_PUD: atomic_inc(&folio->_entire_mapcount); folio_inc_large_mapcount(folio, dst_vma); break; + default: + BUILD_BUG(); } } @@ -609,13 +601,13 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, static inline void folio_dup_file_rmap_ptes(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *dst_vma) { - __folio_dup_file_rmap(folio, page, nr_pages, dst_vma, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, nr_pages, dst_vma, PGTABLE_LEVEL_PTE); } static __always_inline void folio_dup_file_rmap_pte(struct folio *folio, struct page *page, struct vm_area_struct *dst_vma) { - __folio_dup_file_rmap(folio, page, 1, dst_vma, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, 1, dst_vma, PGTABLE_LEVEL_PTE); } /** @@ -632,7 +624,7 @@ static inline void folio_dup_file_rmap_pmd(struct folio *folio, struct page *page, struct vm_area_struct *dst_vma) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, dst_vma, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, dst_vma, PGTABLE_LEVEL_PTE); #else WARN_ON_ONCE(true); #endif @@ -640,7 +632,7 @@ static inline void folio_dup_file_rmap_pmd(struct folio *folio, static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *dst_vma, - struct vm_area_struct *src_vma, enum rmap_level level) + struct vm_area_struct *src_vma, enum pgtable_level level) { const int orig_nr_pages = nr_pages; bool maybe_pinned; @@ -665,7 +657,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, * copying if the folio maybe pinned. */ switch (level) { - case RMAP_LEVEL_PTE: + case PGTABLE_LEVEL_PTE: if (unlikely(maybe_pinned)) { for (i = 0; i < nr_pages; i++) if (PageAnonExclusive(page + i)) @@ -687,8 +679,8 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, } while (page++, --nr_pages > 0); folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; - case RMAP_LEVEL_PMD: - case RMAP_LEVEL_PUD: + case PGTABLE_LEVEL_PMD: + case PGTABLE_LEVEL_PUD: if (PageAnonExclusive(page)) { if (unlikely(maybe_pinned)) return -EBUSY; @@ -697,6 +689,8 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, atomic_inc(&folio->_entire_mapcount); folio_inc_large_mapcount(folio, dst_vma); break; + default: + BUILD_BUG(); } return 0; } @@ -730,7 +724,7 @@ static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio, struct vm_area_struct *src_vma) { return __folio_try_dup_anon_rmap(folio, page, nr_pages, dst_vma, - src_vma, RMAP_LEVEL_PTE); + src_vma, PGTABLE_LEVEL_PTE); } static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, @@ -738,7 +732,7 @@ static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, struct vm_area_struct *src_vma) { return __folio_try_dup_anon_rmap(folio, page, 1, dst_vma, src_vma, - RMAP_LEVEL_PTE); + PGTABLE_LEVEL_PTE); } /** @@ -770,7 +764,7 @@ static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio, { #ifdef CONFIG_TRANSPARENT_HUGEPAGE return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, dst_vma, - src_vma, RMAP_LEVEL_PMD); + src_vma, PGTABLE_LEVEL_PMD); #else WARN_ON_ONCE(true); return -EBUSY; @@ -778,7 +772,7 @@ static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio, } static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, - struct page *page, int nr_pages, enum rmap_level level) + struct page *page, int nr_pages, enum pgtable_level level) { VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); VM_WARN_ON_FOLIO(!PageAnonExclusive(page), folio); @@ -873,7 +867,7 @@ static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, static inline int folio_try_share_anon_rmap_pte(struct folio *folio, struct page *page) { - return __folio_try_share_anon_rmap(folio, page, 1, RMAP_LEVEL_PTE); + return __folio_try_share_anon_rmap(folio, page, 1, PGTABLE_LEVEL_PTE); } /** @@ -904,7 +898,7 @@ static inline int folio_try_share_anon_rmap_pmd(struct folio *folio, { #ifdef CONFIG_TRANSPARENT_HUGEPAGE return __folio_try_share_anon_rmap(folio, page, HPAGE_PMD_NR, - RMAP_LEVEL_PMD); + PGTABLE_LEVEL_PMD); #else WARN_ON_ONCE(true); return -EBUSY; -- cgit v1.2.3 From 2db579838296239545554443234fafb8f485cca0 Mon Sep 17 00:00:00 2001 From: Kiryl Shutsemau Date: Tue, 23 Sep 2025 12:07:06 +0100 Subject: mm/page_vma_mapped: track if the page is mapped across page table boundary Patch series "mm: Improve mlock tracking for large folios", v3. The patchset includes several fixes and improvements related to mlock tracking of large folios. The main objective is to reduce the undercount of Mlocked memory in /proc/meminfo and improve the accuracy of the statistics. Patches 1-2: These patches address a minor race condition in folio_referenced_one() related to mlock_vma_folio(). Currently, mlock_vma_folio() is called on large folio without the page table lock, which can result in a race condition with unmap (i.e. MADV_DONTNEED). This can lead to partially mapped folios on the unevictable LRU list. While not a significant issue, I do not believe backporting is necessary. Patch 3: This patch adds mlocking logic similar to folio_referenced_one() to try_to_unmap_one(), allowing for mlocking of large folios where possible. Patch 4-5: These patches modifies finish_fault() and faultaround to map in the entire folio when possible, enabling efficient mlocking upon addition to the rmap. Patch 6: This patch makes rmap mlock large folios if they are fully mapped, addressing the primary source of mlock undercount for large folios. This patch (of 6): Add a PVMW_PGTABLE_CROSSSED flag that page_vma_mapped_walk() will set if the page is mapped across page table boundary. Unlike other PVMW_* flags, this one is result of page_vma_mapped_walk() and not set by the caller. folio_referenced_one() will use it to detect if it safe to mlock the folio. [akpm@linux-foundation.org: s/CROSSSED/CROSSED/] Link: https://lkml.kernel.org/r/20250923110711.690639-1-kirill@shutemov.name Link: https://lkml.kernel.org/r/20250923110711.690639-2-kirill@shutemov.name Signed-off-by: Kiryl Shutsemau Reviewed-by: Shakeel Butt Cc: Baolin Wang Cc: David Hildenbrand Cc: Johannes Weiner Cc: Lorenzo Stoakes Signed-off-by: Andrew Morton --- include/linux/rmap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/rmap.h') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index e8aff6d2deda..daa92a58585d 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -922,6 +922,11 @@ struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr, /* Look for migration entries rather than present PTEs */ #define PVMW_MIGRATION (1 << 1) +/* Result flags */ + +/* The page is mapped across page table boundary */ +#define PVMW_PGTABLE_CROSSED (1 << 16) + struct page_vma_mapped_walk { unsigned long pfn; unsigned long nr_pages; -- cgit v1.2.3