From 53fbef56e07df822ea3029109ffca25328c2e5ac Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 5 Aug 2025 18:22:51 +0100 Subject: mm: introduce memdesc_flags_t Patch series "Add and use memdesc_flags_t". At some point struct page will be separated from struct slab and struct folio. This is a step towards that by introducing a type for the 'flags' word of all three structures. This gives us a certain amount of type safety by establishing that some of these unsigned longs are different from other unsigned longs in that they contain things like node ID, section number and zone number in the upper bits. That lets us have functions that can be easily called by anyone who has a slab, folio or page (but not easily by anyone else) to get the node or zone. There's going to be some unusual merge problems with this as some odd bits of the kernel decide they want to print out the flags value or something similar by writing page->flags and now they'll need to write page->flags.f instead. That's most of the churn here. Maybe we should be removing these things from the debug output? This patch (of 11): Wrap the unsigned long flags in a typedef. In upcoming patches, this will provide a strong hint that you can't just pass a random unsigned long to functions which take this as an argument. [willy@infradead.org: s/flags/flags.f/ in several architectures] Link: https://lkml.kernel.org/r/aKMgPRLD-WnkPxYm@casper.infradead.org [nicola.vetrini@gmail.com: mips: fix compilation error] Link: https://lore.kernel.org/lkml/CA+G9fYvkpmqGr6wjBNHY=dRp71PLCoi2341JxOudi60yqaeUdg@mail.gmail.com/ Link: https://lkml.kernel.org/r/20250825214245.1838158-1-nicola.vetrini@gmail.com Link: https://lkml.kernel.org/r/20250805172307.1302730-1-willy@infradead.org Link: https://lkml.kernel.org/r/20250805172307.1302730-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Zi Yan Cc: Shakeel Butt Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux/mm_inline.h') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 89b518ff097e..150302b4a905 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -143,7 +143,7 @@ static inline int lru_tier_from_refs(int refs, bool workingset) static inline int folio_lru_refs(struct folio *folio) { - unsigned long flags = READ_ONCE(folio->flags); + unsigned long flags = READ_ONCE(folio->flags.f); if (!(flags & BIT(PG_referenced))) return 0; @@ -156,7 +156,7 @@ static inline int folio_lru_refs(struct folio *folio) static inline int folio_lru_gen(struct folio *folio) { - unsigned long flags = READ_ONCE(folio->flags); + unsigned long flags = READ_ONCE(folio->flags.f); return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; } @@ -268,7 +268,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, gen = lru_gen_from_seq(seq); flags = (gen + 1UL) << LRU_GEN_PGOFF; /* see the comment on MIN_NR_GENS about PG_active */ - set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags); + set_mask_bits(&folio->flags.f, LRU_GEN_MASK | BIT(PG_active), flags); lru_gen_update_size(lruvec, folio, -1, gen); /* for folio_rotate_reclaimable() */ @@ -293,7 +293,7 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, /* for folio_migrate_flags() */ flags = !reclaiming && lru_gen_is_active(lruvec, gen) ? BIT(PG_active) : 0; - flags = set_mask_bits(&folio->flags, LRU_GEN_MASK, flags); + flags = set_mask_bits(&folio->flags.f, LRU_GEN_MASK, flags); gen = ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; lru_gen_update_size(lruvec, folio, gen, -1); @@ -304,9 +304,9 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, static inline void folio_migrate_refs(struct folio *new, struct folio *old) { - unsigned long refs = READ_ONCE(old->flags) & LRU_REFS_MASK; + unsigned long refs = READ_ONCE(old->flags.f) & LRU_REFS_MASK; - set_mask_bits(&new->flags, LRU_REFS_MASK, refs); + set_mask_bits(&new->flags.f, LRU_REFS_MASK, refs); } #else /* !CONFIG_LRU_GEN */ -- cgit v1.2.3 From f346a9473a2fbbab785d1733d475160f1fc54e5a Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 1 Sep 2025 22:50:19 +0200 Subject: mm: constify various inline functions for improved const-correctness We select certain test functions plus folio_migrate_refs() from mm_inline.h which either invoke each other, functions that are already const-ified, or no further functions. It is therefore relatively trivial to const-ify them, which provides a basis for further const-ification further up the call stack. One exception is the function folio_migrate_refs() which does write to the "new" folio pointer; there, only the "old" folio pointer is being constified; only its "flags" field is read, but nothing written. Link: https://lkml.kernel.org/r/20250901205021.3573313-11-max.kellermann@ionos.com Signed-off-by: Max Kellermann Reviewed-by: Vishal Moola (Oracle) Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Acked-by: Mike Rapoport (Microsoft) Acked-by: Shakeel Butt Cc: Alexander Gordeev Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Axel Rasmussen Cc: Baolin Wang Cc: Borislav Betkov Cc: Christian Borntraeger Cc: Christian Brauner Cc: Christian Zankel Cc: David Rientjes Cc: David S. Miller Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Bottomley Cc: Jan Kara Cc: Jocelyn Falempe Cc: Liam Howlett Cc: Mark Brown Cc: Matthew Wilcox (Oracle) Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: "Nysal Jan K.A" Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Russel King Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Thomas Huth Cc: Vasily Gorbik Cc: Wei Xu Cc: Yuanchu Xie Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'include/linux/mm_inline.h') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 150302b4a905..d6c1011b38f2 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -25,7 +25,7 @@ * 0 if @folio is a normal anonymous folio, a tmpfs folio or otherwise * ram or swap backed folio. */ -static inline int folio_is_file_lru(struct folio *folio) +static inline int folio_is_file_lru(const struct folio *folio) { return !folio_test_swapbacked(folio); } @@ -84,7 +84,7 @@ static __always_inline void __folio_clear_lru_flags(struct folio *folio) * Return: The LRU list a folio should be on, as an index * into the array of LRU lists. */ -static __always_inline enum lru_list folio_lru_list(struct folio *folio) +static __always_inline enum lru_list folio_lru_list(const struct folio *folio) { enum lru_list lru; @@ -141,7 +141,7 @@ static inline int lru_tier_from_refs(int refs, bool workingset) return workingset ? MAX_NR_TIERS - 1 : order_base_2(refs); } -static inline int folio_lru_refs(struct folio *folio) +static inline int folio_lru_refs(const struct folio *folio) { unsigned long flags = READ_ONCE(folio->flags.f); @@ -154,14 +154,14 @@ static inline int folio_lru_refs(struct folio *folio) return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + 1; } -static inline int folio_lru_gen(struct folio *folio) +static inline int folio_lru_gen(const struct folio *folio) { unsigned long flags = READ_ONCE(folio->flags.f); return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; } -static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen) +static inline bool lru_gen_is_active(const struct lruvec *lruvec, int gen) { unsigned long max_seq = lruvec->lrugen.max_seq; @@ -217,12 +217,13 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *foli VM_WARN_ON_ONCE(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen)); } -static inline unsigned long lru_gen_folio_seq(struct lruvec *lruvec, struct folio *folio, +static inline unsigned long lru_gen_folio_seq(const struct lruvec *lruvec, + const struct folio *folio, bool reclaiming) { int gen; int type = folio_is_file_lru(folio); - struct lru_gen_folio *lrugen = &lruvec->lrugen; + const struct lru_gen_folio *lrugen = &lruvec->lrugen; /* * +-----------------------------------+-----------------------------------+ @@ -302,7 +303,7 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, return true; } -static inline void folio_migrate_refs(struct folio *new, struct folio *old) +static inline void folio_migrate_refs(struct folio *new, const struct folio *old) { unsigned long refs = READ_ONCE(old->flags.f) & LRU_REFS_MASK; @@ -330,7 +331,7 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, return false; } -static inline void folio_migrate_refs(struct folio *new, struct folio *old) +static inline void folio_migrate_refs(struct folio *new, const struct folio *old) { } @@ -508,7 +509,7 @@ static inline void dec_tlb_flush_pending(struct mm_struct *mm) atomic_dec(&mm->tlb_flush_pending); } -static inline bool mm_tlb_flush_pending(struct mm_struct *mm) +static inline bool mm_tlb_flush_pending(const struct mm_struct *mm) { /* * Must be called after having acquired the PTL; orders against that @@ -521,7 +522,7 @@ static inline bool mm_tlb_flush_pending(struct mm_struct *mm) return atomic_read(&mm->tlb_flush_pending); } -static inline bool mm_tlb_flush_nested(struct mm_struct *mm) +static inline bool mm_tlb_flush_nested(const struct mm_struct *mm) { /* * Similar to mm_tlb_flush_pending(), we must have acquired the PTL @@ -605,7 +606,7 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, return false; } -static inline bool vma_has_recency(struct vm_area_struct *vma) +static inline bool vma_has_recency(const struct vm_area_struct *vma) { if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ)) return false; -- cgit v1.2.3 From 929bf010e0599ddef6b640cd314f1de65dd1ca3e Mon Sep 17 00:00:00 2001 From: Li Zhe Date: Thu, 14 Aug 2025 14:47:10 +0800 Subject: mm: introduce num_pages_contiguous() Let's add a simple helper for determining the number of contiguous pages that represent contiguous PFNs. In an ideal world, this helper would be simpler or not even required. Unfortunately, on some configs we still have to maintain (SPARSEMEM without VMEMMAP), the memmap is allocated per memory section, and we might run into weird corner cases of false positives when blindly testing for contiguous pages only. One example of such false positives would be a memory section-sized hole that does not have a memmap. The surrounding memory sections might get "struct pages" that are contiguous, but the PFNs are actually not. This helper will, for example, be useful for determining contiguous PFNs in a GUP result, to batch further operations across returned "struct page"s. VFIO will utilize this interface to accelerate the VFIO DMA map process. Implementation based on Linus' suggestions to avoid new usage of nth_page() where avoidable. Suggested-by: Linus Torvalds Suggested-by: Jason Gunthorpe Signed-off-by: Li Zhe Co-developed-by: David Hildenbrand Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20250814064714.56485-2-lizhe.67@bytedance.com Signed-off-by: Alex Williamson --- include/linux/mm_inline.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/linux/mm_inline.h') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index d6c1011b38f2..f6a2b2d20016 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -617,4 +617,40 @@ static inline bool vma_has_recency(const struct vm_area_struct *vma) return true; } +/** + * num_pages_contiguous() - determine the number of contiguous pages + * that represent contiguous PFNs + * @pages: an array of page pointers + * @nr_pages: length of the array, at least 1 + * + * Determine the number of contiguous pages that represent contiguous PFNs + * in @pages, starting from the first page. + * + * In some kernel configs contiguous PFNs will not have contiguous struct + * pages. In these configurations num_pages_contiguous() will return a num + * smaller than ideal number. The caller should continue to check for pfn + * contiguity after each call to num_pages_contiguous(). + * + * Returns the number of contiguous pages. + */ +static inline size_t num_pages_contiguous(struct page **pages, size_t nr_pages) +{ + struct page *cur_page = pages[0]; + unsigned long section = memdesc_section(cur_page->flags); + size_t i; + + for (i = 1; i < nr_pages; i++) { + if (++cur_page != pages[i]) + break; + /* + * In unproblematic kernel configs, page_to_section() == 0 and + * the whole check will get optimized out. + */ + if (memdesc_section(cur_page->flags) != section) + break; + } + + return i; +} + #endif -- cgit v1.2.3