From 85edc15a4c606094a14c36ebf5bceea7f9a3e395 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 21 Mar 2024 14:24:41 +0000 Subject: mm: remove folio_prep_large_rmappable() Now that prep_compound_page() initialises folio->_deferred_list, folio_prep_large_rmappable()'s only purpose is to set the large_rmappable flag, so inline it into the two callers. Take the opportunity to convert the large_rmappable definition from PAGEFLAG to FOLIO_FLAG and remove the existance of PageTestLargeRmappable and friends. Link: https://lkml.kernel.org/r/20240321142448.1645400-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Miaohe Lin Cc: Muchun Song Cc: Oscar Salvador Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux/huge_mm.h') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index de0c89105076..0e16451adaba 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -263,7 +263,6 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); -void folio_prep_large_rmappable(struct folio *folio); bool can_split_folio(struct folio *folio, int *pextra_pins); int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, unsigned int new_order); @@ -411,8 +410,6 @@ static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, return 0; } -static inline void folio_prep_large_rmappable(struct folio *folio) {} - #define transparent_hugepage_flags 0UL #define thp_get_unmapped_area NULL -- cgit v1.2.3 From f238b8c33c6738f146bbfbb09b78870ea157c2b7 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Sat, 23 Mar 2024 00:41:36 +1300 Subject: arm64: mm: swap: support THP_SWAP on hardware with MTE Commit d0637c505f8a1 ("arm64: enable THP_SWAP for arm64") brings up THP_SWAP on ARM64, but it doesn't enable THP_SWP on hardware with MTE as the MTE code works with the assumption tags save/restore is always handling a folio with only one page. The limitation should be removed as more and more ARM64 SoCs have this feature. Co-existence of MTE and THP_SWAP becomes more and more important. This patch makes MTE tags saving support large folios, then we don't need to split large folios into base pages for swapping out on ARM64 SoCs with MTE any more. arch_prepare_to_swap() should take folio rather than page as parameter because we support THP swap-out as a whole. It saves tags for all pages in a large folio. As now we are restoring tags based-on folio, in arch_swap_restore(), we may increase some extra loops and early-exitings while refaulting a large folio which is still in swapcache in do_swap_page(). In case a large folio has nr pages, do_swap_page() will only set the PTE of the particular page which is causing the page fault. Thus do_swap_page() runs nr times, and each time, arch_swap_restore() will loop nr times for those subpages in the folio. So right now the algorithmic complexity becomes O(nr^2). Once we support mapping large folios in do_swap_page(), extra loops and early-exitings will decrease while not being completely removed as a large folio might get partially tagged in corner cases such as, 1. a large folio in swapcache can be partially unmapped, thus, MTE tags for the unmapped pages will be invalidated; 2. users might use mprotect() to set MTEs on a part of a large folio. arch_thp_swp_supported() is dropped since ARM64 MTE was the only one who needed it. Link: https://lkml.kernel.org/r/20240322114136.61386-2-21cnbao@gmail.com Signed-off-by: Barry Song Reviewed-by: Steven Price Acked-by: Chris Li Reviewed-by: Ryan Roberts Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: David Hildenbrand Cc: Kemeng Shi Cc: "Matthew Wilcox (Oracle)" Cc: Anshuman Khandual Cc: Peter Collingbourne Cc: Yosry Ahmed Cc: Peter Xu Cc: Lorenzo Stoakes Cc: "Mike Rapoport (IBM)" Cc: Hugh Dickins Cc: "Aneesh Kumar K.V" Cc: Rick Edgecombe Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux/huge_mm.h') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 0e16451adaba..7576025db55d 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -532,16 +532,4 @@ static inline int split_folio_to_order(struct folio *folio, int new_order) #define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0) #define split_folio(f) split_folio_to_order(f, 0) -/* - * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to - * limitations in the implementation like arm64 MTE can override this to - * false - */ -#ifndef arch_thp_swp_supported -static inline bool arch_thp_swp_supported(void) -{ - return true; -} -#endif - #endif /* _LINUX_HUGE_MM_H */ -- cgit v1.2.3 From dee3d0bef2b00772be430425832ead6aa9d707f9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 26 Mar 2024 17:10:32 +0000 Subject: proc: rewrite stable_page_flags() Reduce the usage of PageFlag tests and reduce the number of compound_head() calls. For multi-page folios, we'll now show all pages as having the flags that apply to them, e.g. if it's dirty, all pages will have the dirty flag set instead of just the head page. The mapped flag is still per page, as is the hwpoison flag. [willy@infradead.org: fix up some bits vs masks] Link: https://lkml.kernel.org/r/20240403173112.1450721-1-willy@infradead.org [willy@infradead.org: fix warnings] Link: https://lkml.kernel.org/r/ZhBPtCYfSuFuUMEz@casper.infradead.org Link: https://lkml.kernel.org/r/20240326171045.410737-11-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Svetly Todorov Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/huge_mm.h') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7576025db55d..1540a1481daf 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -351,7 +351,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf); extern struct page *huge_zero_page; extern unsigned long huge_zero_pfn; -static inline bool is_huge_zero_page(struct page *page) +static inline bool is_huge_zero_page(const struct page *page) { return READ_ONCE(huge_zero_page) == page; } @@ -480,7 +480,7 @@ static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) return 0; } -static inline bool is_huge_zero_page(struct page *page) +static inline bool is_huge_zero_page(const struct page *page) { return false; } -- cgit v1.2.3 From 5beaee54a324ba1fe307e341ec825d5d099f4091 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 26 Mar 2024 20:28:22 +0000 Subject: mm: add is_huge_zero_folio() This is the folio equivalent of is_huge_zero_page(). It doesn't add any efficiency, but it does prevent the caller from passing a tail page and getting confused when the predicate returns false. Link: https://lkml.kernel.org/r/20240326202833.523759-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux/huge_mm.h') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 1540a1481daf..600c6008262b 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -356,6 +356,11 @@ static inline bool is_huge_zero_page(const struct page *page) return READ_ONCE(huge_zero_page) == page; } +static inline bool is_huge_zero_folio(const struct folio *folio) +{ + return READ_ONCE(huge_zero_page) == &folio->page; +} + static inline bool is_huge_zero_pmd(pmd_t pmd) { return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd); @@ -485,6 +490,11 @@ static inline bool is_huge_zero_page(const struct page *page) return false; } +static inline bool is_huge_zero_folio(const struct folio *folio) +{ + return false; +} + static inline bool is_huge_zero_pmd(pmd_t pmd) { return false; -- cgit v1.2.3 From 5691753d73a233a97c13f7d389f15d20596d062d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 26 Mar 2024 20:28:25 +0000 Subject: mm: convert huge_zero_page to huge_zero_folio With all callers of is_huge_zero_page() converted, we can now switch the huge_zero_page itself from being a compound page to a folio. Link: https://lkml.kernel.org/r/20240326202833.523759-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'include/linux/huge_mm.h') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 600c6008262b..7ba59ba36354 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -348,17 +348,12 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf); -extern struct page *huge_zero_page; +extern struct folio *huge_zero_folio; extern unsigned long huge_zero_pfn; -static inline bool is_huge_zero_page(const struct page *page) -{ - return READ_ONCE(huge_zero_page) == page; -} - static inline bool is_huge_zero_folio(const struct folio *folio) { - return READ_ONCE(huge_zero_page) == &folio->page; + return READ_ONCE(huge_zero_folio) == folio; } static inline bool is_huge_zero_pmd(pmd_t pmd) @@ -371,9 +366,14 @@ static inline bool is_huge_zero_pud(pud_t pud) return false; } -struct page *mm_get_huge_zero_page(struct mm_struct *mm); +struct folio *mm_get_huge_zero_folio(struct mm_struct *mm); void mm_put_huge_zero_page(struct mm_struct *mm); +static inline struct page *mm_get_huge_zero_page(struct mm_struct *mm) +{ + return &mm_get_huge_zero_folio(mm)->page; +} + #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot)) static inline bool thp_migration_supported(void) @@ -485,11 +485,6 @@ static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) return 0; } -static inline bool is_huge_zero_page(const struct page *page) -{ - return false; -} - static inline bool is_huge_zero_folio(const struct folio *folio) { return false; -- cgit v1.2.3 From 632230ff193954b514c908fdb45320f64ac9dc72 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 26 Mar 2024 20:28:28 +0000 Subject: mm: rename mm_put_huge_zero_page to mm_put_huge_zero_folio Also remove mm_get_huge_zero_page() now it has no users. Link: https://lkml.kernel.org/r/20240326202833.523759-9-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux/huge_mm.h') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7ba59ba36354..9d11e886aa9a 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -367,12 +367,7 @@ static inline bool is_huge_zero_pud(pud_t pud) } struct folio *mm_get_huge_zero_folio(struct mm_struct *mm); -void mm_put_huge_zero_page(struct mm_struct *mm); - -static inline struct page *mm_get_huge_zero_page(struct mm_struct *mm) -{ - return &mm_get_huge_zero_folio(mm)->page; -} +void mm_put_huge_zero_folio(struct mm_struct *mm); #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot)) @@ -500,7 +495,7 @@ static inline bool is_huge_zero_pud(pud_t pud) return false; } -static inline void mm_put_huge_zero_page(struct mm_struct *mm) +static inline void mm_put_huge_zero_folio(struct mm_struct *mm) { return; } -- cgit v1.2.3 From b979db1611a63b207dbc47706de989ac113ea898 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 27 Mar 2024 11:23:22 -0400 Subject: mm: make HPAGE_PXD_* macros even if !THP These macros can be helpful when we plan to merge hugetlb code into generic code. Move them out and define them as long as PGTABLE_HAS_HUGE_LEAVES is selected, because there are systems that only define HUGETLB_PAGE not THP. One note here is HPAGE_PMD_SHIFT must be defined even if PMD_SHIFT is not defined (e.g. !CONFIG_MMU case); it (or in other forms, like HPAGE_PMD_NR) is already used in lots of common codes without ifdef guards. Use the old trick to let complations work. Here we only need to differenciate HPAGE_PXD_SHIFT definitions. All the rest macros will be defined based on it. When at it, move HPAGE_PMD_NR / HPAGE_PMD_ORDER over together. Link: https://lkml.kernel.org/r/20240327152332.950956-4-peterx@redhat.com Signed-off-by: Peter Xu Tested-by: Ryan Roberts Cc: Andrea Arcangeli Cc: Andrew Jones Cc: Aneesh Kumar K.V (IBM) Cc: Axel Rasmussen Cc: Christophe Leroy Cc: Christoph Hellwig Cc: David Hildenbrand Cc: James Houghton Cc: Jason Gunthorpe Cc: John Hubbard Cc: Kirill A. Shutemov Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Michael Ellerman Cc: "Mike Rapoport (IBM)" Cc: Muchun Song Cc: Rik van Riel Cc: Vlastimil Babka Cc: Yang Shi Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'include/linux/huge_mm.h') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 9d11e886aa9a..ff72ee19a125 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -64,9 +64,6 @@ ssize_t single_hugepage_flag_show(struct kobject *kobj, enum transparent_hugepage_flag flag); extern struct kobj_attribute shmem_enabled_attr; -#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) -#define HPAGE_PMD_NR (1< Date: Wed, 27 Mar 2024 11:23:29 -0400 Subject: mm/gup: handle huge pud for follow_pud_mask() Teach follow_pud_mask() to be able to handle normal PUD pages like hugetlb. Rename follow_devmap_pud() to follow_huge_pud() so that it can process either huge devmap or hugetlb. Move it out of TRANSPARENT_HUGEPAGE_PUD and and huge_memory.c (which relies on CONFIG_THP). Switch to pud_leaf() to detect both cases in the slow gup. In the new follow_huge_pud(), taking care of possible CoR for hugetlb if necessary. touch_pud() needs to be moved out of huge_memory.c to be accessable from gup.c even if !THP. Since at it, optimize the non-present check by adding a pud_present() early check before taking the pgtable lock, failing the follow_page() early if PUD is not present: that is required by both devmap or hugetlb. Use pud_huge() to also cover the pud_devmap() case. One more trivial thing to mention is, introduce "pud_t pud" in the code paths along the way, so the code doesn't dereference *pudp multiple time. Not only because that looks less straightforward, but also because if the dereference really happened, it's not clear whether there can be race to see different *pudp values when it's being modified at the same time. Setting ctx->page_mask properly for a PUD entry. As a side effect, this patch should also be able to optimize devmap GUP on PUD to be able to jump over the whole PUD range, but not yet verified. Hugetlb already can do so prior to this patch. Link: https://lkml.kernel.org/r/20240327152332.950956-11-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Jason Gunthorpe Tested-by: Ryan Roberts Cc: Andrea Arcangeli Cc: Andrew Jones Cc: Aneesh Kumar K.V (IBM) Cc: Axel Rasmussen Cc: Christophe Leroy Cc: Christoph Hellwig Cc: David Hildenbrand Cc: James Houghton Cc: John Hubbard Cc: Kirill A. Shutemov Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Michael Ellerman Cc: "Mike Rapoport (IBM)" Cc: Muchun Song Cc: Rik van Riel Cc: Vlastimil Babka Cc: Yang Shi Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux/huge_mm.h') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ff72ee19a125..ab26d9e65ec3 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -351,8 +351,6 @@ static inline bool folio_test_pmd_mappable(struct folio *folio) struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap); -struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, - pud_t *pud, int flags, struct dev_pagemap **pgmap); vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf); @@ -507,12 +505,6 @@ static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, return NULL; } -static inline struct page *follow_devmap_pud(struct vm_area_struct *vma, - unsigned long addr, pud_t *pud, int flags, struct dev_pagemap **pgmap) -{ - return NULL; -} - static inline bool thp_migration_supported(void) { return false; -- cgit v1.2.3 From ed48e87c7df3651accfa3088076830727b070f54 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Mon, 25 Mar 2024 19:16:48 -0700 Subject: thp: add thp_get_unmapped_area_vmflags() When memory is being placed, mmap() will take care to respect the guard gaps of certain types of memory (VM_SHADOWSTACK, VM_GROWSUP and VM_GROWSDOWN). In order to ensure guard gaps between mappings, mmap() needs to consider two things: 1. That the new mapping isn't placed in an any existing mappings guard gaps. 2. That the new mapping isn't placed such that any existing mappings are not in *its* guard gaps. The longstanding behavior of mmap() is to ensure 1, but not take any care around 2. So for example, if there is a PAGE_SIZE free area, and a mmap() with a PAGE_SIZE size, and a type that has a guard gap is being placed, mmap() may place the shadow stack in the PAGE_SIZE free area. Then the mapping that is supposed to have a guard gap will not have a gap to the adjacent VMA. Add a THP implementations of the vm_flags variant of get_unmapped_area(). Future changes will call this from mmap.c in the do_mmap() path to allow shadow stacks to be placed with consideration taken for the start guard gap. Shadow stack memory is always private and anonymous and so special guard gap logic is not needed in a lot of caseis, but it can be mapped by THP, so needs to be handled. Link: https://lkml.kernel.org/r/20240326021656.202649-7-rick.p.edgecombe@intel.com Signed-off-by: Rick Edgecombe Reviewed-by: Christophe Leroy Cc: Alexei Starovoitov Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Borislav Petkov (AMD) Cc: Dan Williams Cc: Dave Hansen Cc: Deepak Gupta Cc: Guo Ren Cc: Helge Deller Cc: H. Peter Anvin (Intel) Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Liam R. Howlett Cc: Mark Brown Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux/huge_mm.h') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ab26d9e65ec3..e896ca4760f6 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -270,6 +270,9 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); +unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags, + vm_flags_t vm_flags); bool can_split_folio(struct folio *folio, int *pextra_pins); int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, @@ -413,6 +416,14 @@ static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, #define thp_get_unmapped_area NULL +static inline unsigned long +thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags, vm_flags_t vm_flags) +{ + return 0; +} + static inline bool can_split_folio(struct folio *folio, int *pextra_pins) { -- cgit v1.2.3 From ec33687c674934dfefd782a8ffd58370b080b503 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Fri, 12 Apr 2024 23:48:55 +1200 Subject: mm: add per-order mTHP anon_fault_alloc and anon_fault_fallback counters Patch series "mm: add per-order mTHP alloc and swpout counters", v6. The patchset introduces a framework to facilitate mTHP counters, starting with the allocation and swap-out counters. Currently, only four new nodes are appended to the stats directory for each mTHP size. /sys/kernel/mm/transparent_hugepage/hugepages-/stats anon_fault_alloc anon_fault_fallback anon_fault_fallback_charge anon_swpout anon_swpout_fallback These nodes are crucial for us to monitor the fragmentation levels of both the buddy system and the swap partitions. In the future, we may consider adding additional nodes for further insights. This patch (of 4): Profiling a system blindly with mTHP has become challenging due to the lack of visibility into its operations. Presenting the success rate of mTHP allocations appears to be pressing need. Recently, I've been experiencing significant difficulty debugging performance improvements and regressions without these figures. It's crucial for us to understand the true effectiveness of mTHP in real-world scenarios, especially in systems with fragmented memory. This patch establishes the framework for per-order mTHP counters. It begins by introducing the anon_fault_alloc and anon_fault_fallback counters. Additionally, to maintain consistency with thp_fault_fallback_charge in /proc/vmstat, this patch also tracks anon_fault_fallback_charge when mem_cgroup_charge fails for mTHP. Incorporating additional counters should now be straightforward as well. Link: https://lkml.kernel.org/r/20240412114858.407208-1-21cnbao@gmail.com Link: https://lkml.kernel.org/r/20240412114858.407208-2-21cnbao@gmail.com Signed-off-by: Barry Song Acked-by: David Hildenbrand Cc: Chris Li Cc: Domenico Cerasuolo Cc: Kairui Song Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Yosry Ahmed Cc: Yu Zhao Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux/huge_mm.h') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e896ca4760f6..d4fdb2641070 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -264,6 +264,27 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, enforce_sysfs, orders); } +enum mthp_stat_item { + MTHP_STAT_ANON_FAULT_ALLOC, + MTHP_STAT_ANON_FAULT_FALLBACK, + MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, + __MTHP_STAT_COUNT +}; + +struct mthp_stat { + unsigned long stats[ilog2(MAX_PTRS_PER_PTE) + 1][__MTHP_STAT_COUNT]; +}; + +DECLARE_PER_CPU(struct mthp_stat, mthp_stats); + +static inline void count_mthp_stat(int order, enum mthp_stat_item item) +{ + if (order <= 0 || order > PMD_ORDER) + return; + + this_cpu_inc(mthp_stats.stats[order][item]); +} + #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ (1< Date: Fri, 12 Apr 2024 23:48:56 +1200 Subject: mm: add per-order mTHP anon_swpout and anon_swpout_fallback counters This helps to display the fragmentation situation of the swapfile, knowing the proportion of how much we haven't split large folios. So far, we only support non-split swapout for anon memory, with the possibility of expanding to shmem in the future. So, we add the "anon" prefix to the counter names. Link: https://lkml.kernel.org/r/20240412114858.407208-3-21cnbao@gmail.com Signed-off-by: Barry Song Reviewed-by: Ryan Roberts Acked-by: David Hildenbrand Cc: Chris Li Cc: Domenico Cerasuolo Cc: Kairui Song Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Yosry Ahmed Cc: Yu Zhao Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/huge_mm.h') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index d4fdb2641070..7cd07b83a3d0 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -268,6 +268,8 @@ enum mthp_stat_item { MTHP_STAT_ANON_FAULT_ALLOC, MTHP_STAT_ANON_FAULT_FALLBACK, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, + MTHP_STAT_ANON_SWPOUT, + MTHP_STAT_ANON_SWPOUT_FALLBACK, __MTHP_STAT_COUNT }; -- cgit v1.2.3 From e0ffb29bc54d86b9ab10ebafc66eb1b7229e0cd7 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 25 Apr 2024 05:00:55 +0100 Subject: mm: simplify thp_vma_allowable_order Combine the three boolean arguments into one flags argument for readability. Signed-off-by: Matthew Wilcox (Oracle) Cc: David Hildenbrand Cc: Kefeng Wang Cc: Ryan Roberts Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'include/linux/huge_mm.h') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7cd07b83a3d0..c8d3ec116e29 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -81,8 +81,12 @@ extern struct kobj_attribute shmem_enabled_attr; */ #define THP_ORDERS_ALL (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE) -#define thp_vma_allowable_order(vma, vm_flags, smaps, in_pf, enforce_sysfs, order) \ - (!!thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf, enforce_sysfs, BIT(order))) +#define TVA_SMAPS (1 << 0) /* Will be used for procfs */ +#define TVA_IN_PF (1 << 1) /* Page fault handler */ +#define TVA_ENFORCE_SYSFS (1 << 2) /* Obey sysfs configuration */ + +#define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \ + (!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order))) #ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES #define HPAGE_PMD_SHIFT PMD_SHIFT @@ -218,17 +222,15 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma) } unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, - unsigned long vm_flags, bool smaps, - bool in_pf, bool enforce_sysfs, + unsigned long vm_flags, + unsigned long tva_flags, unsigned long orders); /** * thp_vma_allowable_orders - determine hugepage orders that are allowed for vma * @vma: the vm area to check * @vm_flags: use these vm_flags instead of vma->vm_flags - * @smaps: whether answer will be used for smaps file - * @in_pf: whether answer will be used by page fault handler - * @enforce_sysfs: whether sysfs config should be taken into account + * @tva_flags: Which TVA flags to honour * @orders: bitfield of all orders to consider * * Calculates the intersection of the requested hugepage orders and the allowed @@ -241,12 +243,12 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, */ static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, - unsigned long vm_flags, bool smaps, - bool in_pf, bool enforce_sysfs, + unsigned long vm_flags, + unsigned long tva_flags, unsigned long orders) { /* Optimization to check if required orders are enabled early. */ - if (enforce_sysfs && vma_is_anonymous(vma)) { + if ((tva_flags & TVA_ENFORCE_SYSFS) && vma_is_anonymous(vma)) { unsigned long mask = READ_ONCE(huge_anon_orders_always); if (vm_flags & VM_HUGEPAGE) @@ -260,8 +262,7 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, return 0; } - return __thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf, - enforce_sysfs, orders); + return __thp_vma_allowable_orders(vma, vm_flags, tva_flags, orders); } enum mthp_stat_item { @@ -428,8 +429,8 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, } static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, - unsigned long vm_flags, bool smaps, - bool in_pf, bool enforce_sysfs, + unsigned long vm_flags, + unsigned long tva_flags, unsigned long orders) { return 0; -- cgit v1.2.3