From 75c70128a67311070115b90d826a229d4bbbb2b5 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Sep 2023 15:44:16 +0800 Subject: mm: mempolicy: make mpol_misplaced() to take a folio In preparation for large folio numa balancing, make mpol_misplaced() to take a folio, no functional change intended. Link: https://lkml.kernel.org/r/20230921074417.24004-6-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Matthew Wilcox (Oracle) Cc: Mike Kravetz Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mempolicy.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux/mempolicy.h') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index d232de7cdc56..6c2754d7bfed 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -174,7 +174,7 @@ extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); /* Check if a vma is migratable */ extern bool vma_migratable(struct vm_area_struct *vma); -extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); +int mpol_misplaced(struct folio *, struct vm_area_struct *, unsigned long); extern void mpol_put_task_policy(struct task_struct *); static inline bool mpol_is_preferred_many(struct mempolicy *pol) @@ -278,7 +278,8 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol) } #endif -static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma, +static inline int mpol_misplaced(struct folio *folio, + struct vm_area_struct *vma, unsigned long address) { return -1; /* no node preference */ -- cgit v1.2.3 From 3657fdc2451abf135c2d20949acf57d78cc50338 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Wed, 11 Oct 2023 18:04:27 +0100 Subject: mm: move vma_policy() and anon_vma_name() decls to mm_types.h Patch series "Abstract vma_merge() and split_vma()", v4. The vma_merge() interface is very confusing and its implementation has led to numerous bugs as a result of that confusion. In addition there is duplication both in invocation of vma_merge(), but also in the common mprotect()-style pattern of attempting a merge, then if this fails, splitting the portion of a VMA about to have its attributes changed. This pattern has been copy/pasted around the kernel in each instance where such an operation has been required, each very slightly modified from the last to make it even harder to decipher what is going on. Simplify the whole thing by dividing the actual uses of vma_merge() and split_vma() into specific and abstracted functions and de-duplicate the vma_merge()/split_vma() pattern altogether. Doing so also opens the door to changing how vma_merge() is implemented - by knowing precisely what cases a caller is invoking rather than having a central interface where anything might happen we can untangle the brittle and confusing vma_merge() implementation into something more workable. For mprotect()-like cases we introduce vma_modify() which performs the vma_merge()/split_vma() pattern, returning a pointer to either the merged or split VMA or an ERR_PTR(err) if the splits fail. We provide a number of inline helper functions to make things even clearer:- * vma_modify_flags() - Prepare to modify the VMA's flags. * vma_modify_flags_name() - Prepare to modify the VMA's flags/anon_vma_name * vma_modify_policy() - Prepare to modify the VMA's mempolicy. * vma_modify_flags_uffd() - Prepare to modify the VMA's flags/uffd context. For cases where a new VMA is attempted to be merged with adjacent VMAs we add:- * vma_merge_new_vma() - Prepare to merge a new VMA. * vma_merge_extend() - Prepare to extend the end of a new VMA. This patch (of 5): The vma_policy() define is a helper specifically for a VMA field so it makes sense to host it in the memory management types header. The anon_vma_name(), anon_vma_name_alloc() and anon_vma_name_free() functions are a little out of place in mm_inline.h as they define external functions, and so it makes sense to locate them in mm_types.h. The purpose of these relocations is to make it possible to abstract static inline wrappers which invoke both of these helpers. Link: https://lkml.kernel.org/r/cover.1697043508.git.lstoakes@gmail.com Link: https://lkml.kernel.org/r/24bfc6c9e382fffbcb0ea8d424392c27d56cc8ca.1697043508.git.lstoakes@gmail.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Cc: Alexander Viro Cc: Christian Brauner Cc: Liam R. Howlett Cc: Lorenzo Stoakes Signed-off-by: Andrew Morton --- include/linux/mempolicy.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux/mempolicy.h') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 6c2754d7bfed..1d7f4ec2614c 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -89,8 +89,6 @@ static inline struct mempolicy *mpol_dup(struct mempolicy *pol) return pol; } -#define vma_policy(vma) ((vma)->vm_policy) - static inline void mpol_get(struct mempolicy *pol) { if (pol) @@ -222,8 +220,6 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) return NULL; } -#define vma_policy(vma) NULL - static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) { -- cgit v1.2.3 From c36f6e6dff4d32ec8b6da8f553933727a57a7a4a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 3 Oct 2023 02:20:14 -0700 Subject: mempolicy trivia: slightly more consistent naming Before getting down to work, do a little cleanup, mainly of inconsistent variable naming. I gave up trying to rationalize mpol versus pol versus policy, and node versus nid, but let's avoid p and nd. Remove a few superfluous blank lines, but add one; and here prefer vma->vm_policy to vma_policy(vma) - the latter being appropriate in other sources, which have to allow for !CONFIG_NUMA. That intriguing line about KERNEL_DS? should have gone in v2.6.15, when numa_policy_init() stopped using set_mempolicy(2)'s system call handler. Link: https://lkml.kernel.org/r/68287974-b6ae-7df-4ba-d19ddd69cbf@google.com Signed-off-by: Hugh Dickins Reviewed-by: Matthew Wilcox (Oracle) Cc: Andi Kleen Cc: Christoph Lameter Cc: David Hildenbrand Cc: Greg Kroah-Hartman Cc: "Huang, Ying" Cc: Kefeng Wang Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Kravetz Cc: Nhat Pham Cc: Sidhartha Kumar Cc: Suren Baghdasaryan Cc: Tejun heo Cc: Vishal Moola (Oracle) Cc: Yang Shi Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/mempolicy.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux/mempolicy.h') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 1d7f4ec2614c..a807976fe95d 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -124,10 +124,9 @@ struct shared_policy { int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst); void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); -int mpol_set_shared_policy(struct shared_policy *info, - struct vm_area_struct *vma, - struct mempolicy *new); -void mpol_free_shared_policy(struct shared_policy *p); +int mpol_set_shared_policy(struct shared_policy *sp, + struct vm_area_struct *vma, struct mempolicy *mpol); +void mpol_free_shared_policy(struct shared_policy *sp); struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx); @@ -191,7 +190,7 @@ static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) return true; } -static inline void mpol_put(struct mempolicy *p) +static inline void mpol_put(struct mempolicy *pol) { } @@ -210,7 +209,7 @@ static inline void mpol_shared_policy_init(struct shared_policy *sp, { } -static inline void mpol_free_shared_policy(struct shared_policy *p) +static inline void mpol_free_shared_policy(struct shared_policy *sp) { } -- cgit v1.2.3 From 93397c3b7684555b7cec726cd13eef6742d191fe Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 3 Oct 2023 02:21:34 -0700 Subject: mempolicy trivia: use pgoff_t in shared mempolicy tree Prefer the more explicit "pgoff_t" to "unsigned long" when dealing with a shared mempolicy tree. Delete confusing comment about pseudo mm vmas. Link: https://lkml.kernel.org/r/5451157-3818-4af5-fd2c-5d26a5d1dc53@google.com Signed-off-by: Hugh Dickins Cc: Andi Kleen Cc: Christoph Lameter Cc: David Hildenbrand Cc: Greg Kroah-Hartman Cc: "Huang, Ying" Cc: Kefeng Wang Cc: Matthew Wilcox (Oracle) Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Kravetz Cc: Nhat Pham Cc: Sidhartha Kumar Cc: Suren Baghdasaryan Cc: Tejun heo Cc: Vishal Moola (Oracle) Cc: Yang Shi Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/mempolicy.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'include/linux/mempolicy.h') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index a807976fe95d..acdb12fcb6cd 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -105,22 +105,16 @@ static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) /* * Tree of shared policies for a shared memory region. - * Maintain the policies in a pseudo mm that contains vmas. The vmas - * carry the policy. As a special twist the pseudo mm is indexed in pages, not - * bytes, so that we can work with shared memory segments bigger than - * unsigned long. */ - -struct sp_node { - struct rb_node nd; - unsigned long start, end; - struct mempolicy *policy; -}; - struct shared_policy { struct rb_root root; rwlock_t lock; }; +struct sp_node { + struct rb_node nd; + pgoff_t start, end; + struct mempolicy *policy; +}; int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst); void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); @@ -128,7 +122,7 @@ int mpol_set_shared_policy(struct shared_policy *sp, struct vm_area_struct *vma, struct mempolicy *mpol); void mpol_free_shared_policy(struct shared_policy *sp); struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, - unsigned long idx); + pgoff_t idx); struct mempolicy *get_task_policy(struct task_struct *p); struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, @@ -214,7 +208,7 @@ static inline void mpol_free_shared_policy(struct shared_policy *sp) } static inline struct mempolicy * -mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) +mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx) { return NULL; } -- cgit v1.2.3 From ddc1a5cbc05dc62743a2f409b96faa5cf95ba064 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 19 Oct 2023 13:39:08 -0700 Subject: mempolicy: alloc_pages_mpol() for NUMA policy without vma Shrink shmem's stack usage by eliminating the pseudo-vma from its folio allocation. alloc_pages_mpol(gfp, order, pol, ilx, nid) becomes the principal actor for passing mempolicy choice down to __alloc_pages(), rather than vma_alloc_folio(gfp, order, vma, addr, hugepage). vma_alloc_folio() and alloc_pages() remain, but as wrappers around alloc_pages_mpol(). alloc_pages_bulk_*() untouched, except to provide the additional args to policy_nodemask(), which subsumes policy_node(). Cleanup throughout, cutting out some unhelpful "helpers". It would all be much simpler without MPOL_INTERLEAVE, but that adds a dynamic to the constant mpol: complicated by v3.6 commit 09c231cb8bfd ("tmpfs: distribute interleave better across nodes"), which added ino bias to the interleave, hidden from mm/mempolicy.c until this commit. Hence "ilx" throughout, the "interleave index". Originally I thought it could be done just with nid, but that's wrong: the nodemask may come from the shared policy layer below a shmem vma, or it may come from the task layer above a shmem vma; and without the final nodemask then nodeid cannot be decided. And how ilx is applied depends also on page order. The interleave index is almost always irrelevant unless MPOL_INTERLEAVE: with one exception in alloc_pages_mpol(), where the NO_INTERLEAVE_INDEX passed down from vma-less alloc_pages() is also used as hint not to use THP-style hugepage allocation - to avoid the overhead of a hugepage arg (though I don't understand why we never just added a GFP bit for THP - if it actually needs a different allocation strategy from other pages of the same order). vma_alloc_folio() still carries its hugepage arg here, but it is not used, and should be removed when agreed. get_vma_policy() no longer allows a NULL vma: over time I believe we've eradicated all the places which used to need it e.g. swapoff and madvise used to pass NULL vma to read_swap_cache_async(), but now know the vma. [hughd@google.com: handle NULL mpol being passed to __read_swap_cache_async()] Link: https://lkml.kernel.org/r/ea419956-4751-0102-21f7-9c93cb957892@google.com Link: https://lkml.kernel.org/r/74e34633-6060-f5e3-aee-7040d43f2e93@google.com Link: https://lkml.kernel.org/r/1738368e-bac0-fd11-ed7f-b87142a939fe@google.com Signed-off-by: Hugh Dickins Cc: Andi Kleen Cc: Christoph Lameter Cc: David Hildenbrand Cc: Greg Kroah-Hartman Cc: Huang Ying Cc: Kefeng Wang Cc: Matthew Wilcox (Oracle) Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Kravetz Cc: Nhat Pham Cc: Sidhartha Kumar Cc: Suren Baghdasaryan Cc: Tejun heo Cc: Vishal Moola (Oracle) Cc: Yang Shi Cc: Yosry Ahmed Cc: Domenico Cerasuolo Cc: Johannes Weiner Signed-off-by: Andrew Morton --- include/linux/mempolicy.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'include/linux/mempolicy.h') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index acdb12fcb6cd..931b118336f4 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -17,6 +17,8 @@ struct mm_struct; +#define NO_INTERLEAVE_INDEX (-1UL) /* use task il_prev for interleaving */ + #ifdef CONFIG_NUMA /* @@ -126,7 +128,9 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, struct mempolicy *get_task_policy(struct task_struct *p); struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, - unsigned long addr); + unsigned long addr, pgoff_t *ilx); +struct mempolicy *get_vma_policy(struct vm_area_struct *vma, + unsigned long addr, int order, pgoff_t *ilx); bool vma_policy_mof(struct vm_area_struct *vma); extern void numa_default_policy(void); @@ -140,8 +144,6 @@ extern int huge_node(struct vm_area_struct *vma, extern bool init_nodemask_of_mempolicy(nodemask_t *mask); extern bool mempolicy_in_oom_domain(struct task_struct *tsk, const nodemask_t *mask); -extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy); - extern unsigned int mempolicy_slab_node(void); extern enum zone_type policy_zone; @@ -179,6 +181,11 @@ extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone); struct mempolicy {}; +static inline struct mempolicy *get_task_policy(struct task_struct *p) +{ + return NULL; +} + static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) { return true; @@ -213,6 +220,13 @@ mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx) return NULL; } +static inline struct mempolicy *get_vma_policy(struct vm_area_struct *vma, + unsigned long addr, int order, pgoff_t *ilx) +{ + *ilx = 0; + return NULL; +} + static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) { -- cgit v1.2.3