From eda8c5e776220ce9c869f5c714ccef90c6e1966b Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 21 Jan 2026 11:49:40 -0500 Subject: mm/memory: add tree limit to free_pgtables() The ceiling and tree search limit need to be different arguments for the future change in the failed fork attempt. The ceiling and floor variables are not very descriptive, so change them to pg_start/pg_end. Adding a new variable for the vma_end to the function as it will differ from the pg_end in the later patches in the series. Add a kernel doc about the free_pgtables() function. Test code also updated. No functional changes intended. Link: https://lkml.kernel.org/r/20260121164946.2093480-6-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Reviewed-by: Lorenzo Stoakes Reviewed-by: Pedro Falcato Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: David Hildenbrand Cc: David Hildenbrand Cc: Jann Horn Cc: Kairui Song Cc: Kemeng Shi Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: Nhat Pham Cc: SeongJae Park Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/vma/vma_internal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 7fa56dcc53a6..f50b8ddee612 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -1139,7 +1139,8 @@ static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, static inline void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, struct vm_area_struct *vma, unsigned long floor, - unsigned long ceiling, bool mm_wr_locked) + unsigned long ceiling, unsigned long tree_max, + bool mm_wr_locked) { } -- cgit v1.2.3 From 0df5a8d3948da979b8ab811a692b34635e1b146d Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 21 Jan 2026 11:49:44 -0500 Subject: mm/vma: use unmap_desc in exit_mmap() and vms_clear_ptes() Convert vms_clear_ptes() to use unmap_desc to call unmap_vmas() instead of the large argument list. The UNMAP_STATE() cannot be used because the vma iterator in the vms does not point to the correct maple state (mas_detach), and the tree_end will be set incorrectly. Setting up the arguments manually avoids setting the struct up incorrectly and doing extra work to get the correct pagetable range. exit_mmap() also calls unmap_vmas() with many arguments. Using the unmap_all_init() function to set the unmap descriptor for all vmas makes this a bit easier to read. Update to the vma test code is necessary to ensure testing continues to function. No functional changes intended. Link: https://lkml.kernel.org/r/20260121164946.2093480-10-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: David Hildenbrand Cc: David Hildenbrand Cc: Jann Horn Cc: Kairui Song Cc: Kemeng Shi Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: Nhat Pham Cc: Pedro Falcato Cc: SeongJae Park Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/vma/vma_internal.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index f50b8ddee612..0b4918aac8d6 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -1131,9 +1131,9 @@ static inline void update_hiwater_vm(struct mm_struct *mm) { } -static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, - struct vm_area_struct *vma, unsigned long start_addr, - unsigned long end_addr, unsigned long tree_end) +struct unmap_desc; + +static inline void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap) { } -- cgit v1.2.3 From a8700d42b0af3a1751f70d53ee90c97fb4dc50f2 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 21 Jan 2026 11:49:46 -0500 Subject: mm: use unmap_desc struct for freeing page tables Pass through the unmap_desc to free_pgtables() because it almost has everything necessary and is already on the stack. Updates testing code as necessary. No functional changes intended. [Liam.Howlett@oracle.com: fix up unmap desc use on exit_mmap()] Link: https://lkml.kernel.org/r/20260210214214.364856-1-Liam.Howlett@oracle.com Link: https://lkml.kernel.org/r/20260121164946.2093480-12-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Reviewed-by: Lorenzo Stoakes Cc: Suren Baghdasaryan Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: David Hildenbrand Cc: David Hildenbrand Cc: Jann Horn Cc: Kairui Song Cc: Kemeng Shi Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: Nhat Pham Cc: Pedro Falcato Cc: SeongJae Park Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/vma/vma_internal.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 0b4918aac8d6..ca4eb563b29b 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -1137,11 +1137,10 @@ static inline void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap) { } -static inline void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, - struct vm_area_struct *vma, unsigned long floor, - unsigned long ceiling, unsigned long tree_max, - bool mm_wr_locked) +static inline void free_pgtables(struct mmu_gather *tlb, struct unmap_desc *desc) { + (void)tlb; + (void)desc; } static inline void mapping_unmap_writable(struct address_space *mapping) -- cgit v1.2.3 From 21c8a5bae7bd594f5b89db551b618d60b994b8cf Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 22 Jan 2026 16:06:13 +0000 Subject: tools: bitmap: add missing bitmap_[subset(), andnot()] The bitmap_subset() and bitmap_andnot() functions are not present in the tools version of include/linux/bitmap.h, so add them as subsequent patches implement test code that requires them. We also add the missing __bitmap_subset() to tools/lib/bitmap.c. Link: https://lkml.kernel.org/r/0fd0d4ec868297f522003cb4b5898b53b498805b.1769097829.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Liam R. Howlett Cc: Baolin Wang Cc: Barry Song Cc: David Hildenbrand Cc: Dev Jain Cc: Jason Gunthorpe Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zi Yan Cc: Damien Le Moal Cc: "Darrick J. Wong" Cc: Jarkko Sakkinen Cc: Yury Norov Cc: Chris Mason Cc: Pedro Falcato Signed-off-by: Andrew Morton --- tools/include/linux/bitmap.h | 22 ++++++++++++++++++++++ tools/lib/bitmap.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) (limited to 'tools') diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 0d992245c600..250883090a5d 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -24,6 +24,10 @@ void __bitmap_set(unsigned long *map, unsigned int start, int len); void __bitmap_clear(unsigned long *map, unsigned int start, int len); bool __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); +bool __bitmap_subset(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); +bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) @@ -81,6 +85,15 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, __bitmap_or(dst, src1, src2, nbits); } +static __always_inline +bool bitmap_andnot(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) +{ + if (small_const_nbits(nbits)) + return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; + return __bitmap_andnot(dst, src1, src2, nbits); +} + static inline unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags __maybe_unused) { return malloc(bitmap_size(nbits)); @@ -157,6 +170,15 @@ static inline bool bitmap_intersects(const unsigned long *src1, return __bitmap_intersects(src1, src2, nbits); } +static __always_inline +bool bitmap_subset(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) +{ + if (small_const_nbits(nbits)) + return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_subset(src1, src2, nbits); +} + static inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) { if (__builtin_constant_p(nbits) && nbits == 1) diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c index 51255c69754d..aa83d22c45e3 100644 --- a/tools/lib/bitmap.c +++ b/tools/lib/bitmap.c @@ -140,3 +140,32 @@ void __bitmap_clear(unsigned long *map, unsigned int start, int len) *p &= ~mask_to_clear; } } + +bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) +{ + unsigned int k; + unsigned int lim = bits/BITS_PER_LONG; + unsigned long result = 0; + + for (k = 0; k < lim; k++) + result |= (dst[k] = bitmap1[k] & ~bitmap2[k]); + if (bits % BITS_PER_LONG) + result |= (dst[k] = bitmap1[k] & ~bitmap2[k] & + BITMAP_LAST_WORD_MASK(bits)); + return result != 0; +} + +bool __bitmap_subset(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) +{ + unsigned int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap1[k] & ~bitmap2[k]) + return false; + + if (bits % BITS_PER_LONG) + if ((bitmap1[k] & ~bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) + return false; + return true; +} -- cgit v1.2.3 From bae0ba7c7c0a022287d8b093da63ebcb794d77ea Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 22 Jan 2026 16:06:14 +0000 Subject: mm: add basic VMA flag operation helper functions Now we have the mk_vma_flags() macro helper which permits easy specification of any number of VMA flags, add helper functions which operate with vma_flags_t parameters. This patch provides vma_flags_test[_mask](), vma_flags_set[_mask]() and vma_flags_clear[_mask]() respectively testing, setting and clearing flags with the _mask variants accepting vma_flag_t parameters, and the non-mask variants implemented as macros which accept a list of flags. This allows us to trivially test/set/clear aggregate VMA flag values as necessary, for instance: if (vma_flags_test(&flags, VMA_READ_BIT, VMA_WRITE_BIT)) goto readwrite; vma_flags_set(&flags, VMA_READ_BIT, VMA_WRITE_BIT); vma_flags_clear(&flags, VMA_READ_BIT, VMA_WRITE_BIT); We also add a function for testing that ALL flags are set for convenience, e.g.: if (vma_flags_test_all(&flags, VMA_READ_BIT, VMA_MAYREAD_BIT)) { /* Both READ and MAYREAD flags set */ ... } The compiler generates optimal assembly for each such that they behave as if the caller were setting the bitmap flags manually. This is important for e.g. drivers which manipulate flag values rather than a VMA's specific flag values. We also add helpers for testing, setting and clearing flags for VMA's and VMA descriptors to reduce boilerplate. Also add the EMPTY_VMA_FLAGS define to aid initialisation of empty flags. Finally, update the userland VMA tests to add the helpers there so they can be utilised as part of userland testing. Link: https://lkml.kernel.org/r/885d4897d67a6a57c0b07fa182a7055ad752df11.1769097829.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Pedro Falcato Reviewed-by: Liam R. Howlett Cc: Baolin Wang Cc: Barry Song Cc: David Hildenbrand Cc: Dev Jain Cc: Jason Gunthorpe Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zi Yan Cc: Damien Le Moal Cc: "Darrick J. Wong" Cc: Jarkko Sakkinen Cc: Yury Norov Cc: Chris Mason Signed-off-by: Andrew Morton --- tools/testing/vma/vma_internal.h | 147 +++++++++++++++++++++++++++++++++------ 1 file changed, 127 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index ca4eb563b29b..2b01794cbd61 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -21,7 +21,13 @@ #include +#ifdef __CONCAT +#undef __CONCAT +#endif + +#include #include +#include #include #include #include @@ -38,6 +44,8 @@ extern unsigned long dac_mmap_min_addr; #define dac_mmap_min_addr 0UL #endif +#define ACCESS_PRIVATE(p, member) ((p)->member) + #define VM_WARN_ON(_expr) (WARN_ON(_expr)) #define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr)) #define VM_WARN_ON_VMG(_expr, _vmg) (WARN_ON(_expr)) @@ -533,6 +541,8 @@ typedef struct { DECLARE_BITMAP(__vma_flags, NUM_VMA_FLAG_BITS); } __private vma_flags_t; +#define EMPTY_VMA_FLAGS ((vma_flags_t){ }) + struct mm_struct { struct maple_tree mm_mt; int map_count; /* number of VMAs */ @@ -882,6 +892,123 @@ static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags) return __pgprot(vm_flags); } +static inline void vma_flags_clear_all(vma_flags_t *flags) +{ + bitmap_zero(flags->__vma_flags, NUM_VMA_FLAG_BITS); +} + +static inline void vma_flag_set(vma_flags_t *flags, vma_flag_t bit) +{ + unsigned long *bitmap = flags->__vma_flags; + + __set_bit((__force int)bit, bitmap); +} + +static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits) +{ + vma_flags_t flags; + int i; + + vma_flags_clear_all(&flags); + for (i = 0; i < count; i++) + vma_flag_set(&flags, bits[i]); + return flags; +} + +#define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \ + (const vma_flag_t []){__VA_ARGS__}) + +static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags, + vma_flags_t to_test) +{ + const unsigned long *bitmap = flags->__vma_flags; + const unsigned long *bitmap_to_test = to_test.__vma_flags; + + return bitmap_intersects(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS); +} + +#define vma_flags_test(flags, ...) \ + vma_flags_test_mask(flags, mk_vma_flags(__VA_ARGS__)) + +static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags, + vma_flags_t to_test) +{ + const unsigned long *bitmap = flags->__vma_flags; + const unsigned long *bitmap_to_test = to_test.__vma_flags; + + return bitmap_subset(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS); +} + +#define vma_flags_test_all(flags, ...) \ + vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__)) + +static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t to_set) +{ + unsigned long *bitmap = flags->__vma_flags; + const unsigned long *bitmap_to_set = to_set.__vma_flags; + + bitmap_or(bitmap, bitmap, bitmap_to_set, NUM_VMA_FLAG_BITS); +} + +#define vma_flags_set(flags, ...) \ + vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__)) + +static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t to_clear) +{ + unsigned long *bitmap = flags->__vma_flags; + const unsigned long *bitmap_to_clear = to_clear.__vma_flags; + + bitmap_andnot(bitmap, bitmap, bitmap_to_clear, NUM_VMA_FLAG_BITS); +} + +#define vma_flags_clear(flags, ...) \ + vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__)) + +static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma, + vma_flags_t flags) +{ + return vma_flags_test_all_mask(&vma->flags, flags); +} + +#define vma_test_all_flags(vma, ...) \ + vma_test_all_flags_mask(vma, mk_vma_flags(__VA_ARGS__)) + +static inline void vma_set_flags_mask(struct vm_area_struct *vma, + vma_flags_t flags) +{ + vma_flags_set_mask(&vma->flags, flags); +} + +#define vma_set_flags(vma, ...) \ + vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__)) + +static inline bool vma_desc_test_flags_mask(const struct vm_area_desc *desc, + vma_flags_t flags) +{ + return vma_flags_test_mask(&desc->vma_flags, flags); +} + +#define vma_desc_test_flags(desc, ...) \ + vma_desc_test_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) + +static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc, + vma_flags_t flags) +{ + vma_flags_set_mask(&desc->vma_flags, flags); +} + +#define vma_desc_set_flags(desc, ...) \ + vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) + +static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc, + vma_flags_t flags) +{ + vma_flags_clear_mask(&desc->vma_flags, flags); +} + +#define vma_desc_clear_flags(desc, ...) \ + vma_desc_clear_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) + static inline bool is_shared_maywrite(vm_flags_t vm_flags) { return (vm_flags & (VM_SHARED | VM_MAYWRITE)) == @@ -1540,31 +1667,11 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm, { } -#define ACCESS_PRIVATE(p, member) ((p)->member) - -#define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE) - -static __always_inline void bitmap_zero(unsigned long *dst, unsigned int nbits) -{ - unsigned int len = bitmap_size(nbits); - - if (small_const_nbits(nbits)) - *dst = 0; - else - memset(dst, 0, len); -} - static inline bool mm_flags_test(int flag, const struct mm_struct *mm) { return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags)); } -/* Clears all bits in the VMA flags bitmap, non-atomically. */ -static inline void vma_flags_clear_all(vma_flags_t *flags) -{ - bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS); -} - /* * Copy value to the first system word of VMA flags, non-atomically. * -- cgit v1.2.3 From 53f1d936445131cb5da2212c2b60884a25cb0330 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 22 Jan 2026 16:06:19 +0000 Subject: mm: make vm_area_desc utilise vma_flags_t only Now we have eliminated all uses of vm_area_desc->vm_flags, eliminate this field, and have mmap_prepare users utilise the vma_flags_t vm_area_desc->vma_flags field only. As part of this change we alter is_shared_maywrite() to accept a vma_flags_t parameter, and introduce is_shared_maywrite_vm_flags() for use with legacy vm_flags_t flags. We also update struct mmap_state to add a union between vma_flags and vm_flags temporarily until the mmap logic is also converted to using vma_flags_t. Also update the VMA userland tests to reflect this change. Link: https://lkml.kernel.org/r/fd2a2938b246b4505321954062b1caba7acfc77a.1769097829.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Pedro Falcato Reviewed-by: Liam R. Howlett Cc: Baolin Wang Cc: Barry Song Cc: David Hildenbrand Cc: Dev Jain Cc: Jason Gunthorpe Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zi Yan Cc: Damien Le Moal Cc: "Darrick J. Wong" Cc: Jarkko Sakkinen Cc: Yury Norov Cc: Chris Mason Signed-off-by: Andrew Morton --- tools/testing/vma/vma_internal.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 2b01794cbd61..2743f12ecf32 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -1009,15 +1009,20 @@ static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc, #define vma_desc_clear_flags(desc, ...) \ vma_desc_clear_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) -static inline bool is_shared_maywrite(vm_flags_t vm_flags) +static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags) { return (vm_flags & (VM_SHARED | VM_MAYWRITE)) == (VM_SHARED | VM_MAYWRITE); } +static inline bool is_shared_maywrite(const vma_flags_t *flags) +{ + return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT); +} + static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma) { - return is_shared_maywrite(vma->vm_flags); + return is_shared_maywrite(&vma->flags); } static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi) -- cgit v1.2.3 From 6aacab308a5dfd222b2d23662bbae60c11007cfb Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 22 Jan 2026 16:06:20 +0000 Subject: tools/testing/vma: separate VMA userland tests into separate files So far the userland VMA tests have been established as a rough expression of what's been possible. Adapt it into a more usable form by separating out tests and shared helper functions. Since we test functions that are declared statically in mm/vma.c, we make use of the trick of #include'ing kernel C files directly. In order for the tests to continue to function, we must therefore also this way into the tests/ directory. We try to keep as much shared logic actually modularised into a separate compilation unit in shared.c, however the merge_existing() and attach_vma() helpers rely on statically declared mm/vma.c functions so these must be declared in main.c. Link: https://lkml.kernel.org/r/a0455ccfe4fdcd1c962c64f76304f612e5662a4e.1769097829.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Cc: Baolin Wang Cc: Barry Song Cc: David Hildenbrand Cc: Dev Jain Cc: Jason Gunthorpe Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zi Yan Cc: Damien Le Moal Cc: "Darrick J. Wong" Cc: Jarkko Sakkinen Cc: Yury Norov Cc: Chris Mason Cc: Pedro Falcato Signed-off-by: Andrew Morton --- tools/testing/vma/Makefile | 4 +- tools/testing/vma/main.c | 55 ++ tools/testing/vma/shared.c | 131 +++ tools/testing/vma/shared.h | 114 +++ tools/testing/vma/tests/merge.c | 1469 +++++++++++++++++++++++++++++++ tools/testing/vma/tests/mmap.c | 57 ++ tools/testing/vma/tests/vma.c | 39 + tools/testing/vma/vma.c | 1785 -------------------------------------- tools/testing/vma/vma_internal.h | 9 - 9 files changed, 1867 insertions(+), 1796 deletions(-) create mode 100644 tools/testing/vma/main.c create mode 100644 tools/testing/vma/shared.c create mode 100644 tools/testing/vma/shared.h create mode 100644 tools/testing/vma/tests/merge.c create mode 100644 tools/testing/vma/tests/mmap.c create mode 100644 tools/testing/vma/tests/vma.c delete mode 100644 tools/testing/vma/vma.c (limited to 'tools') diff --git a/tools/testing/vma/Makefile b/tools/testing/vma/Makefile index 66f3831a668f..94133d9d3955 100644 --- a/tools/testing/vma/Makefile +++ b/tools/testing/vma/Makefile @@ -6,10 +6,10 @@ default: vma include ../shared/shared.mk -OFILES = $(SHARED_OFILES) vma.o maple-shim.o +OFILES = $(SHARED_OFILES) main.o shared.o maple-shim.o TARGETS = vma -vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h +main.o: main.c shared.c shared.h vma_internal.h tests/merge.c tests/mmap.c tests/vma.c ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h vma: $(OFILES) $(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS) diff --git a/tools/testing/vma/main.c b/tools/testing/vma/main.c new file mode 100644 index 000000000000..49b09e97a51f --- /dev/null +++ b/tools/testing/vma/main.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shared.h" +/* + * Directly import the VMA implementation here. Our vma_internal.h wrapper + * provides userland-equivalent functionality for everything vma.c uses. + */ +#include "../../../mm/vma_init.c" +#include "../../../mm/vma_exec.c" +#include "../../../mm/vma.c" + +/* Tests are included directly so they can test static functions in mm/vma.c. */ +#include "tests/merge.c" +#include "tests/mmap.c" +#include "tests/vma.c" + +/* Helper functions which utilise static kernel functions. */ + +struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg) +{ + struct vm_area_struct *vma; + + vma = vma_merge_existing_range(vmg); + if (vma) + vma_assert_attached(vma); + return vma; +} + +int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma) +{ + int res; + + res = vma_link(mm, vma); + if (!res) + vma_assert_attached(vma); + return res; +} + +/* Main test running which invokes tests/ *.c runners. */ +int main(void) +{ + int num_tests = 0, num_fail = 0; + + maple_tree_init(); + vma_state_init(); + + run_merge_tests(&num_tests, &num_fail); + run_mmap_tests(&num_tests, &num_fail); + run_vma_tests(&num_tests, &num_fail); + + printf("%d tests run, %d passed, %d failed.\n", + num_tests, num_tests - num_fail, num_fail); + + return num_fail == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tools/testing/vma/shared.c b/tools/testing/vma/shared.c new file mode 100644 index 000000000000..bda578cc3304 --- /dev/null +++ b/tools/testing/vma/shared.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shared.h" + + +bool fail_prealloc; +unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; +unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; +unsigned long stack_guard_gap = 256UL<vm_start = start; + vma->vm_end = end; + vma->vm_pgoff = pgoff; + vm_flags_reset(vma, vm_flags); + vma_assert_detached(vma); + + return vma; +} + +void detach_free_vma(struct vm_area_struct *vma) +{ + vma_mark_detached(vma); + vm_area_free(vma); +} + +struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, + unsigned long start, unsigned long end, + pgoff_t pgoff, vm_flags_t vm_flags) +{ + struct vm_area_struct *vma = alloc_vma(mm, start, end, pgoff, vm_flags); + + if (vma == NULL) + return NULL; + + if (attach_vma(mm, vma)) { + detach_free_vma(vma); + return NULL; + } + + /* + * Reset this counter which we use to track whether writes have + * begun. Linking to the tree will have caused this to be incremented, + * which means we will get a false positive otherwise. + */ + vma->vm_lock_seq = UINT_MAX; + + return vma; +} + +void reset_dummy_anon_vma(void) +{ + dummy_anon_vma.was_cloned = false; + dummy_anon_vma.was_unlinked = false; +} + +int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi) +{ + struct vm_area_struct *vma; + int count = 0; + + fail_prealloc = false; + reset_dummy_anon_vma(); + + vma_iter_set(vmi, 0); + for_each_vma(*vmi, vma) { + detach_free_vma(vma); + count++; + } + + mtree_destroy(&mm->mm_mt); + mm->map_count = 0; + return count; +} + +bool vma_write_started(struct vm_area_struct *vma) +{ + int seq = vma->vm_lock_seq; + + /* We reset after each check. */ + vma->vm_lock_seq = UINT_MAX; + + /* The vma_start_write() stub simply increments this value. */ + return seq > -1; +} + +void __vma_set_dummy_anon_vma(struct vm_area_struct *vma, + struct anon_vma_chain *avc, struct anon_vma *anon_vma) +{ + vma->anon_vma = anon_vma; + INIT_LIST_HEAD(&vma->anon_vma_chain); + list_add(&avc->same_vma, &vma->anon_vma_chain); + avc->anon_vma = vma->anon_vma; +} + +void vma_set_dummy_anon_vma(struct vm_area_struct *vma, + struct anon_vma_chain *avc) +{ + __vma_set_dummy_anon_vma(vma, avc, &dummy_anon_vma); +} + +struct task_struct *get_current(void) +{ + return &__current; +} + +unsigned long rlimit(unsigned int limit) +{ + return (unsigned long)-1; +} + +void vma_set_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + pgoff_t pgoff) +{ + vma->vm_start = start; + vma->vm_end = end; + vma->vm_pgoff = pgoff; +} diff --git a/tools/testing/vma/shared.h b/tools/testing/vma/shared.h new file mode 100644 index 000000000000..6c64211cfa22 --- /dev/null +++ b/tools/testing/vma/shared.h @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include + +#include "generated/bit-length.h" +#include "maple-shared.h" +#include "vma_internal.h" +#include "../../../mm/vma.h" + +/* Simple test runner. Assumes local num_[fail, tests] counters. */ +#define TEST(name) \ + do { \ + (*num_tests)++; \ + if (!test_##name()) { \ + (*num_fail)++; \ + fprintf(stderr, "Test " #name " FAILED\n"); \ + } \ + } while (0) + +#define ASSERT_TRUE(_expr) \ + do { \ + if (!(_expr)) { \ + fprintf(stderr, \ + "Assert FAILED at %s:%d:%s(): %s is FALSE.\n", \ + __FILE__, __LINE__, __FUNCTION__, #_expr); \ + return false; \ + } \ + } while (0) + +#define ASSERT_FALSE(_expr) ASSERT_TRUE(!(_expr)) +#define ASSERT_EQ(_val1, _val2) ASSERT_TRUE((_val1) == (_val2)) +#define ASSERT_NE(_val1, _val2) ASSERT_TRUE((_val1) != (_val2)) + +#define IS_SET(_val, _flags) ((_val & _flags) == _flags) + +extern bool fail_prealloc; + +/* Override vma_iter_prealloc() so we can choose to fail it. */ +#define vma_iter_prealloc(vmi, vma) \ + (fail_prealloc ? -ENOMEM : mas_preallocate(&(vmi)->mas, (vma), GFP_KERNEL)) + +#define CONFIG_DEFAULT_MMAP_MIN_ADDR 65536 + +extern unsigned long mmap_min_addr; +extern unsigned long dac_mmap_min_addr; +extern unsigned long stack_guard_gap; + +extern const struct vm_operations_struct vma_dummy_vm_ops; +extern struct anon_vma dummy_anon_vma; +extern struct task_struct __current; + +/* + * Helper function which provides a wrapper around a merge existing VMA + * operation. + * + * Declared in main.c as uses static VMA function. + */ +struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg); + +/* + * Helper function to allocate a VMA and link it to the tree. + * + * Declared in main.c as uses static VMA function. + */ +int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma); + +/* Helper function providing a dummy vm_ops->close() method.*/ +static inline void dummy_close(struct vm_area_struct *) +{ +} + +/* Helper function to simply allocate a VMA. */ +struct vm_area_struct *alloc_vma(struct mm_struct *mm, + unsigned long start, unsigned long end, + pgoff_t pgoff, vm_flags_t vm_flags); + +/* Helper function to detach and free a VMA. */ +void detach_free_vma(struct vm_area_struct *vma); + +/* Helper function to allocate a VMA and link it to the tree. */ +struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, + unsigned long start, unsigned long end, + pgoff_t pgoff, vm_flags_t vm_flags); + +/* + * Helper function to reset the dummy anon_vma to indicate it has not been + * duplicated. + */ +void reset_dummy_anon_vma(void); + +/* + * Helper function to remove all VMAs and destroy the maple tree associated with + * a virtual address space. Returns a count of VMAs in the tree. + */ +int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi); + +/* Helper function to determine if VMA has had vma_start_write() performed. */ +bool vma_write_started(struct vm_area_struct *vma); + +void __vma_set_dummy_anon_vma(struct vm_area_struct *vma, + struct anon_vma_chain *avc, struct anon_vma *anon_vma); + +/* Provide a simple dummy VMA/anon_vma dummy setup for testing. */ +void vma_set_dummy_anon_vma(struct vm_area_struct *vma, + struct anon_vma_chain *avc); + +/* Helper function to specify a VMA's range. */ +void vma_set_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + pgoff_t pgoff); diff --git a/tools/testing/vma/tests/merge.c b/tools/testing/vma/tests/merge.c new file mode 100644 index 000000000000..3708dc6945b0 --- /dev/null +++ b/tools/testing/vma/tests/merge.c @@ -0,0 +1,1469 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +/* Helper function which provides a wrapper around a merge new VMA operation. */ +static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg) +{ + struct vm_area_struct *vma; + /* + * For convenience, get prev and next VMAs. Which the new VMA operation + * requires. + */ + vmg->next = vma_next(vmg->vmi); + vmg->prev = vma_prev(vmg->vmi); + vma_iter_next_range(vmg->vmi); + + vma = vma_merge_new_range(vmg); + if (vma) + vma_assert_attached(vma); + + return vma; +} + +/* + * Helper function which provides a wrapper around the expansion of an existing + * VMA. + */ +static int expand_existing(struct vma_merge_struct *vmg) +{ + return vma_expand(vmg); +} + +/* + * Helper function to reset merge state the associated VMA iterator to a + * specified new range. + */ +void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start, + unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags) +{ + vma_iter_set(vmg->vmi, start); + + vmg->prev = NULL; + vmg->middle = NULL; + vmg->next = NULL; + vmg->target = NULL; + + vmg->start = start; + vmg->end = end; + vmg->pgoff = pgoff; + vmg->vm_flags = vm_flags; + + vmg->just_expand = false; + vmg->__remove_middle = false; + vmg->__remove_next = false; + vmg->__adjust_middle_start = false; + vmg->__adjust_next_start = false; +} + +/* Helper function to set both the VMG range and its anon_vma. */ +static void vmg_set_range_anon_vma(struct vma_merge_struct *vmg, unsigned long start, + unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags, + struct anon_vma *anon_vma) +{ + vmg_set_range(vmg, start, end, pgoff, vm_flags); + vmg->anon_vma = anon_vma; +} + +/* + * Helper function to try to merge a new VMA. + * + * Update vmg and the iterator for it and try to merge, otherwise allocate a new + * VMA, link it to the maple tree and return it. + */ +static struct vm_area_struct *try_merge_new_vma(struct mm_struct *mm, + struct vma_merge_struct *vmg, unsigned long start, + unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags, + bool *was_merged) +{ + struct vm_area_struct *merged; + + vmg_set_range(vmg, start, end, pgoff, vm_flags); + + merged = merge_new(vmg); + if (merged) { + *was_merged = true; + ASSERT_EQ(vmg->state, VMA_MERGE_SUCCESS); + return merged; + } + + *was_merged = false; + + ASSERT_EQ(vmg->state, VMA_MERGE_NOMERGE); + + return alloc_and_link_vma(mm, start, end, pgoff, vm_flags); +} + +static bool test_simple_merge(void) +{ + struct vm_area_struct *vma; + vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + struct vm_area_struct *vma_left = alloc_vma(&mm, 0, 0x1000, 0, vm_flags); + struct vm_area_struct *vma_right = alloc_vma(&mm, 0x2000, 0x3000, 2, vm_flags); + VMA_ITERATOR(vmi, &mm, 0x1000); + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + .start = 0x1000, + .end = 0x2000, + .vm_flags = vm_flags, + .pgoff = 1, + }; + + ASSERT_FALSE(attach_vma(&mm, vma_left)); + ASSERT_FALSE(attach_vma(&mm, vma_right)); + + vma = merge_new(&vmg); + ASSERT_NE(vma, NULL); + + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x3000); + ASSERT_EQ(vma->vm_pgoff, 0); + ASSERT_EQ(vma->vm_flags, vm_flags); + + detach_free_vma(vma); + mtree_destroy(&mm.mm_mt); + + return true; +} + +static bool test_simple_modify(void) +{ + struct vm_area_struct *vma; + vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + struct vm_area_struct *init_vma = alloc_vma(&mm, 0, 0x3000, 0, vm_flags); + VMA_ITERATOR(vmi, &mm, 0x1000); + vm_flags_t flags = VM_READ | VM_MAYREAD; + + ASSERT_FALSE(attach_vma(&mm, init_vma)); + + /* + * The flags will not be changed, the vma_modify_flags() function + * performs the merge/split only. + */ + vma = vma_modify_flags(&vmi, init_vma, init_vma, + 0x1000, 0x2000, &flags); + ASSERT_NE(vma, NULL); + /* We modify the provided VMA, and on split allocate new VMAs. */ + ASSERT_EQ(vma, init_vma); + + ASSERT_EQ(vma->vm_start, 0x1000); + ASSERT_EQ(vma->vm_end, 0x2000); + ASSERT_EQ(vma->vm_pgoff, 1); + + /* + * Now walk through the three split VMAs and make sure they are as + * expected. + */ + + vma_iter_set(&vmi, 0); + vma = vma_iter_load(&vmi); + + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x1000); + ASSERT_EQ(vma->vm_pgoff, 0); + + detach_free_vma(vma); + vma_iter_clear(&vmi); + + vma = vma_next(&vmi); + + ASSERT_EQ(vma->vm_start, 0x1000); + ASSERT_EQ(vma->vm_end, 0x2000); + ASSERT_EQ(vma->vm_pgoff, 1); + + detach_free_vma(vma); + vma_iter_clear(&vmi); + + vma = vma_next(&vmi); + + ASSERT_EQ(vma->vm_start, 0x2000); + ASSERT_EQ(vma->vm_end, 0x3000); + ASSERT_EQ(vma->vm_pgoff, 2); + + detach_free_vma(vma); + mtree_destroy(&mm.mm_mt); + + return true; +} + +static bool test_simple_expand(void) +{ + vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x1000, 0, vm_flags); + VMA_ITERATOR(vmi, &mm, 0); + struct vma_merge_struct vmg = { + .vmi = &vmi, + .target = vma, + .start = 0, + .end = 0x3000, + .pgoff = 0, + }; + + ASSERT_FALSE(attach_vma(&mm, vma)); + + ASSERT_FALSE(expand_existing(&vmg)); + + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x3000); + ASSERT_EQ(vma->vm_pgoff, 0); + + detach_free_vma(vma); + mtree_destroy(&mm.mm_mt); + + return true; +} + +static bool test_simple_shrink(void) +{ + vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x3000, 0, vm_flags); + VMA_ITERATOR(vmi, &mm, 0); + + ASSERT_FALSE(attach_vma(&mm, vma)); + + ASSERT_FALSE(vma_shrink(&vmi, vma, 0, 0x1000, 0)); + + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x1000); + ASSERT_EQ(vma->vm_pgoff, 0); + + detach_free_vma(vma); + mtree_destroy(&mm.mm_mt); + + return true; +} + +static bool __test_merge_new(bool is_sticky, bool a_is_sticky, bool b_is_sticky, bool c_is_sticky) +{ + vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + }; + struct anon_vma_chain dummy_anon_vma_chain_a = { + .anon_vma = &dummy_anon_vma, + }; + struct anon_vma_chain dummy_anon_vma_chain_b = { + .anon_vma = &dummy_anon_vma, + }; + struct anon_vma_chain dummy_anon_vma_chain_c = { + .anon_vma = &dummy_anon_vma, + }; + struct anon_vma_chain dummy_anon_vma_chain_d = { + .anon_vma = &dummy_anon_vma, + }; + const struct vm_operations_struct vm_ops = { + .close = dummy_close, + }; + int count; + struct vm_area_struct *vma, *vma_a, *vma_b, *vma_c, *vma_d; + bool merged; + + if (is_sticky) + vm_flags |= VM_STICKY; + + /* + * 0123456789abc + * AA B CC + */ + vma_a = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags); + ASSERT_NE(vma_a, NULL); + if (a_is_sticky) + vm_flags_set(vma_a, VM_STICKY); + /* We give each VMA a single avc so we can test anon_vma duplication. */ + INIT_LIST_HEAD(&vma_a->anon_vma_chain); + list_add(&dummy_anon_vma_chain_a.same_vma, &vma_a->anon_vma_chain); + + vma_b = alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, vm_flags); + ASSERT_NE(vma_b, NULL); + if (b_is_sticky) + vm_flags_set(vma_b, VM_STICKY); + INIT_LIST_HEAD(&vma_b->anon_vma_chain); + list_add(&dummy_anon_vma_chain_b.same_vma, &vma_b->anon_vma_chain); + + vma_c = alloc_and_link_vma(&mm, 0xb000, 0xc000, 0xb, vm_flags); + ASSERT_NE(vma_c, NULL); + if (c_is_sticky) + vm_flags_set(vma_c, VM_STICKY); + INIT_LIST_HEAD(&vma_c->anon_vma_chain); + list_add(&dummy_anon_vma_chain_c.same_vma, &vma_c->anon_vma_chain); + + /* + * NO merge. + * + * 0123456789abc + * AA B ** CC + */ + vma_d = try_merge_new_vma(&mm, &vmg, 0x7000, 0x9000, 7, vm_flags, &merged); + ASSERT_NE(vma_d, NULL); + INIT_LIST_HEAD(&vma_d->anon_vma_chain); + list_add(&dummy_anon_vma_chain_d.same_vma, &vma_d->anon_vma_chain); + ASSERT_FALSE(merged); + ASSERT_EQ(mm.map_count, 4); + + /* + * Merge BOTH sides. + * + * 0123456789abc + * AA*B DD CC + */ + vma_a->vm_ops = &vm_ops; /* This should have no impact. */ + vma_b->anon_vma = &dummy_anon_vma; + vma = try_merge_new_vma(&mm, &vmg, 0x2000, 0x3000, 2, vm_flags, &merged); + ASSERT_EQ(vma, vma_a); + /* Merge with A, delete B. */ + ASSERT_TRUE(merged); + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x4000); + ASSERT_EQ(vma->vm_pgoff, 0); + ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 3); + if (is_sticky || a_is_sticky || b_is_sticky) + ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY)); + + /* + * Merge to PREVIOUS VMA. + * + * 0123456789abc + * AAAA* DD CC + */ + vma = try_merge_new_vma(&mm, &vmg, 0x4000, 0x5000, 4, vm_flags, &merged); + ASSERT_EQ(vma, vma_a); + /* Extend A. */ + ASSERT_TRUE(merged); + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x5000); + ASSERT_EQ(vma->vm_pgoff, 0); + ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 3); + if (is_sticky || a_is_sticky) + ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY)); + + /* + * Merge to NEXT VMA. + * + * 0123456789abc + * AAAAA *DD CC + */ + vma_d->anon_vma = &dummy_anon_vma; + vma_d->vm_ops = &vm_ops; /* This should have no impact. */ + vma = try_merge_new_vma(&mm, &vmg, 0x6000, 0x7000, 6, vm_flags, &merged); + ASSERT_EQ(vma, vma_d); + /* Prepend. */ + ASSERT_TRUE(merged); + ASSERT_EQ(vma->vm_start, 0x6000); + ASSERT_EQ(vma->vm_end, 0x9000); + ASSERT_EQ(vma->vm_pgoff, 6); + ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 3); + if (is_sticky) /* D uses is_sticky. */ + ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY)); + + /* + * Merge BOTH sides. + * + * 0123456789abc + * AAAAA*DDD CC + */ + vma_d->vm_ops = NULL; /* This would otherwise degrade the merge. */ + vma = try_merge_new_vma(&mm, &vmg, 0x5000, 0x6000, 5, vm_flags, &merged); + ASSERT_EQ(vma, vma_a); + /* Merge with A, delete D. */ + ASSERT_TRUE(merged); + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x9000); + ASSERT_EQ(vma->vm_pgoff, 0); + ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 2); + if (is_sticky || a_is_sticky) + ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY)); + + /* + * Merge to NEXT VMA. + * + * 0123456789abc + * AAAAAAAAA *CC + */ + vma_c->anon_vma = &dummy_anon_vma; + vma = try_merge_new_vma(&mm, &vmg, 0xa000, 0xb000, 0xa, vm_flags, &merged); + ASSERT_EQ(vma, vma_c); + /* Prepend C. */ + ASSERT_TRUE(merged); + ASSERT_EQ(vma->vm_start, 0xa000); + ASSERT_EQ(vma->vm_end, 0xc000); + ASSERT_EQ(vma->vm_pgoff, 0xa); + ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 2); + if (is_sticky || c_is_sticky) + ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY)); + + /* + * Merge BOTH sides. + * + * 0123456789abc + * AAAAAAAAA*CCC + */ + vma = try_merge_new_vma(&mm, &vmg, 0x9000, 0xa000, 0x9, vm_flags, &merged); + ASSERT_EQ(vma, vma_a); + /* Extend A and delete C. */ + ASSERT_TRUE(merged); + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0xc000); + ASSERT_EQ(vma->vm_pgoff, 0); + ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 1); + if (is_sticky || a_is_sticky || c_is_sticky) + ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY)); + + /* + * Final state. + * + * 0123456789abc + * AAAAAAAAAAAAA + */ + + count = 0; + vma_iter_set(&vmi, 0); + for_each_vma(vmi, vma) { + ASSERT_NE(vma, NULL); + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0xc000); + ASSERT_EQ(vma->vm_pgoff, 0); + ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); + + detach_free_vma(vma); + count++; + } + + /* Should only have one VMA left (though freed) after all is done.*/ + ASSERT_EQ(count, 1); + + mtree_destroy(&mm.mm_mt); + return true; +} + +static bool test_merge_new(void) +{ + int i, j, k, l; + + /* Generate every possible permutation of sticky flags. */ + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) + for (l = 0; l < 2; l++) + ASSERT_TRUE(__test_merge_new(i, j, k, l)); + + return true; +} + +static bool test_vma_merge_special_flags(void) +{ + vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + }; + vm_flags_t special_flags[] = { VM_IO, VM_DONTEXPAND, VM_PFNMAP, VM_MIXEDMAP }; + vm_flags_t all_special_flags = 0; + int i; + struct vm_area_struct *vma_left, *vma; + + /* Make sure there aren't new VM_SPECIAL flags. */ + for (i = 0; i < ARRAY_SIZE(special_flags); i++) { + all_special_flags |= special_flags[i]; + } + ASSERT_EQ(all_special_flags, VM_SPECIAL); + + /* + * 01234 + * AAA + */ + vma_left = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); + ASSERT_NE(vma_left, NULL); + + /* 1. Set up new VMA with special flag that would otherwise merge. */ + + /* + * 01234 + * AAA* + * + * This should merge if not for the VM_SPECIAL flag. + */ + vmg_set_range(&vmg, 0x3000, 0x4000, 3, vm_flags); + for (i = 0; i < ARRAY_SIZE(special_flags); i++) { + vm_flags_t special_flag = special_flags[i]; + + vm_flags_reset(vma_left, vm_flags | special_flag); + vmg.vm_flags = vm_flags | special_flag; + vma = merge_new(&vmg); + ASSERT_EQ(vma, NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + } + + /* 2. Modify VMA with special flag that would otherwise merge. */ + + /* + * 01234 + * AAAB + * + * Create a VMA to modify. + */ + vma = alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, vm_flags); + ASSERT_NE(vma, NULL); + vmg.middle = vma; + + for (i = 0; i < ARRAY_SIZE(special_flags); i++) { + vm_flags_t special_flag = special_flags[i]; + + vm_flags_reset(vma_left, vm_flags | special_flag); + vmg.vm_flags = vm_flags | special_flag; + vma = merge_existing(&vmg); + ASSERT_EQ(vma, NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + } + + cleanup_mm(&mm, &vmi); + return true; +} + +static bool test_vma_merge_with_close(void) +{ + vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + }; + const struct vm_operations_struct vm_ops = { + .close = dummy_close, + }; + struct vm_area_struct *vma_prev, *vma_next, *vma; + + /* + * When merging VMAs we are not permitted to remove any VMA that has a + * vm_ops->close() hook. + * + * Considering the two possible adjacent VMAs to which a VMA can be + * merged: + * + * [ prev ][ vma ][ next ] + * + * In no case will we need to delete prev. If the operation is + * mergeable, then prev will be extended with one or both of vma and + * next deleted. + * + * As a result, during initial mergeability checks, only + * can_vma_merge_before() (which implies the VMA being merged with is + * 'next' as shown above) bothers to check to see whether the next VMA + * has a vm_ops->close() callback that will need to be called when + * removed. + * + * If it does, then we cannot merge as the resources that the close() + * operation potentially clears down are tied only to the existing VMA + * range and we have no way of extending those to the nearly merged one. + * + * We must consider two scenarios: + * + * A. + * + * vm_ops->close: - - !NULL + * [ prev ][ vma ][ next ] + * + * Where prev may or may not be present/mergeable. + * + * This is picked up by a specific check in can_vma_merge_before(). + * + * B. + * + * vm_ops->close: - !NULL + * [ prev ][ vma ] + * + * Where prev and vma are present and mergeable. + * + * This is picked up by a specific check in the modified VMA merge. + * + * IMPORTANT NOTE: We make the assumption that the following case: + * + * - !NULL NULL + * [ prev ][ vma ][ next ] + * + * Cannot occur, because vma->vm_ops being the same implies the same + * vma->vm_file, and therefore this would mean that next->vm_ops->close + * would be set too, and thus scenario A would pick this up. + */ + + /* + * The only case of a new VMA merge that results in a VMA being deleted + * is one where both the previous and next VMAs are merged - in this + * instance the next VMA is deleted, and the previous VMA is extended. + * + * If we are unable to do so, we reduce the operation to simply + * extending the prev VMA and not merging next. + * + * 0123456789 + * PPP**NNNN + * -> + * 0123456789 + * PPPPPPNNN + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, vm_flags); + vma_next->vm_ops = &vm_ops; + + vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); + ASSERT_EQ(merge_new(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x5000); + ASSERT_EQ(vma_prev->vm_pgoff, 0); + + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); + + /* + * When modifying an existing VMA there are further cases where we + * delete VMAs. + * + * <> + * 0123456789 + * PPPVV + * + * In this instance, if vma has a close hook, the merge simply cannot + * proceed. + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); + vma->vm_ops = &vm_ops; + + vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); + vmg.prev = vma_prev; + vmg.middle = vma; + + /* + * The VMA being modified in a way that would otherwise merge should + * also fail. + */ + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); + + /* + * This case is mirrored if merging with next. + * + * <> + * 0123456789 + * VVNNNN + * + * In this instance, if vma has a close hook, the merge simply cannot + * proceed. + */ + + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, vm_flags); + vma->vm_ops = &vm_ops; + + vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); + vmg.middle = vma; + ASSERT_EQ(merge_existing(&vmg), NULL); + /* + * Initially this is misapprehended as an out of memory report, as the + * close() check is handled in the same way as anon_vma duplication + * failures, however a subsequent patch resolves this. + */ + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); + + /* + * Finally, we consider two variants of the case where we modify a VMA + * to merge with both the previous and next VMAs. + * + * The first variant is where vma has a close hook. In this instance, no + * merge can proceed. + * + * <> + * 0123456789 + * PPPVVNNNN + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, vm_flags); + vma->vm_ops = &vm_ops; + + vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); + vmg.prev = vma_prev; + vmg.middle = vma; + + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + ASSERT_EQ(cleanup_mm(&mm, &vmi), 3); + + /* + * The second variant is where next has a close hook. In this instance, + * we reduce the operation to a merge between prev and vma. + * + * <> + * 0123456789 + * PPPVVNNNN + * -> + * 0123456789 + * PPPPPNNNN + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, vm_flags); + vma_next->vm_ops = &vm_ops; + + vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); + vmg.prev = vma_prev; + vmg.middle = vma; + + ASSERT_EQ(merge_existing(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x5000); + ASSERT_EQ(vma_prev->vm_pgoff, 0); + + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); + + return true; +} + +static bool test_vma_merge_new_with_close(void) +{ + vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + }; + struct vm_area_struct *vma_prev = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags); + struct vm_area_struct *vma_next = alloc_and_link_vma(&mm, 0x5000, 0x7000, 5, vm_flags); + const struct vm_operations_struct vm_ops = { + .close = dummy_close, + }; + struct vm_area_struct *vma; + + /* + * We should allow the partial merge of a proposed new VMA if the + * surrounding VMAs have vm_ops->close() hooks (but are otherwise + * compatible), e.g.: + * + * New VMA + * A v-------v B + * |-----| |-----| + * close close + * + * Since the rule is to not DELETE a VMA with a close operation, this + * should be permitted, only rather than expanding A and deleting B, we + * should simply expand A and leave B intact, e.g.: + * + * New VMA + * A B + * |------------||-----| + * close close + */ + + /* Have prev and next have a vm_ops->close() hook. */ + vma_prev->vm_ops = &vm_ops; + vma_next->vm_ops = &vm_ops; + + vmg_set_range(&vmg, 0x2000, 0x5000, 2, vm_flags); + vma = merge_new(&vmg); + ASSERT_NE(vma, NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x5000); + ASSERT_EQ(vma->vm_pgoff, 0); + ASSERT_EQ(vma->vm_ops, &vm_ops); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 2); + + cleanup_mm(&mm, &vmi); + return true; +} + +static bool __test_merge_existing(bool prev_is_sticky, bool middle_is_sticky, bool next_is_sticky) +{ + vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + vm_flags_t prev_flags = vm_flags; + vm_flags_t next_flags = vm_flags; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vm_area_struct *vma, *vma_prev, *vma_next; + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + }; + const struct vm_operations_struct vm_ops = { + .close = dummy_close, + }; + struct anon_vma_chain avc = {}; + + if (prev_is_sticky) + prev_flags |= VM_STICKY; + if (middle_is_sticky) + vm_flags |= VM_STICKY; + if (next_is_sticky) + next_flags |= VM_STICKY; + + /* + * Merge right case - partial span. + * + * <-> + * 0123456789 + * VVVVNNN + * -> + * 0123456789 + * VNNNNNN + */ + vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, vm_flags); + vma->vm_ops = &vm_ops; /* This should have no impact. */ + vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, next_flags); + vma_next->vm_ops = &vm_ops; /* This should have no impact. */ + vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, vm_flags, &dummy_anon_vma); + vmg.middle = vma; + vmg.prev = vma; + vma_set_dummy_anon_vma(vma, &avc); + ASSERT_EQ(merge_existing(&vmg), vma_next); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_next->vm_start, 0x3000); + ASSERT_EQ(vma_next->vm_end, 0x9000); + ASSERT_EQ(vma_next->vm_pgoff, 3); + ASSERT_EQ(vma_next->anon_vma, &dummy_anon_vma); + ASSERT_EQ(vma->vm_start, 0x2000); + ASSERT_EQ(vma->vm_end, 0x3000); + ASSERT_EQ(vma->vm_pgoff, 2); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_TRUE(vma_write_started(vma_next)); + ASSERT_EQ(mm.map_count, 2); + if (middle_is_sticky || next_is_sticky) + ASSERT_TRUE(IS_SET(vma_next->vm_flags, VM_STICKY)); + + /* Clear down and reset. */ + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); + + /* + * Merge right case - full span. + * + * <--> + * 0123456789 + * VVVVNNN + * -> + * 0123456789 + * NNNNNNN + */ + vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, vm_flags); + vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, next_flags); + vma_next->vm_ops = &vm_ops; /* This should have no impact. */ + vmg_set_range_anon_vma(&vmg, 0x2000, 0x6000, 2, vm_flags, &dummy_anon_vma); + vmg.middle = vma; + vma_set_dummy_anon_vma(vma, &avc); + ASSERT_EQ(merge_existing(&vmg), vma_next); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_next->vm_start, 0x2000); + ASSERT_EQ(vma_next->vm_end, 0x9000); + ASSERT_EQ(vma_next->vm_pgoff, 2); + ASSERT_EQ(vma_next->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma_next)); + ASSERT_EQ(mm.map_count, 1); + if (middle_is_sticky || next_is_sticky) + ASSERT_TRUE(IS_SET(vma_next->vm_flags, VM_STICKY)); + + /* Clear down and reset. We should have deleted vma. */ + ASSERT_EQ(cleanup_mm(&mm, &vmi), 1); + + /* + * Merge left case - partial span. + * + * <-> + * 0123456789 + * PPPVVVV + * -> + * 0123456789 + * PPPPPPV + */ + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, prev_flags); + vma_prev->vm_ops = &vm_ops; /* This should have no impact. */ + vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags); + vma->vm_ops = &vm_ops; /* This should have no impact. */ + vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, vm_flags, &dummy_anon_vma); + vmg.prev = vma_prev; + vmg.middle = vma; + vma_set_dummy_anon_vma(vma, &avc); + ASSERT_EQ(merge_existing(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x6000); + ASSERT_EQ(vma_prev->vm_pgoff, 0); + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + ASSERT_EQ(vma->vm_start, 0x6000); + ASSERT_EQ(vma->vm_end, 0x7000); + ASSERT_EQ(vma->vm_pgoff, 6); + ASSERT_TRUE(vma_write_started(vma_prev)); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 2); + if (prev_is_sticky || middle_is_sticky) + ASSERT_TRUE(IS_SET(vma_prev->vm_flags, VM_STICKY)); + + /* Clear down and reset. */ + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); + + /* + * Merge left case - full span. + * + * <--> + * 0123456789 + * PPPVVVV + * -> + * 0123456789 + * PPPPPPP + */ + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, prev_flags); + vma_prev->vm_ops = &vm_ops; /* This should have no impact. */ + vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags); + vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, vm_flags, &dummy_anon_vma); + vmg.prev = vma_prev; + vmg.middle = vma; + vma_set_dummy_anon_vma(vma, &avc); + ASSERT_EQ(merge_existing(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x7000); + ASSERT_EQ(vma_prev->vm_pgoff, 0); + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma_prev)); + ASSERT_EQ(mm.map_count, 1); + if (prev_is_sticky || middle_is_sticky) + ASSERT_TRUE(IS_SET(vma_prev->vm_flags, VM_STICKY)); + + /* Clear down and reset. We should have deleted vma. */ + ASSERT_EQ(cleanup_mm(&mm, &vmi), 1); + + /* + * Merge both case. + * + * <--> + * 0123456789 + * PPPVVVVNNN + * -> + * 0123456789 + * PPPPPPPPPP + */ + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, prev_flags); + vma_prev->vm_ops = &vm_ops; /* This should have no impact. */ + vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags); + vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, next_flags); + vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, vm_flags, &dummy_anon_vma); + vmg.prev = vma_prev; + vmg.middle = vma; + vma_set_dummy_anon_vma(vma, &avc); + ASSERT_EQ(merge_existing(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x9000); + ASSERT_EQ(vma_prev->vm_pgoff, 0); + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma_prev)); + ASSERT_EQ(mm.map_count, 1); + if (prev_is_sticky || middle_is_sticky || next_is_sticky) + ASSERT_TRUE(IS_SET(vma_prev->vm_flags, VM_STICKY)); + + /* Clear down and reset. We should have deleted prev and next. */ + ASSERT_EQ(cleanup_mm(&mm, &vmi), 1); + + /* + * Non-merge ranges. the modified VMA merge operation assumes that the + * caller always specifies ranges within the input VMA so we need only + * examine these cases. + * + * - + * - + * - + * <-> + * <> + * <> + * 0123456789a + * PPPVVVVVNNN + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, prev_flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x8000, 3, vm_flags); + vma_next = alloc_and_link_vma(&mm, 0x8000, 0xa000, 8, next_flags); + + vmg_set_range(&vmg, 0x4000, 0x5000, 4, vm_flags); + vmg.prev = vma; + vmg.middle = vma; + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + vmg_set_range(&vmg, 0x5000, 0x6000, 5, vm_flags); + vmg.prev = vma; + vmg.middle = vma; + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + vmg_set_range(&vmg, 0x6000, 0x7000, 6, vm_flags); + vmg.prev = vma; + vmg.middle = vma; + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + vmg_set_range(&vmg, 0x4000, 0x7000, 4, vm_flags); + vmg.prev = vma; + vmg.middle = vma; + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + vmg_set_range(&vmg, 0x4000, 0x6000, 4, vm_flags); + vmg.prev = vma; + vmg.middle = vma; + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + vmg_set_range(&vmg, 0x5000, 0x6000, 5, vm_flags); + vmg.prev = vma; + vmg.middle = vma; + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + ASSERT_EQ(cleanup_mm(&mm, &vmi), 3); + + return true; +} + +static bool test_merge_existing(void) +{ + int i, j, k; + + /* Generate every possible permutation of sticky flags. */ + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) + ASSERT_TRUE(__test_merge_existing(i, j, k)); + + return true; +} + +static bool test_anon_vma_non_mergeable(void) +{ + vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vm_area_struct *vma, *vma_prev, *vma_next; + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + }; + struct anon_vma_chain dummy_anon_vma_chain_1 = {}; + struct anon_vma_chain dummy_anon_vma_chain_2 = {}; + struct anon_vma dummy_anon_vma_2; + + /* + * In the case of modified VMA merge, merging both left and right VMAs + * but where prev and next have incompatible anon_vma objects, we revert + * to a merge of prev and VMA: + * + * <--> + * 0123456789 + * PPPVVVVNNN + * -> + * 0123456789 + * PPPPPPPNNN + */ + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags); + vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, vm_flags); + + /* + * Give both prev and next single anon_vma_chain fields, so they will + * merge with the NULL vmg->anon_vma. + * + * However, when prev is compared to next, the merge should fail. + */ + vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, vm_flags, NULL); + vmg.prev = vma_prev; + vmg.middle = vma; + vma_set_dummy_anon_vma(vma_prev, &dummy_anon_vma_chain_1); + __vma_set_dummy_anon_vma(vma_next, &dummy_anon_vma_chain_2, &dummy_anon_vma_2); + + ASSERT_EQ(merge_existing(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x7000); + ASSERT_EQ(vma_prev->vm_pgoff, 0); + ASSERT_TRUE(vma_write_started(vma_prev)); + ASSERT_FALSE(vma_write_started(vma_next)); + + /* Clear down and reset. */ + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); + + /* + * Now consider the new VMA case. This is equivalent, only adding a new + * VMA in a gap between prev and next. + * + * <--> + * 0123456789 + * PPP****NNN + * -> + * 0123456789 + * PPPPPPPNNN + */ + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); + vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, vm_flags); + + vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, vm_flags, NULL); + vmg.prev = vma_prev; + vma_set_dummy_anon_vma(vma_prev, &dummy_anon_vma_chain_1); + __vma_set_dummy_anon_vma(vma_next, &dummy_anon_vma_chain_2, &dummy_anon_vma_2); + + vmg.anon_vma = NULL; + ASSERT_EQ(merge_new(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x7000); + ASSERT_EQ(vma_prev->vm_pgoff, 0); + ASSERT_TRUE(vma_write_started(vma_prev)); + ASSERT_FALSE(vma_write_started(vma_next)); + + /* Final cleanup. */ + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); + + return true; +} + +static bool test_dup_anon_vma(void) +{ + vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + }; + struct anon_vma_chain dummy_anon_vma_chain = { + .anon_vma = &dummy_anon_vma, + }; + struct vm_area_struct *vma_prev, *vma_next, *vma; + + reset_dummy_anon_vma(); + + /* + * Expanding a VMA delete the next one duplicates next's anon_vma and + * assigns it to the expanded VMA. + * + * This covers new VMA merging, as these operations amount to a VMA + * expand. + */ + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); + vma_next = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); + vma_next->anon_vma = &dummy_anon_vma; + + vmg_set_range(&vmg, 0, 0x5000, 0, vm_flags); + vmg.target = vma_prev; + vmg.next = vma_next; + + ASSERT_EQ(expand_existing(&vmg), 0); + + /* Will have been cloned. */ + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_prev->anon_vma->was_cloned); + + /* Cleanup ready for next run. */ + cleanup_mm(&mm, &vmi); + + /* + * next has anon_vma, we assign to prev. + * + * |<----->| + * |-------*********-------| + * prev vma next + * extend delete delete + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, vm_flags); + + /* Initialise avc so mergeability check passes. */ + INIT_LIST_HEAD(&vma_next->anon_vma_chain); + list_add(&dummy_anon_vma_chain.same_vma, &vma_next->anon_vma_chain); + + vma_next->anon_vma = &dummy_anon_vma; + vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); + vmg.prev = vma_prev; + vmg.middle = vma; + + ASSERT_EQ(merge_existing(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x8000); + + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_prev->anon_vma->was_cloned); + + cleanup_mm(&mm, &vmi); + + /* + * vma has anon_vma, we assign to prev. + * + * |<----->| + * |-------*********-------| + * prev vma next + * extend delete delete + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, vm_flags); + vmg.anon_vma = &dummy_anon_vma; + vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain); + vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); + vmg.prev = vma_prev; + vmg.middle = vma; + + ASSERT_EQ(merge_existing(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x8000); + + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_prev->anon_vma->was_cloned); + + cleanup_mm(&mm, &vmi); + + /* + * vma has anon_vma, we assign to prev. + * + * |<----->| + * |-------************* + * prev vma + * extend shrink/delete + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x8000, 3, vm_flags); + + vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain); + vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); + vmg.prev = vma_prev; + vmg.middle = vma; + + ASSERT_EQ(merge_existing(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x5000); + + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_prev->anon_vma->was_cloned); + + cleanup_mm(&mm, &vmi); + + /* + * vma has anon_vma, we assign to next. + * + * |<----->| + * *************-------| + * vma next + * shrink/delete extend + */ + + vma = alloc_and_link_vma(&mm, 0, 0x5000, 0, vm_flags); + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, vm_flags); + + vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain); + vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); + vmg.prev = vma; + vmg.middle = vma; + + ASSERT_EQ(merge_existing(&vmg), vma_next); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + + ASSERT_EQ(vma_next->vm_start, 0x3000); + ASSERT_EQ(vma_next->vm_end, 0x8000); + + ASSERT_EQ(vma_next->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_next->anon_vma->was_cloned); + + cleanup_mm(&mm, &vmi); + return true; +} + +static bool test_vmi_prealloc_fail(void) +{ + vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + }; + struct anon_vma_chain avc = {}; + struct vm_area_struct *vma_prev, *vma; + + /* + * We are merging vma into prev, with vma possessing an anon_vma, which + * will be duplicated. We cause the vmi preallocation to fail and assert + * the duplicated anon_vma is unlinked. + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); + vma->anon_vma = &dummy_anon_vma; + + vmg_set_range_anon_vma(&vmg, 0x3000, 0x5000, 3, vm_flags, &dummy_anon_vma); + vmg.prev = vma_prev; + vmg.middle = vma; + vma_set_dummy_anon_vma(vma, &avc); + + fail_prealloc = true; + + /* This will cause the merge to fail. */ + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_ERROR_NOMEM); + /* We will already have assigned the anon_vma. */ + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + /* And it was both cloned and unlinked. */ + ASSERT_TRUE(dummy_anon_vma.was_cloned); + ASSERT_TRUE(dummy_anon_vma.was_unlinked); + + cleanup_mm(&mm, &vmi); /* Resets fail_prealloc too. */ + + /* + * We repeat the same operation for expanding a VMA, which is what new + * VMA merging ultimately uses too. This asserts that unlinking is + * performed in this case too. + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); + vma->anon_vma = &dummy_anon_vma; + + vmg_set_range(&vmg, 0, 0x5000, 3, vm_flags); + vmg.target = vma_prev; + vmg.next = vma; + + fail_prealloc = true; + ASSERT_EQ(expand_existing(&vmg), -ENOMEM); + ASSERT_EQ(vmg.state, VMA_MERGE_ERROR_NOMEM); + + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(dummy_anon_vma.was_cloned); + ASSERT_TRUE(dummy_anon_vma.was_unlinked); + + cleanup_mm(&mm, &vmi); + return true; +} + +static bool test_merge_extend(void) +{ + vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0x1000); + struct vm_area_struct *vma; + + vma = alloc_and_link_vma(&mm, 0, 0x1000, 0, vm_flags); + alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, vm_flags); + + /* + * Extend a VMA into the gap between itself and the following VMA. + * This should result in a merge. + * + * <-> + * * * + * + */ + + ASSERT_EQ(vma_merge_extend(&vmi, vma, 0x2000), vma); + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x4000); + ASSERT_EQ(vma->vm_pgoff, 0); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 1); + + cleanup_mm(&mm, &vmi); + return true; +} + +static bool test_expand_only_mode(void) +{ + vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vm_area_struct *vma_prev, *vma; + VMG_STATE(vmg, &mm, &vmi, 0x5000, 0x9000, vm_flags, 5); + + /* + * Place a VMA prior to the one we're expanding so we assert that we do + * not erroneously try to traverse to the previous VMA even though we + * have, through the use of the just_expand flag, indicated we do not + * need to do so. + */ + alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags); + + /* + * We will be positioned at the prev VMA, but looking to expand to + * 0x9000. + */ + vma_iter_set(&vmi, 0x3000); + vma_prev = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); + vmg.prev = vma_prev; + vmg.just_expand = true; + + vma = vma_merge_new_range(&vmg); + ASSERT_NE(vma, NULL); + ASSERT_EQ(vma, vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma->vm_start, 0x3000); + ASSERT_EQ(vma->vm_end, 0x9000); + ASSERT_EQ(vma->vm_pgoff, 3); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(vma_iter_addr(&vmi), 0x3000); + vma_assert_attached(vma); + + cleanup_mm(&mm, &vmi); + return true; +} + +static void run_merge_tests(int *num_tests, int *num_fail) +{ + /* Very simple tests to kick the tyres. */ + TEST(simple_merge); + TEST(simple_modify); + TEST(simple_expand); + TEST(simple_shrink); + + TEST(merge_new); + TEST(vma_merge_special_flags); + TEST(vma_merge_with_close); + TEST(vma_merge_new_with_close); + TEST(merge_existing); + TEST(anon_vma_non_mergeable); + TEST(dup_anon_vma); + TEST(vmi_prealloc_fail); + TEST(merge_extend); + TEST(expand_only_mode); +} diff --git a/tools/testing/vma/tests/mmap.c b/tools/testing/vma/tests/mmap.c new file mode 100644 index 000000000000..bded4ecbe5db --- /dev/null +++ b/tools/testing/vma/tests/mmap.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +static bool test_mmap_region_basic(void) +{ + struct mm_struct mm = {}; + unsigned long addr; + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, &mm, 0); + + current->mm = &mm; + + /* Map at 0x300000, length 0x3000. */ + addr = __mmap_region(NULL, 0x300000, 0x3000, + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, + 0x300, NULL); + ASSERT_EQ(addr, 0x300000); + + /* Map at 0x250000, length 0x3000. */ + addr = __mmap_region(NULL, 0x250000, 0x3000, + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, + 0x250, NULL); + ASSERT_EQ(addr, 0x250000); + + /* Map at 0x303000, merging to 0x300000 of length 0x6000. */ + addr = __mmap_region(NULL, 0x303000, 0x3000, + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, + 0x303, NULL); + ASSERT_EQ(addr, 0x303000); + + /* Map at 0x24d000, merging to 0x250000 of length 0x6000. */ + addr = __mmap_region(NULL, 0x24d000, 0x3000, + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, + 0x24d, NULL); + ASSERT_EQ(addr, 0x24d000); + + ASSERT_EQ(mm.map_count, 2); + + for_each_vma(vmi, vma) { + if (vma->vm_start == 0x300000) { + ASSERT_EQ(vma->vm_end, 0x306000); + ASSERT_EQ(vma->vm_pgoff, 0x300); + } else if (vma->vm_start == 0x24d000) { + ASSERT_EQ(vma->vm_end, 0x253000); + ASSERT_EQ(vma->vm_pgoff, 0x24d); + } else { + ASSERT_FALSE(true); + } + } + + cleanup_mm(&mm, &vmi); + return true; +} + +static void run_mmap_tests(int *num_tests, int *num_fail) +{ + TEST(mmap_region_basic); +} diff --git a/tools/testing/vma/tests/vma.c b/tools/testing/vma/tests/vma.c new file mode 100644 index 000000000000..6d9775aee243 --- /dev/null +++ b/tools/testing/vma/tests/vma.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +static bool test_copy_vma(void) +{ + vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + bool need_locks = false; + VMA_ITERATOR(vmi, &mm, 0); + struct vm_area_struct *vma, *vma_new, *vma_next; + + /* Move backwards and do not merge. */ + + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); + vma_new = copy_vma(&vma, 0, 0x2000, 0, &need_locks); + ASSERT_NE(vma_new, vma); + ASSERT_EQ(vma_new->vm_start, 0); + ASSERT_EQ(vma_new->vm_end, 0x2000); + ASSERT_EQ(vma_new->vm_pgoff, 0); + vma_assert_attached(vma_new); + + cleanup_mm(&mm, &vmi); + + /* Move a VMA into position next to another and merge the two. */ + + vma = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags); + vma_next = alloc_and_link_vma(&mm, 0x6000, 0x8000, 6, vm_flags); + vma_new = copy_vma(&vma, 0x4000, 0x2000, 4, &need_locks); + vma_assert_attached(vma_new); + + ASSERT_EQ(vma_new, vma_next); + + cleanup_mm(&mm, &vmi); + return true; +} + +static void run_vma_tests(int *num_tests, int *num_fail) +{ + TEST(copy_vma); +} diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c deleted file mode 100644 index 93d21bc7e112..000000000000 --- a/tools/testing/vma/vma.c +++ /dev/null @@ -1,1785 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later - -#include -#include -#include - -#include "generated/bit-length.h" - -#include "maple-shared.h" -#include "vma_internal.h" - -/* Include so header guard set. */ -#include "../../../mm/vma.h" - -static bool fail_prealloc; - -/* Then override vma_iter_prealloc() so we can choose to fail it. */ -#define vma_iter_prealloc(vmi, vma) \ - (fail_prealloc ? -ENOMEM : mas_preallocate(&(vmi)->mas, (vma), GFP_KERNEL)) - -#define CONFIG_DEFAULT_MMAP_MIN_ADDR 65536 - -unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; -unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; -unsigned long stack_guard_gap = 256UL<vm_start = start; - vma->vm_end = end; - vma->vm_pgoff = pgoff; - vm_flags_reset(vma, vm_flags); - vma_assert_detached(vma); - - return vma; -} - -/* Helper function to allocate a VMA and link it to the tree. */ -static int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma) -{ - int res; - - res = vma_link(mm, vma); - if (!res) - vma_assert_attached(vma); - return res; -} - -static void detach_free_vma(struct vm_area_struct *vma) -{ - vma_mark_detached(vma); - vm_area_free(vma); -} - -/* Helper function to allocate a VMA and link it to the tree. */ -static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, - unsigned long start, - unsigned long end, - pgoff_t pgoff, - vm_flags_t vm_flags) -{ - struct vm_area_struct *vma = alloc_vma(mm, start, end, pgoff, vm_flags); - - if (vma == NULL) - return NULL; - - if (attach_vma(mm, vma)) { - detach_free_vma(vma); - return NULL; - } - - /* - * Reset this counter which we use to track whether writes have - * begun. Linking to the tree will have caused this to be incremented, - * which means we will get a false positive otherwise. - */ - vma->vm_lock_seq = UINT_MAX; - - return vma; -} - -/* Helper function which provides a wrapper around a merge new VMA operation. */ -static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg) -{ - struct vm_area_struct *vma; - /* - * For convenience, get prev and next VMAs. Which the new VMA operation - * requires. - */ - vmg->next = vma_next(vmg->vmi); - vmg->prev = vma_prev(vmg->vmi); - vma_iter_next_range(vmg->vmi); - - vma = vma_merge_new_range(vmg); - if (vma) - vma_assert_attached(vma); - - return vma; -} - -/* - * Helper function which provides a wrapper around a merge existing VMA - * operation. - */ -static struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg) -{ - struct vm_area_struct *vma; - - vma = vma_merge_existing_range(vmg); - if (vma) - vma_assert_attached(vma); - return vma; -} - -/* - * Helper function which provides a wrapper around the expansion of an existing - * VMA. - */ -static int expand_existing(struct vma_merge_struct *vmg) -{ - return vma_expand(vmg); -} - -/* - * Helper function to reset merge state the associated VMA iterator to a - * specified new range. - */ -static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start, - unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags) -{ - vma_iter_set(vmg->vmi, start); - - vmg->prev = NULL; - vmg->middle = NULL; - vmg->next = NULL; - vmg->target = NULL; - - vmg->start = start; - vmg->end = end; - vmg->pgoff = pgoff; - vmg->vm_flags = vm_flags; - - vmg->just_expand = false; - vmg->__remove_middle = false; - vmg->__remove_next = false; - vmg->__adjust_middle_start = false; - vmg->__adjust_next_start = false; -} - -/* Helper function to set both the VMG range and its anon_vma. */ -static void vmg_set_range_anon_vma(struct vma_merge_struct *vmg, unsigned long start, - unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags, - struct anon_vma *anon_vma) -{ - vmg_set_range(vmg, start, end, pgoff, vm_flags); - vmg->anon_vma = anon_vma; -} - -/* - * Helper function to try to merge a new VMA. - * - * Update vmg and the iterator for it and try to merge, otherwise allocate a new - * VMA, link it to the maple tree and return it. - */ -static struct vm_area_struct *try_merge_new_vma(struct mm_struct *mm, - struct vma_merge_struct *vmg, - unsigned long start, unsigned long end, - pgoff_t pgoff, vm_flags_t vm_flags, - bool *was_merged) -{ - struct vm_area_struct *merged; - - vmg_set_range(vmg, start, end, pgoff, vm_flags); - - merged = merge_new(vmg); - if (merged) { - *was_merged = true; - ASSERT_EQ(vmg->state, VMA_MERGE_SUCCESS); - return merged; - } - - *was_merged = false; - - ASSERT_EQ(vmg->state, VMA_MERGE_NOMERGE); - - return alloc_and_link_vma(mm, start, end, pgoff, vm_flags); -} - -/* - * Helper function to reset the dummy anon_vma to indicate it has not been - * duplicated. - */ -static void reset_dummy_anon_vma(void) -{ - dummy_anon_vma.was_cloned = false; - dummy_anon_vma.was_unlinked = false; -} - -/* - * Helper function to remove all VMAs and destroy the maple tree associated with - * a virtual address space. Returns a count of VMAs in the tree. - */ -static int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi) -{ - struct vm_area_struct *vma; - int count = 0; - - fail_prealloc = false; - reset_dummy_anon_vma(); - - vma_iter_set(vmi, 0); - for_each_vma(*vmi, vma) { - detach_free_vma(vma); - count++; - } - - mtree_destroy(&mm->mm_mt); - mm->map_count = 0; - return count; -} - -/* Helper function to determine if VMA has had vma_start_write() performed. */ -static bool vma_write_started(struct vm_area_struct *vma) -{ - int seq = vma->vm_lock_seq; - - /* We reset after each check. */ - vma->vm_lock_seq = UINT_MAX; - - /* The vma_start_write() stub simply increments this value. */ - return seq > -1; -} - -/* Helper function providing a dummy vm_ops->close() method.*/ -static void dummy_close(struct vm_area_struct *) -{ -} - -static void __vma_set_dummy_anon_vma(struct vm_area_struct *vma, - struct anon_vma_chain *avc, - struct anon_vma *anon_vma) -{ - vma->anon_vma = anon_vma; - INIT_LIST_HEAD(&vma->anon_vma_chain); - list_add(&avc->same_vma, &vma->anon_vma_chain); - avc->anon_vma = vma->anon_vma; -} - -static void vma_set_dummy_anon_vma(struct vm_area_struct *vma, - struct anon_vma_chain *avc) -{ - __vma_set_dummy_anon_vma(vma, avc, &dummy_anon_vma); -} - -static bool test_simple_merge(void) -{ - struct vm_area_struct *vma; - vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; - struct mm_struct mm = {}; - struct vm_area_struct *vma_left = alloc_vma(&mm, 0, 0x1000, 0, vm_flags); - struct vm_area_struct *vma_right = alloc_vma(&mm, 0x2000, 0x3000, 2, vm_flags); - VMA_ITERATOR(vmi, &mm, 0x1000); - struct vma_merge_struct vmg = { - .mm = &mm, - .vmi = &vmi, - .start = 0x1000, - .end = 0x2000, - .vm_flags = vm_flags, - .pgoff = 1, - }; - - ASSERT_FALSE(attach_vma(&mm, vma_left)); - ASSERT_FALSE(attach_vma(&mm, vma_right)); - - vma = merge_new(&vmg); - ASSERT_NE(vma, NULL); - - ASSERT_EQ(vma->vm_start, 0); - ASSERT_EQ(vma->vm_end, 0x3000); - ASSERT_EQ(vma->vm_pgoff, 0); - ASSERT_EQ(vma->vm_flags, vm_flags); - - detach_free_vma(vma); - mtree_destroy(&mm.mm_mt); - - return true; -} - -static bool test_simple_modify(void) -{ - struct vm_area_struct *vma; - vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; - struct mm_struct mm = {}; - struct vm_area_struct *init_vma = alloc_vma(&mm, 0, 0x3000, 0, vm_flags); - VMA_ITERATOR(vmi, &mm, 0x1000); - vm_flags_t flags = VM_READ | VM_MAYREAD; - - ASSERT_FALSE(attach_vma(&mm, init_vma)); - - /* - * The flags will not be changed, the vma_modify_flags() function - * performs the merge/split only. - */ - vma = vma_modify_flags(&vmi, init_vma, init_vma, - 0x1000, 0x2000, &flags); - ASSERT_NE(vma, NULL); - /* We modify the provided VMA, and on split allocate new VMAs. */ - ASSERT_EQ(vma, init_vma); - - ASSERT_EQ(vma->vm_start, 0x1000); - ASSERT_EQ(vma->vm_end, 0x2000); - ASSERT_EQ(vma->vm_pgoff, 1); - - /* - * Now walk through the three split VMAs and make sure they are as - * expected. - */ - - vma_iter_set(&vmi, 0); - vma = vma_iter_load(&vmi); - - ASSERT_EQ(vma->vm_start, 0); - ASSERT_EQ(vma->vm_end, 0x1000); - ASSERT_EQ(vma->vm_pgoff, 0); - - detach_free_vma(vma); - vma_iter_clear(&vmi); - - vma = vma_next(&vmi); - - ASSERT_EQ(vma->vm_start, 0x1000); - ASSERT_EQ(vma->vm_end, 0x2000); - ASSERT_EQ(vma->vm_pgoff, 1); - - detach_free_vma(vma); - vma_iter_clear(&vmi); - - vma = vma_next(&vmi); - - ASSERT_EQ(vma->vm_start, 0x2000); - ASSERT_EQ(vma->vm_end, 0x3000); - ASSERT_EQ(vma->vm_pgoff, 2); - - detach_free_vma(vma); - mtree_destroy(&mm.mm_mt); - - return true; -} - -static bool test_simple_expand(void) -{ - vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; - struct mm_struct mm = {}; - struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x1000, 0, vm_flags); - VMA_ITERATOR(vmi, &mm, 0); - struct vma_merge_struct vmg = { - .vmi = &vmi, - .target = vma, - .start = 0, - .end = 0x3000, - .pgoff = 0, - }; - - ASSERT_FALSE(attach_vma(&mm, vma)); - - ASSERT_FALSE(expand_existing(&vmg)); - - ASSERT_EQ(vma->vm_start, 0); - ASSERT_EQ(vma->vm_end, 0x3000); - ASSERT_EQ(vma->vm_pgoff, 0); - - detach_free_vma(vma); - mtree_destroy(&mm.mm_mt); - - return true; -} - -static bool test_simple_shrink(void) -{ - vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; - struct mm_struct mm = {}; - struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x3000, 0, vm_flags); - VMA_ITERATOR(vmi, &mm, 0); - - ASSERT_FALSE(attach_vma(&mm, vma)); - - ASSERT_FALSE(vma_shrink(&vmi, vma, 0, 0x1000, 0)); - - ASSERT_EQ(vma->vm_start, 0); - ASSERT_EQ(vma->vm_end, 0x1000); - ASSERT_EQ(vma->vm_pgoff, 0); - - detach_free_vma(vma); - mtree_destroy(&mm.mm_mt); - - return true; -} - -static bool __test_merge_new(bool is_sticky, bool a_is_sticky, bool b_is_sticky, bool c_is_sticky) -{ - vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; - struct mm_struct mm = {}; - VMA_ITERATOR(vmi, &mm, 0); - struct vma_merge_struct vmg = { - .mm = &mm, - .vmi = &vmi, - }; - struct anon_vma_chain dummy_anon_vma_chain_a = { - .anon_vma = &dummy_anon_vma, - }; - struct anon_vma_chain dummy_anon_vma_chain_b = { - .anon_vma = &dummy_anon_vma, - }; - struct anon_vma_chain dummy_anon_vma_chain_c = { - .anon_vma = &dummy_anon_vma, - }; - struct anon_vma_chain dummy_anon_vma_chain_d = { - .anon_vma = &dummy_anon_vma, - }; - const struct vm_operations_struct vm_ops = { - .close = dummy_close, - }; - int count; - struct vm_area_struct *vma, *vma_a, *vma_b, *vma_c, *vma_d; - bool merged; - - if (is_sticky) - vm_flags |= VM_STICKY; - - /* - * 0123456789abc - * AA B CC - */ - vma_a = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags); - ASSERT_NE(vma_a, NULL); - if (a_is_sticky) - vm_flags_set(vma_a, VM_STICKY); - /* We give each VMA a single avc so we can test anon_vma duplication. */ - INIT_LIST_HEAD(&vma_a->anon_vma_chain); - list_add(&dummy_anon_vma_chain_a.same_vma, &vma_a->anon_vma_chain); - - vma_b = alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, vm_flags); - ASSERT_NE(vma_b, NULL); - if (b_is_sticky) - vm_flags_set(vma_b, VM_STICKY); - INIT_LIST_HEAD(&vma_b->anon_vma_chain); - list_add(&dummy_anon_vma_chain_b.same_vma, &vma_b->anon_vma_chain); - - vma_c = alloc_and_link_vma(&mm, 0xb000, 0xc000, 0xb, vm_flags); - ASSERT_NE(vma_c, NULL); - if (c_is_sticky) - vm_flags_set(vma_c, VM_STICKY); - INIT_LIST_HEAD(&vma_c->anon_vma_chain); - list_add(&dummy_anon_vma_chain_c.same_vma, &vma_c->anon_vma_chain); - - /* - * NO merge. - * - * 0123456789abc - * AA B ** CC - */ - vma_d = try_merge_new_vma(&mm, &vmg, 0x7000, 0x9000, 7, vm_flags, &merged); - ASSERT_NE(vma_d, NULL); - INIT_LIST_HEAD(&vma_d->anon_vma_chain); - list_add(&dummy_anon_vma_chain_d.same_vma, &vma_d->anon_vma_chain); - ASSERT_FALSE(merged); - ASSERT_EQ(mm.map_count, 4); - - /* - * Merge BOTH sides. - * - * 0123456789abc - * AA*B DD CC - */ - vma_a->vm_ops = &vm_ops; /* This should have no impact. */ - vma_b->anon_vma = &dummy_anon_vma; - vma = try_merge_new_vma(&mm, &vmg, 0x2000, 0x3000, 2, vm_flags, &merged); - ASSERT_EQ(vma, vma_a); - /* Merge with A, delete B. */ - ASSERT_TRUE(merged); - ASSERT_EQ(vma->vm_start, 0); - ASSERT_EQ(vma->vm_end, 0x4000); - ASSERT_EQ(vma->vm_pgoff, 0); - ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); - ASSERT_TRUE(vma_write_started(vma)); - ASSERT_EQ(mm.map_count, 3); - if (is_sticky || a_is_sticky || b_is_sticky) - ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY)); - - /* - * Merge to PREVIOUS VMA. - * - * 0123456789abc - * AAAA* DD CC - */ - vma = try_merge_new_vma(&mm, &vmg, 0x4000, 0x5000, 4, vm_flags, &merged); - ASSERT_EQ(vma, vma_a); - /* Extend A. */ - ASSERT_TRUE(merged); - ASSERT_EQ(vma->vm_start, 0); - ASSERT_EQ(vma->vm_end, 0x5000); - ASSERT_EQ(vma->vm_pgoff, 0); - ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); - ASSERT_TRUE(vma_write_started(vma)); - ASSERT_EQ(mm.map_count, 3); - if (is_sticky || a_is_sticky) - ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY)); - - /* - * Merge to NEXT VMA. - * - * 0123456789abc - * AAAAA *DD CC - */ - vma_d->anon_vma = &dummy_anon_vma; - vma_d->vm_ops = &vm_ops; /* This should have no impact. */ - vma = try_merge_new_vma(&mm, &vmg, 0x6000, 0x7000, 6, vm_flags, &merged); - ASSERT_EQ(vma, vma_d); - /* Prepend. */ - ASSERT_TRUE(merged); - ASSERT_EQ(vma->vm_start, 0x6000); - ASSERT_EQ(vma->vm_end, 0x9000); - ASSERT_EQ(vma->vm_pgoff, 6); - ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); - ASSERT_TRUE(vma_write_started(vma)); - ASSERT_EQ(mm.map_count, 3); - if (is_sticky) /* D uses is_sticky. */ - ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY)); - - /* - * Merge BOTH sides. - * - * 0123456789abc - * AAAAA*DDD CC - */ - vma_d->vm_ops = NULL; /* This would otherwise degrade the merge. */ - vma = try_merge_new_vma(&mm, &vmg, 0x5000, 0x6000, 5, vm_flags, &merged); - ASSERT_EQ(vma, vma_a); - /* Merge with A, delete D. */ - ASSERT_TRUE(merged); - ASSERT_EQ(vma->vm_start, 0); - ASSERT_EQ(vma->vm_end, 0x9000); - ASSERT_EQ(vma->vm_pgoff, 0); - ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); - ASSERT_TRUE(vma_write_started(vma)); - ASSERT_EQ(mm.map_count, 2); - if (is_sticky || a_is_sticky) - ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY)); - - /* - * Merge to NEXT VMA. - * - * 0123456789abc - * AAAAAAAAA *CC - */ - vma_c->anon_vma = &dummy_anon_vma; - vma = try_merge_new_vma(&mm, &vmg, 0xa000, 0xb000, 0xa, vm_flags, &merged); - ASSERT_EQ(vma, vma_c); - /* Prepend C. */ - ASSERT_TRUE(merged); - ASSERT_EQ(vma->vm_start, 0xa000); - ASSERT_EQ(vma->vm_end, 0xc000); - ASSERT_EQ(vma->vm_pgoff, 0xa); - ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); - ASSERT_TRUE(vma_write_started(vma)); - ASSERT_EQ(mm.map_count, 2); - if (is_sticky || c_is_sticky) - ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY)); - - /* - * Merge BOTH sides. - * - * 0123456789abc - * AAAAAAAAA*CCC - */ - vma = try_merge_new_vma(&mm, &vmg, 0x9000, 0xa000, 0x9, vm_flags, &merged); - ASSERT_EQ(vma, vma_a); - /* Extend A and delete C. */ - ASSERT_TRUE(merged); - ASSERT_EQ(vma->vm_start, 0); - ASSERT_EQ(vma->vm_end, 0xc000); - ASSERT_EQ(vma->vm_pgoff, 0); - ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); - ASSERT_TRUE(vma_write_started(vma)); - ASSERT_EQ(mm.map_count, 1); - if (is_sticky || a_is_sticky || c_is_sticky) - ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY)); - - /* - * Final state. - * - * 0123456789abc - * AAAAAAAAAAAAA - */ - - count = 0; - vma_iter_set(&vmi, 0); - for_each_vma(vmi, vma) { - ASSERT_NE(vma, NULL); - ASSERT_EQ(vma->vm_start, 0); - ASSERT_EQ(vma->vm_end, 0xc000); - ASSERT_EQ(vma->vm_pgoff, 0); - ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); - - detach_free_vma(vma); - count++; - } - - /* Should only have one VMA left (though freed) after all is done.*/ - ASSERT_EQ(count, 1); - - mtree_destroy(&mm.mm_mt); - return true; -} - -static bool test_merge_new(void) -{ - int i, j, k, l; - - /* Generate every possible permutation of sticky flags. */ - for (i = 0; i < 2; i++) - for (j = 0; j < 2; j++) - for (k = 0; k < 2; k++) - for (l = 0; l < 2; l++) - ASSERT_TRUE(__test_merge_new(i, j, k, l)); - - return true; -} - -static bool test_vma_merge_special_flags(void) -{ - vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; - struct mm_struct mm = {}; - VMA_ITERATOR(vmi, &mm, 0); - struct vma_merge_struct vmg = { - .mm = &mm, - .vmi = &vmi, - }; - vm_flags_t special_flags[] = { VM_IO, VM_DONTEXPAND, VM_PFNMAP, VM_MIXEDMAP }; - vm_flags_t all_special_flags = 0; - int i; - struct vm_area_struct *vma_left, *vma; - - /* Make sure there aren't new VM_SPECIAL flags. */ - for (i = 0; i < ARRAY_SIZE(special_flags); i++) { - all_special_flags |= special_flags[i]; - } - ASSERT_EQ(all_special_flags, VM_SPECIAL); - - /* - * 01234 - * AAA - */ - vma_left = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); - ASSERT_NE(vma_left, NULL); - - /* 1. Set up new VMA with special flag that would otherwise merge. */ - - /* - * 01234 - * AAA* - * - * This should merge if not for the VM_SPECIAL flag. - */ - vmg_set_range(&vmg, 0x3000, 0x4000, 3, vm_flags); - for (i = 0; i < ARRAY_SIZE(special_flags); i++) { - vm_flags_t special_flag = special_flags[i]; - - vm_flags_reset(vma_left, vm_flags | special_flag); - vmg.vm_flags = vm_flags | special_flag; - vma = merge_new(&vmg); - ASSERT_EQ(vma, NULL); - ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); - } - - /* 2. Modify VMA with special flag that would otherwise merge. */ - - /* - * 01234 - * AAAB - * - * Create a VMA to modify. - */ - vma = alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, vm_flags); - ASSERT_NE(vma, NULL); - vmg.middle = vma; - - for (i = 0; i < ARRAY_SIZE(special_flags); i++) { - vm_flags_t special_flag = special_flags[i]; - - vm_flags_reset(vma_left, vm_flags | special_flag); - vmg.vm_flags = vm_flags | special_flag; - vma = merge_existing(&vmg); - ASSERT_EQ(vma, NULL); - ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); - } - - cleanup_mm(&mm, &vmi); - return true; -} - -static bool test_vma_merge_with_close(void) -{ - vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; - struct mm_struct mm = {}; - VMA_ITERATOR(vmi, &mm, 0); - struct vma_merge_struct vmg = { - .mm = &mm, - .vmi = &vmi, - }; - const struct vm_operations_struct vm_ops = { - .close = dummy_close, - }; - struct vm_area_struct *vma_prev, *vma_next, *vma; - - /* - * When merging VMAs we are not permitted to remove any VMA that has a - * vm_ops->close() hook. - * - * Considering the two possible adjacent VMAs to which a VMA can be - * merged: - * - * [ prev ][ vma ][ next ] - * - * In no case will we need to delete prev. If the operation is - * mergeable, then prev will be extended with one or both of vma and - * next deleted. - * - * As a result, during initial mergeability checks, only - * can_vma_merge_before() (which implies the VMA being merged with is - * 'next' as shown above) bothers to check to see whether the next VMA - * has a vm_ops->close() callback that will need to be called when - * removed. - * - * If it does, then we cannot merge as the resources that the close() - * operation potentially clears down are tied only to the existing VMA - * range and we have no way of extending those to the nearly merged one. - * - * We must consider two scenarios: - * - * A. - * - * vm_ops->close: - - !NULL - * [ prev ][ vma ][ next ] - * - * Where prev may or may not be present/mergeable. - * - * This is picked up by a specific check in can_vma_merge_before(). - * - * B. - * - * vm_ops->close: - !NULL - * [ prev ][ vma ] - * - * Where prev and vma are present and mergeable. - * - * This is picked up by a specific check in the modified VMA merge. - * - * IMPORTANT NOTE: We make the assumption that the following case: - * - * - !NULL NULL - * [ prev ][ vma ][ next ] - * - * Cannot occur, because vma->vm_ops being the same implies the same - * vma->vm_file, and therefore this would mean that next->vm_ops->close - * would be set too, and thus scenario A would pick this up. - */ - - /* - * The only case of a new VMA merge that results in a VMA being deleted - * is one where both the previous and next VMAs are merged - in this - * instance the next VMA is deleted, and the previous VMA is extended. - * - * If we are unable to do so, we reduce the operation to simply - * extending the prev VMA and not merging next. - * - * 0123456789 - * PPP**NNNN - * -> - * 0123456789 - * PPPPPPNNN - */ - - vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); - vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, vm_flags); - vma_next->vm_ops = &vm_ops; - - vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); - ASSERT_EQ(merge_new(&vmg), vma_prev); - ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); - ASSERT_EQ(vma_prev->vm_start, 0); - ASSERT_EQ(vma_prev->vm_end, 0x5000); - ASSERT_EQ(vma_prev->vm_pgoff, 0); - - ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); - - /* - * When modifying an existing VMA there are further cases where we - * delete VMAs. - * - * <> - * 0123456789 - * PPPVV - * - * In this instance, if vma has a close hook, the merge simply cannot - * proceed. - */ - - vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); - vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); - vma->vm_ops = &vm_ops; - - vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); - vmg.prev = vma_prev; - vmg.middle = vma; - - /* - * The VMA being modified in a way that would otherwise merge should - * also fail. - */ - ASSERT_EQ(merge_existing(&vmg), NULL); - ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); - - ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); - - /* - * This case is mirrored if merging with next. - * - * <> - * 0123456789 - * VVNNNN - * - * In this instance, if vma has a close hook, the merge simply cannot - * proceed. - */ - - vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); - vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, vm_flags); - vma->vm_ops = &vm_ops; - - vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); - vmg.middle = vma; - ASSERT_EQ(merge_existing(&vmg), NULL); - /* - * Initially this is misapprehended as an out of memory report, as the - * close() check is handled in the same way as anon_vma duplication - * failures, however a subsequent patch resolves this. - */ - ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); - - ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); - - /* - * Finally, we consider two variants of the case where we modify a VMA - * to merge with both the previous and next VMAs. - * - * The first variant is where vma has a close hook. In this instance, no - * merge can proceed. - * - * <> - * 0123456789 - * PPPVVNNNN - */ - - vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); - vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); - vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, vm_flags); - vma->vm_ops = &vm_ops; - - vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); - vmg.prev = vma_prev; - vmg.middle = vma; - - ASSERT_EQ(merge_existing(&vmg), NULL); - ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); - - ASSERT_EQ(cleanup_mm(&mm, &vmi), 3); - - /* - * The second variant is where next has a close hook. In this instance, - * we reduce the operation to a merge between prev and vma. - * - * <> - * 0123456789 - * PPPVVNNNN - * -> - * 0123456789 - * PPPPPNNNN - */ - - vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); - vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); - vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, vm_flags); - vma_next->vm_ops = &vm_ops; - - vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); - vmg.prev = vma_prev; - vmg.middle = vma; - - ASSERT_EQ(merge_existing(&vmg), vma_prev); - ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); - ASSERT_EQ(vma_prev->vm_start, 0); - ASSERT_EQ(vma_prev->vm_end, 0x5000); - ASSERT_EQ(vma_prev->vm_pgoff, 0); - - ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); - - return true; -} - -static bool test_vma_merge_new_with_close(void) -{ - vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; - struct mm_struct mm = {}; - VMA_ITERATOR(vmi, &mm, 0); - struct vma_merge_struct vmg = { - .mm = &mm, - .vmi = &vmi, - }; - struct vm_area_struct *vma_prev = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags); - struct vm_area_struct *vma_next = alloc_and_link_vma(&mm, 0x5000, 0x7000, 5, vm_flags); - const struct vm_operations_struct vm_ops = { - .close = dummy_close, - }; - struct vm_area_struct *vma; - - /* - * We should allow the partial merge of a proposed new VMA if the - * surrounding VMAs have vm_ops->close() hooks (but are otherwise - * compatible), e.g.: - * - * New VMA - * A v-------v B - * |-----| |-----| - * close close - * - * Since the rule is to not DELETE a VMA with a close operation, this - * should be permitted, only rather than expanding A and deleting B, we - * should simply expand A and leave B intact, e.g.: - * - * New VMA - * A B - * |------------||-----| - * close close - */ - - /* Have prev and next have a vm_ops->close() hook. */ - vma_prev->vm_ops = &vm_ops; - vma_next->vm_ops = &vm_ops; - - vmg_set_range(&vmg, 0x2000, 0x5000, 2, vm_flags); - vma = merge_new(&vmg); - ASSERT_NE(vma, NULL); - ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); - ASSERT_EQ(vma->vm_start, 0); - ASSERT_EQ(vma->vm_end, 0x5000); - ASSERT_EQ(vma->vm_pgoff, 0); - ASSERT_EQ(vma->vm_ops, &vm_ops); - ASSERT_TRUE(vma_write_started(vma)); - ASSERT_EQ(mm.map_count, 2); - - cleanup_mm(&mm, &vmi); - return true; -} - -static bool __test_merge_existing(bool prev_is_sticky, bool middle_is_sticky, bool next_is_sticky) -{ - vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; - vm_flags_t prev_flags = vm_flags; - vm_flags_t next_flags = vm_flags; - struct mm_struct mm = {}; - VMA_ITERATOR(vmi, &mm, 0); - struct vm_area_struct *vma, *vma_prev, *vma_next; - struct vma_merge_struct vmg = { - .mm = &mm, - .vmi = &vmi, - }; - const struct vm_operations_struct vm_ops = { - .close = dummy_close, - }; - struct anon_vma_chain avc = {}; - - if (prev_is_sticky) - prev_flags |= VM_STICKY; - if (middle_is_sticky) - vm_flags |= VM_STICKY; - if (next_is_sticky) - next_flags |= VM_STICKY; - - /* - * Merge right case - partial span. - * - * <-> - * 0123456789 - * VVVVNNN - * -> - * 0123456789 - * VNNNNNN - */ - vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, vm_flags); - vma->vm_ops = &vm_ops; /* This should have no impact. */ - vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, next_flags); - vma_next->vm_ops = &vm_ops; /* This should have no impact. */ - vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, vm_flags, &dummy_anon_vma); - vmg.middle = vma; - vmg.prev = vma; - vma_set_dummy_anon_vma(vma, &avc); - ASSERT_EQ(merge_existing(&vmg), vma_next); - ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); - ASSERT_EQ(vma_next->vm_start, 0x3000); - ASSERT_EQ(vma_next->vm_end, 0x9000); - ASSERT_EQ(vma_next->vm_pgoff, 3); - ASSERT_EQ(vma_next->anon_vma, &dummy_anon_vma); - ASSERT_EQ(vma->vm_start, 0x2000); - ASSERT_EQ(vma->vm_end, 0x3000); - ASSERT_EQ(vma->vm_pgoff, 2); - ASSERT_TRUE(vma_write_started(vma)); - ASSERT_TRUE(vma_write_started(vma_next)); - ASSERT_EQ(mm.map_count, 2); - if (middle_is_sticky || next_is_sticky) - ASSERT_TRUE(IS_SET(vma_next->vm_flags, VM_STICKY)); - - /* Clear down and reset. */ - ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); - - /* - * Merge right case - full span. - * - * <--> - * 0123456789 - * VVVVNNN - * -> - * 0123456789 - * NNNNNNN - */ - vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, vm_flags); - vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, next_flags); - vma_next->vm_ops = &vm_ops; /* This should have no impact. */ - vmg_set_range_anon_vma(&vmg, 0x2000, 0x6000, 2, vm_flags, &dummy_anon_vma); - vmg.middle = vma; - vma_set_dummy_anon_vma(vma, &avc); - ASSERT_EQ(merge_existing(&vmg), vma_next); - ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); - ASSERT_EQ(vma_next->vm_start, 0x2000); - ASSERT_EQ(vma_next->vm_end, 0x9000); - ASSERT_EQ(vma_next->vm_pgoff, 2); - ASSERT_EQ(vma_next->anon_vma, &dummy_anon_vma); - ASSERT_TRUE(vma_write_started(vma_next)); - ASSERT_EQ(mm.map_count, 1); - if (middle_is_sticky || next_is_sticky) - ASSERT_TRUE(IS_SET(vma_next->vm_flags, VM_STICKY)); - - /* Clear down and reset. We should have deleted vma. */ - ASSERT_EQ(cleanup_mm(&mm, &vmi), 1); - - /* - * Merge left case - partial span. - * - * <-> - * 0123456789 - * PPPVVVV - * -> - * 0123456789 - * PPPPPPV - */ - vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, prev_flags); - vma_prev->vm_ops = &vm_ops; /* This should have no impact. */ - vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags); - vma->vm_ops = &vm_ops; /* This should have no impact. */ - vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, vm_flags, &dummy_anon_vma); - vmg.prev = vma_prev; - vmg.middle = vma; - vma_set_dummy_anon_vma(vma, &avc); - ASSERT_EQ(merge_existing(&vmg), vma_prev); - ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); - ASSERT_EQ(vma_prev->vm_start, 0); - ASSERT_EQ(vma_prev->vm_end, 0x6000); - ASSERT_EQ(vma_prev->vm_pgoff, 0); - ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); - ASSERT_EQ(vma->vm_start, 0x6000); - ASSERT_EQ(vma->vm_end, 0x7000); - ASSERT_EQ(vma->vm_pgoff, 6); - ASSERT_TRUE(vma_write_started(vma_prev)); - ASSERT_TRUE(vma_write_started(vma)); - ASSERT_EQ(mm.map_count, 2); - if (prev_is_sticky || middle_is_sticky) - ASSERT_TRUE(IS_SET(vma_prev->vm_flags, VM_STICKY)); - - /* Clear down and reset. */ - ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); - - /* - * Merge left case - full span. - * - * <--> - * 0123456789 - * PPPVVVV - * -> - * 0123456789 - * PPPPPPP - */ - vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, prev_flags); - vma_prev->vm_ops = &vm_ops; /* This should have no impact. */ - vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags); - vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, vm_flags, &dummy_anon_vma); - vmg.prev = vma_prev; - vmg.middle = vma; - vma_set_dummy_anon_vma(vma, &avc); - ASSERT_EQ(merge_existing(&vmg), vma_prev); - ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); - ASSERT_EQ(vma_prev->vm_start, 0); - ASSERT_EQ(vma_prev->vm_end, 0x7000); - ASSERT_EQ(vma_prev->vm_pgoff, 0); - ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); - ASSERT_TRUE(vma_write_started(vma_prev)); - ASSERT_EQ(mm.map_count, 1); - if (prev_is_sticky || middle_is_sticky) - ASSERT_TRUE(IS_SET(vma_prev->vm_flags, VM_STICKY)); - - /* Clear down and reset. We should have deleted vma. */ - ASSERT_EQ(cleanup_mm(&mm, &vmi), 1); - - /* - * Merge both case. - * - * <--> - * 0123456789 - * PPPVVVVNNN - * -> - * 0123456789 - * PPPPPPPPPP - */ - vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, prev_flags); - vma_prev->vm_ops = &vm_ops; /* This should have no impact. */ - vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags); - vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, next_flags); - vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, vm_flags, &dummy_anon_vma); - vmg.prev = vma_prev; - vmg.middle = vma; - vma_set_dummy_anon_vma(vma, &avc); - ASSERT_EQ(merge_existing(&vmg), vma_prev); - ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); - ASSERT_EQ(vma_prev->vm_start, 0); - ASSERT_EQ(vma_prev->vm_end, 0x9000); - ASSERT_EQ(vma_prev->vm_pgoff, 0); - ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); - ASSERT_TRUE(vma_write_started(vma_prev)); - ASSERT_EQ(mm.map_count, 1); - if (prev_is_sticky || middle_is_sticky || next_is_sticky) - ASSERT_TRUE(IS_SET(vma_prev->vm_flags, VM_STICKY)); - - /* Clear down and reset. We should have deleted prev and next. */ - ASSERT_EQ(cleanup_mm(&mm, &vmi), 1); - - /* - * Non-merge ranges. the modified VMA merge operation assumes that the - * caller always specifies ranges within the input VMA so we need only - * examine these cases. - * - * - - * - - * - - * <-> - * <> - * <> - * 0123456789a - * PPPVVVVVNNN - */ - - vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, prev_flags); - vma = alloc_and_link_vma(&mm, 0x3000, 0x8000, 3, vm_flags); - vma_next = alloc_and_link_vma(&mm, 0x8000, 0xa000, 8, next_flags); - - vmg_set_range(&vmg, 0x4000, 0x5000, 4, vm_flags); - vmg.prev = vma; - vmg.middle = vma; - ASSERT_EQ(merge_existing(&vmg), NULL); - ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); - - vmg_set_range(&vmg, 0x5000, 0x6000, 5, vm_flags); - vmg.prev = vma; - vmg.middle = vma; - ASSERT_EQ(merge_existing(&vmg), NULL); - ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); - - vmg_set_range(&vmg, 0x6000, 0x7000, 6, vm_flags); - vmg.prev = vma; - vmg.middle = vma; - ASSERT_EQ(merge_existing(&vmg), NULL); - ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); - - vmg_set_range(&vmg, 0x4000, 0x7000, 4, vm_flags); - vmg.prev = vma; - vmg.middle = vma; - ASSERT_EQ(merge_existing(&vmg), NULL); - ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); - - vmg_set_range(&vmg, 0x4000, 0x6000, 4, vm_flags); - vmg.prev = vma; - vmg.middle = vma; - ASSERT_EQ(merge_existing(&vmg), NULL); - ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); - - vmg_set_range(&vmg, 0x5000, 0x6000, 5, vm_flags); - vmg.prev = vma; - vmg.middle = vma; - ASSERT_EQ(merge_existing(&vmg), NULL); - ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); - - ASSERT_EQ(cleanup_mm(&mm, &vmi), 3); - - return true; -} - -static bool test_merge_existing(void) -{ - int i, j, k; - - /* Generate every possible permutation of sticky flags. */ - for (i = 0; i < 2; i++) - for (j = 0; j < 2; j++) - for (k = 0; k < 2; k++) - ASSERT_TRUE(__test_merge_existing(i, j, k)); - - return true; -} - -static bool test_anon_vma_non_mergeable(void) -{ - vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; - struct mm_struct mm = {}; - VMA_ITERATOR(vmi, &mm, 0); - struct vm_area_struct *vma, *vma_prev, *vma_next; - struct vma_merge_struct vmg = { - .mm = &mm, - .vmi = &vmi, - }; - struct anon_vma_chain dummy_anon_vma_chain_1 = {}; - struct anon_vma_chain dummy_anon_vma_chain_2 = {}; - struct anon_vma dummy_anon_vma_2; - - /* - * In the case of modified VMA merge, merging both left and right VMAs - * but where prev and next have incompatible anon_vma objects, we revert - * to a merge of prev and VMA: - * - * <--> - * 0123456789 - * PPPVVVVNNN - * -> - * 0123456789 - * PPPPPPPNNN - */ - vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); - vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags); - vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, vm_flags); - - /* - * Give both prev and next single anon_vma_chain fields, so they will - * merge with the NULL vmg->anon_vma. - * - * However, when prev is compared to next, the merge should fail. - */ - vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, vm_flags, NULL); - vmg.prev = vma_prev; - vmg.middle = vma; - vma_set_dummy_anon_vma(vma_prev, &dummy_anon_vma_chain_1); - __vma_set_dummy_anon_vma(vma_next, &dummy_anon_vma_chain_2, &dummy_anon_vma_2); - - ASSERT_EQ(merge_existing(&vmg), vma_prev); - ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); - ASSERT_EQ(vma_prev->vm_start, 0); - ASSERT_EQ(vma_prev->vm_end, 0x7000); - ASSERT_EQ(vma_prev->vm_pgoff, 0); - ASSERT_TRUE(vma_write_started(vma_prev)); - ASSERT_FALSE(vma_write_started(vma_next)); - - /* Clear down and reset. */ - ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); - - /* - * Now consider the new VMA case. This is equivalent, only adding a new - * VMA in a gap between prev and next. - * - * <--> - * 0123456789 - * PPP****NNN - * -> - * 0123456789 - * PPPPPPPNNN - */ - vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); - vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, vm_flags); - - vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, vm_flags, NULL); - vmg.prev = vma_prev; - vma_set_dummy_anon_vma(vma_prev, &dummy_anon_vma_chain_1); - __vma_set_dummy_anon_vma(vma_next, &dummy_anon_vma_chain_2, &dummy_anon_vma_2); - - vmg.anon_vma = NULL; - ASSERT_EQ(merge_new(&vmg), vma_prev); - ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); - ASSERT_EQ(vma_prev->vm_start, 0); - ASSERT_EQ(vma_prev->vm_end, 0x7000); - ASSERT_EQ(vma_prev->vm_pgoff, 0); - ASSERT_TRUE(vma_write_started(vma_prev)); - ASSERT_FALSE(vma_write_started(vma_next)); - - /* Final cleanup. */ - ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); - - return true; -} - -static bool test_dup_anon_vma(void) -{ - vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; - struct mm_struct mm = {}; - VMA_ITERATOR(vmi, &mm, 0); - struct vma_merge_struct vmg = { - .mm = &mm, - .vmi = &vmi, - }; - struct anon_vma_chain dummy_anon_vma_chain = { - .anon_vma = &dummy_anon_vma, - }; - struct vm_area_struct *vma_prev, *vma_next, *vma; - - reset_dummy_anon_vma(); - - /* - * Expanding a VMA delete the next one duplicates next's anon_vma and - * assigns it to the expanded VMA. - * - * This covers new VMA merging, as these operations amount to a VMA - * expand. - */ - vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); - vma_next = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); - vma_next->anon_vma = &dummy_anon_vma; - - vmg_set_range(&vmg, 0, 0x5000, 0, vm_flags); - vmg.target = vma_prev; - vmg.next = vma_next; - - ASSERT_EQ(expand_existing(&vmg), 0); - - /* Will have been cloned. */ - ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); - ASSERT_TRUE(vma_prev->anon_vma->was_cloned); - - /* Cleanup ready for next run. */ - cleanup_mm(&mm, &vmi); - - /* - * next has anon_vma, we assign to prev. - * - * |<----->| - * |-------*********-------| - * prev vma next - * extend delete delete - */ - - vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); - vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); - vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, vm_flags); - - /* Initialise avc so mergeability check passes. */ - INIT_LIST_HEAD(&vma_next->anon_vma_chain); - list_add(&dummy_anon_vma_chain.same_vma, &vma_next->anon_vma_chain); - - vma_next->anon_vma = &dummy_anon_vma; - vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); - vmg.prev = vma_prev; - vmg.middle = vma; - - ASSERT_EQ(merge_existing(&vmg), vma_prev); - ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); - - ASSERT_EQ(vma_prev->vm_start, 0); - ASSERT_EQ(vma_prev->vm_end, 0x8000); - - ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); - ASSERT_TRUE(vma_prev->anon_vma->was_cloned); - - cleanup_mm(&mm, &vmi); - - /* - * vma has anon_vma, we assign to prev. - * - * |<----->| - * |-------*********-------| - * prev vma next - * extend delete delete - */ - - vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); - vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); - vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, vm_flags); - vmg.anon_vma = &dummy_anon_vma; - vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain); - vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); - vmg.prev = vma_prev; - vmg.middle = vma; - - ASSERT_EQ(merge_existing(&vmg), vma_prev); - ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); - - ASSERT_EQ(vma_prev->vm_start, 0); - ASSERT_EQ(vma_prev->vm_end, 0x8000); - - ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); - ASSERT_TRUE(vma_prev->anon_vma->was_cloned); - - cleanup_mm(&mm, &vmi); - - /* - * vma has anon_vma, we assign to prev. - * - * |<----->| - * |-------************* - * prev vma - * extend shrink/delete - */ - - vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); - vma = alloc_and_link_vma(&mm, 0x3000, 0x8000, 3, vm_flags); - - vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain); - vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); - vmg.prev = vma_prev; - vmg.middle = vma; - - ASSERT_EQ(merge_existing(&vmg), vma_prev); - ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); - - ASSERT_EQ(vma_prev->vm_start, 0); - ASSERT_EQ(vma_prev->vm_end, 0x5000); - - ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); - ASSERT_TRUE(vma_prev->anon_vma->was_cloned); - - cleanup_mm(&mm, &vmi); - - /* - * vma has anon_vma, we assign to next. - * - * |<----->| - * *************-------| - * vma next - * shrink/delete extend - */ - - vma = alloc_and_link_vma(&mm, 0, 0x5000, 0, vm_flags); - vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, vm_flags); - - vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain); - vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags); - vmg.prev = vma; - vmg.middle = vma; - - ASSERT_EQ(merge_existing(&vmg), vma_next); - ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); - - ASSERT_EQ(vma_next->vm_start, 0x3000); - ASSERT_EQ(vma_next->vm_end, 0x8000); - - ASSERT_EQ(vma_next->anon_vma, &dummy_anon_vma); - ASSERT_TRUE(vma_next->anon_vma->was_cloned); - - cleanup_mm(&mm, &vmi); - return true; -} - -static bool test_vmi_prealloc_fail(void) -{ - vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; - struct mm_struct mm = {}; - VMA_ITERATOR(vmi, &mm, 0); - struct vma_merge_struct vmg = { - .mm = &mm, - .vmi = &vmi, - }; - struct anon_vma_chain avc = {}; - struct vm_area_struct *vma_prev, *vma; - - /* - * We are merging vma into prev, with vma possessing an anon_vma, which - * will be duplicated. We cause the vmi preallocation to fail and assert - * the duplicated anon_vma is unlinked. - */ - - vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); - vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); - vma->anon_vma = &dummy_anon_vma; - - vmg_set_range_anon_vma(&vmg, 0x3000, 0x5000, 3, vm_flags, &dummy_anon_vma); - vmg.prev = vma_prev; - vmg.middle = vma; - vma_set_dummy_anon_vma(vma, &avc); - - fail_prealloc = true; - - /* This will cause the merge to fail. */ - ASSERT_EQ(merge_existing(&vmg), NULL); - ASSERT_EQ(vmg.state, VMA_MERGE_ERROR_NOMEM); - /* We will already have assigned the anon_vma. */ - ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); - /* And it was both cloned and unlinked. */ - ASSERT_TRUE(dummy_anon_vma.was_cloned); - ASSERT_TRUE(dummy_anon_vma.was_unlinked); - - cleanup_mm(&mm, &vmi); /* Resets fail_prealloc too. */ - - /* - * We repeat the same operation for expanding a VMA, which is what new - * VMA merging ultimately uses too. This asserts that unlinking is - * performed in this case too. - */ - - vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags); - vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); - vma->anon_vma = &dummy_anon_vma; - - vmg_set_range(&vmg, 0, 0x5000, 3, vm_flags); - vmg.target = vma_prev; - vmg.next = vma; - - fail_prealloc = true; - ASSERT_EQ(expand_existing(&vmg), -ENOMEM); - ASSERT_EQ(vmg.state, VMA_MERGE_ERROR_NOMEM); - - ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); - ASSERT_TRUE(dummy_anon_vma.was_cloned); - ASSERT_TRUE(dummy_anon_vma.was_unlinked); - - cleanup_mm(&mm, &vmi); - return true; -} - -static bool test_merge_extend(void) -{ - vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; - struct mm_struct mm = {}; - VMA_ITERATOR(vmi, &mm, 0x1000); - struct vm_area_struct *vma; - - vma = alloc_and_link_vma(&mm, 0, 0x1000, 0, vm_flags); - alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, vm_flags); - - /* - * Extend a VMA into the gap between itself and the following VMA. - * This should result in a merge. - * - * <-> - * * * - * - */ - - ASSERT_EQ(vma_merge_extend(&vmi, vma, 0x2000), vma); - ASSERT_EQ(vma->vm_start, 0); - ASSERT_EQ(vma->vm_end, 0x4000); - ASSERT_EQ(vma->vm_pgoff, 0); - ASSERT_TRUE(vma_write_started(vma)); - ASSERT_EQ(mm.map_count, 1); - - cleanup_mm(&mm, &vmi); - return true; -} - -static bool test_copy_vma(void) -{ - vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; - struct mm_struct mm = {}; - bool need_locks = false; - VMA_ITERATOR(vmi, &mm, 0); - struct vm_area_struct *vma, *vma_new, *vma_next; - - /* Move backwards and do not merge. */ - - vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); - vma_new = copy_vma(&vma, 0, 0x2000, 0, &need_locks); - ASSERT_NE(vma_new, vma); - ASSERT_EQ(vma_new->vm_start, 0); - ASSERT_EQ(vma_new->vm_end, 0x2000); - ASSERT_EQ(vma_new->vm_pgoff, 0); - vma_assert_attached(vma_new); - - cleanup_mm(&mm, &vmi); - - /* Move a VMA into position next to another and merge the two. */ - - vma = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags); - vma_next = alloc_and_link_vma(&mm, 0x6000, 0x8000, 6, vm_flags); - vma_new = copy_vma(&vma, 0x4000, 0x2000, 4, &need_locks); - vma_assert_attached(vma_new); - - ASSERT_EQ(vma_new, vma_next); - - cleanup_mm(&mm, &vmi); - return true; -} - -static bool test_expand_only_mode(void) -{ - vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; - struct mm_struct mm = {}; - VMA_ITERATOR(vmi, &mm, 0); - struct vm_area_struct *vma_prev, *vma; - VMG_STATE(vmg, &mm, &vmi, 0x5000, 0x9000, vm_flags, 5); - - /* - * Place a VMA prior to the one we're expanding so we assert that we do - * not erroneously try to traverse to the previous VMA even though we - * have, through the use of the just_expand flag, indicated we do not - * need to do so. - */ - alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags); - - /* - * We will be positioned at the prev VMA, but looking to expand to - * 0x9000. - */ - vma_iter_set(&vmi, 0x3000); - vma_prev = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); - vmg.prev = vma_prev; - vmg.just_expand = true; - - vma = vma_merge_new_range(&vmg); - ASSERT_NE(vma, NULL); - ASSERT_EQ(vma, vma_prev); - ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); - ASSERT_EQ(vma->vm_start, 0x3000); - ASSERT_EQ(vma->vm_end, 0x9000); - ASSERT_EQ(vma->vm_pgoff, 3); - ASSERT_TRUE(vma_write_started(vma)); - ASSERT_EQ(vma_iter_addr(&vmi), 0x3000); - vma_assert_attached(vma); - - cleanup_mm(&mm, &vmi); - return true; -} - -static bool test_mmap_region_basic(void) -{ - struct mm_struct mm = {}; - unsigned long addr; - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, &mm, 0); - - current->mm = &mm; - - /* Map at 0x300000, length 0x3000. */ - addr = __mmap_region(NULL, 0x300000, 0x3000, - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, - 0x300, NULL); - ASSERT_EQ(addr, 0x300000); - - /* Map at 0x250000, length 0x3000. */ - addr = __mmap_region(NULL, 0x250000, 0x3000, - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, - 0x250, NULL); - ASSERT_EQ(addr, 0x250000); - - /* Map at 0x303000, merging to 0x300000 of length 0x6000. */ - addr = __mmap_region(NULL, 0x303000, 0x3000, - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, - 0x303, NULL); - ASSERT_EQ(addr, 0x303000); - - /* Map at 0x24d000, merging to 0x250000 of length 0x6000. */ - addr = __mmap_region(NULL, 0x24d000, 0x3000, - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, - 0x24d, NULL); - ASSERT_EQ(addr, 0x24d000); - - ASSERT_EQ(mm.map_count, 2); - - for_each_vma(vmi, vma) { - if (vma->vm_start == 0x300000) { - ASSERT_EQ(vma->vm_end, 0x306000); - ASSERT_EQ(vma->vm_pgoff, 0x300); - } else if (vma->vm_start == 0x24d000) { - ASSERT_EQ(vma->vm_end, 0x253000); - ASSERT_EQ(vma->vm_pgoff, 0x24d); - } else { - ASSERT_FALSE(true); - } - } - - cleanup_mm(&mm, &vmi); - return true; -} - -int main(void) -{ - int num_tests = 0, num_fail = 0; - - maple_tree_init(); - vma_state_init(); - -#define TEST(name) \ - do { \ - num_tests++; \ - if (!test_##name()) { \ - num_fail++; \ - fprintf(stderr, "Test " #name " FAILED\n"); \ - } \ - } while (0) - - /* Very simple tests to kick the tyres. */ - TEST(simple_merge); - TEST(simple_modify); - TEST(simple_expand); - TEST(simple_shrink); - - TEST(merge_new); - TEST(vma_merge_special_flags); - TEST(vma_merge_with_close); - TEST(vma_merge_new_with_close); - TEST(merge_existing); - TEST(anon_vma_non_mergeable); - TEST(dup_anon_vma); - TEST(vmi_prealloc_fail); - TEST(merge_extend); - TEST(copy_vma); - TEST(expand_only_mode); - - TEST(mmap_region_basic); - -#undef TEST - - printf("%d tests run, %d passed, %d failed.\n", - num_tests, num_tests - num_fail, num_fail); - - return num_fail == 0 ? EXIT_SUCCESS : EXIT_FAILURE; -} diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 2743f12ecf32..b48ebae3927d 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -1127,15 +1127,6 @@ static inline void mapping_allow_writable(struct address_space *mapping) atomic_inc(&mapping->i_mmap_writable); } -static inline void vma_set_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end, - pgoff_t pgoff) -{ - vma->vm_start = start; - vma->vm_end = end; - vma->vm_pgoff = pgoff; -} - static inline struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max) { -- cgit v1.2.3 From a1f0dacaaba14c7f949f5c6ab876944034620904 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 22 Jan 2026 16:06:21 +0000 Subject: tools/testing/vma: separate out vma_internal.h into logical headers The vma_internal.h file is becoming entirely unmanageable. It combines duplicated kernel implementation logic that needs to be kept in-sync with the kernel, stubbed out declarations that we simply ignore for testing purposes and custom logic added to aid testing. If we separate each of the three things into separate headers it makes things far more manageable, so do so: * include/stubs.h contains the stubbed declarations, * include/dup.h contains the duplicated kernel declarations, and * include/custom.h contains declarations customised for testing. [lorenzo.stoakes@oracle.com: avoid a duplicate struct define] Link: https://lkml.kernel.org/r/1e032732-61c3-485c-9aa7-6a09016fefc1@lucifer.local Link: https://lkml.kernel.org/r/dd57baf5b5986cb96a167150ac712cbe804b63ee.1769097829.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Cc: Baolin Wang Cc: Barry Song Cc: David Hildenbrand Cc: Dev Jain Cc: Jason Gunthorpe Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zi Yan Cc: Damien Le Moal Cc: "Darrick J. Wong" Cc: Jarkko Sakkinen Cc: Yury Norov Cc: Chris Mason Cc: Pedro Falcato Signed-off-by: Andrew Morton --- tools/testing/vma/Makefile | 2 +- tools/testing/vma/include/custom.h | 103 ++ tools/testing/vma/include/dup.h | 1329 +++++++++++++++++++++++++ tools/testing/vma/include/stubs.h | 428 ++++++++ tools/testing/vma/vma_internal.h | 1936 +----------------------------------- 5 files changed, 1873 insertions(+), 1925 deletions(-) create mode 100644 tools/testing/vma/include/custom.h create mode 100644 tools/testing/vma/include/dup.h create mode 100644 tools/testing/vma/include/stubs.h (limited to 'tools') diff --git a/tools/testing/vma/Makefile b/tools/testing/vma/Makefile index 94133d9d3955..50aa4301b3a6 100644 --- a/tools/testing/vma/Makefile +++ b/tools/testing/vma/Makefile @@ -9,7 +9,7 @@ include ../shared/shared.mk OFILES = $(SHARED_OFILES) main.o shared.o maple-shim.o TARGETS = vma -main.o: main.c shared.c shared.h vma_internal.h tests/merge.c tests/mmap.c tests/vma.c ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h +main.o: main.c shared.c shared.h vma_internal.h tests/merge.c tests/mmap.c tests/vma.c ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h include/custom.h include/dup.h include/stubs.h vma: $(OFILES) $(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS) diff --git a/tools/testing/vma/include/custom.h b/tools/testing/vma/include/custom.h new file mode 100644 index 000000000000..f567127efba9 --- /dev/null +++ b/tools/testing/vma/include/custom.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#pragma once + +/* + * Contains declarations that exist in the kernel which have been CUSTOMISED for + * testing purposes to faciliate userland VMA testing. + */ + +#ifdef CONFIG_MMU +extern unsigned long mmap_min_addr; +extern unsigned long dac_mmap_min_addr; +#else +#define mmap_min_addr 0UL +#define dac_mmap_min_addr 0UL +#endif + +#define VM_WARN_ON(_expr) (WARN_ON(_expr)) +#define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr)) +#define VM_WARN_ON_VMG(_expr, _vmg) (WARN_ON(_expr)) +#define VM_BUG_ON(_expr) (BUG_ON(_expr)) +#define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr)) + +/* We hardcode this for now. */ +#define sysctl_max_map_count 0x1000000UL + +#define TASK_SIZE ((1ul << 47)-PAGE_SIZE) + +/* + * The shared stubs do not implement this, it amounts to an fprintf(STDERR,...) + * either way :) + */ +#define pr_warn_once pr_err + +#define pgtable_supports_soft_dirty() 1 + +struct anon_vma { + struct anon_vma *root; + struct rb_root_cached rb_root; + + /* Test fields. */ + bool was_cloned; + bool was_unlinked; +}; + +static inline void unlink_anon_vmas(struct vm_area_struct *vma) +{ + /* For testing purposes, indicate that the anon_vma was unlinked. */ + vma->anon_vma->was_unlinked = true; +} + +static inline void vma_start_write(struct vm_area_struct *vma) +{ + /* Used to indicate to tests that a write operation has begun. */ + vma->vm_lock_seq++; +} + +static inline __must_check +int vma_start_write_killable(struct vm_area_struct *vma) +{ + /* Used to indicate to tests that a write operation has begun. */ + vma->vm_lock_seq++; + return 0; +} + +static inline int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src, + enum vma_operation operation) +{ + /* For testing purposes. We indicate that an anon_vma has been cloned. */ + if (src->anon_vma != NULL) { + dst->anon_vma = src->anon_vma; + dst->anon_vma->was_cloned = true; + } + + return 0; +} + +static inline int __anon_vma_prepare(struct vm_area_struct *vma) +{ + struct anon_vma *anon_vma = calloc(1, sizeof(struct anon_vma)); + + if (!anon_vma) + return -ENOMEM; + + anon_vma->root = anon_vma; + vma->anon_vma = anon_vma; + + return 0; +} + +static inline int anon_vma_prepare(struct vm_area_struct *vma) +{ + if (likely(vma->anon_vma)) + return 0; + + return __anon_vma_prepare(vma); +} + +static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) +{ + if (reset_refcnt) + refcount_set(&vma->vm_refcnt, 0); +} diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h new file mode 100644 index 000000000000..0accfc296615 --- /dev/null +++ b/tools/testing/vma/include/dup.h @@ -0,0 +1,1329 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#pragma once + +/* Forward declarations to avoid header cycle. */ +struct vm_area_struct; +static inline void vma_start_write(struct vm_area_struct *vma); + +extern const struct vm_operations_struct vma_dummy_vm_ops; +extern unsigned long stack_guard_gap; +extern const struct vm_operations_struct vma_dummy_vm_ops; +extern unsigned long rlimit(unsigned int limit); +struct task_struct *get_current(void); + +#define MMF_HAS_MDWE 28 +#define current get_current() + +/* + * Define the task command name length as enum, then it can be visible to + * BPF programs. + */ +enum { + TASK_COMM_LEN = 16, +}; + +/* PARTIALLY implemented types. */ +struct mm_struct { + struct maple_tree mm_mt; + int map_count; /* number of VMAs */ + unsigned long total_vm; /* Total pages mapped */ + unsigned long locked_vm; /* Pages that have PG_mlocked set */ + unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ + unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ + unsigned long stack_vm; /* VM_STACK */ + + unsigned long def_flags; + + mm_flags_t flags; /* Must use mm_flags_* helpers to access */ +}; +struct address_space { + struct rb_root_cached i_mmap; + unsigned long flags; + atomic_t i_mmap_writable; +}; +struct file_operations { + int (*mmap)(struct file *, struct vm_area_struct *); + int (*mmap_prepare)(struct vm_area_desc *); +}; +struct file { + struct address_space *f_mapping; + const struct file_operations *f_op; +}; +struct anon_vma_chain { + struct anon_vma *anon_vma; + struct list_head same_vma; +}; +struct task_struct { + char comm[TASK_COMM_LEN]; + pid_t pid; + struct mm_struct *mm; + + /* Used for emulating ABI behavior of previous Linux versions: */ + unsigned int personality; +}; + +struct kref { + refcount_t refcount; +}; + +struct anon_vma_name { + struct kref kref; + /* The name needs to be at the end because it is dynamically sized. */ + char name[]; +}; + +/* + * Contains declarations that are DUPLICATED from kernel source in order to + * faciliate userland VMA testing. + * + * These must be kept in sync with kernel source. + */ + +#define VMA_LOCK_OFFSET 0x40000000 + +typedef struct { unsigned long v; } freeptr_t; + +#define VM_NONE 0x00000000 + +typedef int __bitwise vma_flag_t; + +#define ACCESS_PRIVATE(p, member) ((p)->member) + +#define DECLARE_VMA_BIT(name, bitnum) \ + VMA_ ## name ## _BIT = ((__force vma_flag_t)bitnum) +#define DECLARE_VMA_BIT_ALIAS(name, aliased) \ + VMA_ ## name ## _BIT = VMA_ ## aliased ## _BIT +enum { + DECLARE_VMA_BIT(READ, 0), + DECLARE_VMA_BIT(WRITE, 1), + DECLARE_VMA_BIT(EXEC, 2), + DECLARE_VMA_BIT(SHARED, 3), + /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */ + DECLARE_VMA_BIT(MAYREAD, 4), /* limits for mprotect() etc. */ + DECLARE_VMA_BIT(MAYWRITE, 5), + DECLARE_VMA_BIT(MAYEXEC, 6), + DECLARE_VMA_BIT(MAYSHARE, 7), + DECLARE_VMA_BIT(GROWSDOWN, 8), /* general info on the segment */ +#ifdef CONFIG_MMU + DECLARE_VMA_BIT(UFFD_MISSING, 9),/* missing pages tracking */ +#else + /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */ + DECLARE_VMA_BIT(MAYOVERLAY, 9), +#endif /* CONFIG_MMU */ + /* Page-ranges managed without "struct page", just pure PFN */ + DECLARE_VMA_BIT(PFNMAP, 10), + DECLARE_VMA_BIT(MAYBE_GUARD, 11), + DECLARE_VMA_BIT(UFFD_WP, 12), /* wrprotect pages tracking */ + DECLARE_VMA_BIT(LOCKED, 13), + DECLARE_VMA_BIT(IO, 14), /* Memory mapped I/O or similar */ + DECLARE_VMA_BIT(SEQ_READ, 15), /* App will access data sequentially */ + DECLARE_VMA_BIT(RAND_READ, 16), /* App will not benefit from clustered reads */ + DECLARE_VMA_BIT(DONTCOPY, 17), /* Do not copy this vma on fork */ + DECLARE_VMA_BIT(DONTEXPAND, 18),/* Cannot expand with mremap() */ + DECLARE_VMA_BIT(LOCKONFAULT, 19),/* Lock pages covered when faulted in */ + DECLARE_VMA_BIT(ACCOUNT, 20), /* Is a VM accounted object */ + DECLARE_VMA_BIT(NORESERVE, 21), /* should the VM suppress accounting */ + DECLARE_VMA_BIT(HUGETLB, 22), /* Huge TLB Page VM */ + DECLARE_VMA_BIT(SYNC, 23), /* Synchronous page faults */ + DECLARE_VMA_BIT(ARCH_1, 24), /* Architecture-specific flag */ + DECLARE_VMA_BIT(WIPEONFORK, 25),/* Wipe VMA contents in child. */ + DECLARE_VMA_BIT(DONTDUMP, 26), /* Do not include in the core dump */ + DECLARE_VMA_BIT(SOFTDIRTY, 27), /* NOT soft dirty clean area */ + DECLARE_VMA_BIT(MIXEDMAP, 28), /* Can contain struct page and pure PFN pages */ + DECLARE_VMA_BIT(HUGEPAGE, 29), /* MADV_HUGEPAGE marked this vma */ + DECLARE_VMA_BIT(NOHUGEPAGE, 30),/* MADV_NOHUGEPAGE marked this vma */ + DECLARE_VMA_BIT(MERGEABLE, 31), /* KSM may merge identical pages */ + /* These bits are reused, we define specific uses below. */ + DECLARE_VMA_BIT(HIGH_ARCH_0, 32), + DECLARE_VMA_BIT(HIGH_ARCH_1, 33), + DECLARE_VMA_BIT(HIGH_ARCH_2, 34), + DECLARE_VMA_BIT(HIGH_ARCH_3, 35), + DECLARE_VMA_BIT(HIGH_ARCH_4, 36), + DECLARE_VMA_BIT(HIGH_ARCH_5, 37), + DECLARE_VMA_BIT(HIGH_ARCH_6, 38), + /* + * This flag is used to connect VFIO to arch specific KVM code. It + * indicates that the memory under this VMA is safe for use with any + * non-cachable memory type inside KVM. Some VFIO devices, on some + * platforms, are thought to be unsafe and can cause machine crashes + * if KVM does not lock down the memory type. + */ + DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39), +#ifdef CONFIG_PPC32 + DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1), +#else + DECLARE_VMA_BIT(DROPPABLE, 40), +#endif + DECLARE_VMA_BIT(UFFD_MINOR, 41), + DECLARE_VMA_BIT(SEALED, 42), + /* Flags that reuse flags above. */ + DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0), + DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1), + DECLARE_VMA_BIT_ALIAS(PKEY_BIT2, HIGH_ARCH_2), + DECLARE_VMA_BIT_ALIAS(PKEY_BIT3, HIGH_ARCH_3), + DECLARE_VMA_BIT_ALIAS(PKEY_BIT4, HIGH_ARCH_4), +#if defined(CONFIG_X86_USER_SHADOW_STACK) + /* + * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of + * support core mm. + * + * These VMAs will get a single end guard page. This helps userspace + * protect itself from attacks. A single page is enough for current + * shadow stack archs (x86). See the comments near alloc_shstk() in + * arch/x86/kernel/shstk.c for more details on the guard size. + */ + DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_5), +#elif defined(CONFIG_ARM64_GCS) + /* + * arm64's Guarded Control Stack implements similar functionality and + * has similar constraints to shadow stacks. + */ + DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_6), +#endif + DECLARE_VMA_BIT_ALIAS(SAO, ARCH_1), /* Strong Access Ordering (powerpc) */ + DECLARE_VMA_BIT_ALIAS(GROWSUP, ARCH_1), /* parisc */ + DECLARE_VMA_BIT_ALIAS(SPARC_ADI, ARCH_1), /* sparc64 */ + DECLARE_VMA_BIT_ALIAS(ARM64_BTI, ARCH_1), /* arm64 */ + DECLARE_VMA_BIT_ALIAS(ARCH_CLEAR, ARCH_1), /* sparc64, arm64 */ + DECLARE_VMA_BIT_ALIAS(MAPPED_COPY, ARCH_1), /* !CONFIG_MMU */ + DECLARE_VMA_BIT_ALIAS(MTE, HIGH_ARCH_4), /* arm64 */ + DECLARE_VMA_BIT_ALIAS(MTE_ALLOWED, HIGH_ARCH_5),/* arm64 */ +#ifdef CONFIG_STACK_GROWSUP + DECLARE_VMA_BIT_ALIAS(STACK, GROWSUP), + DECLARE_VMA_BIT_ALIAS(STACK_EARLY, GROWSDOWN), +#else + DECLARE_VMA_BIT_ALIAS(STACK, GROWSDOWN), +#endif +}; + +#define INIT_VM_FLAG(name) BIT((__force int) VMA_ ## name ## _BIT) +#define VM_READ INIT_VM_FLAG(READ) +#define VM_WRITE INIT_VM_FLAG(WRITE) +#define VM_EXEC INIT_VM_FLAG(EXEC) +#define VM_SHARED INIT_VM_FLAG(SHARED) +#define VM_MAYREAD INIT_VM_FLAG(MAYREAD) +#define VM_MAYWRITE INIT_VM_FLAG(MAYWRITE) +#define VM_MAYEXEC INIT_VM_FLAG(MAYEXEC) +#define VM_MAYSHARE INIT_VM_FLAG(MAYSHARE) +#define VM_GROWSDOWN INIT_VM_FLAG(GROWSDOWN) +#ifdef CONFIG_MMU +#define VM_UFFD_MISSING INIT_VM_FLAG(UFFD_MISSING) +#else +#define VM_UFFD_MISSING VM_NONE +#define VM_MAYOVERLAY INIT_VM_FLAG(MAYOVERLAY) +#endif +#define VM_PFNMAP INIT_VM_FLAG(PFNMAP) +#define VM_MAYBE_GUARD INIT_VM_FLAG(MAYBE_GUARD) +#define VM_UFFD_WP INIT_VM_FLAG(UFFD_WP) +#define VM_LOCKED INIT_VM_FLAG(LOCKED) +#define VM_IO INIT_VM_FLAG(IO) +#define VM_SEQ_READ INIT_VM_FLAG(SEQ_READ) +#define VM_RAND_READ INIT_VM_FLAG(RAND_READ) +#define VM_DONTCOPY INIT_VM_FLAG(DONTCOPY) +#define VM_DONTEXPAND INIT_VM_FLAG(DONTEXPAND) +#define VM_LOCKONFAULT INIT_VM_FLAG(LOCKONFAULT) +#define VM_ACCOUNT INIT_VM_FLAG(ACCOUNT) +#define VM_NORESERVE INIT_VM_FLAG(NORESERVE) +#define VM_HUGETLB INIT_VM_FLAG(HUGETLB) +#define VM_SYNC INIT_VM_FLAG(SYNC) +#define VM_ARCH_1 INIT_VM_FLAG(ARCH_1) +#define VM_WIPEONFORK INIT_VM_FLAG(WIPEONFORK) +#define VM_DONTDUMP INIT_VM_FLAG(DONTDUMP) +#ifdef CONFIG_MEM_SOFT_DIRTY +#define VM_SOFTDIRTY INIT_VM_FLAG(SOFTDIRTY) +#else +#define VM_SOFTDIRTY VM_NONE +#endif +#define VM_MIXEDMAP INIT_VM_FLAG(MIXEDMAP) +#define VM_HUGEPAGE INIT_VM_FLAG(HUGEPAGE) +#define VM_NOHUGEPAGE INIT_VM_FLAG(NOHUGEPAGE) +#define VM_MERGEABLE INIT_VM_FLAG(MERGEABLE) +#define VM_STACK INIT_VM_FLAG(STACK) +#ifdef CONFIG_STACK_GROWS_UP +#define VM_STACK_EARLY INIT_VM_FLAG(STACK_EARLY) +#else +#define VM_STACK_EARLY VM_NONE +#endif +#ifdef CONFIG_ARCH_HAS_PKEYS +#define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT) +/* Despite the naming, these are FLAGS not bits. */ +#define VM_PKEY_BIT0 INIT_VM_FLAG(PKEY_BIT0) +#define VM_PKEY_BIT1 INIT_VM_FLAG(PKEY_BIT1) +#define VM_PKEY_BIT2 INIT_VM_FLAG(PKEY_BIT2) +#if CONFIG_ARCH_PKEY_BITS > 3 +#define VM_PKEY_BIT3 INIT_VM_FLAG(PKEY_BIT3) +#else +#define VM_PKEY_BIT3 VM_NONE +#endif /* CONFIG_ARCH_PKEY_BITS > 3 */ +#if CONFIG_ARCH_PKEY_BITS > 4 +#define VM_PKEY_BIT4 INIT_VM_FLAG(PKEY_BIT4) +#else +#define VM_PKEY_BIT4 VM_NONE +#endif /* CONFIG_ARCH_PKEY_BITS > 4 */ +#endif /* CONFIG_ARCH_HAS_PKEYS */ +#if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS) +#define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK) +#else +#define VM_SHADOW_STACK VM_NONE +#endif +#if defined(CONFIG_PPC64) +#define VM_SAO INIT_VM_FLAG(SAO) +#elif defined(CONFIG_PARISC) +#define VM_GROWSUP INIT_VM_FLAG(GROWSUP) +#elif defined(CONFIG_SPARC64) +#define VM_SPARC_ADI INIT_VM_FLAG(SPARC_ADI) +#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR) +#elif defined(CONFIG_ARM64) +#define VM_ARM64_BTI INIT_VM_FLAG(ARM64_BTI) +#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR) +#elif !defined(CONFIG_MMU) +#define VM_MAPPED_COPY INIT_VM_FLAG(MAPPED_COPY) +#endif +#ifndef VM_GROWSUP +#define VM_GROWSUP VM_NONE +#endif +#ifdef CONFIG_ARM64_MTE +#define VM_MTE INIT_VM_FLAG(MTE) +#define VM_MTE_ALLOWED INIT_VM_FLAG(MTE_ALLOWED) +#else +#define VM_MTE VM_NONE +#define VM_MTE_ALLOWED VM_NONE +#endif +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR +#define VM_UFFD_MINOR INIT_VM_FLAG(UFFD_MINOR) +#else +#define VM_UFFD_MINOR VM_NONE +#endif +#ifdef CONFIG_64BIT +#define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED) +#define VM_SEALED INIT_VM_FLAG(SEALED) +#else +#define VM_ALLOW_ANY_UNCACHED VM_NONE +#define VM_SEALED VM_NONE +#endif +#if defined(CONFIG_64BIT) || defined(CONFIG_PPC32) +#define VM_DROPPABLE INIT_VM_FLAG(DROPPABLE) +#else +#define VM_DROPPABLE VM_NONE +#endif + +/* Bits set in the VMA until the stack is in its final location */ +#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY) + +#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) + +/* Common data flag combinations */ +#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \ + VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */ +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC +#endif + +#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ +#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS +#endif + +#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK) + +#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) + +/* VMA basic access permission flags */ +#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) + +/* + * Special vmas that are non-mergable, non-mlock()able. + */ +#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) + +#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) +#define TASK_SIZE_LOW DEFAULT_MAP_WINDOW +#define TASK_SIZE_MAX DEFAULT_MAP_WINDOW +#define STACK_TOP TASK_SIZE_LOW +#define STACK_TOP_MAX TASK_SIZE_MAX + +/* This mask represents all the VMA flag bits used by mlock */ +#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) + +#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) + +#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#define RLIMIT_STACK 3 /* max stack size */ +#define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */ + +#define CAP_IPC_LOCK 14 + +#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD) + +#define VM_IGNORE_MERGE VM_STICKY + +#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD) + +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +#define for_each_vma(__vmi, __vma) \ + while (((__vma) = vma_next(&(__vmi))) != NULL) + +/* The MM code likes to work with exclusive end addresses */ +#define for_each_vma_range(__vmi, __vma, __end) \ + while (((__vma) = vma_find(&(__vmi), (__end))) != NULL) + +#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) + +#define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT)) + +#define test_and_set_bit(nr, addr) __test_and_set_bit(nr, addr) +#define test_and_clear_bit(nr, addr) __test_and_clear_bit(nr, addr) + +#define AS_MM_ALL_LOCKS 2 + +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +/* + * Flags for bug emulation. + * + * These occupy the top three bytes. + */ +enum { + READ_IMPLIES_EXEC = 0x0400000, +}; + +struct vma_iterator { + struct ma_state mas; +}; + +#define VMA_ITERATOR(name, __mm, __addr) \ + struct vma_iterator name = { \ + .mas = { \ + .tree = &(__mm)->mm_mt, \ + .index = __addr, \ + .node = NULL, \ + .status = ma_start, \ + }, \ + } + +#define DEFINE_MUTEX(mutexname) \ + struct mutex mutexname = {} + +#define DECLARE_BITMAP(name, bits) \ + unsigned long name[BITS_TO_LONGS(bits)] + +#define EMPTY_VMA_FLAGS ((vma_flags_t){ }) + +/* What action should be taken after an .mmap_prepare call is complete? */ +enum mmap_action_type { + MMAP_NOTHING, /* Mapping is complete, no further action. */ + MMAP_REMAP_PFN, /* Remap PFN range. */ + MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */ +}; + +/* + * Describes an action an mmap_prepare hook can instruct to be taken to complete + * the mapping of a VMA. Specified in vm_area_desc. + */ +struct mmap_action { + union { + /* Remap range. */ + struct { + unsigned long start; + unsigned long start_pfn; + unsigned long size; + pgprot_t pgprot; + } remap; + }; + enum mmap_action_type type; + + /* + * If specified, this hook is invoked after the selected action has been + * successfully completed. Note that the VMA write lock still held. + * + * The absolute minimum ought to be done here. + * + * Returns 0 on success, or an error code. + */ + int (*success_hook)(const struct vm_area_struct *vma); + + /* + * If specified, this hook is invoked when an error occurred when + * attempting the selection action. + * + * The hook can return an error code in order to filter the error, but + * it is not valid to clear the error here. + */ + int (*error_hook)(int err); + + /* + * This should be set in rare instances where the operation required + * that the rmap should not be able to access the VMA until + * completely set up. + */ + bool hide_from_rmap_until_complete :1; +}; + +/* Operations which modify VMAs. */ +enum vma_operation { + VMA_OP_SPLIT, + VMA_OP_MERGE_UNFAULTED, + VMA_OP_REMAP, + VMA_OP_FORK, +}; + +/* + * Describes a VMA that is about to be mmap()'ed. Drivers may choose to + * manipulate mutable fields which will cause those fields to be updated in the + * resultant VMA. + * + * Helper functions are not required for manipulating any field. + */ +struct vm_area_desc { + /* Immutable state. */ + const struct mm_struct *const mm; + struct file *const file; /* May vary from vm_file in stacked callers. */ + unsigned long start; + unsigned long end; + + /* Mutable fields. Populated with initial state. */ + pgoff_t pgoff; + struct file *vm_file; + union { + vm_flags_t vm_flags; + vma_flags_t vma_flags; + }; + pgprot_t page_prot; + + /* Write-only fields. */ + const struct vm_operations_struct *vm_ops; + void *private_data; + + /* Take further action? */ + struct mmap_action action; +}; + +struct vm_area_struct { + /* The first cache line has the info for VMA tree walking. */ + + union { + struct { + /* VMA covers [vm_start; vm_end) addresses within mm */ + unsigned long vm_start; + unsigned long vm_end; + }; + freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */ + }; + + struct mm_struct *vm_mm; /* The address space we belong to. */ + pgprot_t vm_page_prot; /* Access permissions of this VMA. */ + + /* + * Flags, see mm.h. + * To modify use vm_flags_{init|reset|set|clear|mod} functions. + */ + union { + const vm_flags_t vm_flags; + vma_flags_t flags; + }; + +#ifdef CONFIG_PER_VMA_LOCK + /* + * Can only be written (using WRITE_ONCE()) while holding both: + * - mmap_lock (in write mode) + * - vm_refcnt bit at VMA_LOCK_OFFSET is set + * Can be read reliably while holding one of: + * - mmap_lock (in read or write mode) + * - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1 + * Can be read unreliably (using READ_ONCE()) for pessimistic bailout + * while holding nothing (except RCU to keep the VMA struct allocated). + * + * This sequence counter is explicitly allowed to overflow; sequence + * counter reuse can only lead to occasional unnecessary use of the + * slowpath. + */ + unsigned int vm_lock_seq; +#endif + + /* + * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma + * list, after a COW of one of the file pages. A MAP_SHARED vma + * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack + * or brk vma (with NULL file) can only be in an anon_vma list. + */ + struct list_head anon_vma_chain; /* Serialized by mmap_lock & + * page_table_lock */ + struct anon_vma *anon_vma; /* Serialized by page_table_lock */ + + /* Function pointers to deal with this struct. */ + const struct vm_operations_struct *vm_ops; + + /* Information about our backing store: */ + unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE + units */ + struct file * vm_file; /* File we map to (can be NULL). */ + void * vm_private_data; /* was vm_pte (shared mem) */ + +#ifdef CONFIG_SWAP + atomic_long_t swap_readahead_info; +#endif +#ifndef CONFIG_MMU + struct vm_region *vm_region; /* NOMMU mapping region */ +#endif +#ifdef CONFIG_NUMA + struct mempolicy *vm_policy; /* NUMA policy for the VMA */ +#endif +#ifdef CONFIG_NUMA_BALANCING + struct vma_numab_state *numab_state; /* NUMA Balancing state */ +#endif +#ifdef CONFIG_PER_VMA_LOCK + /* Unstable RCU readers are allowed to read this. */ + refcount_t vm_refcnt; +#endif + /* + * For areas with an address space and backing store, + * linkage into the address_space->i_mmap interval tree. + * + */ + struct { + struct rb_node rb; + unsigned long rb_subtree_last; + } shared; +#ifdef CONFIG_ANON_VMA_NAME + /* + * For private and shared anonymous mappings, a pointer to a null + * terminated string containing the name given to the vma, or NULL if + * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. + */ + struct anon_vma_name *anon_name; +#endif + struct vm_userfaultfd_ctx vm_userfaultfd_ctx; +} __randomize_layout; + +struct vm_operations_struct { + void (*open)(struct vm_area_struct * area); + /** + * @close: Called when the VMA is being removed from the MM. + * Context: User context. May sleep. Caller holds mmap_lock. + */ + void (*close)(struct vm_area_struct * area); + /* Called any time before splitting to check if it's allowed */ + int (*may_split)(struct vm_area_struct *area, unsigned long addr); + int (*mremap)(struct vm_area_struct *area); + /* + * Called by mprotect() to make driver-specific permission + * checks before mprotect() is finalised. The VMA must not + * be modified. Returns 0 if mprotect() can proceed. + */ + int (*mprotect)(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long newflags); + vm_fault_t (*fault)(struct vm_fault *vmf); + vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order); + vm_fault_t (*map_pages)(struct vm_fault *vmf, + pgoff_t start_pgoff, pgoff_t end_pgoff); + unsigned long (*pagesize)(struct vm_area_struct * area); + + /* notification that a previously read-only page is about to become + * writable, if an error is returned it will cause a SIGBUS */ + vm_fault_t (*page_mkwrite)(struct vm_fault *vmf); + + /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */ + vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf); + + /* called by access_process_vm when get_user_pages() fails, typically + * for use by special VMAs. See also generic_access_phys() for a generic + * implementation useful for any iomem mapping. + */ + int (*access)(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write); + + /* Called by the /proc/PID/maps code to ask the vma whether it + * has a special name. Returning non-NULL will also cause this + * vma to be dumped unconditionally. */ + const char *(*name)(struct vm_area_struct *vma); + +#ifdef CONFIG_NUMA + /* + * set_policy() op must add a reference to any non-NULL @new mempolicy + * to hold the policy upon return. Caller should pass NULL @new to + * remove a policy and fall back to surrounding context--i.e. do not + * install a MPOL_DEFAULT policy, nor the task or system default + * mempolicy. + */ + int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new); + + /* + * get_policy() op must add reference [mpol_get()] to any policy at + * (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure + * in mm/mempolicy.c will do this automatically. + * get_policy() must NOT add a ref if the policy at (vma,addr) is not + * marked as MPOL_SHARED. vma policies are protected by the mmap_lock. + * If no [shared/vma] mempolicy exists at the addr, get_policy() op + * must return NULL--i.e., do not "fallback" to task or system default + * policy. + */ + struct mempolicy *(*get_policy)(struct vm_area_struct *vma, + unsigned long addr, pgoff_t *ilx); +#endif +#ifdef CONFIG_FIND_NORMAL_PAGE + /* + * Called by vm_normal_page() for special PTEs in @vma at @addr. This + * allows for returning a "normal" page from vm_normal_page() even + * though the PTE indicates that the "struct page" either does not exist + * or should not be touched: "special". + * + * Do not add new users: this really only works when a "normal" page + * was mapped, but then the PTE got changed to something weird (+ + * marked special) that would not make pte_pfn() identify the originally + * inserted page. + */ + struct page *(*find_normal_page)(struct vm_area_struct *vma, + unsigned long addr); +#endif /* CONFIG_FIND_NORMAL_PAGE */ +}; + +struct vm_unmapped_area_info { +#define VM_UNMAPPED_AREA_TOPDOWN 1 + unsigned long flags; + unsigned long length; + unsigned long low_limit; + unsigned long high_limit; + unsigned long align_mask; + unsigned long align_offset; + unsigned long start_gap; +}; + +struct pagetable_move_control { + struct vm_area_struct *old; /* Source VMA. */ + struct vm_area_struct *new; /* Destination VMA. */ + unsigned long old_addr; /* Address from which the move begins. */ + unsigned long old_end; /* Exclusive address at which old range ends. */ + unsigned long new_addr; /* Address to move page tables to. */ + unsigned long len_in; /* Bytes to remap specified by user. */ + + bool need_rmap_locks; /* Do rmap locks need to be taken? */ + bool for_stack; /* Is this an early temp stack being moved? */ +}; + +#define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \ + struct pagetable_move_control name = { \ + .old = old_, \ + .new = new_, \ + .old_addr = old_addr_, \ + .old_end = (old_addr_) + (len_), \ + .new_addr = new_addr_, \ + .len_in = len_, \ + } + +static inline void vma_iter_invalidate(struct vma_iterator *vmi) +{ + mas_pause(&vmi->mas); +} + +static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) +{ + return __pgprot(pgprot_val(oldprot) | pgprot_val(newprot)); +} + +static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags) +{ + return __pgprot(vm_flags); +} + +static inline bool mm_flags_test(int flag, const struct mm_struct *mm) +{ + return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags)); +} + +/* + * Copy value to the first system word of VMA flags, non-atomically. + * + * IMPORTANT: This does not overwrite bytes past the first system word. The + * caller must account for this. + */ +static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value) +{ + *ACCESS_PRIVATE(flags, __vma_flags) = value; +} + +/* + * Copy value to the first system word of VMA flags ONCE, non-atomically. + * + * IMPORTANT: This does not overwrite bytes past the first system word. The + * caller must account for this. + */ +static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value) +{ + unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); + + WRITE_ONCE(*bitmap, value); +} + +/* Update the first system word of VMA flags setting bits, non-atomically. */ +static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value) +{ + unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); + + *bitmap |= value; +} + +/* Update the first system word of VMA flags clearing bits, non-atomically. */ +static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value) +{ + unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); + + *bitmap &= ~value; +} + +static inline void vma_flags_clear_all(vma_flags_t *flags) +{ + bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS); +} + +static inline void vma_flag_set(vma_flags_t *flags, vma_flag_t bit) +{ + unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); + + __set_bit((__force int)bit, bitmap); +} + +/* Use when VMA is not part of the VMA tree and needs no locking */ +static inline void vm_flags_init(struct vm_area_struct *vma, + vm_flags_t flags) +{ + vma_flags_clear_all(&vma->flags); + vma_flags_overwrite_word(&vma->flags, flags); +} + +/* + * Use when VMA is part of the VMA tree and modifications need coordination + * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and + * it should be locked explicitly beforehand. + */ +static inline void vm_flags_reset(struct vm_area_struct *vma, + vm_flags_t flags) +{ + vma_assert_write_locked(vma); + vm_flags_init(vma, flags); +} + +static inline void vm_flags_reset_once(struct vm_area_struct *vma, + vm_flags_t flags) +{ + vma_assert_write_locked(vma); + /* + * The user should only be interested in avoiding reordering of + * assignment to the first word. + */ + vma_flags_clear_all(&vma->flags); + vma_flags_overwrite_word_once(&vma->flags, flags); +} + +static inline void vm_flags_set(struct vm_area_struct *vma, + vm_flags_t flags) +{ + vma_start_write(vma); + vma_flags_set_word(&vma->flags, flags); +} + +static inline void vm_flags_clear(struct vm_area_struct *vma, + vm_flags_t flags) +{ + vma_start_write(vma); + vma_flags_clear_word(&vma->flags, flags); +} + +static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits) +{ + vma_flags_t flags; + int i; + + vma_flags_clear_all(&flags); + for (i = 0; i < count; i++) + vma_flag_set(&flags, bits[i]); + return flags; +} + +#define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \ + (const vma_flag_t []){__VA_ARGS__}) + +static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags, + vma_flags_t to_test) +{ + const unsigned long *bitmap = flags->__vma_flags; + const unsigned long *bitmap_to_test = to_test.__vma_flags; + + return bitmap_intersects(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS); +} + +#define vma_flags_test(flags, ...) \ + vma_flags_test_mask(flags, mk_vma_flags(__VA_ARGS__)) + +static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags, + vma_flags_t to_test) +{ + const unsigned long *bitmap = flags->__vma_flags; + const unsigned long *bitmap_to_test = to_test.__vma_flags; + + return bitmap_subset(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS); +} + +#define vma_flags_test_all(flags, ...) \ + vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__)) + +static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t to_set) +{ + unsigned long *bitmap = flags->__vma_flags; + const unsigned long *bitmap_to_set = to_set.__vma_flags; + + bitmap_or(bitmap, bitmap, bitmap_to_set, NUM_VMA_FLAG_BITS); +} + +#define vma_flags_set(flags, ...) \ + vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__)) + +static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t to_clear) +{ + unsigned long *bitmap = flags->__vma_flags; + const unsigned long *bitmap_to_clear = to_clear.__vma_flags; + + bitmap_andnot(bitmap, bitmap, bitmap_to_clear, NUM_VMA_FLAG_BITS); +} + +#define vma_flags_clear(flags, ...) \ + vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__)) + +static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma, + vma_flags_t flags) +{ + return vma_flags_test_all_mask(&vma->flags, flags); +} + +#define vma_test_all_flags(vma, ...) \ + vma_test_all_flags_mask(vma, mk_vma_flags(__VA_ARGS__)) + +static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags) +{ + return (vm_flags & (VM_SHARED | VM_MAYWRITE)) == + (VM_SHARED | VM_MAYWRITE); +} + +static inline void vma_set_flags_mask(struct vm_area_struct *vma, + vma_flags_t flags) +{ + vma_flags_set_mask(&vma->flags, flags); +} + +#define vma_set_flags(vma, ...) \ + vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__)) + +static inline bool vma_desc_test_flags_mask(const struct vm_area_desc *desc, + vma_flags_t flags) +{ + return vma_flags_test_mask(&desc->vma_flags, flags); +} + +#define vma_desc_test_flags(desc, ...) \ + vma_desc_test_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) + +static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc, + vma_flags_t flags) +{ + vma_flags_set_mask(&desc->vma_flags, flags); +} + +#define vma_desc_set_flags(desc, ...) \ + vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) + +static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc, + vma_flags_t flags) +{ + vma_flags_clear_mask(&desc->vma_flags, flags); +} + +#define vma_desc_clear_flags(desc, ...) \ + vma_desc_clear_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) + +static inline bool is_shared_maywrite(const vma_flags_t *flags) +{ + return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT); +} + +static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma) +{ + return is_shared_maywrite(&vma->flags); +} + +static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi) +{ + /* + * Uses mas_find() to get the first VMA when the iterator starts. + * Calling mas_next() could skip the first entry. + */ + return mas_find(&vmi->mas, ULONG_MAX); +} + +/* + * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these + * assertions should be made either under mmap_write_lock or when the object + * has been isolated under mmap_write_lock, ensuring no competing writers. + */ +static inline void vma_assert_attached(struct vm_area_struct *vma) +{ + WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt)); +} + +static inline void vma_assert_detached(struct vm_area_struct *vma) +{ + WARN_ON_ONCE(refcount_read(&vma->vm_refcnt)); +} + +static inline void vma_assert_write_locked(struct vm_area_struct *); +static inline void vma_mark_attached(struct vm_area_struct *vma) +{ + vma_assert_write_locked(vma); + vma_assert_detached(vma); + refcount_set_release(&vma->vm_refcnt, 1); +} + +static inline void vma_mark_detached(struct vm_area_struct *vma) +{ + vma_assert_write_locked(vma); + vma_assert_attached(vma); + /* We are the only writer, so no need to use vma_refcount_put(). */ + if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) { + /* + * Reader must have temporarily raised vm_refcnt but it will + * drop it without using the vma since vma is write-locked. + */ + } +} + +static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) +{ + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = mm; + vma->vm_ops = &vma_dummy_vm_ops; + INIT_LIST_HEAD(&vma->anon_vma_chain); + vma->vm_lock_seq = UINT_MAX; +} + +/* + * These are defined in vma.h, but sadly vm_stat_account() is referenced by + * kernel/fork.c, so we have to these broadly available there, and temporarily + * define them here to resolve the dependency cycle. + */ +#define is_exec_mapping(flags) \ + ((flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC) + +#define is_stack_mapping(flags) \ + (((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK)) + +#define is_data_mapping(flags) \ + ((flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE) + +static inline void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, + long npages) +{ + WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages); + + if (is_exec_mapping(flags)) + mm->exec_vm += npages; + else if (is_stack_mapping(flags)) + mm->stack_vm += npages; + else if (is_data_mapping(flags)) + mm->data_vm += npages; +} + +#undef is_exec_mapping +#undef is_stack_mapping +#undef is_data_mapping + +static inline void vm_unacct_memory(long pages) +{ + vm_acct_memory(-pages); +} + +static inline void mapping_allow_writable(struct address_space *mapping) +{ + atomic_inc(&mapping->i_mmap_writable); +} + +static inline +struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max) +{ + return mas_find(&vmi->mas, max - 1); +} + +static inline int vma_iter_clear_gfp(struct vma_iterator *vmi, + unsigned long start, unsigned long end, gfp_t gfp) +{ + __mas_set_range(&vmi->mas, start, end - 1); + mas_store_gfp(&vmi->mas, NULL, gfp); + if (unlikely(mas_is_err(&vmi->mas))) + return -ENOMEM; + + return 0; +} + +static inline void vma_set_anonymous(struct vm_area_struct *vma) +{ + vma->vm_ops = NULL; +} + +/* Declared in vma.h. */ +static inline void set_vma_from_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc); + +static inline int __compat_vma_mmap(const struct file_operations *f_op, + struct file *file, struct vm_area_struct *vma) +{ + struct vm_area_desc desc = { + .mm = vma->vm_mm, + .file = file, + .start = vma->vm_start, + .end = vma->vm_end, + + .pgoff = vma->vm_pgoff, + .vm_file = vma->vm_file, + .vm_flags = vma->vm_flags, + .page_prot = vma->vm_page_prot, + + .action.type = MMAP_NOTHING, /* Default */ + }; + int err; + + err = f_op->mmap_prepare(&desc); + if (err) + return err; + + mmap_action_prepare(&desc.action, &desc); + set_vma_from_desc(vma, &desc); + return mmap_action_complete(&desc.action, vma); +} + +static inline int compat_vma_mmap(struct file *file, + struct vm_area_struct *vma) +{ + return __compat_vma_mmap(file->f_op, file, vma); +} + + +static inline void vma_iter_init(struct vma_iterator *vmi, + struct mm_struct *mm, unsigned long addr) +{ + mas_init(&vmi->mas, &mm->mm_mt, addr); +} + +static inline unsigned long vma_pages(struct vm_area_struct *vma) +{ + return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; +} + +static inline void mmap_assert_locked(struct mm_struct *); +static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm, + unsigned long start_addr, + unsigned long end_addr) +{ + unsigned long index = start_addr; + + mmap_assert_locked(mm); + return mt_find(&mm->mm_mt, &index, end_addr - 1); +} + +static inline +struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr) +{ + return mtree_load(&mm->mm_mt, addr); +} + +static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi) +{ + return mas_prev(&vmi->mas, 0); +} + +static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr) +{ + mas_set(&vmi->mas, addr); +} + +static inline bool vma_is_anonymous(struct vm_area_struct *vma) +{ + return !vma->vm_ops; +} + +/* Defined in vma.h, so temporarily define here to avoid circular dependency. */ +#define vma_iter_load(vmi) \ + mas_walk(&(vmi)->mas) + +static inline struct vm_area_struct * +find_vma_prev(struct mm_struct *mm, unsigned long addr, + struct vm_area_struct **pprev) +{ + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, addr); + + vma = vma_iter_load(&vmi); + *pprev = vma_prev(&vmi); + if (!vma) + vma = vma_next(&vmi); + return vma; +} + +#undef vma_iter_load + +static inline void vma_iter_free(struct vma_iterator *vmi) +{ + mas_destroy(&vmi->mas); +} + +static inline +struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi) +{ + return mas_next_range(&vmi->mas, ULONG_MAX); +} + +bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); + +/* Update vma->vm_page_prot to reflect vma->vm_flags. */ +static inline void vma_set_page_prot(struct vm_area_struct *vma) +{ + vm_flags_t vm_flags = vma->vm_flags; + pgprot_t vm_page_prot; + + /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */ + vm_page_prot = pgprot_modify(vma->vm_page_prot, vm_get_page_prot(vm_flags)); + + if (vma_wants_writenotify(vma, vm_page_prot)) { + vm_flags &= ~VM_SHARED; + /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */ + vm_page_prot = pgprot_modify(vm_page_prot, vm_get_page_prot(vm_flags)); + } + /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */ + WRITE_ONCE(vma->vm_page_prot, vm_page_prot); +} + +static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_GROWSDOWN) + return stack_guard_gap; + + /* See reasoning around the VM_SHADOW_STACK definition */ + if (vma->vm_flags & VM_SHADOW_STACK) + return PAGE_SIZE; + + return 0; +} + +static inline unsigned long vm_start_gap(struct vm_area_struct *vma) +{ + unsigned long gap = stack_guard_start_gap(vma); + unsigned long vm_start = vma->vm_start; + + vm_start -= gap; + if (vm_start > vma->vm_start) + vm_start = 0; + return vm_start; +} + +static inline unsigned long vm_end_gap(struct vm_area_struct *vma) +{ + unsigned long vm_end = vma->vm_end; + + if (vma->vm_flags & VM_GROWSUP) { + vm_end += stack_guard_gap; + if (vm_end < vma->vm_end) + vm_end = -PAGE_SIZE; + } + return vm_end; +} + +static inline bool vma_is_accessible(struct vm_area_struct *vma) +{ + return vma->vm_flags & VM_ACCESS_FLAGS; +} + +static inline bool mlock_future_ok(const struct mm_struct *mm, + vm_flags_t vm_flags, unsigned long bytes) +{ + unsigned long locked_pages, limit_pages; + + if (!(vm_flags & VM_LOCKED) || capable(CAP_IPC_LOCK)) + return true; + + locked_pages = bytes >> PAGE_SHIFT; + locked_pages += mm->locked_vm; + + limit_pages = rlimit(RLIMIT_MEMLOCK); + limit_pages >>= PAGE_SHIFT; + + return locked_pages <= limit_pages; +} + +static inline bool map_deny_write_exec(unsigned long old, unsigned long new) +{ + /* If MDWE is disabled, we have nothing to deny. */ + if (mm_flags_test(MMF_HAS_MDWE, current->mm)) + return false; + + /* If the new VMA is not executable, we have nothing to deny. */ + if (!(new & VM_EXEC)) + return false; + + /* Under MDWE we do not accept newly writably executable VMAs... */ + if (new & VM_WRITE) + return true; + + /* ...nor previously non-executable VMAs becoming executable. */ + if (!(old & VM_EXEC)) + return true; + + return false; +} + +static inline int mapping_map_writable(struct address_space *mapping) +{ + return atomic_inc_unless_negative(&mapping->i_mmap_writable) ? + 0 : -EPERM; +} + +/* Did the driver provide valid mmap hook configuration? */ +static inline bool can_mmap_file(struct file *file) +{ + bool has_mmap = file->f_op->mmap; + bool has_mmap_prepare = file->f_op->mmap_prepare; + + /* Hooks are mutually exclusive. */ + if (WARN_ON_ONCE(has_mmap && has_mmap_prepare)) + return false; + if (!has_mmap && !has_mmap_prepare) + return false; + + return true; +} + +static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (file->f_op->mmap_prepare) + return compat_vma_mmap(file, vma); + + return file->f_op->mmap(file, vma); +} + +static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc) +{ + return file->f_op->mmap_prepare(desc); +} + +static inline void vma_set_file(struct vm_area_struct *vma, struct file *file) +{ + /* Changing an anonymous vma with this is illegal */ + get_file(file); + swap(vma->vm_file, file); + fput(file); +} diff --git a/tools/testing/vma/include/stubs.h b/tools/testing/vma/include/stubs.h new file mode 100644 index 000000000000..947a3a0c2566 --- /dev/null +++ b/tools/testing/vma/include/stubs.h @@ -0,0 +1,428 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#pragma once + +/* + * Contains declarations that are STUBBED, that is that are rendered no-ops, in + * order to faciliate userland VMA testing. + */ + +/* Forward declarations. */ +struct mm_struct; +struct vm_area_struct; +struct vm_area_desc; +struct pagetable_move_control; +struct mmap_action; +struct file; +struct anon_vma; +struct anon_vma_chain; +struct address_space; +struct unmap_desc; + +#define __bitwise +#define __randomize_layout + +#define FIRST_USER_ADDRESS 0UL +#define USER_PGTABLES_CEILING 0UL + +#define vma_policy(vma) NULL + +#define down_write_nest_lock(sem, nest_lock) + +#define data_race(expr) expr + +#define ASSERT_EXCLUSIVE_WRITER(x) + +struct vm_userfaultfd_ctx {}; +struct mempolicy {}; +struct mmu_gather {}; +struct mutex {}; +struct vm_fault {}; + +static inline void userfaultfd_unmap_complete(struct mm_struct *mm, + struct list_head *uf) +{ +} + +static inline unsigned long move_page_tables(struct pagetable_move_control *pmc) +{ + return 0; +} + +static inline void free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ +} + +static inline int ksm_execve(struct mm_struct *mm) +{ + return 0; +} + +static inline void ksm_exit(struct mm_struct *mm) +{ +} + +static inline void vma_numab_state_init(struct vm_area_struct *vma) +{ +} + +static inline void vma_numab_state_free(struct vm_area_struct *vma) +{ +} + +static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma) +{ +} + +static inline void free_anon_vma_name(struct vm_area_struct *vma) +{ +} + +static inline void mmap_action_prepare(struct mmap_action *action, + struct vm_area_desc *desc) +{ +} + +static inline int mmap_action_complete(struct mmap_action *action, + struct vm_area_struct *vma) +{ + return 0; +} + +static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ +} + +static inline bool shmem_file(struct file *file) +{ + return false; +} + +static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm, + const struct file *file, vm_flags_t vm_flags) +{ + return vm_flags; +} + +static inline void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn) +{ +} + +static inline int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t pgprot) +{ + return 0; +} + +static inline int do_munmap(struct mm_struct *, unsigned long, size_t, + struct list_head *uf) +{ + return 0; +} + +/* Currently stubbed but we may later wish to un-stub. */ +static inline void vm_acct_memory(long pages); + +static inline void mmap_assert_locked(struct mm_struct *mm) +{ +} + + +static inline void anon_vma_unlock_write(struct anon_vma *anon_vma) +{ +} + +static inline void i_mmap_unlock_write(struct address_space *mapping) +{ +} + +static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + struct list_head *unmaps) +{ + return 0; +} + +static inline void mmap_write_downgrade(struct mm_struct *mm) +{ +} + +static inline void mmap_read_unlock(struct mm_struct *mm) +{ +} + +static inline void mmap_write_unlock(struct mm_struct *mm) +{ +} + +static inline int mmap_write_lock_killable(struct mm_struct *mm) +{ + return 0; +} + +static inline bool can_modify_mm(struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + return true; +} + +static inline void arch_unmap(struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ +} + +static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) +{ + return true; +} + +static inline void khugepaged_enter_vma(struct vm_area_struct *vma, + vm_flags_t vm_flags) +{ +} + +static inline bool mapping_can_writeback(struct address_space *mapping) +{ + return true; +} + +static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma) +{ + return false; +} + +static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma) +{ + return false; +} + +static inline bool userfaultfd_wp(struct vm_area_struct *vma) +{ + return false; +} + +static inline void mmap_assert_write_locked(struct mm_struct *mm) +{ +} + +static inline void mutex_lock(struct mutex *lock) +{ +} + +static inline void mutex_unlock(struct mutex *lock) +{ +} + +static inline bool mutex_is_locked(struct mutex *lock) +{ + return true; +} + +static inline bool signal_pending(void *p) +{ + return false; +} + +static inline bool is_file_hugepages(struct file *file) +{ + return false; +} + +static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) +{ + return 0; +} + +static inline bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, + unsigned long npages) +{ + return true; +} + +static inline int shmem_zero_setup(struct vm_area_struct *vma) +{ + return 0; +} + + +static inline void vm_acct_memory(long pages) +{ +} + +static inline void vma_interval_tree_insert(struct vm_area_struct *vma, + struct rb_root_cached *rb) +{ +} + +static inline void vma_interval_tree_remove(struct vm_area_struct *vma, + struct rb_root_cached *rb) +{ +} + +static inline void flush_dcache_mmap_unlock(struct address_space *mapping) +{ +} + +static inline void anon_vma_interval_tree_insert(struct anon_vma_chain *avc, + struct rb_root_cached *rb) +{ +} + +static inline void anon_vma_interval_tree_remove(struct anon_vma_chain *avc, + struct rb_root_cached *rb) +{ +} + +static inline void uprobe_mmap(struct vm_area_struct *vma) +{ +} + +static inline void uprobe_munmap(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ +} + +static inline void i_mmap_lock_write(struct address_space *mapping) +{ +} + +static inline void anon_vma_lock_write(struct anon_vma *anon_vma) +{ +} + +static inline void vma_assert_write_locked(struct vm_area_struct *vma) +{ +} + +static inline void ksm_add_vma(struct vm_area_struct *vma) +{ +} + +static inline void perf_event_mmap(struct vm_area_struct *vma) +{ +} + +static inline bool vma_is_dax(struct vm_area_struct *vma) +{ + return false; +} + +static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return NULL; +} + +static inline bool arch_validate_flags(vm_flags_t flags) +{ + return true; +} + +static inline void vma_close(struct vm_area_struct *vma) +{ +} + +static inline int mmap_file(struct file *file, struct vm_area_struct *vma) +{ + return 0; +} + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, unsigned long len) +{ + return 0; +} + +static inline bool capable(int cap) +{ + return true; +} + +static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) +{ + return NULL; +} + +static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, + struct vm_userfaultfd_ctx vm_ctx) +{ + return true; +} + +static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, + struct anon_vma_name *anon_name2) +{ + return true; +} + +static inline void might_sleep(void) +{ +} + +static inline void fput(struct file *file) +{ +} + +static inline void mpol_put(struct mempolicy *pol) +{ +} + +static inline void lru_add_drain(void) +{ +} + +static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm) +{ +} + +static inline void update_hiwater_rss(struct mm_struct *mm) +{ +} + +static inline void update_hiwater_vm(struct mm_struct *mm) +{ +} + +static inline void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap) +{ +} + +static inline void free_pgtables(struct mmu_gather *tlb, struct unmap_desc *unmap) +{ +} + +static inline void mapping_unmap_writable(struct address_space *mapping) +{ +} + +static inline void flush_dcache_mmap_lock(struct address_space *mapping) +{ +} + +static inline void tlb_finish_mmu(struct mmu_gather *tlb) +{ +} + +static inline struct file *get_file(struct file *f) +{ + return f; +} + +static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) +{ + return 0; +} + +static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + struct vm_area_struct *next) +{ +} + +static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {} diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index b48ebae3927d..e3ed05b57819 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -12,15 +12,11 @@ #ifndef __MM_VMA_INTERNAL_H #define __MM_VMA_INTERNAL_H -#define __private -#define __bitwise -#define __randomize_layout +#include #define CONFIG_MMU #define CONFIG_PER_VMA_LOCK -#include - #ifdef __CONCAT #undef __CONCAT #endif @@ -35,1936 +31,28 @@ #include #include -extern unsigned long stack_guard_gap; -#ifdef CONFIG_MMU -extern unsigned long mmap_min_addr; -extern unsigned long dac_mmap_min_addr; -#else -#define mmap_min_addr 0UL -#define dac_mmap_min_addr 0UL -#endif - -#define ACCESS_PRIVATE(p, member) ((p)->member) - -#define VM_WARN_ON(_expr) (WARN_ON(_expr)) -#define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr)) -#define VM_WARN_ON_VMG(_expr, _vmg) (WARN_ON(_expr)) -#define VM_BUG_ON(_expr) (BUG_ON(_expr)) -#define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr)) - -#define MMF_HAS_MDWE 28 - -/* - * vm_flags in vm_area_struct, see mm_types.h. - * When changing, update also include/trace/events/mmflags.h - */ - -#define VM_NONE 0x00000000 - -/** - * typedef vma_flag_t - specifies an individual VMA flag by bit number. - * - * This value is made type safe by sparse to avoid passing invalid flag values - * around. - */ -typedef int __bitwise vma_flag_t; - -#define DECLARE_VMA_BIT(name, bitnum) \ - VMA_ ## name ## _BIT = ((__force vma_flag_t)bitnum) -#define DECLARE_VMA_BIT_ALIAS(name, aliased) \ - VMA_ ## name ## _BIT = VMA_ ## aliased ## _BIT -enum { - DECLARE_VMA_BIT(READ, 0), - DECLARE_VMA_BIT(WRITE, 1), - DECLARE_VMA_BIT(EXEC, 2), - DECLARE_VMA_BIT(SHARED, 3), - /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */ - DECLARE_VMA_BIT(MAYREAD, 4), /* limits for mprotect() etc. */ - DECLARE_VMA_BIT(MAYWRITE, 5), - DECLARE_VMA_BIT(MAYEXEC, 6), - DECLARE_VMA_BIT(MAYSHARE, 7), - DECLARE_VMA_BIT(GROWSDOWN, 8), /* general info on the segment */ -#ifdef CONFIG_MMU - DECLARE_VMA_BIT(UFFD_MISSING, 9),/* missing pages tracking */ -#else - /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */ - DECLARE_VMA_BIT(MAYOVERLAY, 9), -#endif /* CONFIG_MMU */ - /* Page-ranges managed without "struct page", just pure PFN */ - DECLARE_VMA_BIT(PFNMAP, 10), - DECLARE_VMA_BIT(MAYBE_GUARD, 11), - DECLARE_VMA_BIT(UFFD_WP, 12), /* wrprotect pages tracking */ - DECLARE_VMA_BIT(LOCKED, 13), - DECLARE_VMA_BIT(IO, 14), /* Memory mapped I/O or similar */ - DECLARE_VMA_BIT(SEQ_READ, 15), /* App will access data sequentially */ - DECLARE_VMA_BIT(RAND_READ, 16), /* App will not benefit from clustered reads */ - DECLARE_VMA_BIT(DONTCOPY, 17), /* Do not copy this vma on fork */ - DECLARE_VMA_BIT(DONTEXPAND, 18),/* Cannot expand with mremap() */ - DECLARE_VMA_BIT(LOCKONFAULT, 19),/* Lock pages covered when faulted in */ - DECLARE_VMA_BIT(ACCOUNT, 20), /* Is a VM accounted object */ - DECLARE_VMA_BIT(NORESERVE, 21), /* should the VM suppress accounting */ - DECLARE_VMA_BIT(HUGETLB, 22), /* Huge TLB Page VM */ - DECLARE_VMA_BIT(SYNC, 23), /* Synchronous page faults */ - DECLARE_VMA_BIT(ARCH_1, 24), /* Architecture-specific flag */ - DECLARE_VMA_BIT(WIPEONFORK, 25),/* Wipe VMA contents in child. */ - DECLARE_VMA_BIT(DONTDUMP, 26), /* Do not include in the core dump */ - DECLARE_VMA_BIT(SOFTDIRTY, 27), /* NOT soft dirty clean area */ - DECLARE_VMA_BIT(MIXEDMAP, 28), /* Can contain struct page and pure PFN pages */ - DECLARE_VMA_BIT(HUGEPAGE, 29), /* MADV_HUGEPAGE marked this vma */ - DECLARE_VMA_BIT(NOHUGEPAGE, 30),/* MADV_NOHUGEPAGE marked this vma */ - DECLARE_VMA_BIT(MERGEABLE, 31), /* KSM may merge identical pages */ - /* These bits are reused, we define specific uses below. */ - DECLARE_VMA_BIT(HIGH_ARCH_0, 32), - DECLARE_VMA_BIT(HIGH_ARCH_1, 33), - DECLARE_VMA_BIT(HIGH_ARCH_2, 34), - DECLARE_VMA_BIT(HIGH_ARCH_3, 35), - DECLARE_VMA_BIT(HIGH_ARCH_4, 36), - DECLARE_VMA_BIT(HIGH_ARCH_5, 37), - DECLARE_VMA_BIT(HIGH_ARCH_6, 38), - /* - * This flag is used to connect VFIO to arch specific KVM code. It - * indicates that the memory under this VMA is safe for use with any - * non-cachable memory type inside KVM. Some VFIO devices, on some - * platforms, are thought to be unsafe and can cause machine crashes - * if KVM does not lock down the memory type. - */ - DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39), -#ifdef CONFIG_PPC32 - DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1), -#else - DECLARE_VMA_BIT(DROPPABLE, 40), -#endif - DECLARE_VMA_BIT(UFFD_MINOR, 41), - DECLARE_VMA_BIT(SEALED, 42), - /* Flags that reuse flags above. */ - DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0), - DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1), - DECLARE_VMA_BIT_ALIAS(PKEY_BIT2, HIGH_ARCH_2), - DECLARE_VMA_BIT_ALIAS(PKEY_BIT3, HIGH_ARCH_3), - DECLARE_VMA_BIT_ALIAS(PKEY_BIT4, HIGH_ARCH_4), -#if defined(CONFIG_X86_USER_SHADOW_STACK) - /* - * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of - * support core mm. - * - * These VMAs will get a single end guard page. This helps userspace - * protect itself from attacks. A single page is enough for current - * shadow stack archs (x86). See the comments near alloc_shstk() in - * arch/x86/kernel/shstk.c for more details on the guard size. - */ - DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_5), -#elif defined(CONFIG_ARM64_GCS) - /* - * arm64's Guarded Control Stack implements similar functionality and - * has similar constraints to shadow stacks. - */ - DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_6), -#endif - DECLARE_VMA_BIT_ALIAS(SAO, ARCH_1), /* Strong Access Ordering (powerpc) */ - DECLARE_VMA_BIT_ALIAS(GROWSUP, ARCH_1), /* parisc */ - DECLARE_VMA_BIT_ALIAS(SPARC_ADI, ARCH_1), /* sparc64 */ - DECLARE_VMA_BIT_ALIAS(ARM64_BTI, ARCH_1), /* arm64 */ - DECLARE_VMA_BIT_ALIAS(ARCH_CLEAR, ARCH_1), /* sparc64, arm64 */ - DECLARE_VMA_BIT_ALIAS(MAPPED_COPY, ARCH_1), /* !CONFIG_MMU */ - DECLARE_VMA_BIT_ALIAS(MTE, HIGH_ARCH_4), /* arm64 */ - DECLARE_VMA_BIT_ALIAS(MTE_ALLOWED, HIGH_ARCH_5),/* arm64 */ -#ifdef CONFIG_STACK_GROWSUP - DECLARE_VMA_BIT_ALIAS(STACK, GROWSUP), - DECLARE_VMA_BIT_ALIAS(STACK_EARLY, GROWSDOWN), -#else - DECLARE_VMA_BIT_ALIAS(STACK, GROWSDOWN), -#endif -}; - -#define INIT_VM_FLAG(name) BIT((__force int) VMA_ ## name ## _BIT) -#define VM_READ INIT_VM_FLAG(READ) -#define VM_WRITE INIT_VM_FLAG(WRITE) -#define VM_EXEC INIT_VM_FLAG(EXEC) -#define VM_SHARED INIT_VM_FLAG(SHARED) -#define VM_MAYREAD INIT_VM_FLAG(MAYREAD) -#define VM_MAYWRITE INIT_VM_FLAG(MAYWRITE) -#define VM_MAYEXEC INIT_VM_FLAG(MAYEXEC) -#define VM_MAYSHARE INIT_VM_FLAG(MAYSHARE) -#define VM_GROWSDOWN INIT_VM_FLAG(GROWSDOWN) -#ifdef CONFIG_MMU -#define VM_UFFD_MISSING INIT_VM_FLAG(UFFD_MISSING) -#else -#define VM_UFFD_MISSING VM_NONE -#define VM_MAYOVERLAY INIT_VM_FLAG(MAYOVERLAY) -#endif -#define VM_PFNMAP INIT_VM_FLAG(PFNMAP) -#define VM_MAYBE_GUARD INIT_VM_FLAG(MAYBE_GUARD) -#define VM_UFFD_WP INIT_VM_FLAG(UFFD_WP) -#define VM_LOCKED INIT_VM_FLAG(LOCKED) -#define VM_IO INIT_VM_FLAG(IO) -#define VM_SEQ_READ INIT_VM_FLAG(SEQ_READ) -#define VM_RAND_READ INIT_VM_FLAG(RAND_READ) -#define VM_DONTCOPY INIT_VM_FLAG(DONTCOPY) -#define VM_DONTEXPAND INIT_VM_FLAG(DONTEXPAND) -#define VM_LOCKONFAULT INIT_VM_FLAG(LOCKONFAULT) -#define VM_ACCOUNT INIT_VM_FLAG(ACCOUNT) -#define VM_NORESERVE INIT_VM_FLAG(NORESERVE) -#define VM_HUGETLB INIT_VM_FLAG(HUGETLB) -#define VM_SYNC INIT_VM_FLAG(SYNC) -#define VM_ARCH_1 INIT_VM_FLAG(ARCH_1) -#define VM_WIPEONFORK INIT_VM_FLAG(WIPEONFORK) -#define VM_DONTDUMP INIT_VM_FLAG(DONTDUMP) -#ifdef CONFIG_MEM_SOFT_DIRTY -#define VM_SOFTDIRTY INIT_VM_FLAG(SOFTDIRTY) -#else -#define VM_SOFTDIRTY VM_NONE -#endif -#define VM_MIXEDMAP INIT_VM_FLAG(MIXEDMAP) -#define VM_HUGEPAGE INIT_VM_FLAG(HUGEPAGE) -#define VM_NOHUGEPAGE INIT_VM_FLAG(NOHUGEPAGE) -#define VM_MERGEABLE INIT_VM_FLAG(MERGEABLE) -#define VM_STACK INIT_VM_FLAG(STACK) -#ifdef CONFIG_STACK_GROWS_UP -#define VM_STACK_EARLY INIT_VM_FLAG(STACK_EARLY) -#else -#define VM_STACK_EARLY VM_NONE -#endif -#ifdef CONFIG_ARCH_HAS_PKEYS -#define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT) -/* Despite the naming, these are FLAGS not bits. */ -#define VM_PKEY_BIT0 INIT_VM_FLAG(PKEY_BIT0) -#define VM_PKEY_BIT1 INIT_VM_FLAG(PKEY_BIT1) -#define VM_PKEY_BIT2 INIT_VM_FLAG(PKEY_BIT2) -#if CONFIG_ARCH_PKEY_BITS > 3 -#define VM_PKEY_BIT3 INIT_VM_FLAG(PKEY_BIT3) -#else -#define VM_PKEY_BIT3 VM_NONE -#endif /* CONFIG_ARCH_PKEY_BITS > 3 */ -#if CONFIG_ARCH_PKEY_BITS > 4 -#define VM_PKEY_BIT4 INIT_VM_FLAG(PKEY_BIT4) -#else -#define VM_PKEY_BIT4 VM_NONE -#endif /* CONFIG_ARCH_PKEY_BITS > 4 */ -#endif /* CONFIG_ARCH_HAS_PKEYS */ -#if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS) -#define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK) -#else -#define VM_SHADOW_STACK VM_NONE -#endif -#if defined(CONFIG_PPC64) -#define VM_SAO INIT_VM_FLAG(SAO) -#elif defined(CONFIG_PARISC) -#define VM_GROWSUP INIT_VM_FLAG(GROWSUP) -#elif defined(CONFIG_SPARC64) -#define VM_SPARC_ADI INIT_VM_FLAG(SPARC_ADI) -#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR) -#elif defined(CONFIG_ARM64) -#define VM_ARM64_BTI INIT_VM_FLAG(ARM64_BTI) -#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR) -#elif !defined(CONFIG_MMU) -#define VM_MAPPED_COPY INIT_VM_FLAG(MAPPED_COPY) -#endif -#ifndef VM_GROWSUP -#define VM_GROWSUP VM_NONE -#endif -#ifdef CONFIG_ARM64_MTE -#define VM_MTE INIT_VM_FLAG(MTE) -#define VM_MTE_ALLOWED INIT_VM_FLAG(MTE_ALLOWED) -#else -#define VM_MTE VM_NONE -#define VM_MTE_ALLOWED VM_NONE -#endif -#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR -#define VM_UFFD_MINOR INIT_VM_FLAG(UFFD_MINOR) -#else -#define VM_UFFD_MINOR VM_NONE -#endif -#ifdef CONFIG_64BIT -#define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED) -#define VM_SEALED INIT_VM_FLAG(SEALED) -#else -#define VM_ALLOW_ANY_UNCACHED VM_NONE -#define VM_SEALED VM_NONE -#endif -#if defined(CONFIG_64BIT) || defined(CONFIG_PPC32) -#define VM_DROPPABLE INIT_VM_FLAG(DROPPABLE) -#else -#define VM_DROPPABLE VM_NONE -#endif - -/* Bits set in the VMA until the stack is in its final location */ -#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY) - -#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) - -/* Common data flag combinations */ -#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \ - VM_MAYWRITE | VM_MAYEXEC) -#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */ -#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC -#endif - -#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ -#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS -#endif - -#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK) - -#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) - -/* VMA basic access permission flags */ -#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) - -/* - * Special vmas that are non-mergable, non-mlock()able. - */ -#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) - -#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) -#define TASK_SIZE_LOW DEFAULT_MAP_WINDOW -#define TASK_SIZE_MAX DEFAULT_MAP_WINDOW -#define STACK_TOP TASK_SIZE_LOW -#define STACK_TOP_MAX TASK_SIZE_MAX - -/* This mask represents all the VMA flag bits used by mlock */ -#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) - -#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) - -#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define RLIMIT_STACK 3 /* max stack size */ -#define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */ - -#define CAP_IPC_LOCK 14 - -/* - * Flags which should be 'sticky' on merge - that is, flags which, when one VMA - * possesses it but the other does not, the merged VMA should nonetheless have - * applied to it: - * - * VM_SOFTDIRTY - if a VMA is marked soft-dirty, that is has not had its - * references cleared via /proc/$pid/clear_refs, any merged VMA - * should be considered soft-dirty also as it operates at a VMA - * granularity. - */ -#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD) - /* - * VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one - * of these flags and the other not does not preclude a merge. - * - * VM_STICKY - When merging VMAs, VMA flags must match, unless they are - * 'sticky'. If any sticky flags exist in either VMA, we simply - * set all of them on the merged VMA. + * DUPLICATE typedef definitions from kernel source that have to be declared + * ahead of all other headers. */ -#define VM_IGNORE_MERGE VM_STICKY - -/* - * Flags which should result in page tables being copied on fork. These are - * flags which indicate that the VMA maps page tables which cannot be - * reconsistuted upon page fault, so necessitate page table copying upon - * - * VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be - * reasonably reconstructed on page fault. - * - * VM_UFFD_WP - Encodes metadata about an installed uffd - * write protect handler, which cannot be - * reconstructed on page fault. - * - * We always copy pgtables when dst_vma has uffd-wp - * enabled even if it's file-backed - * (e.g. shmem). Because when uffd-wp is enabled, - * pgtable contains uffd-wp protection information, - * that's something we can't retrieve from page cache, - * and skip copying will lose those info. - * - * VM_MAYBE_GUARD - Could contain page guard region markers which - * by design are a property of the page tables - * only and thus cannot be reconstructed on page - * fault. - */ -#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD) - -#define FIRST_USER_ADDRESS 0UL -#define USER_PGTABLES_CEILING 0UL - -#define vma_policy(vma) NULL - -#define down_write_nest_lock(sem, nest_lock) - -#define pgprot_val(x) ((x).pgprot) -#define __pgprot(x) ((pgprot_t) { (x) } ) - -#define for_each_vma(__vmi, __vma) \ - while (((__vma) = vma_next(&(__vmi))) != NULL) - -/* The MM code likes to work with exclusive end addresses */ -#define for_each_vma_range(__vmi, __vma, __end) \ - while (((__vma) = vma_find(&(__vmi), (__end))) != NULL) - -#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) - -#define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT)) - -#define test_and_set_bit(nr, addr) __test_and_set_bit(nr, addr) -#define test_and_clear_bit(nr, addr) __test_and_clear_bit(nr, addr) - -#define TASK_SIZE ((1ul << 47)-PAGE_SIZE) - -#define AS_MM_ALL_LOCKS 2 - -/* We hardcode this for now. */ -#define sysctl_max_map_count 0x1000000UL - -#define pgoff_t unsigned long -typedef unsigned long pgprotval_t; -typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; -typedef unsigned long vm_flags_t; -typedef __bitwise unsigned int vm_fault_t; - -/* - * The shared stubs do not implement this, it amounts to an fprintf(STDERR,...) - * either way :) - */ -#define pr_warn_once pr_err - -#define data_race(expr) expr - -#define ASSERT_EXCLUSIVE_WRITER(x) - -#define pgtable_supports_soft_dirty() 1 - -/** - * swap - swap values of @a and @b - * @a: first value - * @b: second value - */ -#define swap(a, b) \ - do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) - -struct kref { - refcount_t refcount; -}; - -/* - * Define the task command name length as enum, then it can be visible to - * BPF programs. - */ -enum { - TASK_COMM_LEN = 16, -}; - -/* - * Flags for bug emulation. - * - * These occupy the top three bytes. - */ -enum { - READ_IMPLIES_EXEC = 0x0400000, -}; - -struct task_struct { - char comm[TASK_COMM_LEN]; - pid_t pid; - struct mm_struct *mm; - - /* Used for emulating ABI behavior of previous Linux versions: */ - unsigned int personality; -}; - -struct task_struct *get_current(void); -#define current get_current() - -struct anon_vma { - struct anon_vma *root; - struct rb_root_cached rb_root; - - /* Test fields. */ - bool was_cloned; - bool was_unlinked; -}; - -struct anon_vma_chain { - struct anon_vma *anon_vma; - struct list_head same_vma; -}; - -struct anon_vma_name { - struct kref kref; - /* The name needs to be at the end because it is dynamically sized. */ - char name[]; -}; - -struct vma_iterator { - struct ma_state mas; -}; - -#define VMA_ITERATOR(name, __mm, __addr) \ - struct vma_iterator name = { \ - .mas = { \ - .tree = &(__mm)->mm_mt, \ - .index = __addr, \ - .node = NULL, \ - .status = ma_start, \ - }, \ - } - -struct address_space { - struct rb_root_cached i_mmap; - unsigned long flags; - atomic_t i_mmap_writable; -}; - -struct vm_userfaultfd_ctx {}; -struct mempolicy {}; -struct mmu_gather {}; -struct mutex {}; -#define DEFINE_MUTEX(mutexname) \ - struct mutex mutexname = {} - -#define DECLARE_BITMAP(name, bits) \ - unsigned long name[BITS_TO_LONGS(bits)] - +#define __private #define NUM_MM_FLAG_BITS (64) typedef struct { __private DECLARE_BITMAP(__mm_flags, NUM_MM_FLAG_BITS); } mm_flags_t; - -/* - * Opaque type representing current VMA (vm_area_struct) flag state. Must be - * accessed via vma_flags_xxx() helper functions. - */ #define NUM_VMA_FLAG_BITS BITS_PER_LONG typedef struct { DECLARE_BITMAP(__vma_flags, NUM_VMA_FLAG_BITS); } __private vma_flags_t; -#define EMPTY_VMA_FLAGS ((vma_flags_t){ }) - -struct mm_struct { - struct maple_tree mm_mt; - int map_count; /* number of VMAs */ - unsigned long total_vm; /* Total pages mapped */ - unsigned long locked_vm; /* Pages that have PG_mlocked set */ - unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ - unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ - unsigned long stack_vm; /* VM_STACK */ - - unsigned long def_flags; - - mm_flags_t flags; /* Must use mm_flags_* helpers to access */ -}; - -struct vm_area_struct; - - -/* What action should be taken after an .mmap_prepare call is complete? */ -enum mmap_action_type { - MMAP_NOTHING, /* Mapping is complete, no further action. */ - MMAP_REMAP_PFN, /* Remap PFN range. */ - MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */ -}; - -/* - * Describes an action an mmap_prepare hook can instruct to be taken to complete - * the mapping of a VMA. Specified in vm_area_desc. - */ -struct mmap_action { - union { - /* Remap range. */ - struct { - unsigned long start; - unsigned long start_pfn; - unsigned long size; - pgprot_t pgprot; - } remap; - }; - enum mmap_action_type type; - - /* - * If specified, this hook is invoked after the selected action has been - * successfully completed. Note that the VMA write lock still held. - * - * The absolute minimum ought to be done here. - * - * Returns 0 on success, or an error code. - */ - int (*success_hook)(const struct vm_area_struct *vma); - - /* - * If specified, this hook is invoked when an error occurred when - * attempting the selection action. - * - * The hook can return an error code in order to filter the error, but - * it is not valid to clear the error here. - */ - int (*error_hook)(int err); - - /* - * This should be set in rare instances where the operation required - * that the rmap should not be able to access the VMA until - * completely set up. - */ - bool hide_from_rmap_until_complete :1; -}; - -/* Operations which modify VMAs. */ -enum vma_operation { - VMA_OP_SPLIT, - VMA_OP_MERGE_UNFAULTED, - VMA_OP_REMAP, - VMA_OP_FORK, -}; - -/* - * Describes a VMA that is about to be mmap()'ed. Drivers may choose to - * manipulate mutable fields which will cause those fields to be updated in the - * resultant VMA. - * - * Helper functions are not required for manipulating any field. - */ -struct vm_area_desc { - /* Immutable state. */ - const struct mm_struct *const mm; - struct file *const file; /* May vary from vm_file in stacked callers. */ - unsigned long start; - unsigned long end; - - /* Mutable fields. Populated with initial state. */ - pgoff_t pgoff; - struct file *vm_file; - union { - vm_flags_t vm_flags; - vma_flags_t vma_flags; - }; - pgprot_t page_prot; - - /* Write-only fields. */ - const struct vm_operations_struct *vm_ops; - void *private_data; - - /* Take further action? */ - struct mmap_action action; -}; - -struct file_operations { - int (*mmap)(struct file *, struct vm_area_struct *); - int (*mmap_prepare)(struct vm_area_desc *); -}; - -struct file { - struct address_space *f_mapping; - const struct file_operations *f_op; -}; - -#define VMA_LOCK_OFFSET 0x40000000 - -typedef struct { unsigned long v; } freeptr_t; - -struct vm_area_struct { - /* The first cache line has the info for VMA tree walking. */ - - union { - struct { - /* VMA covers [vm_start; vm_end) addresses within mm */ - unsigned long vm_start; - unsigned long vm_end; - }; - freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */ - }; - - struct mm_struct *vm_mm; /* The address space we belong to. */ - pgprot_t vm_page_prot; /* Access permissions of this VMA. */ - - /* - * Flags, see mm.h. - * To modify use vm_flags_{init|reset|set|clear|mod} functions. - */ - union { - const vm_flags_t vm_flags; - vma_flags_t flags; - }; - -#ifdef CONFIG_PER_VMA_LOCK - /* - * Can only be written (using WRITE_ONCE()) while holding both: - * - mmap_lock (in write mode) - * - vm_refcnt bit at VMA_LOCK_OFFSET is set - * Can be read reliably while holding one of: - * - mmap_lock (in read or write mode) - * - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1 - * Can be read unreliably (using READ_ONCE()) for pessimistic bailout - * while holding nothing (except RCU to keep the VMA struct allocated). - * - * This sequence counter is explicitly allowed to overflow; sequence - * counter reuse can only lead to occasional unnecessary use of the - * slowpath. - */ - unsigned int vm_lock_seq; -#endif - - /* - * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma - * list, after a COW of one of the file pages. A MAP_SHARED vma - * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack - * or brk vma (with NULL file) can only be in an anon_vma list. - */ - struct list_head anon_vma_chain; /* Serialized by mmap_lock & - * page_table_lock */ - struct anon_vma *anon_vma; /* Serialized by page_table_lock */ - - /* Function pointers to deal with this struct. */ - const struct vm_operations_struct *vm_ops; - - /* Information about our backing store: */ - unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE - units */ - struct file * vm_file; /* File we map to (can be NULL). */ - void * vm_private_data; /* was vm_pte (shared mem) */ - -#ifdef CONFIG_SWAP - atomic_long_t swap_readahead_info; -#endif -#ifndef CONFIG_MMU - struct vm_region *vm_region; /* NOMMU mapping region */ -#endif -#ifdef CONFIG_NUMA - struct mempolicy *vm_policy; /* NUMA policy for the VMA */ -#endif -#ifdef CONFIG_NUMA_BALANCING - struct vma_numab_state *numab_state; /* NUMA Balancing state */ -#endif -#ifdef CONFIG_PER_VMA_LOCK - /* Unstable RCU readers are allowed to read this. */ - refcount_t vm_refcnt; -#endif - /* - * For areas with an address space and backing store, - * linkage into the address_space->i_mmap interval tree. - * - */ - struct { - struct rb_node rb; - unsigned long rb_subtree_last; - } shared; -#ifdef CONFIG_ANON_VMA_NAME - /* - * For private and shared anonymous mappings, a pointer to a null - * terminated string containing the name given to the vma, or NULL if - * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. - */ - struct anon_vma_name *anon_name; -#endif - struct vm_userfaultfd_ctx vm_userfaultfd_ctx; -} __randomize_layout; - -struct vm_fault {}; - -struct vm_operations_struct { - void (*open)(struct vm_area_struct * area); - /** - * @close: Called when the VMA is being removed from the MM. - * Context: User context. May sleep. Caller holds mmap_lock. - */ - void (*close)(struct vm_area_struct * area); - /* Called any time before splitting to check if it's allowed */ - int (*may_split)(struct vm_area_struct *area, unsigned long addr); - int (*mremap)(struct vm_area_struct *area); - /* - * Called by mprotect() to make driver-specific permission - * checks before mprotect() is finalised. The VMA must not - * be modified. Returns 0 if mprotect() can proceed. - */ - int (*mprotect)(struct vm_area_struct *vma, unsigned long start, - unsigned long end, unsigned long newflags); - vm_fault_t (*fault)(struct vm_fault *vmf); - vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order); - vm_fault_t (*map_pages)(struct vm_fault *vmf, - pgoff_t start_pgoff, pgoff_t end_pgoff); - unsigned long (*pagesize)(struct vm_area_struct * area); - - /* notification that a previously read-only page is about to become - * writable, if an error is returned it will cause a SIGBUS */ - vm_fault_t (*page_mkwrite)(struct vm_fault *vmf); - - /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */ - vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf); - - /* called by access_process_vm when get_user_pages() fails, typically - * for use by special VMAs. See also generic_access_phys() for a generic - * implementation useful for any iomem mapping. - */ - int (*access)(struct vm_area_struct *vma, unsigned long addr, - void *buf, int len, int write); - - /* Called by the /proc/PID/maps code to ask the vma whether it - * has a special name. Returning non-NULL will also cause this - * vma to be dumped unconditionally. */ - const char *(*name)(struct vm_area_struct *vma); - -#ifdef CONFIG_NUMA - /* - * set_policy() op must add a reference to any non-NULL @new mempolicy - * to hold the policy upon return. Caller should pass NULL @new to - * remove a policy and fall back to surrounding context--i.e. do not - * install a MPOL_DEFAULT policy, nor the task or system default - * mempolicy. - */ - int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new); - - /* - * get_policy() op must add reference [mpol_get()] to any policy at - * (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure - * in mm/mempolicy.c will do this automatically. - * get_policy() must NOT add a ref if the policy at (vma,addr) is not - * marked as MPOL_SHARED. vma policies are protected by the mmap_lock. - * If no [shared/vma] mempolicy exists at the addr, get_policy() op - * must return NULL--i.e., do not "fallback" to task or system default - * policy. - */ - struct mempolicy *(*get_policy)(struct vm_area_struct *vma, - unsigned long addr, pgoff_t *ilx); -#endif -#ifdef CONFIG_FIND_NORMAL_PAGE - /* - * Called by vm_normal_page() for special PTEs in @vma at @addr. This - * allows for returning a "normal" page from vm_normal_page() even - * though the PTE indicates that the "struct page" either does not exist - * or should not be touched: "special". - * - * Do not add new users: this really only works when a "normal" page - * was mapped, but then the PTE got changed to something weird (+ - * marked special) that would not make pte_pfn() identify the originally - * inserted page. - */ - struct page *(*find_normal_page)(struct vm_area_struct *vma, - unsigned long addr); -#endif /* CONFIG_FIND_NORMAL_PAGE */ -}; - -struct vm_unmapped_area_info { -#define VM_UNMAPPED_AREA_TOPDOWN 1 - unsigned long flags; - unsigned long length; - unsigned long low_limit; - unsigned long high_limit; - unsigned long align_mask; - unsigned long align_offset; - unsigned long start_gap; -}; - -struct pagetable_move_control { - struct vm_area_struct *old; /* Source VMA. */ - struct vm_area_struct *new; /* Destination VMA. */ - unsigned long old_addr; /* Address from which the move begins. */ - unsigned long old_end; /* Exclusive address at which old range ends. */ - unsigned long new_addr; /* Address to move page tables to. */ - unsigned long len_in; /* Bytes to remap specified by user. */ - - bool need_rmap_locks; /* Do rmap locks need to be taken? */ - bool for_stack; /* Is this an early temp stack being moved? */ -}; - -#define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \ - struct pagetable_move_control name = { \ - .old = old_, \ - .new = new_, \ - .old_addr = old_addr_, \ - .old_end = (old_addr_) + (len_), \ - .new_addr = new_addr_, \ - .len_in = len_, \ - } - -static inline void vma_iter_invalidate(struct vma_iterator *vmi) -{ - mas_pause(&vmi->mas); -} - -static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) -{ - return __pgprot(pgprot_val(oldprot) | pgprot_val(newprot)); -} - -static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags) -{ - return __pgprot(vm_flags); -} - -static inline void vma_flags_clear_all(vma_flags_t *flags) -{ - bitmap_zero(flags->__vma_flags, NUM_VMA_FLAG_BITS); -} - -static inline void vma_flag_set(vma_flags_t *flags, vma_flag_t bit) -{ - unsigned long *bitmap = flags->__vma_flags; - - __set_bit((__force int)bit, bitmap); -} - -static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits) -{ - vma_flags_t flags; - int i; - - vma_flags_clear_all(&flags); - for (i = 0; i < count; i++) - vma_flag_set(&flags, bits[i]); - return flags; -} - -#define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \ - (const vma_flag_t []){__VA_ARGS__}) - -static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags, - vma_flags_t to_test) -{ - const unsigned long *bitmap = flags->__vma_flags; - const unsigned long *bitmap_to_test = to_test.__vma_flags; - - return bitmap_intersects(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS); -} - -#define vma_flags_test(flags, ...) \ - vma_flags_test_mask(flags, mk_vma_flags(__VA_ARGS__)) - -static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags, - vma_flags_t to_test) -{ - const unsigned long *bitmap = flags->__vma_flags; - const unsigned long *bitmap_to_test = to_test.__vma_flags; - - return bitmap_subset(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS); -} - -#define vma_flags_test_all(flags, ...) \ - vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__)) - -static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t to_set) -{ - unsigned long *bitmap = flags->__vma_flags; - const unsigned long *bitmap_to_set = to_set.__vma_flags; - - bitmap_or(bitmap, bitmap, bitmap_to_set, NUM_VMA_FLAG_BITS); -} - -#define vma_flags_set(flags, ...) \ - vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__)) - -static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t to_clear) -{ - unsigned long *bitmap = flags->__vma_flags; - const unsigned long *bitmap_to_clear = to_clear.__vma_flags; - - bitmap_andnot(bitmap, bitmap, bitmap_to_clear, NUM_VMA_FLAG_BITS); -} - -#define vma_flags_clear(flags, ...) \ - vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__)) - -static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma, - vma_flags_t flags) -{ - return vma_flags_test_all_mask(&vma->flags, flags); -} - -#define vma_test_all_flags(vma, ...) \ - vma_test_all_flags_mask(vma, mk_vma_flags(__VA_ARGS__)) - -static inline void vma_set_flags_mask(struct vm_area_struct *vma, - vma_flags_t flags) -{ - vma_flags_set_mask(&vma->flags, flags); -} - -#define vma_set_flags(vma, ...) \ - vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__)) - -static inline bool vma_desc_test_flags_mask(const struct vm_area_desc *desc, - vma_flags_t flags) -{ - return vma_flags_test_mask(&desc->vma_flags, flags); -} - -#define vma_desc_test_flags(desc, ...) \ - vma_desc_test_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) - -static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc, - vma_flags_t flags) -{ - vma_flags_set_mask(&desc->vma_flags, flags); -} - -#define vma_desc_set_flags(desc, ...) \ - vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) - -static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc, - vma_flags_t flags) -{ - vma_flags_clear_mask(&desc->vma_flags, flags); -} - -#define vma_desc_clear_flags(desc, ...) \ - vma_desc_clear_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) - -static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags) -{ - return (vm_flags & (VM_SHARED | VM_MAYWRITE)) == - (VM_SHARED | VM_MAYWRITE); -} - -static inline bool is_shared_maywrite(const vma_flags_t *flags) -{ - return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT); -} - -static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma) -{ - return is_shared_maywrite(&vma->flags); -} - -static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi) -{ - /* - * Uses mas_find() to get the first VMA when the iterator starts. - * Calling mas_next() could skip the first entry. - */ - return mas_find(&vmi->mas, ULONG_MAX); -} - -/* - * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these - * assertions should be made either under mmap_write_lock or when the object - * has been isolated under mmap_write_lock, ensuring no competing writers. - */ -static inline void vma_assert_attached(struct vm_area_struct *vma) -{ - WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt)); -} - -static inline void vma_assert_detached(struct vm_area_struct *vma) -{ - WARN_ON_ONCE(refcount_read(&vma->vm_refcnt)); -} - -static inline void vma_assert_write_locked(struct vm_area_struct *); -static inline void vma_mark_attached(struct vm_area_struct *vma) -{ - vma_assert_write_locked(vma); - vma_assert_detached(vma); - refcount_set_release(&vma->vm_refcnt, 1); -} - -static inline void vma_mark_detached(struct vm_area_struct *vma) -{ - vma_assert_write_locked(vma); - vma_assert_attached(vma); - /* We are the only writer, so no need to use vma_refcount_put(). */ - if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) { - /* - * Reader must have temporarily raised vm_refcnt but it will - * drop it without using the vma since vma is write-locked. - */ - } -} - -extern const struct vm_operations_struct vma_dummy_vm_ops; - -extern unsigned long rlimit(unsigned int limit); - -static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) -{ - memset(vma, 0, sizeof(*vma)); - vma->vm_mm = mm; - vma->vm_ops = &vma_dummy_vm_ops; - INIT_LIST_HEAD(&vma->anon_vma_chain); - vma->vm_lock_seq = UINT_MAX; -} - -/* - * These are defined in vma.h, but sadly vm_stat_account() is referenced by - * kernel/fork.c, so we have to these broadly available there, and temporarily - * define them here to resolve the dependency cycle. - */ - -#define is_exec_mapping(flags) \ - ((flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC) - -#define is_stack_mapping(flags) \ - (((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK)) - -#define is_data_mapping(flags) \ - ((flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE) - -static inline void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, - long npages) -{ - WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages); - - if (is_exec_mapping(flags)) - mm->exec_vm += npages; - else if (is_stack_mapping(flags)) - mm->stack_vm += npages; - else if (is_data_mapping(flags)) - mm->data_vm += npages; -} - -#undef is_exec_mapping -#undef is_stack_mapping -#undef is_data_mapping - -/* Currently stubbed but we may later wish to un-stub. */ -static inline void vm_acct_memory(long pages); -static inline void vm_unacct_memory(long pages) -{ - vm_acct_memory(-pages); -} - -static inline void mapping_allow_writable(struct address_space *mapping) -{ - atomic_inc(&mapping->i_mmap_writable); -} - -static inline -struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max) -{ - return mas_find(&vmi->mas, max - 1); -} - -static inline int vma_iter_clear_gfp(struct vma_iterator *vmi, - unsigned long start, unsigned long end, gfp_t gfp) -{ - __mas_set_range(&vmi->mas, start, end - 1); - mas_store_gfp(&vmi->mas, NULL, gfp); - if (unlikely(mas_is_err(&vmi->mas))) - return -ENOMEM; - - return 0; -} - -static inline void mmap_assert_locked(struct mm_struct *); -static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm, - unsigned long start_addr, - unsigned long end_addr) -{ - unsigned long index = start_addr; - - mmap_assert_locked(mm); - return mt_find(&mm->mm_mt, &index, end_addr - 1); -} - -static inline -struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr) -{ - return mtree_load(&mm->mm_mt, addr); -} - -static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi) -{ - return mas_prev(&vmi->mas, 0); -} - -static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr) -{ - mas_set(&vmi->mas, addr); -} - -static inline bool vma_is_anonymous(struct vm_area_struct *vma) -{ - return !vma->vm_ops; -} - -/* Defined in vma.h, so temporarily define here to avoid circular dependency. */ -#define vma_iter_load(vmi) \ - mas_walk(&(vmi)->mas) - -static inline struct vm_area_struct * -find_vma_prev(struct mm_struct *mm, unsigned long addr, - struct vm_area_struct **pprev) -{ - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, mm, addr); - - vma = vma_iter_load(&vmi); - *pprev = vma_prev(&vmi); - if (!vma) - vma = vma_next(&vmi); - return vma; -} - -#undef vma_iter_load - -static inline void vma_iter_init(struct vma_iterator *vmi, - struct mm_struct *mm, unsigned long addr) -{ - mas_init(&vmi->mas, &mm->mm_mt, addr); -} - -/* Stubbed functions. */ - -static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) -{ - return NULL; -} - -static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, - struct vm_userfaultfd_ctx vm_ctx) -{ - return true; -} - -static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, - struct anon_vma_name *anon_name2) -{ - return true; -} - -static inline void might_sleep(void) -{ -} - -static inline unsigned long vma_pages(struct vm_area_struct *vma) -{ - return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; -} - -static inline void fput(struct file *file) -{ -} - -static inline void mpol_put(struct mempolicy *pol) -{ -} - -static inline void lru_add_drain(void) -{ -} - -static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm) -{ -} - -static inline void update_hiwater_rss(struct mm_struct *mm) -{ -} - -static inline void update_hiwater_vm(struct mm_struct *mm) -{ -} - -struct unmap_desc; - -static inline void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap) -{ -} - -static inline void free_pgtables(struct mmu_gather *tlb, struct unmap_desc *desc) -{ - (void)tlb; - (void)desc; -} - -static inline void mapping_unmap_writable(struct address_space *mapping) -{ -} - -static inline void flush_dcache_mmap_lock(struct address_space *mapping) -{ -} - -static inline void tlb_finish_mmu(struct mmu_gather *tlb) -{ -} - -static inline struct file *get_file(struct file *f) -{ - return f; -} - -static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) -{ - return 0; -} - -static inline int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src, - enum vma_operation operation) -{ - /* For testing purposes. We indicate that an anon_vma has been cloned. */ - if (src->anon_vma != NULL) { - dst->anon_vma = src->anon_vma; - dst->anon_vma->was_cloned = true; - } - - return 0; -} - -static inline void vma_start_write(struct vm_area_struct *vma) -{ - /* Used to indicate to tests that a write operation has begun. */ - vma->vm_lock_seq++; -} - -static inline __must_check -int vma_start_write_killable(struct vm_area_struct *vma) -{ - /* Used to indicate to tests that a write operation has begun. */ - vma->vm_lock_seq++; - return 0; -} - -static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, - unsigned long start, - unsigned long end, - struct vm_area_struct *next) -{ -} - -static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {} - -static inline void vma_iter_free(struct vma_iterator *vmi) -{ - mas_destroy(&vmi->mas); -} - -static inline -struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi) -{ - return mas_next_range(&vmi->mas, ULONG_MAX); -} - -static inline void vm_acct_memory(long pages) -{ -} - -static inline void vma_interval_tree_insert(struct vm_area_struct *vma, - struct rb_root_cached *rb) -{ -} - -static inline void vma_interval_tree_remove(struct vm_area_struct *vma, - struct rb_root_cached *rb) -{ -} - -static inline void flush_dcache_mmap_unlock(struct address_space *mapping) -{ -} - -static inline void anon_vma_interval_tree_insert(struct anon_vma_chain *avc, - struct rb_root_cached *rb) -{ -} - -static inline void anon_vma_interval_tree_remove(struct anon_vma_chain *avc, - struct rb_root_cached *rb) -{ -} - -static inline void uprobe_mmap(struct vm_area_struct *vma) -{ -} - -static inline void uprobe_munmap(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ -} - -static inline void i_mmap_lock_write(struct address_space *mapping) -{ -} - -static inline void anon_vma_lock_write(struct anon_vma *anon_vma) -{ -} - -static inline void vma_assert_write_locked(struct vm_area_struct *vma) -{ -} - -static inline void unlink_anon_vmas(struct vm_area_struct *vma) -{ - /* For testing purposes, indicate that the anon_vma was unlinked. */ - vma->anon_vma->was_unlinked = true; -} - -static inline void anon_vma_unlock_write(struct anon_vma *anon_vma) -{ -} - -static inline void i_mmap_unlock_write(struct address_space *mapping) -{ -} - -static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, - unsigned long start, - unsigned long end, - struct list_head *unmaps) -{ - return 0; -} - -static inline void mmap_write_downgrade(struct mm_struct *mm) -{ -} - -static inline void mmap_read_unlock(struct mm_struct *mm) -{ -} - -static inline void mmap_write_unlock(struct mm_struct *mm) -{ -} - -static inline int mmap_write_lock_killable(struct mm_struct *mm) -{ - return 0; -} - -static inline bool can_modify_mm(struct mm_struct *mm, - unsigned long start, - unsigned long end) -{ - return true; -} - -static inline void arch_unmap(struct mm_struct *mm, - unsigned long start, - unsigned long end) -{ -} - -static inline void mmap_assert_locked(struct mm_struct *mm) -{ -} - -static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) -{ - return true; -} - -static inline void khugepaged_enter_vma(struct vm_area_struct *vma, - vm_flags_t vm_flags) -{ -} - -static inline bool mapping_can_writeback(struct address_space *mapping) -{ - return true; -} - -static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma) -{ - return false; -} - -static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma) -{ - return false; -} - -static inline bool userfaultfd_wp(struct vm_area_struct *vma) -{ - return false; -} - -static inline void mmap_assert_write_locked(struct mm_struct *mm) -{ -} - -static inline void mutex_lock(struct mutex *lock) -{ -} - -static inline void mutex_unlock(struct mutex *lock) -{ -} - -static inline bool mutex_is_locked(struct mutex *lock) -{ - return true; -} - -static inline bool signal_pending(void *p) -{ - return false; -} - -static inline bool is_file_hugepages(struct file *file) -{ - return false; -} - -static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) -{ - return 0; -} - -static inline bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, - unsigned long npages) -{ - return true; -} - -static inline int shmem_zero_setup(struct vm_area_struct *vma) -{ - return 0; -} - -static inline void vma_set_anonymous(struct vm_area_struct *vma) -{ - vma->vm_ops = NULL; -} - -static inline void ksm_add_vma(struct vm_area_struct *vma) -{ -} - -static inline void perf_event_mmap(struct vm_area_struct *vma) -{ -} - -static inline bool vma_is_dax(struct vm_area_struct *vma) -{ - return false; -} - -static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ - return NULL; -} - -bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); - -/* Update vma->vm_page_prot to reflect vma->vm_flags. */ -static inline void vma_set_page_prot(struct vm_area_struct *vma) -{ - vm_flags_t vm_flags = vma->vm_flags; - pgprot_t vm_page_prot; - - /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */ - vm_page_prot = pgprot_modify(vma->vm_page_prot, vm_get_page_prot(vm_flags)); - - if (vma_wants_writenotify(vma, vm_page_prot)) { - vm_flags &= ~VM_SHARED; - /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */ - vm_page_prot = pgprot_modify(vm_page_prot, vm_get_page_prot(vm_flags)); - } - /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */ - WRITE_ONCE(vma->vm_page_prot, vm_page_prot); -} - -static inline bool arch_validate_flags(vm_flags_t flags) -{ - return true; -} - -static inline void vma_close(struct vm_area_struct *vma) -{ -} - -static inline int mmap_file(struct file *file, struct vm_area_struct *vma) -{ - return 0; -} - -static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma) -{ - if (vma->vm_flags & VM_GROWSDOWN) - return stack_guard_gap; - - /* See reasoning around the VM_SHADOW_STACK definition */ - if (vma->vm_flags & VM_SHADOW_STACK) - return PAGE_SIZE; - - return 0; -} - -static inline unsigned long vm_start_gap(struct vm_area_struct *vma) -{ - unsigned long gap = stack_guard_start_gap(vma); - unsigned long vm_start = vma->vm_start; - - vm_start -= gap; - if (vm_start > vma->vm_start) - vm_start = 0; - return vm_start; -} - -static inline unsigned long vm_end_gap(struct vm_area_struct *vma) -{ - unsigned long vm_end = vma->vm_end; - - if (vma->vm_flags & VM_GROWSUP) { - vm_end += stack_guard_gap; - if (vm_end < vma->vm_end) - vm_end = -PAGE_SIZE; - } - return vm_end; -} - -static inline int is_hugepage_only_range(struct mm_struct *mm, - unsigned long addr, unsigned long len) -{ - return 0; -} - -static inline bool vma_is_accessible(struct vm_area_struct *vma) -{ - return vma->vm_flags & VM_ACCESS_FLAGS; -} - -static inline bool capable(int cap) -{ - return true; -} - -static inline bool mlock_future_ok(const struct mm_struct *mm, - vm_flags_t vm_flags, unsigned long bytes) -{ - unsigned long locked_pages, limit_pages; - - if (!(vm_flags & VM_LOCKED) || capable(CAP_IPC_LOCK)) - return true; - - locked_pages = bytes >> PAGE_SHIFT; - locked_pages += mm->locked_vm; - - limit_pages = rlimit(RLIMIT_MEMLOCK); - limit_pages >>= PAGE_SHIFT; - - return locked_pages <= limit_pages; -} - -static inline int __anon_vma_prepare(struct vm_area_struct *vma) -{ - struct anon_vma *anon_vma = calloc(1, sizeof(struct anon_vma)); - - if (!anon_vma) - return -ENOMEM; - - anon_vma->root = anon_vma; - vma->anon_vma = anon_vma; - - return 0; -} - -static inline int anon_vma_prepare(struct vm_area_struct *vma) -{ - if (likely(vma->anon_vma)) - return 0; - - return __anon_vma_prepare(vma); -} - -static inline void userfaultfd_unmap_complete(struct mm_struct *mm, - struct list_head *uf) -{ -} - -static inline bool mm_flags_test(int flag, const struct mm_struct *mm) -{ - return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags)); -} - -/* - * Copy value to the first system word of VMA flags, non-atomically. - * - * IMPORTANT: This does not overwrite bytes past the first system word. The - * caller must account for this. - */ -static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value) -{ - *ACCESS_PRIVATE(flags, __vma_flags) = value; -} - -/* - * Copy value to the first system word of VMA flags ONCE, non-atomically. - * - * IMPORTANT: This does not overwrite bytes past the first system word. The - * caller must account for this. - */ -static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value) -{ - unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); - - WRITE_ONCE(*bitmap, value); -} - -/* Update the first system word of VMA flags setting bits, non-atomically. */ -static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value) -{ - unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); - - *bitmap |= value; -} - -/* Update the first system word of VMA flags clearing bits, non-atomically. */ -static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value) -{ - unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); - - *bitmap &= ~value; -} - - -/* Use when VMA is not part of the VMA tree and needs no locking */ -static inline void vm_flags_init(struct vm_area_struct *vma, - vm_flags_t flags) -{ - vma_flags_clear_all(&vma->flags); - vma_flags_overwrite_word(&vma->flags, flags); -} - -/* - * Use when VMA is part of the VMA tree and modifications need coordination - * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and - * it should be locked explicitly beforehand. - */ -static inline void vm_flags_reset(struct vm_area_struct *vma, - vm_flags_t flags) -{ - vma_assert_write_locked(vma); - vm_flags_init(vma, flags); -} - -static inline void vm_flags_reset_once(struct vm_area_struct *vma, - vm_flags_t flags) -{ - vma_assert_write_locked(vma); - /* - * The user should only be interested in avoiding reordering of - * assignment to the first word. - */ - vma_flags_clear_all(&vma->flags); - vma_flags_overwrite_word_once(&vma->flags, flags); -} - -static inline void vm_flags_set(struct vm_area_struct *vma, - vm_flags_t flags) -{ - vma_start_write(vma); - vma_flags_set_word(&vma->flags, flags); -} - -static inline void vm_flags_clear(struct vm_area_struct *vma, - vm_flags_t flags) -{ - vma_start_write(vma); - vma_flags_clear_word(&vma->flags, flags); -} - -/* - * Denies creating a writable executable mapping or gaining executable permissions. - * - * This denies the following: - * - * a) mmap(PROT_WRITE | PROT_EXEC) - * - * b) mmap(PROT_WRITE) - * mprotect(PROT_EXEC) - * - * c) mmap(PROT_WRITE) - * mprotect(PROT_READ) - * mprotect(PROT_EXEC) - * - * But allows the following: - * - * d) mmap(PROT_READ | PROT_EXEC) - * mmap(PROT_READ | PROT_EXEC | PROT_BTI) - * - * This is only applicable if the user has set the Memory-Deny-Write-Execute - * (MDWE) protection mask for the current process. - * - * @old specifies the VMA flags the VMA originally possessed, and @new the ones - * we propose to set. - * - * Return: false if proposed change is OK, true if not ok and should be denied. - */ -static inline bool map_deny_write_exec(unsigned long old, unsigned long new) -{ - /* If MDWE is disabled, we have nothing to deny. */ - if (mm_flags_test(MMF_HAS_MDWE, current->mm)) - return false; - - /* If the new VMA is not executable, we have nothing to deny. */ - if (!(new & VM_EXEC)) - return false; - - /* Under MDWE we do not accept newly writably executable VMAs... */ - if (new & VM_WRITE) - return true; - - /* ...nor previously non-executable VMAs becoming executable. */ - if (!(old & VM_EXEC)) - return true; - - return false; -} - -static inline int mapping_map_writable(struct address_space *mapping) -{ - return atomic_inc_unless_negative(&mapping->i_mmap_writable) ? - 0 : -EPERM; -} - -static inline unsigned long move_page_tables(struct pagetable_move_control *pmc) -{ - return 0; -} - -static inline void free_pgd_range(struct mmu_gather *tlb, - unsigned long addr, unsigned long end, - unsigned long floor, unsigned long ceiling) -{ -} - -static inline int ksm_execve(struct mm_struct *mm) -{ - return 0; -} - -static inline void ksm_exit(struct mm_struct *mm) -{ -} - -static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) -{ - if (reset_refcnt) - refcount_set(&vma->vm_refcnt, 0); -} - -static inline void vma_numab_state_init(struct vm_area_struct *vma) -{ -} - -static inline void vma_numab_state_free(struct vm_area_struct *vma) -{ -} - -static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, - struct vm_area_struct *new_vma) -{ -} - -static inline void free_anon_vma_name(struct vm_area_struct *vma) -{ -} - -/* Declared in vma.h. */ -static inline void set_vma_from_desc(struct vm_area_struct *vma, - struct vm_area_desc *desc); - -static inline void mmap_action_prepare(struct mmap_action *action, - struct vm_area_desc *desc) -{ -} - -static inline int mmap_action_complete(struct mmap_action *action, - struct vm_area_struct *vma) -{ - return 0; -} - -static inline int __compat_vma_mmap(const struct file_operations *f_op, - struct file *file, struct vm_area_struct *vma) -{ - struct vm_area_desc desc = { - .mm = vma->vm_mm, - .file = file, - .start = vma->vm_start, - .end = vma->vm_end, - - .pgoff = vma->vm_pgoff, - .vm_file = vma->vm_file, - .vm_flags = vma->vm_flags, - .page_prot = vma->vm_page_prot, - - .action.type = MMAP_NOTHING, /* Default */ - }; - int err; - - err = f_op->mmap_prepare(&desc); - if (err) - return err; - - mmap_action_prepare(&desc.action, &desc); - set_vma_from_desc(vma, &desc); - return mmap_action_complete(&desc.action, vma); -} - -static inline int compat_vma_mmap(struct file *file, - struct vm_area_struct *vma) -{ - return __compat_vma_mmap(file->f_op, file, vma); -} - -/* Did the driver provide valid mmap hook configuration? */ -static inline bool can_mmap_file(struct file *file) -{ - bool has_mmap = file->f_op->mmap; - bool has_mmap_prepare = file->f_op->mmap_prepare; - - /* Hooks are mutually exclusive. */ - if (WARN_ON_ONCE(has_mmap && has_mmap_prepare)) - return false; - if (!has_mmap && !has_mmap_prepare) - return false; - - return true; -} - -static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma) -{ - if (file->f_op->mmap_prepare) - return compat_vma_mmap(file, vma); - - return file->f_op->mmap(file, vma); -} - -static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc) -{ - return file->f_op->mmap_prepare(desc); -} - -static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) -{ -} - -static inline void vma_set_file(struct vm_area_struct *vma, struct file *file) -{ - /* Changing an anonymous vma with this is illegal */ - get_file(file); - swap(vma->vm_file, file); - fput(file); -} - -static inline bool shmem_file(struct file *file) -{ - return false; -} - -static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm, - const struct file *file, vm_flags_t vm_flags) -{ - return vm_flags; -} - -static inline void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn) -{ -} - -static inline int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn, unsigned long size, pgprot_t pgprot) -{ - return 0; -} +typedef unsigned long vm_flags_t; +#define pgoff_t unsigned long +typedef unsigned long pgprotval_t; +typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; +typedef __bitwise unsigned int vm_fault_t; -static inline int do_munmap(struct mm_struct *, unsigned long, size_t, - struct list_head *uf) -{ - return 0; -} +#include "include/stubs.h" +#include "include/dup.h" +#include "include/custom.h" #endif /* __MM_VMA_INTERNAL_H */ -- cgit v1.2.3 From f615cc92641a403d354c6ee68263074a86de49c7 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 22 Jan 2026 16:06:22 +0000 Subject: tools/testing/vma: add VMA userland tests for VMA flag functions Now we have the capability to test the new helpers for the bitmap VMA flags in userland, do so. We also update the Makefile such that both VMA (and while we're here) mm_struct flag sizes can be customised on build. We default to 128-bit to enable testing of flags above word size even on 64-bit systems. We add userland tests to ensure that we do not regress VMA flag behaviour with the introduction when using bitmap VMA flags, nor accidentally introduce unexpected results due to for instance higher bit values not being correctly cleared/set. As part of this change, make __mk_vma_flags() a custom function so we can handle specifying invalid VMA bits. This is purposeful so we can have the VMA tests work at lower and higher number of VMA flags without having to duplicate code too much. Link: https://lkml.kernel.org/r/7fe6afe9c8c61e4d3cfc9a2d50a5d24da8528e68.1769097829.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Cc: Baolin Wang Cc: Barry Song Cc: David Hildenbrand Cc: Dev Jain Cc: Jason Gunthorpe Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zi Yan Cc: Damien Le Moal Cc: "Darrick J. Wong" Cc: Jarkko Sakkinen Cc: Yury Norov Cc: Chris Mason Cc: Pedro Falcato Signed-off-by: Andrew Morton --- tools/testing/vma/Makefile | 3 + tools/testing/vma/include/custom.h | 16 ++ tools/testing/vma/include/dup.h | 11 +- tools/testing/vma/tests/vma.c | 300 +++++++++++++++++++++++++++++++++++++ tools/testing/vma/vma_internal.h | 4 +- 5 files changed, 322 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/vma/Makefile b/tools/testing/vma/Makefile index 50aa4301b3a6..e72b45dedda5 100644 --- a/tools/testing/vma/Makefile +++ b/tools/testing/vma/Makefile @@ -9,6 +9,9 @@ include ../shared/shared.mk OFILES = $(SHARED_OFILES) main.o shared.o maple-shim.o TARGETS = vma +# These can be varied to test different sizes. +CFLAGS += -DNUM_VMA_FLAG_BITS=128 -DNUM_MM_FLAG_BITS=128 + main.o: main.c shared.c shared.h vma_internal.h tests/merge.c tests/mmap.c tests/vma.c ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h include/custom.h include/dup.h include/stubs.h vma: $(OFILES) diff --git a/tools/testing/vma/include/custom.h b/tools/testing/vma/include/custom.h index f567127efba9..802a76317245 100644 --- a/tools/testing/vma/include/custom.h +++ b/tools/testing/vma/include/custom.h @@ -101,3 +101,19 @@ static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) if (reset_refcnt) refcount_set(&vma->vm_refcnt, 0); } + +static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits) +{ + vma_flags_t flags; + int i; + + /* + * For testing purposes: allow invalid bit specification so we can + * easily test. + */ + vma_flags_clear_all(&flags); + for (i = 0; i < count; i++) + if (bits[i] < NUM_VMA_FLAG_BITS) + vma_flag_set(&flags, bits[i]); + return flags; +} diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h index 0accfc296615..3078ff1487d3 100644 --- a/tools/testing/vma/include/dup.h +++ b/tools/testing/vma/include/dup.h @@ -838,16 +838,7 @@ static inline void vm_flags_clear(struct vm_area_struct *vma, vma_flags_clear_word(&vma->flags, flags); } -static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits) -{ - vma_flags_t flags; - int i; - - vma_flags_clear_all(&flags); - for (i = 0; i < count; i++) - vma_flag_set(&flags, bits[i]); - return flags; -} +static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits); #define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \ (const vma_flag_t []){__VA_ARGS__}) diff --git a/tools/testing/vma/tests/vma.c b/tools/testing/vma/tests/vma.c index 6d9775aee243..c54ffc954f11 100644 --- a/tools/testing/vma/tests/vma.c +++ b/tools/testing/vma/tests/vma.c @@ -1,5 +1,25 @@ // SPDX-License-Identifier: GPL-2.0-or-later +static bool compare_legacy_flags(vm_flags_t legacy_flags, vma_flags_t flags) +{ + const unsigned long legacy_val = legacy_flags; + /* The lower word should contain the precise same value. */ + const unsigned long flags_lower = flags.__vma_flags[0]; +#if NUM_VMA_FLAGS > BITS_PER_LONG + int i; + + /* All bits in higher flag values should be zero. */ + for (i = 1; i < NUM_VMA_FLAGS / BITS_PER_LONG; i++) { + if (flags.__vma_flags[i] != 0) + return false; + } +#endif + + static_assert(sizeof(legacy_flags) == sizeof(unsigned long)); + + return legacy_val == flags_lower; +} + static bool test_copy_vma(void) { vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; @@ -33,7 +53,287 @@ static bool test_copy_vma(void) return true; } +static bool test_vma_flags_unchanged(void) +{ + vma_flags_t flags = EMPTY_VMA_FLAGS; + vm_flags_t legacy_flags = 0; + int bit; + struct vm_area_struct vma; + struct vm_area_desc desc; + + + vma.flags = EMPTY_VMA_FLAGS; + desc.vma_flags = EMPTY_VMA_FLAGS; + + for (bit = 0; bit < BITS_PER_LONG; bit++) { + vma_flags_t mask = mk_vma_flags(bit); + + legacy_flags |= (1UL << bit); + + /* Individual flags. */ + vma_flags_set(&flags, bit); + ASSERT_TRUE(compare_legacy_flags(legacy_flags, flags)); + + /* Via mask. */ + vma_flags_set_mask(&flags, mask); + ASSERT_TRUE(compare_legacy_flags(legacy_flags, flags)); + + /* Same for VMA. */ + vma_set_flags(&vma, bit); + ASSERT_TRUE(compare_legacy_flags(legacy_flags, vma.flags)); + vma_set_flags_mask(&vma, mask); + ASSERT_TRUE(compare_legacy_flags(legacy_flags, vma.flags)); + + /* Same for VMA descriptor. */ + vma_desc_set_flags(&desc, bit); + ASSERT_TRUE(compare_legacy_flags(legacy_flags, desc.vma_flags)); + vma_desc_set_flags_mask(&desc, mask); + ASSERT_TRUE(compare_legacy_flags(legacy_flags, desc.vma_flags)); + } + + return true; +} + +static bool test_vma_flags_cleared(void) +{ + const vma_flags_t empty = EMPTY_VMA_FLAGS; + vma_flags_t flags; + int i; + + /* Set all bits high. */ + memset(&flags, 1, sizeof(flags)); + /* Try to clear. */ + vma_flags_clear_all(&flags); + /* Equal to EMPTY_VMA_FLAGS? */ + ASSERT_EQ(memcmp(&empty, &flags, sizeof(flags)), 0); + /* Make sure every unsigned long entry in bitmap array zero. */ + for (i = 0; i < sizeof(flags) / BITS_PER_LONG; i++) { + const unsigned long val = flags.__vma_flags[i]; + + ASSERT_EQ(val, 0); + } + + return true; +} + +/* + * Assert that VMA flag functions that operate at the system word level function + * correctly. + */ +static bool test_vma_flags_word(void) +{ + vma_flags_t flags = EMPTY_VMA_FLAGS; + const vma_flags_t comparison = + mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, 64, 65); + + /* Set some custom high flags. */ + vma_flags_set(&flags, 64, 65); + /* Now overwrite the first word. */ + vma_flags_overwrite_word(&flags, VM_READ | VM_WRITE); + /* Ensure they are equal. */ + ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0); + + flags = EMPTY_VMA_FLAGS; + vma_flags_set(&flags, 64, 65); + + /* Do the same with the _once() equivalent. */ + vma_flags_overwrite_word_once(&flags, VM_READ | VM_WRITE); + ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0); + + flags = EMPTY_VMA_FLAGS; + vma_flags_set(&flags, 64, 65); + + /* Make sure we can set a word without disturbing other bits. */ + vma_flags_set(&flags, VMA_WRITE_BIT); + vma_flags_set_word(&flags, VM_READ); + ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0); + + flags = EMPTY_VMA_FLAGS; + vma_flags_set(&flags, 64, 65); + + /* Make sure we can clear a word without disturbing other bits. */ + vma_flags_set(&flags, VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT); + vma_flags_clear_word(&flags, VM_EXEC); + ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0); + + return true; +} + +/* Ensure that vma_flags_test() and friends works correctly. */ +static bool test_vma_flags_test(void) +{ + const vma_flags_t flags = mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, + VMA_EXEC_BIT, 64, 65); + struct vm_area_struct vma; + struct vm_area_desc desc; + + vma.flags = flags; + desc.vma_flags = flags; + +#define do_test(...) \ + ASSERT_TRUE(vma_flags_test(&flags, __VA_ARGS__)); \ + ASSERT_TRUE(vma_desc_test_flags(&desc, __VA_ARGS__)) + +#define do_test_all_true(...) \ + ASSERT_TRUE(vma_flags_test_all(&flags, __VA_ARGS__)); \ + ASSERT_TRUE(vma_test_all_flags(&vma, __VA_ARGS__)) + +#define do_test_all_false(...) \ + ASSERT_FALSE(vma_flags_test_all(&flags, __VA_ARGS__)); \ + ASSERT_FALSE(vma_test_all_flags(&vma, __VA_ARGS__)) + + /* + * Testing for some flags that are present, some that are not - should + * pass. ANY flags matching should work. + */ + do_test(VMA_READ_BIT, VMA_MAYREAD_BIT, VMA_SEQ_READ_BIT); + /* However, the ...test_all() variant should NOT pass. */ + do_test_all_false(VMA_READ_BIT, VMA_MAYREAD_BIT, VMA_SEQ_READ_BIT); + /* But should pass for flags present. */ + do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64, 65); + /* Also subsets... */ + do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64); + do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT); + do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT); + do_test_all_true(VMA_READ_BIT); + /* + * Check _mask variant. We don't need to test extensively as macro + * helper is the equivalent. + */ + ASSERT_TRUE(vma_flags_test_mask(&flags, flags)); + ASSERT_TRUE(vma_flags_test_all_mask(&flags, flags)); + + /* Single bits. */ + do_test(VMA_READ_BIT); + do_test(VMA_WRITE_BIT); + do_test(VMA_EXEC_BIT); +#if NUM_VMA_FLAG_BITS > 64 + do_test(64); + do_test(65); +#endif + + /* Two bits. */ + do_test(VMA_READ_BIT, VMA_WRITE_BIT); + do_test(VMA_READ_BIT, VMA_EXEC_BIT); + do_test(VMA_WRITE_BIT, VMA_EXEC_BIT); + /* Ordering shouldn't matter. */ + do_test(VMA_WRITE_BIT, VMA_READ_BIT); + do_test(VMA_EXEC_BIT, VMA_READ_BIT); + do_test(VMA_EXEC_BIT, VMA_WRITE_BIT); +#if NUM_VMA_FLAG_BITS > 64 + do_test(VMA_READ_BIT, 64); + do_test(VMA_WRITE_BIT, 64); + do_test(64, VMA_READ_BIT); + do_test(64, VMA_WRITE_BIT); + do_test(VMA_READ_BIT, 65); + do_test(VMA_WRITE_BIT, 65); + do_test(65, VMA_READ_BIT); + do_test(65, VMA_WRITE_BIT); +#endif + /* Three bits. */ + do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT); +#if NUM_VMA_FLAG_BITS > 64 + /* No need to consider every single permutation. */ + do_test(VMA_READ_BIT, VMA_WRITE_BIT, 64); + do_test(VMA_READ_BIT, VMA_WRITE_BIT, 65); + + /* Four bits. */ + do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64); + do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 65); + + /* Five bits. */ + do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64, 65); +#endif + +#undef do_test +#undef do_test_all_true +#undef do_test_all_false + + return true; +} + +/* Ensure that vma_flags_clear() and friends works correctly. */ +static bool test_vma_flags_clear(void) +{ + vma_flags_t flags = mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, + VMA_EXEC_BIT, 64, 65); + vma_flags_t mask = mk_vma_flags(VMA_EXEC_BIT, 64); + struct vm_area_struct vma; + struct vm_area_desc desc; + + vma.flags = flags; + desc.vma_flags = flags; + + /* Cursory check of _mask() variant, as the helper macros imply. */ + vma_flags_clear_mask(&flags, mask); + vma_flags_clear_mask(&vma.flags, mask); + vma_desc_clear_flags_mask(&desc, mask); + ASSERT_FALSE(vma_flags_test(&flags, VMA_EXEC_BIT, 64)); + ASSERT_FALSE(vma_flags_test(&vma.flags, VMA_EXEC_BIT, 64)); + ASSERT_FALSE(vma_desc_test_flags(&desc, VMA_EXEC_BIT, 64)); + /* Reset. */ + vma_flags_set(&flags, VMA_EXEC_BIT, 64); + vma_set_flags(&vma, VMA_EXEC_BIT, 64); + vma_desc_set_flags(&desc, VMA_EXEC_BIT, 64); + + /* + * Clear the flags and assert clear worked, then reset flags back to + * include specified flags. + */ +#define do_test_and_reset(...) \ + vma_flags_clear(&flags, __VA_ARGS__); \ + vma_flags_clear(&vma.flags, __VA_ARGS__); \ + vma_desc_clear_flags(&desc, __VA_ARGS__); \ + ASSERT_FALSE(vma_flags_test(&flags, __VA_ARGS__)); \ + ASSERT_FALSE(vma_flags_test(&vma.flags, __VA_ARGS__)); \ + ASSERT_FALSE(vma_desc_test_flags(&desc, __VA_ARGS__)); \ + vma_flags_set(&flags, __VA_ARGS__); \ + vma_set_flags(&vma, __VA_ARGS__); \ + vma_desc_set_flags(&desc, __VA_ARGS__) + + /* Single flags. */ + do_test_and_reset(VMA_READ_BIT); + do_test_and_reset(VMA_WRITE_BIT); + do_test_and_reset(VMA_EXEC_BIT); + do_test_and_reset(64); + do_test_and_reset(65); + + /* Two flags, in different orders. */ + do_test_and_reset(VMA_READ_BIT, VMA_WRITE_BIT); + do_test_and_reset(VMA_READ_BIT, VMA_EXEC_BIT); + do_test_and_reset(VMA_READ_BIT, 64); + do_test_and_reset(VMA_READ_BIT, 65); + do_test_and_reset(VMA_WRITE_BIT, VMA_READ_BIT); + do_test_and_reset(VMA_WRITE_BIT, VMA_EXEC_BIT); + do_test_and_reset(VMA_WRITE_BIT, 64); + do_test_and_reset(VMA_WRITE_BIT, 65); + do_test_and_reset(VMA_EXEC_BIT, VMA_READ_BIT); + do_test_and_reset(VMA_EXEC_BIT, VMA_WRITE_BIT); + do_test_and_reset(VMA_EXEC_BIT, 64); + do_test_and_reset(VMA_EXEC_BIT, 65); + do_test_and_reset(64, VMA_READ_BIT); + do_test_and_reset(64, VMA_WRITE_BIT); + do_test_and_reset(64, VMA_EXEC_BIT); + do_test_and_reset(64, 65); + do_test_and_reset(65, VMA_READ_BIT); + do_test_and_reset(65, VMA_WRITE_BIT); + do_test_and_reset(65, VMA_EXEC_BIT); + do_test_and_reset(65, 64); + + /* Three flags. */ + +#undef do_test_some_missing +#undef do_test_and_reset + + return true; +} + static void run_vma_tests(int *num_tests, int *num_fail) { TEST(copy_vma); + TEST(vma_flags_unchanged); + TEST(vma_flags_cleared); + TEST(vma_flags_word); + TEST(vma_flags_test); + TEST(vma_flags_clear); } diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index e3ed05b57819..0e1121e2ef23 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -36,11 +36,11 @@ * ahead of all other headers. */ #define __private -#define NUM_MM_FLAG_BITS (64) +/* NUM_MM_FLAG_BITS defined by test code. */ typedef struct { __private DECLARE_BITMAP(__mm_flags, NUM_MM_FLAG_BITS); } mm_flags_t; -#define NUM_VMA_FLAG_BITS BITS_PER_LONG +/* NUM_VMA_FLAG_BITS defined by test code. */ typedef struct { DECLARE_BITMAP(__vma_flags, NUM_VMA_FLAG_BITS); } __private vma_flags_t; -- cgit v1.2.3 From ff4ef2fbd10192357da76fd80796b7262df21b78 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 6 Feb 2026 11:16:37 +0800 Subject: selftests/mm: add memory failure anonymous page test Patch series "selftests/mm: add memory failure selftests", v4. Introduce selftests to validate the functionality of memory failure. These tests help ensure that memory failure handling for anonymous pages, pagecaches pages works correctly, including proper SIGBUS delivery to user processes, page isolation, and recovery paths. Currently madvise syscall is used to inject memory failures. And only anonymous pages and pagecaches are tested. More test scenarios, e.g. hugetlb, shmem, thp, will be added. Also more memory failure injecting methods will be supported, e.g. APEI Error INJection, if required. This patch (of 3): This patch adds a new kselftest to validate memory failure handling for anonymous pages. The test performs the following operations: 1. Allocates anonymous pages using mmap(). 2. Injects memory failure via madvise syscall. 3. Verifies expected error handling behavior. 4. Unpoison memory. This test helps ensure that memory failure handling for anonymous pages works correctly, including proper SIGBUS delivery to user processes, page isolation and recovery paths. Link: https://lkml.kernel.org/r/20260206031639.2707102-1-linmiaohe@huawei.com Link: https://lkml.kernel.org/r/20260206031639.2707102-2-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Cc: David Hildenbrand Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mark Brown Cc: Michal Hocko Cc: Mike Rapoport Cc: Naoya Horiguchi Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: kernel test robot Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/.gitignore | 1 + tools/testing/selftests/mm/Makefile | 2 + tools/testing/selftests/mm/config | 2 + tools/testing/selftests/mm/ksft_memory_failure.sh | 4 + tools/testing/selftests/mm/memory-failure.c | 239 ++++++++++++++++++++++ tools/testing/selftests/mm/run_vmtests.sh | 21 ++ tools/testing/selftests/mm/vm_util.c | 41 ++++ tools/testing/selftests/mm/vm_util.h | 3 + 8 files changed, 313 insertions(+) create mode 100755 tools/testing/selftests/mm/ksft_memory_failure.sh create mode 100644 tools/testing/selftests/mm/memory-failure.c (limited to 'tools') diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index 702e5723c35d..bfd94a79e975 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -12,6 +12,7 @@ map_hugetlb map_populate thuge-gen compaction_test +memory-failure migration mlock2-tests mrelease_test diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 905f1e034963..4847c6d6c1b0 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -75,6 +75,7 @@ TEST_GEN_FILES += map_populate ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64)) TEST_GEN_FILES += memfd_secret endif +TEST_GEN_FILES += memory-failure TEST_GEN_FILES += migration TEST_GEN_FILES += mkdirty TEST_GEN_FILES += mlock-random-test @@ -154,6 +155,7 @@ TEST_PROGS += ksft_ksm_numa.sh TEST_PROGS += ksft_madv_guard.sh TEST_PROGS += ksft_madv_populate.sh TEST_PROGS += ksft_memfd_secret.sh +TEST_PROGS += ksft_memory_failure.sh TEST_PROGS += ksft_migration.sh TEST_PROGS += ksft_mkdirty.sh TEST_PROGS += ksft_mlock.sh diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config index deba93379c80..1dbe2b4558ab 100644 --- a/tools/testing/selftests/mm/config +++ b/tools/testing/selftests/mm/config @@ -11,3 +11,5 @@ CONFIG_ANON_VMA_NAME=y CONFIG_FTRACE=y CONFIG_PROFILING=y CONFIG_UPROBES=y +CONFIG_MEMORY_FAILURE=y +CONFIG_HWPOISON_INJECT=m diff --git a/tools/testing/selftests/mm/ksft_memory_failure.sh b/tools/testing/selftests/mm/ksft_memory_failure.sh new file mode 100755 index 000000000000..ae1614d4d49b --- /dev/null +++ b/tools/testing/selftests/mm/ksft_memory_failure.sh @@ -0,0 +1,4 @@ +#!/bin/sh -e +# SPDX-License-Identifier: GPL-2.0 + +./run_vmtests.sh -t memory-failure diff --git a/tools/testing/selftests/mm/memory-failure.c b/tools/testing/selftests/mm/memory-failure.c new file mode 100644 index 000000000000..37806a58f4b4 --- /dev/null +++ b/tools/testing/selftests/mm/memory-failure.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Memory-failure functional tests. + * + * Author(s): Miaohe Lin + */ + +#include "../kselftest_harness.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "vm_util.h" + +enum inject_type { + MADV_HARD, + MADV_SOFT, +}; + +enum result_type { + MADV_HARD_ANON, + MADV_SOFT_ANON, +}; + +static jmp_buf signal_jmp_buf; +static siginfo_t siginfo; +const char *pagemap_proc = "/proc/self/pagemap"; +const char *kpageflags_proc = "/proc/kpageflags"; + +FIXTURE(memory_failure) +{ + unsigned long page_size; + unsigned long corrupted_size; + unsigned long pfn; + int pagemap_fd; + int kpageflags_fd; + bool triggered; +}; + +FIXTURE_VARIANT(memory_failure) +{ + enum inject_type type; + int (*inject)(FIXTURE_DATA(memory_failure) * self, void *vaddr); +}; + +static int madv_hard_inject(FIXTURE_DATA(memory_failure) * self, void *vaddr) +{ + return madvise(vaddr, self->page_size, MADV_HWPOISON); +} + +FIXTURE_VARIANT_ADD(memory_failure, madv_hard) +{ + .type = MADV_HARD, + .inject = madv_hard_inject, +}; + +static int madv_soft_inject(FIXTURE_DATA(memory_failure) * self, void *vaddr) +{ + return madvise(vaddr, self->page_size, MADV_SOFT_OFFLINE); +} + +FIXTURE_VARIANT_ADD(memory_failure, madv_soft) +{ + .type = MADV_SOFT, + .inject = madv_soft_inject, +}; + +static void sigbus_action(int signo, siginfo_t *si, void *args) +{ + memcpy(&siginfo, si, sizeof(siginfo_t)); + siglongjmp(signal_jmp_buf, 1); +} + +static int setup_sighandler(void) +{ + struct sigaction sa = { + .sa_sigaction = sigbus_action, + .sa_flags = SA_SIGINFO, + }; + + return sigaction(SIGBUS, &sa, NULL); +} + +FIXTURE_SETUP(memory_failure) +{ + memset(self, 0, sizeof(*self)); + + self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); + + memset(&siginfo, 0, sizeof(siginfo)); + if (setup_sighandler()) + SKIP(return, "setup sighandler failed.\n"); + + self->pagemap_fd = open(pagemap_proc, O_RDONLY); + if (self->pagemap_fd == -1) + SKIP(return, "open %s failed.\n", pagemap_proc); + + self->kpageflags_fd = open(kpageflags_proc, O_RDONLY); + if (self->kpageflags_fd == -1) + SKIP(return, "open %s failed.\n", kpageflags_proc); +} + +static void teardown_sighandler(void) +{ + struct sigaction sa = { + .sa_handler = SIG_DFL, + .sa_flags = SA_SIGINFO, + }; + + sigaction(SIGBUS, &sa, NULL); +} + +FIXTURE_TEARDOWN(memory_failure) +{ + close(self->kpageflags_fd); + close(self->pagemap_fd); + teardown_sighandler(); +} + +static void prepare(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self, + void *vaddr) +{ + self->pfn = pagemap_get_pfn(self->pagemap_fd, vaddr); + ASSERT_NE(self->pfn, -1UL); + + ASSERT_EQ(get_hardware_corrupted_size(&self->corrupted_size), 0); +} + +static bool check_memory(void *vaddr, unsigned long size) +{ + char buf[64]; + + memset(buf, 0xce, sizeof(buf)); + while (size >= sizeof(buf)) { + if (memcmp(vaddr, buf, sizeof(buf))) + return false; + size -= sizeof(buf); + vaddr += sizeof(buf); + } + + return true; +} + +static void check(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self, + void *vaddr, enum result_type type, int setjmp) +{ + unsigned long size; + uint64_t pfn_flags; + + switch (type) { + case MADV_SOFT_ANON: + /* It is not expected to receive a SIGBUS signal. */ + ASSERT_EQ(setjmp, 0); + + /* The page content should remain unchanged. */ + ASSERT_TRUE(check_memory(vaddr, self->page_size)); + + /* The backing pfn of addr should have changed. */ + ASSERT_NE(pagemap_get_pfn(self->pagemap_fd, vaddr), self->pfn); + break; + case MADV_HARD_ANON: + /* The SIGBUS signal should have been received. */ + ASSERT_EQ(setjmp, 1); + + /* Check if siginfo contains correct SIGBUS context. */ + ASSERT_EQ(siginfo.si_signo, SIGBUS); + ASSERT_EQ(siginfo.si_code, BUS_MCEERR_AR); + ASSERT_EQ(1UL << siginfo.si_addr_lsb, self->page_size); + ASSERT_EQ(siginfo.si_addr, vaddr); + + /* XXX Check backing pte is hwpoison entry when supported. */ + ASSERT_TRUE(pagemap_is_swapped(self->pagemap_fd, vaddr)); + break; + default: + SKIP(return, "unexpected inject type %d.\n", type); + } + + /* Check if the value of HardwareCorrupted has increased. */ + ASSERT_EQ(get_hardware_corrupted_size(&size), 0); + ASSERT_EQ(size, self->corrupted_size + self->page_size / 1024); + + /* Check if HWPoison flag is set. */ + ASSERT_EQ(pageflags_get(self->pfn, self->kpageflags_fd, &pfn_flags), 0); + ASSERT_EQ(pfn_flags & KPF_HWPOISON, KPF_HWPOISON); +} + +static void cleanup(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self, + void *vaddr) +{ + unsigned long size; + uint64_t pfn_flags; + + ASSERT_EQ(unpoison_memory(self->pfn), 0); + + /* Check if HWPoison flag is cleared. */ + ASSERT_EQ(pageflags_get(self->pfn, self->kpageflags_fd, &pfn_flags), 0); + ASSERT_NE(pfn_flags & KPF_HWPOISON, KPF_HWPOISON); + + /* Check if the value of HardwareCorrupted has decreased. */ + ASSERT_EQ(get_hardware_corrupted_size(&size), 0); + ASSERT_EQ(size, self->corrupted_size); +} + +TEST_F(memory_failure, anon) +{ + char *addr; + int ret; + + addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (addr == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + memset(addr, 0xce, self->page_size); + + prepare(_metadata, self, addr); + + ret = sigsetjmp(signal_jmp_buf, 1); + if (!self->triggered) { + self->triggered = true; + ASSERT_EQ(variant->inject(self, addr), 0); + FORCE_READ(*addr); + } + + if (variant->type == MADV_HARD) + check(_metadata, self, addr, MADV_HARD_ANON, ret); + else + check(_metadata, self, addr, MADV_SOFT_ANON, ret); + + cleanup(_metadata, self, addr); + + ASSERT_EQ(munmap(addr, self->page_size), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 29be9038bfb0..afdcfd0d7cef 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -91,6 +91,8 @@ separated by spaces: test VMA merge cases behave as expected - rmap test rmap behaves as expected +- memory-failure + test memory-failure behaves as expected example: ./run_vmtests.sh -t "hmm mmap ksm" EOF @@ -527,6 +529,25 @@ CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned CATEGORY="rmap" run_test ./rmap +# Try to load hwpoison_inject if not present. +HWPOISON_DIR=/sys/kernel/debug/hwpoison/ +if [ ! -d "$HWPOISON_DIR" ]; then + if ! modprobe -q -R hwpoison_inject; then + echo "Module hwpoison_inject not found, skipping..." + else + modprobe hwpoison_inject > /dev/null 2>&1 + LOADED_MOD=1 + fi +fi + +if [ -d "$HWPOISON_DIR" ]; then + CATEGORY="memory-failure" run_test ./memory-failure +fi + +if [ -n "${LOADED_MOD}" ]; then + modprobe -r hwpoison_inject > /dev/null 2>&1 +fi + if [ "${HAVE_HUGEPAGES}" = 1 ]; then echo "$orig_nr_hugepgs" > /proc/sys/vm/nr_hugepages fi diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index d954bf91afd5..a6d4ff7dfdc0 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -723,3 +723,44 @@ int ksm_stop(void) close(ksm_fd); return ret == 1 ? 0 : -errno; } + +int get_hardware_corrupted_size(unsigned long *val) +{ + unsigned long size; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + int ret = -1; + + if (!f) + return ret; + + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "HardwareCorrupted: %12lu kB", &size) == 1) { + *val = size; + ret = 0; + break; + } + } + + free(line); + fclose(f); + return ret; +} + +int unpoison_memory(unsigned long pfn) +{ + int unpoison_fd, len; + char buf[32]; + ssize_t ret; + + unpoison_fd = open("/sys/kernel/debug/hwpoison/unpoison-pfn", O_WRONLY); + if (unpoison_fd < 0) + return -errno; + + len = sprintf(buf, "0x%lx\n", pfn); + ret = write(unpoison_fd, buf, len); + close(unpoison_fd); + + return ret > 0 ? 0 : -errno; +} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index 522f7f9050f5..e9c4e24769c1 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -20,6 +20,7 @@ #define KPF_COMPOUND_HEAD BIT_ULL(15) #define KPF_COMPOUND_TAIL BIT_ULL(16) +#define KPF_HWPOISON BIT_ULL(19) #define KPF_THP BIT_ULL(22) /* * Ignore the checkpatch warning, we must read from x but don't want to do @@ -154,6 +155,8 @@ long ksm_get_full_scans(void); int ksm_use_zero_pages(void); int ksm_start(void); int ksm_stop(void); +int get_hardware_corrupted_size(unsigned long *val); +int unpoison_memory(unsigned long pfn); /* * On ppc64 this will only work with radix 2M hugepage size -- cgit v1.2.3 From 12e8a2fae372c55c17a410929cfa60f96b93d17a Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 6 Feb 2026 11:16:38 +0800 Subject: selftests/mm: add memory failure clean pagecache test This patch adds a new testcase to validate memory failure handling for clean pagecache. This test performs similar operations as anonymous pages except allocating memory using mmap() with a file fd. This test helps ensure that memory failure handling for clean pagecache works correctly, including unchanged page content, page isolation, and recovery paths. Link: https://lkml.kernel.org/r/20260206031639.2707102-3-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202601221142.mDWA1ucw-lkp@intel.com/ Cc: David Hildenbrand Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mark Brown Cc: Michal Hocko Cc: Mike Rapoport Cc: Naoya Horiguchi Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/memory-failure.c | 66 +++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/mm/memory-failure.c b/tools/testing/selftests/mm/memory-failure.c index 37806a58f4b4..3aa624db9577 100644 --- a/tools/testing/selftests/mm/memory-failure.c +++ b/tools/testing/selftests/mm/memory-failure.c @@ -10,10 +10,14 @@ #include #include #include +#include #include #include #include #include +#include +#include +#include #include "vm_util.h" @@ -24,7 +28,9 @@ enum inject_type { enum result_type { MADV_HARD_ANON, + MADV_HARD_CLEAN_PAGECACHE, MADV_SOFT_ANON, + MADV_SOFT_CLEAN_PAGECACHE, }; static jmp_buf signal_jmp_buf; @@ -154,6 +160,8 @@ static void check(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure switch (type) { case MADV_SOFT_ANON: + case MADV_HARD_CLEAN_PAGECACHE: + case MADV_SOFT_CLEAN_PAGECACHE: /* It is not expected to receive a SIGBUS signal. */ ASSERT_EQ(setjmp, 0); @@ -236,4 +244,62 @@ TEST_F(memory_failure, anon) ASSERT_EQ(munmap(addr, self->page_size), 0); } +/* Borrowed from mm/gup_longterm.c. */ +static int get_fs_type(int fd) +{ + struct statfs fs; + int ret; + + do { + ret = fstatfs(fd, &fs); + } while (ret && errno == EINTR); + + return ret ? 0 : (int)fs.f_type; +} + +TEST_F(memory_failure, clean_pagecache) +{ + const char *fname = "./clean-page-cache-test-file"; + int fd; + char *addr; + int ret; + int fs_type; + + fd = open(fname, O_RDWR | O_CREAT, 0664); + if (fd < 0) + SKIP(return, "failed to open test file.\n"); + unlink(fname); + ftruncate(fd, self->page_size); + fs_type = get_fs_type(fd); + if (!fs_type || fs_type == TMPFS_MAGIC) + SKIP(return, "unsupported filesystem :%x\n", fs_type); + + addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + memset(addr, 0xce, self->page_size); + fsync(fd); + + prepare(_metadata, self, addr); + + ret = sigsetjmp(signal_jmp_buf, 1); + if (!self->triggered) { + self->triggered = true; + ASSERT_EQ(variant->inject(self, addr), 0); + FORCE_READ(*addr); + } + + if (variant->type == MADV_HARD) + check(_metadata, self, addr, MADV_HARD_CLEAN_PAGECACHE, ret); + else + check(_metadata, self, addr, MADV_SOFT_CLEAN_PAGECACHE, ret); + + cleanup(_metadata, self, addr); + + ASSERT_EQ(munmap(addr, self->page_size), 0); + + ASSERT_EQ(close(fd), 0); +} + TEST_HARNESS_MAIN -- cgit v1.2.3 From d51b5076c7468fad568645caf38a6979458a5de1 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 6 Feb 2026 11:16:39 +0800 Subject: selftests/mm: add memory failure dirty pagecache test This patch adds a new testcase to validate memory failure handling for dirty pagecache. This performs similar operations as clean pagecaches except fsync() is not used to keep pages dirty. This test helps ensure that memory failure handling for dirty pagecache works correctly, including proper SIGBUS delivery, page isolation, and recovery paths. Link: https://lkml.kernel.org/r/20260206031639.2707102-4-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Cc: David Hildenbrand Cc: kernel test robot Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mark Brown Cc: Michal Hocko Cc: Mike Rapoport Cc: Naoya Horiguchi Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/memory-failure.c | 62 +++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/memory-failure.c b/tools/testing/selftests/mm/memory-failure.c index 3aa624db9577..3d9e0b9ffb41 100644 --- a/tools/testing/selftests/mm/memory-failure.c +++ b/tools/testing/selftests/mm/memory-failure.c @@ -29,8 +29,10 @@ enum inject_type { enum result_type { MADV_HARD_ANON, MADV_HARD_CLEAN_PAGECACHE, + MADV_HARD_DIRTY_PAGECACHE, MADV_SOFT_ANON, MADV_SOFT_CLEAN_PAGECACHE, + MADV_SOFT_DIRTY_PAGECACHE, }; static jmp_buf signal_jmp_buf; @@ -162,6 +164,7 @@ static void check(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure case MADV_SOFT_ANON: case MADV_HARD_CLEAN_PAGECACHE: case MADV_SOFT_CLEAN_PAGECACHE: + case MADV_SOFT_DIRTY_PAGECACHE: /* It is not expected to receive a SIGBUS signal. */ ASSERT_EQ(setjmp, 0); @@ -172,6 +175,7 @@ static void check(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure ASSERT_NE(pagemap_get_pfn(self->pagemap_fd, vaddr), self->pfn); break; case MADV_HARD_ANON: + case MADV_HARD_DIRTY_PAGECACHE: /* The SIGBUS signal should have been received. */ ASSERT_EQ(setjmp, 1); @@ -244,6 +248,18 @@ TEST_F(memory_failure, anon) ASSERT_EQ(munmap(addr, self->page_size), 0); } +static int prepare_file(const char *fname, unsigned long size) +{ + int fd; + + fd = open(fname, O_RDWR | O_CREAT, 0664); + if (fd >= 0) { + unlink(fname); + ftruncate(fd, size); + } + return fd; +} + /* Borrowed from mm/gup_longterm.c. */ static int get_fs_type(int fd) { @@ -259,17 +275,14 @@ static int get_fs_type(int fd) TEST_F(memory_failure, clean_pagecache) { - const char *fname = "./clean-page-cache-test-file"; int fd; char *addr; int ret; int fs_type; - fd = open(fname, O_RDWR | O_CREAT, 0664); + fd = prepare_file("./clean-page-cache-test-file", self->page_size); if (fd < 0) SKIP(return, "failed to open test file.\n"); - unlink(fname); - ftruncate(fd, self->page_size); fs_type = get_fs_type(fd); if (!fs_type || fs_type == TMPFS_MAGIC) SKIP(return, "unsupported filesystem :%x\n", fs_type); @@ -302,4 +315,45 @@ TEST_F(memory_failure, clean_pagecache) ASSERT_EQ(close(fd), 0); } +TEST_F(memory_failure, dirty_pagecache) +{ + int fd; + char *addr; + int ret; + int fs_type; + + fd = prepare_file("./dirty-page-cache-test-file", self->page_size); + if (fd < 0) + SKIP(return, "failed to open test file.\n"); + fs_type = get_fs_type(fd); + if (!fs_type || fs_type == TMPFS_MAGIC) + SKIP(return, "unsupported filesystem :%x\n", fs_type); + + addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + memset(addr, 0xce, self->page_size); + + prepare(_metadata, self, addr); + + ret = sigsetjmp(signal_jmp_buf, 1); + if (!self->triggered) { + self->triggered = true; + ASSERT_EQ(variant->inject(self, addr), 0); + FORCE_READ(*addr); + } + + if (variant->type == MADV_HARD) + check(_metadata, self, addr, MADV_HARD_DIRTY_PAGECACHE, ret); + else + check(_metadata, self, addr, MADV_SOFT_DIRTY_PAGECACHE, ret); + + cleanup(_metadata, self, addr); + + ASSERT_EQ(munmap(addr, self->page_size), 0); + + ASSERT_EQ(close(fd), 0); +} + TEST_HARNESS_MAIN -- cgit v1.2.3