diff options
| author | Maxime Ripard <mripard@kernel.org> | 2026-02-23 10:09:45 +0100 |
|---|---|---|
| committer | Maxime Ripard <mripard@kernel.org> | 2026-02-23 10:09:45 +0100 |
| commit | c17ee635fd3a482b2ad2bf5e269755c2eae5f25e (patch) | |
| tree | e3f147462d8a9fd0cf2312c8cd3c5a94da15c3e4 /include/linux/mm.h | |
| parent | 803ec1faf7c1823e6e3b1f2aaa81be18528c9436 (diff) | |
| parent | 6de23f81a5e08be8fbf5e8d7e9febc72a5b5f27f (diff) | |
Merge drm/drm-fixes into drm-misc-fixes
7.0-rc1 was just released, let's merge it to kick the new release cycle.
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Diffstat (limited to 'include/linux/mm.h')
| -rw-r--r-- | include/linux/mm.h | 373 |
1 files changed, 315 insertions, 58 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 6f959d8ca4b4..5be3d8a8f806 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2,6 +2,7 @@ #ifndef _LINUX_MM_H #define _LINUX_MM_H +#include <linux/args.h> #include <linux/errno.h> #include <linux/mmdebug.h> #include <linux/gfp.h> @@ -36,6 +37,7 @@ #include <linux/rcuwait.h> #include <linux/bitmap.h> #include <linux/bitops.h> +#include <linux/iommu-debug-pagealloc.h> struct mempolicy; struct anon_vma; @@ -45,6 +47,7 @@ struct pt_regs; struct folio_batch; void arch_mm_preinit(void); +void mm_core_init_early(void); void mm_core_init(void); void init_mm_internals(void); @@ -359,7 +362,7 @@ enum { DECLARE_VMA_BIT_ALIAS(PKEY_BIT2, HIGH_ARCH_2), DECLARE_VMA_BIT_ALIAS(PKEY_BIT3, HIGH_ARCH_3), DECLARE_VMA_BIT_ALIAS(PKEY_BIT4, HIGH_ARCH_4), -#if defined(CONFIG_X86_USER_SHADOW_STACK) +#if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_RISCV_USER_CFI) /* * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of * support core mm. @@ -460,7 +463,8 @@ enum { #define VM_PKEY_BIT4 VM_NONE #endif /* CONFIG_ARCH_PKEY_BITS > 4 */ #endif /* CONFIG_ARCH_HAS_PKEYS */ -#if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS) +#if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS) || \ + defined(CONFIG_RISCV_USER_CFI) #define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK) #else #define VM_SHADOW_STACK VM_NONE @@ -548,17 +552,18 @@ enum { /* * Physically remapped pages are special. Tell the * rest of the world about it: - * VM_IO tells people not to look at these pages + * IO tells people not to look at these pages * (accesses can have side effects). - * VM_PFNMAP tells the core MM that the base pages are just + * PFNMAP tells the core MM that the base pages are just * raw PFN mappings, and do not have a "struct page" associated * with them. - * VM_DONTEXPAND + * DONTEXPAND * Disable vma merging and expanding with mremap(). - * VM_DONTDUMP + * DONTDUMP * Omit vma from core dump, even when VM_IO turned off. */ -#define VM_REMAP_FLAGS (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP) +#define VMA_REMAP_FLAGS mk_vma_flags(VMA_IO_BIT, VMA_PFNMAP_BIT, \ + VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT) /* This mask prevents VMA from being scanned with khugepaged */ #define VM_NO_KHUGEPAGED (VM_SPECIAL | VM_HUGETLB) @@ -608,7 +613,11 @@ enum { /* * Flags which should result in page tables being copied on fork. These are * flags which indicate that the VMA maps page tables which cannot be - * reconsistuted upon page fault, so necessitate page table copying upon + * reconsistuted upon page fault, so necessitate page table copying upon fork. + * + * Note that these flags should be compared with the DESTINATION VMA not the + * source, as VM_UFFD_WP may not be propagated to destination, while all other + * flags will be. * * VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be * reasonably reconstructed on page fault. @@ -938,7 +947,7 @@ static inline void vm_flags_reset_once(struct vm_area_struct *vma, * system word. */ if (NUM_VMA_FLAG_BITS > BITS_PER_LONG) { - unsigned long *bitmap = ACCESS_PRIVATE(&vma->flags, __vma_flags); + unsigned long *bitmap = vma->flags.__vma_flags; bitmap_zero(&bitmap[1], NUM_VMA_FLAG_BITS - BITS_PER_LONG); } @@ -982,8 +991,7 @@ static inline void vm_flags_mod(struct vm_area_struct *vma, __vm_flags_mod(vma, set, clear); } -static inline bool __vma_flag_atomic_valid(struct vm_area_struct *vma, - vma_flag_t bit) +static inline bool __vma_atomic_valid_flag(struct vm_area_struct *vma, vma_flag_t bit) { const vm_flags_t mask = BIT((__force int)bit); @@ -998,16 +1006,12 @@ static inline bool __vma_flag_atomic_valid(struct vm_area_struct *vma, * Set VMA flag atomically. Requires only VMA/mmap read lock. Only specific * valid flags are allowed to do this. */ -static inline void vma_flag_set_atomic(struct vm_area_struct *vma, - vma_flag_t bit) +static inline void vma_set_atomic_flag(struct vm_area_struct *vma, vma_flag_t bit) { - unsigned long *bitmap = ACCESS_PRIVATE(&vma->flags, __vma_flags); - - /* mmap read lock/VMA read lock must be held. */ - if (!rwsem_is_locked(&vma->vm_mm->mmap_lock)) - vma_assert_locked(vma); + unsigned long *bitmap = vma->flags.__vma_flags; - if (__vma_flag_atomic_valid(vma, bit)) + vma_assert_stabilised(vma); + if (__vma_atomic_valid_flag(vma, bit)) set_bit((__force int)bit, bitmap); } @@ -1018,15 +1022,211 @@ static inline void vma_flag_set_atomic(struct vm_area_struct *vma, * This is necessarily racey, so callers must ensure that serialisation is * achieved through some other means, or that races are permissible. */ -static inline bool vma_flag_test_atomic(struct vm_area_struct *vma, - vma_flag_t bit) +static inline bool vma_test_atomic_flag(struct vm_area_struct *vma, vma_flag_t bit) { - if (__vma_flag_atomic_valid(vma, bit)) + if (__vma_atomic_valid_flag(vma, bit)) return test_bit((__force int)bit, &vma->vm_flags); return false; } +/* Set an individual VMA flag in flags, non-atomically. */ +static inline void vma_flag_set(vma_flags_t *flags, vma_flag_t bit) +{ + unsigned long *bitmap = flags->__vma_flags; + + __set_bit((__force int)bit, bitmap); +} + +static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits) +{ + vma_flags_t flags; + int i; + + vma_flags_clear_all(&flags); + for (i = 0; i < count; i++) + vma_flag_set(&flags, bits[i]); + return flags; +} + +/* + * Helper macro which bitwise-or combines the specified input flags into a + * vma_flags_t bitmap value. E.g.: + * + * vma_flags_t flags = mk_vma_flags(VMA_IO_BIT, VMA_PFNMAP_BIT, + * VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT); + * + * The compiler cleverly optimises away all of the work and this ends up being + * equivalent to aggregating the values manually. + */ +#define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \ + (const vma_flag_t []){__VA_ARGS__}) + +/* Test each of to_test flags in flags, non-atomically. */ +static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags, + vma_flags_t to_test) +{ + const unsigned long *bitmap = flags->__vma_flags; + const unsigned long *bitmap_to_test = to_test.__vma_flags; + + return bitmap_intersects(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS); +} + +/* + * Test whether any specified VMA flag is set, e.g.: + * + * if (vma_flags_test(flags, VMA_READ_BIT, VMA_MAYREAD_BIT)) { ... } + */ +#define vma_flags_test(flags, ...) \ + vma_flags_test_mask(flags, mk_vma_flags(__VA_ARGS__)) + +/* Test that ALL of the to_test flags are set, non-atomically. */ +static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags, + vma_flags_t to_test) +{ + const unsigned long *bitmap = flags->__vma_flags; + const unsigned long *bitmap_to_test = to_test.__vma_flags; + + return bitmap_subset(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS); +} + +/* + * Test whether ALL specified VMA flags are set, e.g.: + * + * if (vma_flags_test_all(flags, VMA_READ_BIT, VMA_MAYREAD_BIT)) { ... } + */ +#define vma_flags_test_all(flags, ...) \ + vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__)) + +/* Set each of the to_set flags in flags, non-atomically. */ +static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t to_set) +{ + unsigned long *bitmap = flags->__vma_flags; + const unsigned long *bitmap_to_set = to_set.__vma_flags; + + bitmap_or(bitmap, bitmap, bitmap_to_set, NUM_VMA_FLAG_BITS); +} + +/* + * Set all specified VMA flags, e.g.: + * + * vma_flags_set(&flags, VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT); + */ +#define vma_flags_set(flags, ...) \ + vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__)) + +/* Clear all of the to-clear flags in flags, non-atomically. */ +static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t to_clear) +{ + unsigned long *bitmap = flags->__vma_flags; + const unsigned long *bitmap_to_clear = to_clear.__vma_flags; + + bitmap_andnot(bitmap, bitmap, bitmap_to_clear, NUM_VMA_FLAG_BITS); +} + +/* + * Clear all specified individual flags, e.g.: + * + * vma_flags_clear(&flags, VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT); + */ +#define vma_flags_clear(flags, ...) \ + vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__)) + +/* + * Helper to test that ALL specified flags are set in a VMA. + * + * Note: appropriate locks must be held, this function does not acquire them for + * you. + */ +static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma, + vma_flags_t flags) +{ + return vma_flags_test_all_mask(&vma->flags, flags); +} + +/* + * Helper macro for checking that ALL specified flags are set in a VMA, e.g.: + * + * if (vma_test_all_flags(vma, VMA_READ_BIT, VMA_MAYREAD_BIT) { ... } + */ +#define vma_test_all_flags(vma, ...) \ + vma_test_all_flags_mask(vma, mk_vma_flags(__VA_ARGS__)) + +/* + * Helper to set all VMA flags in a VMA. + * + * Note: appropriate locks must be held, this function does not acquire them for + * you. + */ +static inline void vma_set_flags_mask(struct vm_area_struct *vma, + vma_flags_t flags) +{ + vma_flags_set_mask(&vma->flags, flags); +} + +/* + * Helper macro for specifying VMA flags in a VMA, e.g.: + * + * vma_set_flags(vma, VMA_IO_BIT, VMA_PFNMAP_BIT, VMA_DONTEXPAND_BIT, + * VMA_DONTDUMP_BIT); + * + * Note: appropriate locks must be held, this function does not acquire them for + * you. + */ +#define vma_set_flags(vma, ...) \ + vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__)) + +/* Helper to test all VMA flags in a VMA descriptor. */ +static inline bool vma_desc_test_flags_mask(const struct vm_area_desc *desc, + vma_flags_t flags) +{ + return vma_flags_test_mask(&desc->vma_flags, flags); +} + +/* + * Helper macro for testing VMA flags for an input pointer to a struct + * vm_area_desc object describing a proposed VMA, e.g.: + * + * if (vma_desc_test_flags(desc, VMA_IO_BIT, VMA_PFNMAP_BIT, + * VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT)) { ... } + */ +#define vma_desc_test_flags(desc, ...) \ + vma_desc_test_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) + +/* Helper to set all VMA flags in a VMA descriptor. */ +static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc, + vma_flags_t flags) +{ + vma_flags_set_mask(&desc->vma_flags, flags); +} + +/* + * Helper macro for specifying VMA flags for an input pointer to a struct + * vm_area_desc object describing a proposed VMA, e.g.: + * + * vma_desc_set_flags(desc, VMA_IO_BIT, VMA_PFNMAP_BIT, VMA_DONTEXPAND_BIT, + * VMA_DONTDUMP_BIT); + */ +#define vma_desc_set_flags(desc, ...) \ + vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) + +/* Helper to clear all VMA flags in a VMA descriptor. */ +static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc, + vma_flags_t flags) +{ + vma_flags_clear_mask(&desc->vma_flags, flags); +} + +/* + * Helper macro for clearing VMA flags for an input pointer to a struct + * vm_area_desc object describing a proposed VMA, e.g.: + * + * vma_desc_clear_flags(desc, VMA_IO_BIT, VMA_PFNMAP_BIT, VMA_DONTEXPAND_BIT, + * VMA_DONTDUMP_BIT); + */ +#define vma_desc_clear_flags(desc, ...) \ + vma_desc_clear_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) + static inline void vma_set_anonymous(struct vm_area_struct *vma) { vma->vm_ops = NULL; @@ -1092,15 +1292,20 @@ static inline bool vma_is_accessible(const struct vm_area_struct *vma) return vma->vm_flags & VM_ACCESS_FLAGS; } -static inline bool is_shared_maywrite(vm_flags_t vm_flags) +static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags) { return (vm_flags & (VM_SHARED | VM_MAYWRITE)) == (VM_SHARED | VM_MAYWRITE); } +static inline bool is_shared_maywrite(const vma_flags_t *flags) +{ + return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT); +} + static inline bool vma_is_shared_maywrite(const struct vm_area_struct *vma) { - return is_shared_maywrite(vma->vm_flags); + return is_shared_maywrite(&vma->flags); } static inline @@ -1728,6 +1933,14 @@ static inline bool is_cow_mapping(vm_flags_t flags) return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; } +static inline bool vma_desc_is_cow_mapping(struct vm_area_desc *desc) +{ + const vma_flags_t *flags = &desc->vma_flags; + + return vma_flags_test(flags, VMA_MAYWRITE_BIT) && + !vma_flags_test(flags, VMA_SHARED_BIT); +} + #ifndef CONFIG_MMU static inline bool is_nommu_shared_mapping(vm_flags_t flags) { @@ -1741,6 +1954,11 @@ static inline bool is_nommu_shared_mapping(vm_flags_t flags) */ return flags & (VM_MAYSHARE | VM_MAYOVERLAY); } + +static inline bool is_nommu_shared_vma_flags(const vma_flags_t *flags) +{ + return vma_flags_test(flags, VMA_MAYSHARE_BIT, VMA_MAYOVERLAY_BIT); +} #endif #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) @@ -2623,10 +2841,6 @@ static inline void zap_vma_pages(struct vm_area_struct *vma) zap_page_range_single(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL); } -void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, - struct vm_area_struct *start_vma, unsigned long start, - unsigned long end, unsigned long tree_end); - struct mmu_notifier_range; void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, @@ -2901,6 +3115,13 @@ static inline unsigned long get_mm_rss(struct mm_struct *mm) get_mm_counter(mm, MM_SHMEMPAGES); } +static inline unsigned long get_mm_rss_sum(struct mm_struct *mm) +{ + return get_mm_counter_sum(mm, MM_FILEPAGES) + + get_mm_counter_sum(mm, MM_ANONPAGES) + + get_mm_counter_sum(mm, MM_SHMEMPAGES); +} + static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm) { return max(mm->hiwater_rss, get_mm_rss(mm)); @@ -2975,15 +3196,8 @@ static inline pud_t pud_mkspecial(pud_t pud) } #endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */ -extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, - spinlock_t **ptl); -static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, - spinlock_t **ptl) -{ - pte_t *ptep; - __cond_lock(*ptl, ptep = __get_locked_pte(mm, addr, ptl)); - return ptep; -} +extern pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, + spinlock_t **ptl); #ifdef __PAGETABLE_P4D_FOLDED static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, @@ -3337,31 +3551,15 @@ static inline bool pagetable_pte_ctor(struct mm_struct *mm, return true; } -pte_t *___pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp); -static inline pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, - pmd_t *pmdvalp) -{ - pte_t *pte; +pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp); - __cond_lock(RCU, pte = ___pte_offset_map(pmd, addr, pmdvalp)); - return pte; -} static inline pte_t *pte_offset_map(pmd_t *pmd, unsigned long addr) { return __pte_offset_map(pmd, addr, NULL); } -pte_t *__pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr, spinlock_t **ptlp); -static inline pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr, spinlock_t **ptlp) -{ - pte_t *pte; - - __cond_lock(RCU, __cond_lock(*ptlp, - pte = __pte_offset_map_lock(mm, pmd, addr, ptlp))); - return pte; -} +pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, spinlock_t **ptlp); pte_t *pte_offset_map_ro_nolock(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, spinlock_t **ptlp); @@ -3536,7 +3734,7 @@ static inline unsigned long get_num_physpages(void) } /* - * Using memblock node mappings, an architecture may initialise its + * FIXME: Using memblock node mappings, an architecture may initialise its * zones, allocate the backing mem_map and account for memory holes in an * architecture independent manner. * @@ -3551,7 +3749,7 @@ static inline unsigned long get_num_physpages(void) * memblock_add_node(base, size, nid, MEMBLOCK_NONE) * free_area_init(max_zone_pfns); */ -void free_area_init(unsigned long *max_zone_pfn); +void arch_zone_limits_init(unsigned long *max_zone_pfn); unsigned long node_map_pfn_alignment(void); extern unsigned long absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn); @@ -4133,12 +4331,16 @@ extern void __kernel_map_pages(struct page *page, int numpages, int enable); #ifdef CONFIG_DEBUG_PAGEALLOC static inline void debug_pagealloc_map_pages(struct page *page, int numpages) { + iommu_debug_check_unmapped(page, numpages); + if (debug_pagealloc_enabled_static()) __kernel_map_pages(page, numpages, 1); } static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) { + iommu_debug_check_unmapped(page, numpages); + if (debug_pagealloc_enabled_static()) __kernel_map_pages(page, numpages, 0); } @@ -4194,6 +4396,61 @@ static inline void clear_page_guard(struct zone *zone, struct page *page, unsigned int order) {} #endif /* CONFIG_DEBUG_PAGEALLOC */ +#ifndef clear_pages +/** + * clear_pages() - clear a page range for kernel-internal use. + * @addr: start address + * @npages: number of pages + * + * Use clear_user_pages() instead when clearing a page range to be + * mapped to user space. + * + * Does absolutely no exception handling. + * + * Note that even though the clearing operation is preemptible, clear_pages() + * does not (and on architectures where it reduces to a few long-running + * instructions, might not be able to) call cond_resched() to check if + * rescheduling is required. + * + * When running under preemptible models this is not a problem. Under + * cooperatively scheduled models, however, the caller is expected to + * limit @npages to no more than PROCESS_PAGES_NON_PREEMPT_BATCH. + */ +static inline void clear_pages(void *addr, unsigned int npages) +{ + do { + clear_page(addr); + addr += PAGE_SIZE; + } while (--npages); +} +#endif + +#ifndef PROCESS_PAGES_NON_PREEMPT_BATCH +#ifdef clear_pages +/* + * The architecture defines clear_pages(), and we assume that it is + * generally "fast". So choose a batch size large enough to allow the processor + * headroom for optimizing the operation and yet small enough that we see + * reasonable preemption latency for when this optimization is not possible + * (ex. slow microarchitectures, memory bandwidth saturation.) + * + * With a value of 32MB and assuming a memory bandwidth of ~10GBps, this should + * result in worst case preemption latency of around 3ms when clearing pages. + * + * (See comment above clear_pages() for why preemption latency is a concern + * here.) + */ +#define PROCESS_PAGES_NON_PREEMPT_BATCH (SZ_32M >> PAGE_SHIFT) +#else /* !clear_pages */ +/* + * The architecture does not provide a clear_pages() implementation. Assume + * that clear_page() -- which clear_pages() will fallback to -- is relatively + * slow and choose a small value for PROCESS_PAGES_NON_PREEMPT_BATCH. + */ +#define PROCESS_PAGES_NON_PREEMPT_BATCH 1 +#endif +#endif + #ifdef __HAVE_ARCH_GATE_AREA extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); extern int in_gate_area_no_mm(unsigned long addr); |
