summaryrefslogtreecommitdiff
path: root/include/linux/mm.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r--include/linux/mm.h741
1 files changed, 538 insertions, 203 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5be3d8a8f806..0b776907152e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -27,7 +27,6 @@
#include <linux/page-flags.h>
#include <linux/page_ref.h>
#include <linux/overflow.h>
-#include <linux/sizes.h>
#include <linux/sched.h>
#include <linux/pgtable.h>
#include <linux/kasan.h>
@@ -208,8 +207,6 @@ static inline void __mm_zero_struct_page(struct page *page)
#define MAPCOUNT_ELF_CORE_MARGIN (5)
#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
-extern int sysctl_max_map_count;
-
extern unsigned long sysctl_user_reserve_kbytes;
extern unsigned long sysctl_admin_reserve_kbytes;
@@ -349,9 +346,9 @@ enum {
* if KVM does not lock down the memory type.
*/
DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39),
-#ifdef CONFIG_PPC32
+#if defined(CONFIG_PPC32)
DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1),
-#else
+#elif defined(CONFIG_64BIT)
DECLARE_VMA_BIT(DROPPABLE, 40),
#endif
DECLARE_VMA_BIT(UFFD_MINOR, 41),
@@ -466,8 +463,10 @@ enum {
#if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS) || \
defined(CONFIG_RISCV_USER_CFI)
#define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK)
+#define VMA_STARTGAP_FLAGS mk_vma_flags(VMA_GROWSDOWN_BIT, VMA_SHADOW_STACK_BIT)
#else
#define VM_SHADOW_STACK VM_NONE
+#define VMA_STARTGAP_FLAGS mk_vma_flags(VMA_GROWSDOWN_BIT)
#endif
#if defined(CONFIG_PPC64)
#define VM_SAO INIT_VM_FLAG(SAO)
@@ -506,32 +505,41 @@ enum {
#endif
#if defined(CONFIG_64BIT) || defined(CONFIG_PPC32)
#define VM_DROPPABLE INIT_VM_FLAG(DROPPABLE)
+#define VMA_DROPPABLE mk_vma_flags(VMA_DROPPABLE_BIT)
#else
#define VM_DROPPABLE VM_NONE
+#define VMA_DROPPABLE EMPTY_VMA_FLAGS
#endif
/* Bits set in the VMA until the stack is in its final location */
#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
-#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
+#define TASK_EXEC_BIT ((current->personality & READ_IMPLIES_EXEC) ? \
+ VMA_EXEC_BIT : VMA_READ_BIT)
/* Common data flag combinations */
-#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \
- VM_MAYWRITE | VM_MAYEXEC)
-#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \
- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */
-#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC
+#define VMA_DATA_FLAGS_TSK_EXEC mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, \
+ TASK_EXEC_BIT, VMA_MAYREAD_BIT, VMA_MAYWRITE_BIT, \
+ VMA_MAYEXEC_BIT)
+#define VMA_DATA_FLAGS_NON_EXEC mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, \
+ VMA_MAYREAD_BIT, VMA_MAYWRITE_BIT, VMA_MAYEXEC_BIT)
+#define VMA_DATA_FLAGS_EXEC mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, \
+ VMA_EXEC_BIT, VMA_MAYREAD_BIT, VMA_MAYWRITE_BIT, \
+ VMA_MAYEXEC_BIT)
+
+#ifndef VMA_DATA_DEFAULT_FLAGS /* arch can override this */
+#define VMA_DATA_DEFAULT_FLAGS VMA_DATA_FLAGS_EXEC
#endif
-#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
-#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
+#ifndef VMA_STACK_DEFAULT_FLAGS /* arch can override this */
+#define VMA_STACK_DEFAULT_FLAGS VMA_DATA_DEFAULT_FLAGS
#endif
-#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
+#define VMA_STACK_FLAGS append_vma_flags(VMA_STACK_DEFAULT_FLAGS, \
+ VMA_STACK_BIT, VMA_ACCOUNT_BIT)
+
+/* Temporary until VMA flags conversion complete. */
+#define VM_STACK_FLAGS vma_flags_to_legacy(VMA_STACK_FLAGS)
#ifdef CONFIG_MSEAL_SYSTEM_MAPPINGS
#define VM_SEALED_SYSMAP VM_SEALED
@@ -539,15 +547,17 @@ enum {
#define VM_SEALED_SYSMAP VM_NONE
#endif
-#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
-
/* VMA basic access permission flags */
#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
+#define VMA_ACCESS_FLAGS mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT)
/*
* Special vmas that are non-mergable, non-mlock()able.
*/
-#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
+
+#define VMA_SPECIAL_FLAGS mk_vma_flags(VMA_IO_BIT, VMA_DONTEXPAND_BIT, \
+ VMA_PFNMAP_BIT, VMA_MIXEDMAP_BIT)
+#define VM_SPECIAL vma_flags_to_legacy(VMA_SPECIAL_FLAGS)
/*
* Physically remapped pages are special. Tell the
@@ -574,6 +584,8 @@ enum {
/* This mask represents all the VMA flag bits used by mlock */
#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT)
+#define VMA_LOCKED_MASK mk_vma_flags(VMA_LOCKED_BIT, VMA_LOCKONFAULT_BIT)
+
/* These flags can be updated atomically via VMA/mmap read lock. */
#define VM_ATOMIC_SET_ALLOWED VM_MAYBE_GUARD
@@ -588,27 +600,32 @@ enum {
* possesses it but the other does not, the merged VMA should nonetheless have
* applied to it:
*
- * VM_SOFTDIRTY - if a VMA is marked soft-dirty, that is has not had its
- * references cleared via /proc/$pid/clear_refs, any merged VMA
- * should be considered soft-dirty also as it operates at a VMA
- * granularity.
+ * VMA_SOFTDIRTY_BIT - if a VMA is marked soft-dirty, that is has not had its
+ * references cleared via /proc/$pid/clear_refs, any
+ * merged VMA should be considered soft-dirty also as it
+ * operates at a VMA granularity.
*
- * VM_MAYBE_GUARD - If a VMA may have guard regions in place it implies that
- * mapped page tables may contain metadata not described by the
- * VMA and thus any merged VMA may also contain this metadata,
- * and thus we must make this flag sticky.
+ * VMA_MAYBE_GUARD_BIT - If a VMA may have guard regions in place it implies
+ * that mapped page tables may contain metadata not
+ * described by the VMA and thus any merged VMA may also
+ * contain this metadata, and thus we must make this flag
+ * sticky.
*/
-#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD)
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define VMA_STICKY_FLAGS mk_vma_flags(VMA_SOFTDIRTY_BIT, VMA_MAYBE_GUARD_BIT)
+#else
+#define VMA_STICKY_FLAGS mk_vma_flags(VMA_MAYBE_GUARD_BIT)
+#endif
/*
* VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one
* of these flags and the other not does not preclude a merge.
*
- * VM_STICKY - When merging VMAs, VMA flags must match, unless they are
- * 'sticky'. If any sticky flags exist in either VMA, we simply
- * set all of them on the merged VMA.
+ * VMA_STICKY_FLAGS - When merging VMAs, VMA flags must match, unless they
+ * are 'sticky'. If any sticky flags exist in either VMA,
+ * we simply set all of them on the merged VMA.
*/
-#define VM_IGNORE_MERGE VM_STICKY
+#define VMA_IGNORE_MERGE_FLAGS VMA_STICKY_FLAGS
/*
* Flags which should result in page tables being copied on fork. These are
@@ -741,21 +758,45 @@ struct vm_fault {
*/
};
+struct vm_uffd_ops;
+
/*
* These are the virtual MM functions - opening of an area, closing and
* unmapping it (needed to keep files on disk up-to-date etc), pointer
* to the functions called when a no-page or a wp-page exception occurs.
*/
struct vm_operations_struct {
- void (*open)(struct vm_area_struct * area);
+ /**
+ * @open: Called when a VMA is remapped, split or forked. Not called
+ * upon first mapping a VMA.
+ * Context: User context. May sleep. Caller holds mmap_lock.
+ */
+ void (*open)(struct vm_area_struct *vma);
/**
* @close: Called when the VMA is being removed from the MM.
* Context: User context. May sleep. Caller holds mmap_lock.
*/
- void (*close)(struct vm_area_struct * area);
+ void (*close)(struct vm_area_struct *vma);
+ /**
+ * @mapped: Called when the VMA is first mapped in the MM. Not called if
+ * the new VMA is merged with an adjacent VMA.
+ *
+ * The @vm_private_data field is an output field allowing the user to
+ * modify vma->vm_private_data as necessary.
+ *
+ * ONLY valid if set from f_op->mmap_prepare. Will result in an error if
+ * set from f_op->mmap.
+ *
+ * Returns %0 on success, or an error otherwise. On error, the VMA will
+ * be unmapped.
+ *
+ * Context: User context. May sleep. Caller holds mmap_lock.
+ */
+ int (*mapped)(unsigned long start, unsigned long end, pgoff_t pgoff,
+ const struct file *file, void **vm_private_data);
/* Called any time before splitting to check if it's allowed */
- int (*may_split)(struct vm_area_struct *area, unsigned long addr);
- int (*mremap)(struct vm_area_struct *area);
+ int (*may_split)(struct vm_area_struct *vma, unsigned long addr);
+ int (*mremap)(struct vm_area_struct *vma);
/*
* Called by mprotect() to make driver-specific permission
* checks before mprotect() is finalised. The VMA must not
@@ -767,7 +808,7 @@ struct vm_operations_struct {
vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order);
vm_fault_t (*map_pages)(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
- unsigned long (*pagesize)(struct vm_area_struct * area);
+ unsigned long (*pagesize)(struct vm_area_struct *vma);
/* notification that a previously read-only page is about to become
* writable, if an error is returned it will cause a SIGBUS */
@@ -826,6 +867,9 @@ struct vm_operations_struct {
struct page *(*find_normal_page)(struct vm_area_struct *vma,
unsigned long addr);
#endif /* CONFIG_FIND_NORMAL_PAGE */
+#ifdef CONFIG_USERFAULTFD
+ const struct vm_uffd_ops *uffd_ops;
+#endif
};
#ifdef CONFIG_NUMA_BALANCING
@@ -937,22 +981,20 @@ static inline void vm_flags_reset(struct vm_area_struct *vma,
vm_flags_init(vma, flags);
}
-static inline void vm_flags_reset_once(struct vm_area_struct *vma,
- vm_flags_t flags)
+static inline void vma_flags_reset_once(struct vm_area_struct *vma,
+ vma_flags_t *flags)
{
- vma_assert_write_locked(vma);
- /*
- * If VMA flags exist beyond the first system word, also clear these. It
- * is assumed the write once behaviour is required only for the first
- * system word.
- */
+ const unsigned long word = flags->__vma_flags[0];
+
+ /* It is assumed only the first system word must be written once. */
+ vma_flags_overwrite_word_once(&vma->flags, word);
+ /* The remainder can be copied normally. */
if (NUM_VMA_FLAG_BITS > BITS_PER_LONG) {
- unsigned long *bitmap = vma->flags.__vma_flags;
+ unsigned long *dst = &vma->flags.__vma_flags[1];
+ const unsigned long *src = &flags->__vma_flags[1];
- bitmap_zero(&bitmap[1], NUM_VMA_FLAG_BITS - BITS_PER_LONG);
+ bitmap_copy(dst, src, NUM_VMA_FLAG_BITS - BITS_PER_LONG);
}
-
- vma_flags_overwrite_word_once(&vma->flags, flags);
}
static inline void vm_flags_set(struct vm_area_struct *vma,
@@ -991,7 +1033,8 @@ static inline void vm_flags_mod(struct vm_area_struct *vma,
__vm_flags_mod(vma, set, clear);
}
-static inline bool __vma_atomic_valid_flag(struct vm_area_struct *vma, vma_flag_t bit)
+static __always_inline bool __vma_atomic_valid_flag(struct vm_area_struct *vma,
+ vma_flag_t bit)
{
const vm_flags_t mask = BIT((__force int)bit);
@@ -1006,7 +1049,8 @@ static inline bool __vma_atomic_valid_flag(struct vm_area_struct *vma, vma_flag_
* Set VMA flag atomically. Requires only VMA/mmap read lock. Only specific
* valid flags are allowed to do this.
*/
-static inline void vma_set_atomic_flag(struct vm_area_struct *vma, vma_flag_t bit)
+static __always_inline void vma_set_atomic_flag(struct vm_area_struct *vma,
+ vma_flag_t bit)
{
unsigned long *bitmap = vma->flags.__vma_flags;
@@ -1022,7 +1066,8 @@ static inline void vma_set_atomic_flag(struct vm_area_struct *vma, vma_flag_t bi
* This is necessarily racey, so callers must ensure that serialisation is
* achieved through some other means, or that races are permissible.
*/
-static inline bool vma_test_atomic_flag(struct vm_area_struct *vma, vma_flag_t bit)
+static __always_inline bool vma_test_atomic_flag(struct vm_area_struct *vma,
+ vma_flag_t bit)
{
if (__vma_atomic_valid_flag(vma, bit))
return test_bit((__force int)bit, &vma->vm_flags);
@@ -1031,21 +1076,21 @@ static inline bool vma_test_atomic_flag(struct vm_area_struct *vma, vma_flag_t b
}
/* Set an individual VMA flag in flags, non-atomically. */
-static inline void vma_flag_set(vma_flags_t *flags, vma_flag_t bit)
+static __always_inline void vma_flags_set_flag(vma_flags_t *flags,
+ vma_flag_t bit)
{
unsigned long *bitmap = flags->__vma_flags;
__set_bit((__force int)bit, bitmap);
}
-static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits)
+static __always_inline vma_flags_t __mk_vma_flags(vma_flags_t flags,
+ size_t count, const vma_flag_t *bits)
{
- vma_flags_t flags;
int i;
- vma_flags_clear_all(&flags);
for (i = 0; i < count; i++)
- vma_flag_set(&flags, bits[i]);
+ vma_flags_set_flag(&flags, bits[i]);
return flags;
}
@@ -1054,16 +1099,73 @@ static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits)
* vma_flags_t bitmap value. E.g.:
*
* vma_flags_t flags = mk_vma_flags(VMA_IO_BIT, VMA_PFNMAP_BIT,
- * VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT);
+ * VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT);
*
* The compiler cleverly optimises away all of the work and this ends up being
* equivalent to aggregating the values manually.
*/
-#define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \
- (const vma_flag_t []){__VA_ARGS__})
+#define mk_vma_flags(...) __mk_vma_flags(EMPTY_VMA_FLAGS, \
+ COUNT_ARGS(__VA_ARGS__), (const vma_flag_t []){__VA_ARGS__})
+
+/*
+ * Helper macro which acts like mk_vma_flags, only appending to a copy of the
+ * specified flags rather than establishing new flags. E.g.:
+ *
+ * vma_flags_t flags = append_vma_flags(VMA_STACK_DEFAULT_FLAGS, VMA_STACK_BIT,
+ * VMA_ACCOUNT_BIT);
+ */
+#define append_vma_flags(flags, ...) __mk_vma_flags(flags, \
+ COUNT_ARGS(__VA_ARGS__), (const vma_flag_t []){__VA_ARGS__})
+
+/* Calculates the number of set bits in the specified VMA flags. */
+static __always_inline int vma_flags_count(const vma_flags_t *flags)
+{
+ const unsigned long *bitmap = flags->__vma_flags;
+
+ return bitmap_weight(bitmap, NUM_VMA_FLAG_BITS);
+}
+
+/*
+ * Test whether a specific VMA flag is set, e.g.:
+ *
+ * if (vma_flags_test(flags, VMA_READ_BIT)) { ... }
+ */
+static __always_inline bool vma_flags_test(const vma_flags_t *flags,
+ vma_flag_t bit)
+{
+ const unsigned long *bitmap = flags->__vma_flags;
+
+ return test_bit((__force int)bit, bitmap);
+}
+
+/*
+ * Obtain a set of VMA flags which contain the overlapping flags contained
+ * within flags and to_and.
+ */
+static __always_inline vma_flags_t vma_flags_and_mask(const vma_flags_t *flags,
+ vma_flags_t to_and)
+{
+ vma_flags_t dst;
+ unsigned long *bitmap_dst = dst.__vma_flags;
+ const unsigned long *bitmap = flags->__vma_flags;
+ const unsigned long *bitmap_to_and = to_and.__vma_flags;
+
+ bitmap_and(bitmap_dst, bitmap, bitmap_to_and, NUM_VMA_FLAG_BITS);
+ return dst;
+}
+
+/*
+ * Obtain a set of VMA flags which contains the specified overlapping flags,
+ * e.g.:
+ *
+ * vma_flags_t read_flags = vma_flags_and(&flags, VMA_READ_BIT,
+ * VMA_MAY_READ_BIT);
+ */
+#define vma_flags_and(flags, ...) \
+ vma_flags_and_mask(flags, mk_vma_flags(__VA_ARGS__))
/* Test each of to_test flags in flags, non-atomically. */
-static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags,
+static __always_inline bool vma_flags_test_any_mask(const vma_flags_t *flags,
vma_flags_t to_test)
{
const unsigned long *bitmap = flags->__vma_flags;
@@ -1075,10 +1177,10 @@ static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags,
/*
* Test whether any specified VMA flag is set, e.g.:
*
- * if (vma_flags_test(flags, VMA_READ_BIT, VMA_MAYREAD_BIT)) { ... }
+ * if (vma_flags_test_any(flags, VMA_READ_BIT, VMA_MAYREAD_BIT)) { ... }
*/
-#define vma_flags_test(flags, ...) \
- vma_flags_test_mask(flags, mk_vma_flags(__VA_ARGS__))
+#define vma_flags_test_any(flags, ...) \
+ vma_flags_test_any_mask(flags, mk_vma_flags(__VA_ARGS__))
/* Test that ALL of the to_test flags are set, non-atomically. */
static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags,
@@ -1098,8 +1200,29 @@ static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags,
#define vma_flags_test_all(flags, ...) \
vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__))
+/*
+ * Helper to test that a flag mask of type vma_flags_t has a SINGLE flag set
+ * (returning false if flagmask has no flags set).
+ *
+ * This is defined to make the semantics clearer when testing an optionally
+ * defined VMA flags mask, e.g.:
+ *
+ * if (vma_flags_test_single_mask(&flags, VMA_DROPPABLE)) { ... }
+ *
+ * When VMA_DROPPABLE is defined if available, or set to EMPTY_VMA_FLAGS
+ * otherwise.
+ */
+static __always_inline bool vma_flags_test_single_mask(const vma_flags_t *flags,
+ vma_flags_t flagmask)
+{
+ VM_WARN_ON_ONCE(vma_flags_count(&flagmask) > 1);
+
+ return vma_flags_test_any_mask(flags, flagmask);
+}
+
/* Set each of the to_set flags in flags, non-atomically. */
-static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t to_set)
+static __always_inline void vma_flags_set_mask(vma_flags_t *flags,
+ vma_flags_t to_set)
{
unsigned long *bitmap = flags->__vma_flags;
const unsigned long *bitmap_to_set = to_set.__vma_flags;
@@ -1116,7 +1239,8 @@ static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t t
vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__))
/* Clear all of the to-clear flags in flags, non-atomically. */
-static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t to_clear)
+static __always_inline void vma_flags_clear_mask(vma_flags_t *flags,
+ vma_flags_t to_clear)
{
unsigned long *bitmap = flags->__vma_flags;
const unsigned long *bitmap_to_clear = to_clear.__vma_flags;
@@ -1133,13 +1257,85 @@ static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t
vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__))
/*
+ * Obtain a VMA flags value containing those flags that are present in flags or
+ * flags_other but not in both.
+ */
+static __always_inline vma_flags_t vma_flags_diff_pair(const vma_flags_t *flags,
+ const vma_flags_t *flags_other)
+{
+ vma_flags_t dst;
+ const unsigned long *bitmap_other = flags_other->__vma_flags;
+ const unsigned long *bitmap = flags->__vma_flags;
+ unsigned long *bitmap_dst = dst.__vma_flags;
+
+ bitmap_xor(bitmap_dst, bitmap, bitmap_other, NUM_VMA_FLAG_BITS);
+ return dst;
+}
+
+/* Determine if flags and flags_other have precisely the same flags set. */
+static __always_inline bool vma_flags_same_pair(const vma_flags_t *flags,
+ const vma_flags_t *flags_other)
+{
+ const unsigned long *bitmap = flags->__vma_flags;
+ const unsigned long *bitmap_other = flags_other->__vma_flags;
+
+ return bitmap_equal(bitmap, bitmap_other, NUM_VMA_FLAG_BITS);
+}
+
+/* Determine if flags and flags_other have precisely the same flags set. */
+static __always_inline bool vma_flags_same_mask(const vma_flags_t *flags,
+ vma_flags_t flags_other)
+{
+ const unsigned long *bitmap = flags->__vma_flags;
+ const unsigned long *bitmap_other = flags_other.__vma_flags;
+
+ return bitmap_equal(bitmap, bitmap_other, NUM_VMA_FLAG_BITS);
+}
+
+/*
+ * Helper macro to determine if only the specific flags are set, e.g.:
+ *
+ * if (vma_flags_same(&flags, VMA_WRITE_BIT) { ... }
+ */
+#define vma_flags_same(flags, ...) \
+ vma_flags_same_mask(flags, mk_vma_flags(__VA_ARGS__))
+
+/*
+ * Test whether a specific flag in the VMA is set, e.g.:
+ *
+ * if (vma_test(vma, VMA_READ_BIT)) { ... }
+ */
+static __always_inline bool vma_test(const struct vm_area_struct *vma,
+ vma_flag_t bit)
+{
+ return vma_flags_test(&vma->flags, bit);
+}
+
+/* Helper to test any VMA flags in a VMA . */
+static __always_inline bool vma_test_any_mask(const struct vm_area_struct *vma,
+ vma_flags_t flags)
+{
+ return vma_flags_test_any_mask(&vma->flags, flags);
+}
+
+/*
+ * Helper macro for testing whether any VMA flags are set in a VMA,
+ * e.g.:
+ *
+ * if (vma_test_any(vma, VMA_IO_BIT, VMA_PFNMAP_BIT,
+ * VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT)) { ... }
+ */
+#define vma_test_any(vma, ...) \
+ vma_test_any_mask(vma, mk_vma_flags(__VA_ARGS__))
+
+/*
* Helper to test that ALL specified flags are set in a VMA.
*
* Note: appropriate locks must be held, this function does not acquire them for
* you.
*/
-static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma,
- vma_flags_t flags)
+static __always_inline bool vma_test_all_mask(const struct vm_area_struct *vma,
+ vma_flags_t flags)
{
return vma_flags_test_all_mask(&vma->flags, flags);
}
@@ -1147,10 +1343,28 @@ static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma,
/*
* Helper macro for checking that ALL specified flags are set in a VMA, e.g.:
*
- * if (vma_test_all_flags(vma, VMA_READ_BIT, VMA_MAYREAD_BIT) { ... }
+ * if (vma_test_all(vma, VMA_READ_BIT, VMA_MAYREAD_BIT) { ... }
+ */
+#define vma_test_all(vma, ...) \
+ vma_test_all_mask(vma, mk_vma_flags(__VA_ARGS__))
+
+/*
+ * Helper to test that a flag mask of type vma_flags_t has a SINGLE flag set
+ * (returning false if flagmask has no flags set).
+ *
+ * This is useful when a flag needs to be either defined or not depending upon
+ * kernel configuration, e.g.:
+ *
+ * if (vma_test_single_mask(vma, VMA_DROPPABLE)) { ... }
+ *
+ * When VMA_DROPPABLE is defined if available, or set to EMPTY_VMA_FLAGS
+ * otherwise.
*/
-#define vma_test_all_flags(vma, ...) \
- vma_test_all_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
+static __always_inline bool
+vma_test_single_mask(const struct vm_area_struct *vma, vma_flags_t flagmask)
+{
+ return vma_flags_test_single_mask(&vma->flags, flagmask);
+}
/*
* Helper to set all VMA flags in a VMA.
@@ -1158,8 +1372,8 @@ static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma,
* Note: appropriate locks must be held, this function does not acquire them for
* you.
*/
-static inline void vma_set_flags_mask(struct vm_area_struct *vma,
- vma_flags_t flags)
+static __always_inline void vma_set_flags_mask(struct vm_area_struct *vma,
+ vma_flags_t flags)
{
vma_flags_set_mask(&vma->flags, flags);
}
@@ -1176,26 +1390,69 @@ static inline void vma_set_flags_mask(struct vm_area_struct *vma,
#define vma_set_flags(vma, ...) \
vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
-/* Helper to test all VMA flags in a VMA descriptor. */
-static inline bool vma_desc_test_flags_mask(const struct vm_area_desc *desc,
- vma_flags_t flags)
+/* Helper to clear all VMA flags in a VMA. */
+static __always_inline void vma_clear_flags_mask(struct vm_area_struct *vma,
+ vma_flags_t flags)
{
- return vma_flags_test_mask(&desc->vma_flags, flags);
+ vma_flags_clear_mask(&vma->flags, flags);
}
/*
- * Helper macro for testing VMA flags for an input pointer to a struct
- * vm_area_desc object describing a proposed VMA, e.g.:
+ * Helper macro for clearing VMA flags, e.g.:
*
- * if (vma_desc_test_flags(desc, VMA_IO_BIT, VMA_PFNMAP_BIT,
+ * vma_clear_flags(vma, VMA_IO_BIT, VMA_PFNMAP_BIT, VMA_DONTEXPAND_BIT,
+ * VMA_DONTDUMP_BIT);
+ */
+#define vma_clear_flags(vma, ...) \
+ vma_clear_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
+
+/*
+ * Test whether a specific VMA flag is set in a VMA descriptor, e.g.:
+ *
+ * if (vma_desc_test(desc, VMA_READ_BIT)) { ... }
+ */
+static __always_inline bool vma_desc_test(const struct vm_area_desc *desc,
+ vma_flag_t bit)
+{
+ return vma_flags_test(&desc->vma_flags, bit);
+}
+
+/* Helper to test any VMA flags in a VMA descriptor. */
+static __always_inline bool vma_desc_test_any_mask(const struct vm_area_desc *desc,
+ vma_flags_t flags)
+{
+ return vma_flags_test_any_mask(&desc->vma_flags, flags);
+}
+
+/*
+ * Helper macro for testing whether any VMA flags are set in a VMA descriptor,
+ * e.g.:
+ *
+ * if (vma_desc_test_any(desc, VMA_IO_BIT, VMA_PFNMAP_BIT,
* VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT)) { ... }
*/
-#define vma_desc_test_flags(desc, ...) \
- vma_desc_test_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
+#define vma_desc_test_any(desc, ...) \
+ vma_desc_test_any_mask(desc, mk_vma_flags(__VA_ARGS__))
+
+/* Helper to test all VMA flags in a VMA descriptor. */
+static __always_inline bool vma_desc_test_all_mask(const struct vm_area_desc *desc,
+ vma_flags_t flags)
+{
+ return vma_flags_test_all_mask(&desc->vma_flags, flags);
+}
+
+/*
+ * Helper macro for testing whether ALL VMA flags are set in a VMA descriptor,
+ * e.g.:
+ *
+ * if (vma_desc_test_all(desc, VMA_READ_BIT, VMA_MAYREAD_BIT)) { ... }
+ */
+#define vma_desc_test_all(desc, ...) \
+ vma_desc_test_all_mask(desc, mk_vma_flags(__VA_ARGS__))
/* Helper to set all VMA flags in a VMA descriptor. */
-static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
- vma_flags_t flags)
+static __always_inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
+ vma_flags_t flags)
{
vma_flags_set_mask(&desc->vma_flags, flags);
}
@@ -1211,8 +1468,8 @@ static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
/* Helper to clear all VMA flags in a VMA descriptor. */
-static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc,
- vma_flags_t flags)
+static __always_inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc,
+ vma_flags_t flags)
{
vma_flags_clear_mask(&desc->vma_flags, flags);
}
@@ -1292,12 +1549,6 @@ static inline bool vma_is_accessible(const struct vm_area_struct *vma)
return vma->vm_flags & VM_ACCESS_FLAGS;
}
-static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags)
-{
- return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
- (VM_SHARED | VM_MAYWRITE);
-}
-
static inline bool is_shared_maywrite(const vma_flags_t *flags)
{
return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT);
@@ -1308,6 +1559,28 @@ static inline bool vma_is_shared_maywrite(const struct vm_area_struct *vma)
return is_shared_maywrite(&vma->flags);
}
+/**
+ * vma_kernel_pagesize - Default page size granularity for this VMA.
+ * @vma: The user mapping.
+ *
+ * The kernel page size specifies in which granularity VMA modifications
+ * can be performed. Folios in this VMA will be aligned to, and at least
+ * the size of the number of bytes returned by this function.
+ *
+ * The default kernel page size is not affected by Transparent Huge Pages
+ * being in effect.
+ *
+ * Return: The default page size granularity for this VMA.
+ */
+static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
+{
+ if (unlikely(vma->vm_ops && vma->vm_ops->pagesize))
+ return vma->vm_ops->pagesize(vma);
+ return PAGE_SIZE;
+}
+
+unsigned long vma_mmu_pagesize(struct vm_area_struct *vma);
+
static inline
struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
{
@@ -1507,7 +1780,7 @@ static inline int folio_put_testzero(struct folio *folio)
*/
static inline bool get_page_unless_zero(struct page *page)
{
- return page_ref_add_unless(page, 1, 0);
+ return page_ref_add_unless_zero(page, 1);
}
static inline struct folio *folio_get_nontail_page(struct page *page)
@@ -1957,7 +2230,7 @@ static inline bool is_nommu_shared_mapping(vm_flags_t flags)
static inline bool is_nommu_shared_vma_flags(const vma_flags_t *flags)
{
- return vma_flags_test(flags, VMA_MAYSHARE_BIT, VMA_MAYOVERLAY_BIT);
+ return vma_flags_test_any(flags, VMA_MAYSHARE_BIT, VMA_MAYOVERLAY_BIT);
}
#endif
@@ -2479,36 +2752,6 @@ static inline unsigned long folio_nr_pages(const struct folio *folio)
return folio_large_nr_pages(folio);
}
-#if !defined(CONFIG_HAVE_GIGANTIC_FOLIOS)
-/*
- * We don't expect any folios that exceed buddy sizes (and consequently
- * memory sections).
- */
-#define MAX_FOLIO_ORDER MAX_PAGE_ORDER
-#elif defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
-/*
- * Only pages within a single memory section are guaranteed to be
- * contiguous. By limiting folios to a single memory section, all folio
- * pages are guaranteed to be contiguous.
- */
-#define MAX_FOLIO_ORDER PFN_SECTION_SHIFT
-#elif defined(CONFIG_HUGETLB_PAGE)
-/*
- * There is no real limit on the folio size. We limit them to the maximum we
- * currently expect (see CONFIG_HAVE_GIGANTIC_FOLIOS): with hugetlb, we expect
- * no folios larger than 16 GiB on 64bit and 1 GiB on 32bit.
- */
-#define MAX_FOLIO_ORDER get_order(IS_ENABLED(CONFIG_64BIT) ? SZ_16G : SZ_1G)
-#else
-/*
- * Without hugetlb, gigantic folios that are bigger than a single PUD are
- * currently impossible.
- */
-#define MAX_FOLIO_ORDER PUD_ORDER
-#endif
-
-#define MAX_FOLIO_NR_PAGES (1UL << MAX_FOLIO_ORDER)
-
/*
* compound_nr() returns the number of pages in this potentially compound
* page. compound_nr() can be called on a tail page, and is defined to
@@ -2667,7 +2910,7 @@ static inline bool folio_maybe_mapped_shared(struct folio *folio)
* The caller must add any reference (e.g., from folio_try_get()) it might be
* holding itself to the result.
*
- * Returns the expected folio refcount.
+ * Returns: the expected folio refcount.
*/
static inline int folio_expected_ref_count(const struct folio *folio)
{
@@ -2798,8 +3041,9 @@ extern void pagefault_out_of_memory(void);
*/
struct zap_details {
struct folio *single_folio; /* Locked folio to be unmapped */
- bool even_cows; /* Zap COWed private pages too? */
+ bool skip_cows; /* Do not zap COWed private pages */
bool reclaim_pt; /* Need reclaim page tables? */
+ bool reaping; /* Reaping, do not block. */
zap_flags_t zap_flags; /* Extra flags for zapping */
};
@@ -2832,14 +3076,17 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
struct page *vm_normal_page_pud(struct vm_area_struct *vma, unsigned long addr,
pud_t pud);
-void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
+void zap_special_vma_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size);
-void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
- unsigned long size, struct zap_details *details);
-static inline void zap_vma_pages(struct vm_area_struct *vma)
+void zap_vma_range(struct vm_area_struct *vma, unsigned long address,
+ unsigned long size);
+/**
+ * zap_vma - zap all page table entries in a vma
+ * @vma: The vma to zap.
+ */
+static inline void zap_vma(struct vm_area_struct *vma)
{
- zap_page_range_single(vma, vma->vm_start,
- vma->vm_end - vma->vm_start, NULL);
+ zap_vma_range(vma, vma->vm_start, vma->vm_end - vma->vm_start);
}
struct mmu_notifier_range;
@@ -3514,26 +3761,21 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
static inline void ptlock_free(struct ptdesc *ptdesc) {}
#endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */
-static inline unsigned long ptdesc_nr_pages(const struct ptdesc *ptdesc)
-{
- return compound_nr(ptdesc_page(ptdesc));
-}
-
static inline void __pagetable_ctor(struct ptdesc *ptdesc)
{
- pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags));
+ struct folio *folio = ptdesc_folio(ptdesc);
- __SetPageTable(ptdesc_page(ptdesc));
- mod_node_page_state(pgdat, NR_PAGETABLE, ptdesc_nr_pages(ptdesc));
+ __folio_set_pgtable(folio);
+ lruvec_stat_add_folio(folio, NR_PAGETABLE);
}
static inline void pagetable_dtor(struct ptdesc *ptdesc)
{
- pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags));
+ struct folio *folio = ptdesc_folio(ptdesc);
ptlock_free(ptdesc);
- __ClearPageTable(ptdesc_page(ptdesc));
- mod_node_page_state(pgdat, NR_PAGETABLE, -ptdesc_nr_pages(ptdesc));
+ __folio_clear_pgtable(folio);
+ lruvec_stat_sub_folio(folio, NR_PAGETABLE);
}
static inline void pagetable_dtor_free(struct ptdesc *ptdesc)
@@ -3691,9 +3933,6 @@ extern unsigned long free_reserved_area(void *start, void *end,
extern void adjust_managed_page_count(struct page *page, long count);
-extern void reserve_bootmem_region(phys_addr_t start,
- phys_addr_t end, int nid);
-
/* Free the reserved page into the buddy system, so it gets managed. */
void free_reserved_page(struct page *page);
@@ -3852,7 +4091,6 @@ extern int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
extern struct file *get_mm_exe_file(struct mm_struct *mm);
extern struct file *get_task_exe_file(struct task_struct *task);
-extern bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long npages);
extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages);
extern bool vma_is_special_mapping(const struct vm_area_struct *vma,
@@ -3903,11 +4141,13 @@ static inline void mm_populate(unsigned long addr, unsigned long len) {}
#endif
/* This takes the mm semaphore itself */
-extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long);
-extern int vm_munmap(unsigned long, size_t);
-extern unsigned long __must_check vm_mmap(struct file *, unsigned long,
- unsigned long, unsigned long,
- unsigned long, unsigned long);
+int __must_check vm_brk_flags(unsigned long addr, unsigned long request, bool is_exec);
+int vm_munmap(unsigned long start, size_t len);
+unsigned long __must_check vm_mmap(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long prot,
+ unsigned long flag, unsigned long offset);
+unsigned long __must_check vm_mmap_shadow_stack(unsigned long addr,
+ unsigned long len, unsigned long flags);
struct vm_unmapped_area_info {
#define VM_UNMAPPED_AREA_TOPDOWN 1
@@ -4004,6 +4244,11 @@ static inline unsigned long vma_pages(const struct vm_area_struct *vma)
return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
}
+static inline unsigned long vma_last_pgoff(struct vm_area_struct *vma)
+{
+ return vma->vm_pgoff + vma_pages(vma) - 1;
+}
+
static inline unsigned long vma_desc_size(const struct vm_area_desc *desc)
{
return desc->end - desc->start;
@@ -4078,15 +4323,75 @@ static inline void mmap_action_ioremap(struct vm_area_desc *desc,
* @start_pfn: The first PFN in the range to remap.
*/
static inline void mmap_action_ioremap_full(struct vm_area_desc *desc,
- unsigned long start_pfn)
+ unsigned long start_pfn)
{
mmap_action_ioremap(desc, desc->start, start_pfn, vma_desc_size(desc));
}
-void mmap_action_prepare(struct mmap_action *action,
- struct vm_area_desc *desc);
-int mmap_action_complete(struct mmap_action *action,
- struct vm_area_struct *vma);
+/**
+ * mmap_action_simple_ioremap - helper for mmap_prepare hook to specify that the
+ * physical range in [start_phys_addr, start_phys_addr + size) should be I/O
+ * remapped.
+ * @desc: The VMA descriptor for the VMA requiring remap.
+ * @start_phys_addr: Start of the physical memory to be mapped.
+ * @size: Size of the area to map.
+ *
+ * NOTE: Some drivers might want to tweak desc->page_prot for purposes of
+ * write-combine or similar.
+ */
+static inline void mmap_action_simple_ioremap(struct vm_area_desc *desc,
+ phys_addr_t start_phys_addr,
+ unsigned long size)
+{
+ struct mmap_action *action = &desc->action;
+
+ action->simple_ioremap.start_phys_addr = start_phys_addr;
+ action->simple_ioremap.size = size;
+ action->type = MMAP_SIMPLE_IO_REMAP;
+}
+
+/**
+ * mmap_action_map_kernel_pages - helper for mmap_prepare hook to specify that
+ * @num kernel pages contained in the @pages array should be mapped to userland
+ * starting at virtual address @start.
+ * @desc: The VMA descriptor for the VMA requiring kernel pags to be mapped.
+ * @start: The virtual address from which to map them.
+ * @pages: An array of struct page pointers describing the memory to map.
+ * @nr_pages: The number of entries in the @pages aray.
+ */
+static inline void mmap_action_map_kernel_pages(struct vm_area_desc *desc,
+ unsigned long start, struct page **pages,
+ unsigned long nr_pages)
+{
+ struct mmap_action *action = &desc->action;
+
+ action->type = MMAP_MAP_KERNEL_PAGES;
+ action->map_kernel.start = start;
+ action->map_kernel.pages = pages;
+ action->map_kernel.nr_pages = nr_pages;
+ action->map_kernel.pgoff = desc->pgoff;
+}
+
+/**
+ * mmap_action_map_kernel_pages_full - helper for mmap_prepare hook to specify that
+ * kernel pages contained in the @pages array should be mapped to userland
+ * from @desc->start to @desc->end.
+ * @desc: The VMA descriptor for the VMA requiring kernel pags to be mapped.
+ * @pages: An array of struct page pointers describing the memory to map.
+ *
+ * The caller must ensure that @pages contains sufficient entries to cover the
+ * entire range described by @desc.
+ */
+static inline void mmap_action_map_kernel_pages_full(struct vm_area_desc *desc,
+ struct page **pages)
+{
+ mmap_action_map_kernel_pages(desc, desc->start, pages,
+ vma_desc_pages(desc));
+}
+
+int mmap_action_prepare(struct vm_area_desc *desc);
+int mmap_action_complete(struct vm_area_struct *vma,
+ struct mmap_action *action);
/* Look up the first VMA which exactly match the interval vm_start ... vm_end */
static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
@@ -4100,20 +4405,81 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
return vma;
}
+/**
+ * range_is_subset - Is the specified inner range a subset of the outer range?
+ * @outer_start: The start of the outer range.
+ * @outer_end: The exclusive end of the outer range.
+ * @inner_start: The start of the inner range.
+ * @inner_end: The exclusive end of the inner range.
+ *
+ * Returns: %true if [inner_start, inner_end) is a subset of [outer_start,
+ * outer_end), otherwise %false.
+ */
+static inline bool range_is_subset(unsigned long outer_start,
+ unsigned long outer_end,
+ unsigned long inner_start,
+ unsigned long inner_end)
+{
+ return outer_start <= inner_start && inner_end <= outer_end;
+}
+
+/**
+ * range_in_vma - is the specified [@start, @end) range a subset of the VMA?
+ * @vma: The VMA against which we want to check [@start, @end).
+ * @start: The start of the range we wish to check.
+ * @end: The exclusive end of the range we wish to check.
+ *
+ * Returns: %true if [@start, @end) is a subset of [@vma->vm_start,
+ * @vma->vm_end), %false otherwise.
+ */
static inline bool range_in_vma(const struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- return (vma && vma->vm_start <= start && end <= vma->vm_end);
+ if (!vma)
+ return false;
+
+ return range_is_subset(vma->vm_start, vma->vm_end, start, end);
+}
+
+/**
+ * range_in_vma_desc - is the specified [@start, @end) range a subset of the VMA
+ * described by @desc, a VMA descriptor?
+ * @desc: The VMA descriptor against which we want to check [@start, @end).
+ * @start: The start of the range we wish to check.
+ * @end: The exclusive end of the range we wish to check.
+ *
+ * Returns: %true if [@start, @end) is a subset of [@desc->start, @desc->end),
+ * %false otherwise.
+ */
+static inline bool range_in_vma_desc(const struct vm_area_desc *desc,
+ unsigned long start, unsigned long end)
+{
+ if (!desc)
+ return false;
+
+ return range_is_subset(desc->start, desc->end, start, end);
}
#ifdef CONFIG_MMU
pgprot_t vm_get_page_prot(vm_flags_t vm_flags);
+
+static inline pgprot_t vma_get_page_prot(vma_flags_t vma_flags)
+{
+ const vm_flags_t vm_flags = vma_flags_to_legacy(vma_flags);
+
+ return vm_get_page_prot(vm_flags);
+}
+
void vma_set_page_prot(struct vm_area_struct *vma);
#else
static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
{
return __pgprot(0);
}
+static inline pgprot_t vma_get_page_prot(vma_flags_t vma_flags)
+{
+ return __pgprot(0);
+}
static inline void vma_set_page_prot(struct vm_area_struct *vma)
{
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
@@ -4135,6 +4501,9 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
struct page **pages, unsigned long *num);
+int map_kernel_pages_prepare(struct vm_area_desc *desc);
+int map_kernel_pages_complete(struct vm_area_struct *vma,
+ struct mmap_action *action);
int vm_map_pages(struct vm_area_struct *vma, struct page **pages,
unsigned long num);
int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages,
@@ -4513,10 +4882,9 @@ int vmemmap_populate_hugepages(unsigned long start, unsigned long end,
int node, struct vmem_altmap *altmap);
int vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap);
-int vmemmap_populate_hvo(unsigned long start, unsigned long end, int node,
+int vmemmap_populate_hvo(unsigned long start, unsigned long end,
+ unsigned int order, struct zone *zone,
unsigned long headsize);
-int vmemmap_undo_hvo(unsigned long start, unsigned long end, int node,
- unsigned long headsize);
void vmemmap_wrprotect_hvo(unsigned long start, unsigned long end, int node,
unsigned long headsize);
void vmemmap_populate_print_last(void);
@@ -4702,22 +5070,6 @@ long copy_folio_from_user(struct folio *dst_folio,
const void __user *usr_src,
bool allow_pagefault);
-/**
- * vma_is_special_huge - Are transhuge page-table entries considered special?
- * @vma: Pointer to the struct vm_area_struct to consider
- *
- * Whether transhuge page-table entries are considered "special" following
- * the definition in vm_normal_page().
- *
- * Return: true if transhuge page-table entries should be considered special,
- * false otherwise.
- */
-static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
-{
- return vma_is_dax(vma) || (vma->vm_file &&
- (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)));
-}
-
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
#if MAX_NUMNODES > 1
@@ -4822,10 +5174,9 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
* DMA mapping IDs for page_pool
*
* When DMA-mapping a page, page_pool allocates an ID (from an xarray) and
- * stashes it in the upper bits of page->pp_magic. We always want to be able to
- * unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP
- * pages can have arbitrary kernel pointers stored in the same field as pp_magic
- * (since it overlaps with page->lru.next), so we must ensure that we cannot
+ * stashes it in the upper bits of page->pp_magic. Non-PP pages can have
+ * arbitrary kernel pointers stored in the same field as pp_magic (since
+ * it overlaps with page->lru.next), so we must ensure that we cannot
* mistake a valid kernel pointer with any of the values we write into this
* field.
*
@@ -4860,26 +5211,6 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \
PP_DMA_INDEX_SHIFT)
-/* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is
- * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for
- * the head page of compound page and bit 1 for pfmemalloc page, as well as the
- * bits used for the DMA index. page_is_pfmemalloc() is checked in
- * __page_pool_put_page() to avoid recycling the pfmemalloc page.
- */
-#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL)
-
-#ifdef CONFIG_PAGE_POOL
-static inline bool page_pool_page_is_pp(const struct page *page)
-{
- return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE;
-}
-#else
-static inline bool page_pool_page_is_pp(const struct page *page)
-{
- return false;
-}
-#endif
-
#define PAGE_SNAPSHOT_FAITHFUL (1 << 0)
#define PAGE_SNAPSHOT_PG_BUDDY (1 << 1)
#define PAGE_SNAPSHOT_PG_IDLE (1 << 2)
@@ -4899,4 +5230,8 @@ static inline bool snapshot_page_is_faithful(const struct page_snapshot *ps)
void snapshot_page(struct page_snapshot *ps, const struct page *page);
+void map_anon_folio_pte_nopf(struct folio *folio, pte_t *pte,
+ struct vm_area_struct *vma, unsigned long addr,
+ bool uffd_wp);
+
#endif /* _LINUX_MM_H */