summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLorenzo Stoakes <lorenzo.stoakes@oracle.com>2025-10-20 13:11:27 +0100
committerAndrew Morton <akpm@linux-foundation.org>2025-11-16 17:28:12 -0800
commitac0a3fc9c07df79dc8a4ce9d274df00afc7bf12d (patch)
treec1db9e38fd4a636423b5a1f2342ee54eceae86e2 /include
parentdb91b783290e395443151b0fe4b8dc32aceebef5 (diff)
mm: add ability to take further action in vm_area_desc
Some drivers/filesystems need to perform additional tasks after the VMA is set up. This is typically in the form of pre-population. The forms of pre-population most likely to be performed are a PFN remap or the insertion of normal folios and PFNs into a mixed map. We start by implementing the PFN remap functionality, ensuring that we perform the appropriate actions at the appropriate time - that is setting flags at the point of .mmap_prepare, and performing the actual remap at the point at which the VMA is fully established. This prevents the driver from doing anything too crazy with a VMA at any stage, and we retain complete control over how the mm functionality is applied. Unfortunately callers still do often require some kind of custom action, so we add an optional success/error _hook to allow the caller to do something after the action has succeeded or failed. This is done at the point when the VMA has already been established, so the harm that can be done is limited. The error hook can be used to filter errors if necessary. There may be cases in which the caller absolutely must hold the file rmap lock until the operation is entirely complete. It is an edge case, but certainly the hugetlbfs mmap hook requires it. To accommodate this, we add the hide_from_rmap_until_complete flag to the mmap_action type. In this case, if a new VMA is allocated, we will hold the file rmap lock until the operation is entirely completed (including any success/error hooks). Note that we do not need to update __compat_vma_mmap() to accommodate this flag, as this function will be invoked from an .mmap handler whose VMA is not yet visible, so we implicitly hide it from the rmap. If any error arises on these final actions, we simply unmap the VMA altogether. Also update the stacked filesystem compatibility layer to utilise the action behaviour, and update the VMA tests accordingly. While we're here, rename __compat_vma_mmap_prepare() to __compat_vma_mmap() as we are now performing actions invoked by the mmap_prepare in addition to just the mmap_prepare hook. Link: https://lkml.kernel.org/r/2601199a7b2eaeadfcd8ab6e199c6d1706650c94.1760959442.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Andreas Larsson <andreas@gaisler.com> Cc: Andrey Konovalov <andreyknvl@gmail.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Baoquan He <bhe@redhat.com> Cc: Chatre, Reinette <reinette.chatre@intel.com> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Christian Brauner <brauner@kernel.org> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Jiang <dave.jiang@intel.com> Cc: Dave Martin <dave.martin@arm.com> Cc: Dave Young <dyoung@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: David S. Miller <davem@davemloft.net> Cc: Dmitriy Vyukov <dvyukov@google.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Guo Ren <guoren@kernel.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Hugh Dickins <hughd@google.com> Cc: James Morse <james.morse@arm.com> Cc: Jan Kara <jack@suse.cz> Cc: Jann Horn <jannh@google.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Kevin Tian <kevin.tian@intel.com> Cc: Konstantin Komarov <almaz.alexandrovich@paragon-software.com> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Muchun Song <muchun.song@linux.dev> Cc: Nicolas Pitre <nico@fluxnic.net> Cc: Oscar Salvador <osalvador@suse.de> Cc: Pedro Falcato <pfalcato@suse.de> Cc: Robin Murohy <robin.murphy@arm.com> Cc: Sumanth Korikkar <sumanthk@linux.ibm.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: "Uladzislau Rezki (Sony)" <urezki@gmail.com> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Vishal Verma <vishal.l.verma@intel.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/mm.h74
-rw-r--r--include/linux/mm_types.h53
3 files changed, 130 insertions, 3 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c895146c1444..8cf9547a881c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2393,14 +2393,14 @@ static inline bool can_mmap_file(struct file *file)
return true;
}
-int __compat_vma_mmap_prepare(const struct file_operations *f_op,
+int __compat_vma_mmap(const struct file_operations *f_op,
struct file *file, struct vm_area_struct *vma);
-int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma);
+int compat_vma_mmap(struct file *file, struct vm_area_struct *vma);
static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
{
if (file->f_op->mmap_prepare)
- return compat_vma_mmap_prepare(file, vma);
+ return compat_vma_mmap(file, vma);
return file->f_op->mmap(file, vma);
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4441ceec913f..2d060081caa5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3608,6 +3608,80 @@ static inline unsigned long vma_desc_pages(const struct vm_area_desc *desc)
return vma_desc_size(desc) >> PAGE_SHIFT;
}
+/**
+ * mmap_action_remap - helper for mmap_prepare hook to specify that a pure PFN
+ * remap is required.
+ * @desc: The VMA descriptor for the VMA requiring remap.
+ * @start: The virtual address to start the remap from, must be within the VMA.
+ * @start_pfn: The first PFN in the range to remap.
+ * @size: The size of the range to remap, in bytes, at most spanning to the end
+ * of the VMA.
+ */
+static inline void mmap_action_remap(struct vm_area_desc *desc,
+ unsigned long start,
+ unsigned long start_pfn,
+ unsigned long size)
+{
+ struct mmap_action *action = &desc->action;
+
+ /* [start, start + size) must be within the VMA. */
+ WARN_ON_ONCE(start < desc->start || start >= desc->end);
+ WARN_ON_ONCE(start + size > desc->end);
+
+ action->type = MMAP_REMAP_PFN;
+ action->remap.start = start;
+ action->remap.start_pfn = start_pfn;
+ action->remap.size = size;
+ action->remap.pgprot = desc->page_prot;
+}
+
+/**
+ * mmap_action_remap_full - helper for mmap_prepare hook to specify that the
+ * entirety of a VMA should be PFN remapped.
+ * @desc: The VMA descriptor for the VMA requiring remap.
+ * @start_pfn: The first PFN in the range to remap.
+ */
+static inline void mmap_action_remap_full(struct vm_area_desc *desc,
+ unsigned long start_pfn)
+{
+ mmap_action_remap(desc, desc->start, start_pfn, vma_desc_size(desc));
+}
+
+/**
+ * mmap_action_ioremap - helper for mmap_prepare hook to specify that a pure PFN
+ * I/O remap is required.
+ * @desc: The VMA descriptor for the VMA requiring remap.
+ * @start: The virtual address to start the remap from, must be within the VMA.
+ * @start_pfn: The first PFN in the range to remap.
+ * @size: The size of the range to remap, in bytes, at most spanning to the end
+ * of the VMA.
+ */
+static inline void mmap_action_ioremap(struct vm_area_desc *desc,
+ unsigned long start,
+ unsigned long start_pfn,
+ unsigned long size)
+{
+ mmap_action_remap(desc, start, start_pfn, size);
+ desc->action.type = MMAP_IO_REMAP_PFN;
+}
+
+/**
+ * mmap_action_ioremap_full - helper for mmap_prepare hook to specify that the
+ * entirety of a VMA should be PFN I/O remapped.
+ * @desc: The VMA descriptor for the VMA requiring remap.
+ * @start_pfn: The first PFN in the range to remap.
+ */
+static inline void mmap_action_ioremap_full(struct vm_area_desc *desc,
+ unsigned long start_pfn)
+{
+ mmap_action_ioremap(desc, desc->start, start_pfn, vma_desc_size(desc));
+}
+
+void mmap_action_prepare(struct mmap_action *action,
+ struct vm_area_desc *desc);
+int mmap_action_complete(struct mmap_action *action,
+ struct vm_area_struct *vma);
+
/* Look up the first VMA which exactly match the interval vm_start ... vm_end */
static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
unsigned long vm_start, unsigned long vm_end)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 90e5790c318f..5021047485a9 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -773,6 +773,56 @@ struct pfnmap_track_ctx {
};
#endif
+/* What action should be taken after an .mmap_prepare call is complete? */
+enum mmap_action_type {
+ MMAP_NOTHING, /* Mapping is complete, no further action. */
+ MMAP_REMAP_PFN, /* Remap PFN range. */
+ MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */
+};
+
+/*
+ * Describes an action an mmap_prepare hook can instruct to be taken to complete
+ * the mapping of a VMA. Specified in vm_area_desc.
+ */
+struct mmap_action {
+ union {
+ /* Remap range. */
+ struct {
+ unsigned long start;
+ unsigned long start_pfn;
+ unsigned long size;
+ pgprot_t pgprot;
+ } remap;
+ };
+ enum mmap_action_type type;
+
+ /*
+ * If specified, this hook is invoked after the selected action has been
+ * successfully completed. Note that the VMA write lock still held.
+ *
+ * The absolute minimum ought to be done here.
+ *
+ * Returns 0 on success, or an error code.
+ */
+ int (*success_hook)(const struct vm_area_struct *vma);
+
+ /*
+ * If specified, this hook is invoked when an error occurred when
+ * attempting the selection action.
+ *
+ * The hook can return an error code in order to filter the error, but
+ * it is not valid to clear the error here.
+ */
+ int (*error_hook)(int err);
+
+ /*
+ * This should be set in rare instances where the operation required
+ * that the rmap should not be able to access the VMA until
+ * completely set up.
+ */
+ bool hide_from_rmap_until_complete :1;
+};
+
/*
* Describes a VMA that is about to be mmap()'ed. Drivers may choose to
* manipulate mutable fields which will cause those fields to be updated in the
@@ -796,6 +846,9 @@ struct vm_area_desc {
/* Write-only fields. */
const struct vm_operations_struct *vm_ops;
void *private_data;
+
+ /* Take further action? */
+ struct mmap_action action;
};
/*