From ac0a3fc9c07df79dc8a4ce9d274df00afc7bf12d Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 20 Oct 2025 13:11:27 +0100 Subject: mm: add ability to take further action in vm_area_desc Some drivers/filesystems need to perform additional tasks after the VMA is set up. This is typically in the form of pre-population. The forms of pre-population most likely to be performed are a PFN remap or the insertion of normal folios and PFNs into a mixed map. We start by implementing the PFN remap functionality, ensuring that we perform the appropriate actions at the appropriate time - that is setting flags at the point of .mmap_prepare, and performing the actual remap at the point at which the VMA is fully established. This prevents the driver from doing anything too crazy with a VMA at any stage, and we retain complete control over how the mm functionality is applied. Unfortunately callers still do often require some kind of custom action, so we add an optional success/error _hook to allow the caller to do something after the action has succeeded or failed. This is done at the point when the VMA has already been established, so the harm that can be done is limited. The error hook can be used to filter errors if necessary. There may be cases in which the caller absolutely must hold the file rmap lock until the operation is entirely complete. It is an edge case, but certainly the hugetlbfs mmap hook requires it. To accommodate this, we add the hide_from_rmap_until_complete flag to the mmap_action type. In this case, if a new VMA is allocated, we will hold the file rmap lock until the operation is entirely completed (including any success/error hooks). Note that we do not need to update __compat_vma_mmap() to accommodate this flag, as this function will be invoked from an .mmap handler whose VMA is not yet visible, so we implicitly hide it from the rmap. If any error arises on these final actions, we simply unmap the VMA altogether. Also update the stacked filesystem compatibility layer to utilise the action behaviour, and update the VMA tests accordingly. While we're here, rename __compat_vma_mmap_prepare() to __compat_vma_mmap() as we are now performing actions invoked by the mmap_prepare in addition to just the mmap_prepare hook. Link: https://lkml.kernel.org/r/2601199a7b2eaeadfcd8ab6e199c6d1706650c94.1760959442.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Cc: Alexander Gordeev Cc: Al Viro Cc: Andreas Larsson Cc: Andrey Konovalov Cc: Arnd Bergmann Cc: Baolin Wang Cc: Baoquan He Cc: Chatre, Reinette Cc: Christian Borntraeger Cc: Christian Brauner Cc: Dan Williams Cc: Dave Jiang Cc: Dave Martin Cc: Dave Young Cc: David Hildenbrand Cc: David S. Miller Cc: Dmitriy Vyukov Cc: Greg Kroah-Hartman Cc: Guo Ren Cc: Heiko Carstens Cc: Hugh Dickins Cc: James Morse Cc: Jan Kara Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Kevin Tian Cc: Konstantin Komarov Cc: Liam Howlett Cc: "Luck, Tony" Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: Muchun Song Cc: Nicolas Pitre Cc: Oscar Salvador Cc: Pedro Falcato Cc: Robin Murohy Cc: Sumanth Korikkar Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Bogendoerfer Cc: "Uladzislau Rezki (Sony)" Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- tools/testing/vma/vma_internal.h | 98 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 91 insertions(+), 7 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index dc976a285ad2..d873667704e8 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -275,6 +275,57 @@ struct mm_struct { struct vm_area_struct; + +/* What action should be taken after an .mmap_prepare call is complete? */ +enum mmap_action_type { + MMAP_NOTHING, /* Mapping is complete, no further action. */ + MMAP_REMAP_PFN, /* Remap PFN range. */ + MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */ +}; + +/* + * Describes an action an mmap_prepare hook can instruct to be taken to complete + * the mapping of a VMA. Specified in vm_area_desc. + */ +struct mmap_action { + union { + /* Remap range. */ + struct { + unsigned long start; + unsigned long start_pfn; + unsigned long size; + pgprot_t pgprot; + } remap; + }; + enum mmap_action_type type; + + /* + * If specified, this hook is invoked after the selected action has been + * successfully completed. Note that the VMA write lock still held. + * + * The absolute minimum ought to be done here. + * + * Returns 0 on success, or an error code. + */ + int (*success_hook)(const struct vm_area_struct *vma); + + /* + * If specified, this hook is invoked when an error occurred when + * attempting the selection action. + * + * The hook can return an error code in order to filter the error, but + * it is not valid to clear the error here. + */ + int (*error_hook)(int err); + + /* + * This should be set in rare instances where the operation required + * that the rmap should not be able to access the VMA until + * completely set up. + */ + bool hide_from_rmap_until_complete :1; +}; + /* * Describes a VMA that is about to be mmap()'ed. Drivers may choose to * manipulate mutable fields which will cause those fields to be updated in the @@ -298,6 +349,9 @@ struct vm_area_desc { /* Write-only fields. */ const struct vm_operations_struct *vm_ops; void *private_data; + + /* Take further action? */ + struct mmap_action action; }; struct file_operations { @@ -1326,12 +1380,23 @@ static inline void free_anon_vma_name(struct vm_area_struct *vma) static inline void set_vma_from_desc(struct vm_area_struct *vma, struct vm_area_desc *desc); -static inline int __compat_vma_mmap_prepare(const struct file_operations *f_op, +static inline void mmap_action_prepare(struct mmap_action *action, + struct vm_area_desc *desc) +{ +} + +static inline int mmap_action_complete(struct mmap_action *action, + struct vm_area_struct *vma) +{ + return 0; +} + +static inline int __compat_vma_mmap(const struct file_operations *f_op, struct file *file, struct vm_area_struct *vma) { struct vm_area_desc desc = { .mm = vma->vm_mm, - .file = vma->vm_file, + .file = file, .start = vma->vm_start, .end = vma->vm_end, @@ -1339,21 +1404,24 @@ static inline int __compat_vma_mmap_prepare(const struct file_operations *f_op, .vm_file = vma->vm_file, .vm_flags = vma->vm_flags, .page_prot = vma->vm_page_prot, + + .action.type = MMAP_NOTHING, /* Default */ }; int err; err = f_op->mmap_prepare(&desc); if (err) return err; - set_vma_from_desc(vma, &desc); - return 0; + mmap_action_prepare(&desc.action, &desc); + set_vma_from_desc(vma, &desc); + return mmap_action_complete(&desc.action, vma); } -static inline int compat_vma_mmap_prepare(struct file *file, +static inline int compat_vma_mmap(struct file *file, struct vm_area_struct *vma) { - return __compat_vma_mmap_prepare(file->f_op, file, vma); + return __compat_vma_mmap(file->f_op, file, vma); } /* Did the driver provide valid mmap hook configuration? */ @@ -1374,7 +1442,7 @@ static inline bool can_mmap_file(struct file *file) static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma) { if (file->f_op->mmap_prepare) - return compat_vma_mmap_prepare(file, vma); + return compat_vma_mmap(file, vma); return file->f_op->mmap(file, vma); } @@ -1407,4 +1475,20 @@ static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm, return vm_flags; } +static inline void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn) +{ +} + +static inline int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t pgprot) +{ + return 0; +} + +static inline int do_munmap(struct mm_struct *, unsigned long, size_t, + struct list_head *uf) +{ + return 0; +} + #endif /* __MM_VMA_INTERNAL_H */ -- cgit v1.2.3