From 56f434f40f059eb3769d50b9c244a850096c3d6f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Nov 2019 16:22:18 -0400 Subject: mm/mmu_notifier: define the header pre-processor parts even if disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have KERNEL_HEADER_TEST all headers are generally compile tested, so relying on makefile tricks to avoid compiling code that depends on CONFIG_MMU_NOTIFIER is more annoying. Instead follow the usual pattern and provide most of the header with only the functions stubbed out when CONFIG_MMU_NOTIFIER is disabled. This ensures code compiles no matter what the config setting is. While here, struct mmu_notifier_mm is private to mmu_notifier.c, move it. Link: https://lore.kernel.org/r/20191112202231.3856-2-jgg@ziepe.ca Reviewed-by: Jérôme Glisse Tested-by: Ralph Campbell Reviewed-by: John Hubbard Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/linux/mmu_notifier.h | 46 ++++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 1bd8e6a09a3c..12bd603d318c 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -7,8 +7,9 @@ #include #include +struct mmu_notifier_mm; struct mmu_notifier; -struct mmu_notifier_ops; +struct mmu_notifier_range; /** * enum mmu_notifier_event - reason for the mmu notifier callback @@ -40,36 +41,8 @@ enum mmu_notifier_event { MMU_NOTIFY_SOFT_DIRTY, }; -#ifdef CONFIG_MMU_NOTIFIER - -#ifdef CONFIG_LOCKDEP -extern struct lockdep_map __mmu_notifier_invalidate_range_start_map; -#endif - -/* - * The mmu notifier_mm structure is allocated and installed in - * mm->mmu_notifier_mm inside the mm_take_all_locks() protected - * critical section and it's released only when mm_count reaches zero - * in mmdrop(). - */ -struct mmu_notifier_mm { - /* all mmu notifiers registerd in this mm are queued in this list */ - struct hlist_head list; - /* to serialize the list modifications and hlist_unhashed */ - spinlock_t lock; -}; - #define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0) -struct mmu_notifier_range { - struct vm_area_struct *vma; - struct mm_struct *mm; - unsigned long start; - unsigned long end; - unsigned flags; - enum mmu_notifier_event event; -}; - struct mmu_notifier_ops { /* * Called either by mmu_notifier_unregister or when the mm is @@ -249,6 +222,21 @@ struct mmu_notifier { unsigned int users; }; +#ifdef CONFIG_MMU_NOTIFIER + +#ifdef CONFIG_LOCKDEP +extern struct lockdep_map __mmu_notifier_invalidate_range_start_map; +#endif + +struct mmu_notifier_range { + struct vm_area_struct *vma; + struct mm_struct *mm; + unsigned long start; + unsigned long end; + unsigned flags; + enum mmu_notifier_event event; +}; + static inline int mm_has_notifiers(struct mm_struct *mm) { return unlikely(mm->mmu_notifier_mm); -- cgit v1.2.3 From 99cb252f5e68d72afa3245a4e73d216d295cd335 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Nov 2019 16:22:19 -0400 Subject: mm/mmu_notifier: add an interval tree notifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Of the 13 users of mmu_notifiers, 8 of them use only invalidate_range_start/end() and immediately intersect the mmu_notifier_range with some kind of internal list of VAs. 4 use an interval tree (i915_gem, radeon_mn, umem_odp, hfi1). 4 use a linked list of some kind (scif_dma, vhost, gntdev, hmm) And the remaining 5 either don't use invalidate_range_start() or do some special thing with it. It turns out that building a correct scheme with an interval tree is pretty complicated, particularly if the use case is synchronizing against another thread doing get_user_pages(). Many of these implementations have various subtle and difficult to fix races. This approach puts the interval tree as common code at the top of the mmu notifier call tree and implements a shareable locking scheme. It includes: - An interval tree tracking VA ranges, with per-range callbacks - A read/write locking scheme for the interval tree that avoids sleeping in the notifier path (for OOM killer) - A sequence counter based collision-retry locking scheme to tell device page fault that a VA range is being concurrently invalidated. This is based on various ideas: - hmm accumulates invalidated VA ranges and releases them when all invalidates are done, via active_invalidate_ranges count. This approach avoids having to intersect the interval tree twice (as umem_odp does) at the potential cost of a longer device page fault. - kvm/umem_odp use a sequence counter to drive the collision retry, via invalidate_seq - a deferred work todo list on unlock scheme like RTNL, via deferred_list. This makes adding/removing interval tree members more deterministic - seqlock, except this version makes the seqlock idea multi-holder on the write side by protecting it with active_invalidate_ranges and a spinlock To minimize MM overhead when only the interval tree is being used, the entire SRCU and hlist overheads are dropped using some simple branches. Similarly the interval tree overhead is dropped when in hlist mode. The overhead from the mandatory spinlock is broadly the same as most of existing users which already had a lock (or two) of some sort on the invalidation path. Link: https://lore.kernel.org/r/20191112202231.3856-3-jgg@ziepe.ca Acked-by: Christian König Tested-by: Philip Yang Tested-by: Ralph Campbell Reviewed-by: John Hubbard Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/linux/mmu_notifier.h | 101 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) (limited to 'include') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 12bd603d318c..9e6caa8ecd19 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -6,10 +6,12 @@ #include #include #include +#include struct mmu_notifier_mm; struct mmu_notifier; struct mmu_notifier_range; +struct mmu_interval_notifier; /** * enum mmu_notifier_event - reason for the mmu notifier callback @@ -32,6 +34,9 @@ struct mmu_notifier_range; * access flags). User should soft dirty the page in the end callback to make * sure that anyone relying on soft dirtyness catch pages that might be written * through non CPU mappings. + * + * @MMU_NOTIFY_RELEASE: used during mmu_interval_notifier invalidate to signal + * that the mm refcount is zero and the range is no longer accessible. */ enum mmu_notifier_event { MMU_NOTIFY_UNMAP = 0, @@ -39,6 +44,7 @@ enum mmu_notifier_event { MMU_NOTIFY_PROTECTION_VMA, MMU_NOTIFY_PROTECTION_PAGE, MMU_NOTIFY_SOFT_DIRTY, + MMU_NOTIFY_RELEASE, }; #define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0) @@ -222,6 +228,26 @@ struct mmu_notifier { unsigned int users; }; +/** + * struct mmu_interval_notifier_ops + * @invalidate: Upon return the caller must stop using any SPTEs within this + * range. This function can sleep. Return false only if sleeping + * was required but mmu_notifier_range_blockable(range) is false. + */ +struct mmu_interval_notifier_ops { + bool (*invalidate)(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq); +}; + +struct mmu_interval_notifier { + struct interval_tree_node interval_tree; + const struct mmu_interval_notifier_ops *ops; + struct mm_struct *mm; + struct hlist_node deferred_item; + unsigned long invalidate_seq; +}; + #ifdef CONFIG_MMU_NOTIFIER #ifdef CONFIG_LOCKDEP @@ -263,6 +289,81 @@ extern int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm); extern void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm); + +unsigned long mmu_interval_read_begin(struct mmu_interval_notifier *mni); +int mmu_interval_notifier_insert(struct mmu_interval_notifier *mni, + struct mm_struct *mm, unsigned long start, + unsigned long length, + const struct mmu_interval_notifier_ops *ops); +int mmu_interval_notifier_insert_locked( + struct mmu_interval_notifier *mni, struct mm_struct *mm, + unsigned long start, unsigned long length, + const struct mmu_interval_notifier_ops *ops); +void mmu_interval_notifier_remove(struct mmu_interval_notifier *mni); + +/** + * mmu_interval_set_seq - Save the invalidation sequence + * @mni - The mni passed to invalidate + * @cur_seq - The cur_seq passed to the invalidate() callback + * + * This must be called unconditionally from the invalidate callback of a + * struct mmu_interval_notifier_ops under the same lock that is used to call + * mmu_interval_read_retry(). It updates the sequence number for later use by + * mmu_interval_read_retry(). The provided cur_seq will always be odd. + * + * If the caller does not call mmu_interval_read_begin() or + * mmu_interval_read_retry() then this call is not required. + */ +static inline void mmu_interval_set_seq(struct mmu_interval_notifier *mni, + unsigned long cur_seq) +{ + WRITE_ONCE(mni->invalidate_seq, cur_seq); +} + +/** + * mmu_interval_read_retry - End a read side critical section against a VA range + * mni: The range + * seq: The return of the paired mmu_interval_read_begin() + * + * This MUST be called under a user provided lock that is also held + * unconditionally by op->invalidate() when it calls mmu_interval_set_seq(). + * + * Each call should be paired with a single mmu_interval_read_begin() and + * should be used to conclude the read side. + * + * Returns true if an invalidation collided with this critical section, and + * the caller should retry. + */ +static inline bool mmu_interval_read_retry(struct mmu_interval_notifier *mni, + unsigned long seq) +{ + return mni->invalidate_seq != seq; +} + +/** + * mmu_interval_check_retry - Test if a collision has occurred + * mni: The range + * seq: The return of the matching mmu_interval_read_begin() + * + * This can be used in the critical section between mmu_interval_read_begin() + * and mmu_interval_read_retry(). A return of true indicates an invalidation + * has collided with this critical region and a future + * mmu_interval_read_retry() will return true. + * + * False is not reliable and only suggests a collision may not have + * occured. It can be called many times and does not have to hold the user + * provided lock. + * + * This call can be used as part of loops and other expensive operations to + * expedite a retry. + */ +static inline bool mmu_interval_check_retry(struct mmu_interval_notifier *mni, + unsigned long seq) +{ + /* Pairs with the WRITE_ONCE in mmu_interval_set_seq() */ + return READ_ONCE(mni->invalidate_seq) != seq; +} + extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); extern void __mmu_notifier_release(struct mm_struct *mm); extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, -- cgit v1.2.3 From 04ec32fbc2b29a640d67872d2f88daac4c73e45b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Nov 2019 16:22:20 -0400 Subject: mm/hmm: allow hmm_range to be used with a mmu_interval_notifier or hmm_mirror MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hmm_mirror's handling of ranges does not use a sequence count which results in this bug: CPU0 CPU1 hmm_range_wait_until_valid(range) valid == true hmm_range_fault(range) hmm_invalidate_range_start() range->valid = false hmm_invalidate_range_end() range->valid = true hmm_range_valid(range) valid == true Where the hmm_range_valid() should not have succeeded. Adding the required sequence count would make it nearly identical to the new mmu_interval_notifier. Instead replace the hmm_mirror stuff with mmu_interval_notifier. Co-existence of the two APIs is the first step. Link: https://lore.kernel.org/r/20191112202231.3856-4-jgg@ziepe.ca Reviewed-by: Jérôme Glisse Tested-by: Philip Yang Tested-by: Ralph Campbell Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 3fec513b9c00..fbb35c78637e 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -145,6 +145,9 @@ enum hmm_pfn_value_e { /* * struct hmm_range - track invalidation lock on virtual address range * + * @notifier: an optional mmu_interval_notifier + * @notifier_seq: when notifier is used this is the result of + * mmu_interval_read_begin() * @hmm: the core HMM structure this range is active against * @vma: the vm area struct for the range * @list: all range lock are on a list @@ -159,6 +162,8 @@ enum hmm_pfn_value_e { * @valid: pfns array did not change since it has been fill by an HMM function */ struct hmm_range { + struct mmu_interval_notifier *notifier; + unsigned long notifier_seq; struct hmm *hmm; struct list_head list; unsigned long start; -- cgit v1.2.3 From 107e899874e95dcddc779142942bf285eba38bc5 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Nov 2019 16:22:21 -0400 Subject: mm/hmm: define the pre-processor related parts of hmm.h even if disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only the function calls are stubbed out with static inlines that always fail. This is the standard way to write a header for an optional component and makes it easier for drivers that only optionally need HMM_MIRROR. Link: https://lore.kernel.org/r/20191112202231.3856-5-jgg@ziepe.ca Reviewed-by: Jérôme Glisse Tested-by: Ralph Campbell Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 59 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index fbb35c78637e..cb69bf10dc78 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -62,8 +62,6 @@ #include #include -#ifdef CONFIG_HMM_MIRROR - #include #include #include @@ -374,6 +372,15 @@ struct hmm_mirror { struct list_head list; }; +/* + * Retry fault if non-blocking, drop mmap_sem and return -EAGAIN in that case. + */ +#define HMM_FAULT_ALLOW_RETRY (1 << 0) + +/* Don't fault in missing PTEs, just snapshot the current state. */ +#define HMM_FAULT_SNAPSHOT (1 << 1) + +#ifdef CONFIG_HMM_MIRROR int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm); void hmm_mirror_unregister(struct hmm_mirror *mirror); @@ -383,14 +390,6 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror); int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror); void hmm_range_unregister(struct hmm_range *range); -/* - * Retry fault if non-blocking, drop mmap_sem and return -EAGAIN in that case. - */ -#define HMM_FAULT_ALLOW_RETRY (1 << 0) - -/* Don't fault in missing PTEs, just snapshot the current state. */ -#define HMM_FAULT_SNAPSHOT (1 << 1) - long hmm_range_fault(struct hmm_range *range, unsigned int flags); long hmm_range_dma_map(struct hmm_range *range, @@ -401,6 +400,44 @@ long hmm_range_dma_unmap(struct hmm_range *range, struct device *device, dma_addr_t *daddrs, bool dirty); +#else +int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm) +{ + return -EOPNOTSUPP; +} + +void hmm_mirror_unregister(struct hmm_mirror *mirror) +{ +} + +int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror) +{ + return -EOPNOTSUPP; +} + +void hmm_range_unregister(struct hmm_range *range) +{ +} + +static inline long hmm_range_fault(struct hmm_range *range, unsigned int flags) +{ + return -EOPNOTSUPP; +} + +static inline long hmm_range_dma_map(struct hmm_range *range, + struct device *device, dma_addr_t *daddrs, + unsigned int flags) +{ + return -EOPNOTSUPP; +} + +static inline long hmm_range_dma_unmap(struct hmm_range *range, + struct device *device, + dma_addr_t *daddrs, bool dirty) +{ + return -EOPNOTSUPP; +} +#endif /* * HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range @@ -411,6 +448,4 @@ long hmm_range_dma_unmap(struct hmm_range *range, */ #define HMM_RANGE_DEFAULT_TIMEOUT 1000 -#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ - #endif /* LINUX_HMM_H */ -- cgit v1.2.3 From f25a546e65292b36f15cca0912450c4944fae031 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Nov 2019 16:22:22 -0400 Subject: RDMA/odp: Use mmu_interval_notifier_insert() Replace the internal interval tree based mmu notifier with the new common mmu_interval_notifier_insert() API. This removes a lot of code and fixes a deadlock that can be triggered in ODP: zap_page_range() mmu_notifier_invalidate_range_start() [..] ib_umem_notifier_invalidate_range_start() down_read(&per_mm->umem_rwsem) unmap_single_vma() [..] __split_huge_page_pmd() mmu_notifier_invalidate_range_start() [..] ib_umem_notifier_invalidate_range_start() down_read(&per_mm->umem_rwsem) // DEADLOCK mmu_notifier_invalidate_range_end() up_read(&per_mm->umem_rwsem) mmu_notifier_invalidate_range_end() up_read(&per_mm->umem_rwsem) The umem_rwsem is held across the range_start/end as the ODP algorithm for invalidate_range_end cannot tolerate changes to the interval tree. However, due to the nested invalidation regions the second down_read() can deadlock if there are competing writers. The new core code provides an alternative scheme to solve this problem. Fixes: ca748c39ea3f ("RDMA/umem: Get rid of per_mm->notifier_count") Link: https://lore.kernel.org/r/20191112202231.3856-6-jgg@ziepe.ca Tested-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- include/rdma/ib_umem_odp.h | 68 ++++++++++------------------------------------ include/rdma/ib_verbs.h | 2 -- 2 files changed, 14 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 09b0e4494986..81429acc8257 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -35,11 +35,11 @@ #include #include -#include struct ib_umem_odp { struct ib_umem umem; - struct ib_ucontext_per_mm *per_mm; + struct mmu_interval_notifier notifier; + struct pid *tgid; /* * An array of the pages included in the on-demand paging umem. @@ -62,13 +62,8 @@ struct ib_umem_odp { struct mutex umem_mutex; void *private; /* for the HW driver to use. */ - int notifiers_seq; - int notifiers_count; int npages; - /* Tree tracking */ - struct interval_tree_node interval_tree; - /* * An implicit odp umem cannot be DMA mapped, has 0 length, and serves * only as an anchor for the driver to hold onto the per_mm. FIXME: @@ -77,7 +72,6 @@ struct ib_umem_odp { */ bool is_implicit_odp; - struct completion notifier_completion; unsigned int page_shift; }; @@ -89,13 +83,13 @@ static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem) /* Returns the first page of an ODP umem. */ static inline unsigned long ib_umem_start(struct ib_umem_odp *umem_odp) { - return umem_odp->interval_tree.start; + return umem_odp->notifier.interval_tree.start; } /* Returns the address of the page after the last one of an ODP umem. */ static inline unsigned long ib_umem_end(struct ib_umem_odp *umem_odp) { - return umem_odp->interval_tree.last + 1; + return umem_odp->notifier.interval_tree.last + 1; } static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp) @@ -119,21 +113,15 @@ static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -struct ib_ucontext_per_mm { - struct mmu_notifier mn; - struct pid *tgid; - - struct rb_root_cached umem_tree; - /* Protects umem_tree */ - struct rw_semaphore umem_rwsem; -}; - -struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, - size_t size, int access); +struct ib_umem_odp * +ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, size_t size, + int access, const struct mmu_interval_notifier_ops *ops); struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata, int access); -struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, - unsigned long addr, size_t size); +struct ib_umem_odp * +ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, unsigned long addr, + size_t size, + const struct mmu_interval_notifier_ops *ops); void ib_umem_odp_release(struct ib_umem_odp *umem_odp); int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, @@ -143,39 +131,11 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, u64 bound); -typedef int (*umem_call_back)(struct ib_umem_odp *item, u64 start, u64 end, - void *cookie); -/* - * Call the callback on each ib_umem in the range. Returns the logical or of - * the return values of the functions called. - */ -int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, - u64 start, u64 end, - umem_call_back cb, - bool blockable, void *cookie); - -static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp, - unsigned long mmu_seq) -{ - /* - * This code is strongly based on the KVM code from - * mmu_notifier_retry. Should be called with - * the relevant locks taken (umem_odp->umem_mutex - * and the ucontext umem_mutex semaphore locked for read). - */ - - if (unlikely(umem_odp->notifiers_count)) - return 1; - if (umem_odp->notifiers_seq != mmu_seq) - return 1; - return 0; -} - #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ -static inline struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, - unsigned long addr, - size_t size, int access) +static inline struct ib_umem_odp * +ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, size_t size, + int access, const struct mmu_interval_notifier_ops *ops) { return ERR_PTR(-EINVAL); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6a47ba85c54c..2c30c859ae0d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2422,8 +2422,6 @@ struct ib_device_ops { u64 iova); int (*unmap_fmr)(struct list_head *fmr_list); int (*dealloc_fmr)(struct ib_fmr *fmr); - void (*invalidate_range)(struct ib_umem_odp *umem_odp, - unsigned long start, unsigned long end); int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device, -- cgit v1.2.3 From a22dd506400d0f4784ad596f073b9eb5ed7c6a2a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Nov 2019 16:22:30 -0400 Subject: mm/hmm: remove hmm_mirror and related MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only two users of this are now converted to use mmu_interval_notifier, delete all the code and update hmm.rst. Link: https://lore.kernel.org/r/20191112202231.3856-14-jgg@ziepe.ca Reviewed-by: Jérôme Glisse Tested-by: Ralph Campbell Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 183 +--------------------------------------------------- 1 file changed, 2 insertions(+), 181 deletions(-) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index cb69bf10dc78..1225b3c87aba 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -68,29 +68,6 @@ #include #include - -/* - * struct hmm - HMM per mm struct - * - * @mm: mm struct this HMM struct is bound to - * @lock: lock protecting ranges list - * @ranges: list of range being snapshotted - * @mirrors: list of mirrors for this mm - * @mmu_notifier: mmu notifier to track updates to CPU page table - * @mirrors_sem: read/write semaphore protecting the mirrors list - * @wq: wait queue for user waiting on a range invalidation - * @notifiers: count of active mmu notifiers - */ -struct hmm { - struct mmu_notifier mmu_notifier; - spinlock_t ranges_lock; - struct list_head ranges; - struct list_head mirrors; - struct rw_semaphore mirrors_sem; - wait_queue_head_t wq; - long notifiers; -}; - /* * hmm_pfn_flag_e - HMM flag enums * @@ -143,9 +120,8 @@ enum hmm_pfn_value_e { /* * struct hmm_range - track invalidation lock on virtual address range * - * @notifier: an optional mmu_interval_notifier - * @notifier_seq: when notifier is used this is the result of - * mmu_interval_read_begin() + * @notifier: a mmu_interval_notifier that includes the start/end + * @notifier_seq: result of mmu_interval_read_begin() * @hmm: the core HMM structure this range is active against * @vma: the vm area struct for the range * @list: all range lock are on a list @@ -162,8 +138,6 @@ enum hmm_pfn_value_e { struct hmm_range { struct mmu_interval_notifier *notifier; unsigned long notifier_seq; - struct hmm *hmm; - struct list_head list; unsigned long start; unsigned long end; uint64_t *pfns; @@ -172,32 +146,8 @@ struct hmm_range { uint64_t default_flags; uint64_t pfn_flags_mask; uint8_t pfn_shift; - bool valid; }; -/* - * hmm_range_wait_until_valid() - wait for range to be valid - * @range: range affected by invalidation to wait on - * @timeout: time out for wait in ms (ie abort wait after that period of time) - * Return: true if the range is valid, false otherwise. - */ -static inline bool hmm_range_wait_until_valid(struct hmm_range *range, - unsigned long timeout) -{ - return wait_event_timeout(range->hmm->wq, range->valid, - msecs_to_jiffies(timeout)) != 0; -} - -/* - * hmm_range_valid() - test if a range is valid or not - * @range: range - * Return: true if the range is valid, false otherwise. - */ -static inline bool hmm_range_valid(struct hmm_range *range) -{ - return range->valid; -} - /* * hmm_device_entry_to_page() - return struct page pointed to by a device entry * @range: range use to decode device entry value @@ -267,111 +217,6 @@ static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, range->flags[HMM_PFN_VALID]; } -/* - * Mirroring: how to synchronize device page table with CPU page table. - * - * A device driver that is participating in HMM mirroring must always - * synchronize with CPU page table updates. For this, device drivers can either - * directly use mmu_notifier APIs or they can use the hmm_mirror API. Device - * drivers can decide to register one mirror per device per process, or just - * one mirror per process for a group of devices. The pattern is: - * - * int device_bind_address_space(..., struct mm_struct *mm, ...) - * { - * struct device_address_space *das; - * - * // Device driver specific initialization, and allocation of das - * // which contains an hmm_mirror struct as one of its fields. - * ... - * - * ret = hmm_mirror_register(&das->mirror, mm, &device_mirror_ops); - * if (ret) { - * // Cleanup on error - * return ret; - * } - * - * // Other device driver specific initialization - * ... - * } - * - * Once an hmm_mirror is registered for an address space, the device driver - * will get callbacks through sync_cpu_device_pagetables() operation (see - * hmm_mirror_ops struct). - * - * Device driver must not free the struct containing the hmm_mirror struct - * before calling hmm_mirror_unregister(). The expected usage is to do that when - * the device driver is unbinding from an address space. - * - * - * void device_unbind_address_space(struct device_address_space *das) - * { - * // Device driver specific cleanup - * ... - * - * hmm_mirror_unregister(&das->mirror); - * - * // Other device driver specific cleanup, and now das can be freed - * ... - * } - */ - -struct hmm_mirror; - -/* - * struct hmm_mirror_ops - HMM mirror device operations callback - * - * @update: callback to update range on a device - */ -struct hmm_mirror_ops { - /* release() - release hmm_mirror - * - * @mirror: pointer to struct hmm_mirror - * - * This is called when the mm_struct is being released. The callback - * must ensure that all access to any pages obtained from this mirror - * is halted before the callback returns. All future access should - * fault. - */ - void (*release)(struct hmm_mirror *mirror); - - /* sync_cpu_device_pagetables() - synchronize page tables - * - * @mirror: pointer to struct hmm_mirror - * @update: update information (see struct mmu_notifier_range) - * Return: -EAGAIN if mmu_notifier_range_blockable(update) is false - * and callback needs to block, 0 otherwise. - * - * This callback ultimately originates from mmu_notifiers when the CPU - * page table is updated. The device driver must update its page table - * in response to this callback. The update argument tells what action - * to perform. - * - * The device driver must not return from this callback until the device - * page tables are completely updated (TLBs flushed, etc); this is a - * synchronous call. - */ - int (*sync_cpu_device_pagetables)( - struct hmm_mirror *mirror, - const struct mmu_notifier_range *update); -}; - -/* - * struct hmm_mirror - mirror struct for a device driver - * - * @hmm: pointer to struct hmm (which is unique per mm_struct) - * @ops: device driver callback for HMM mirror operations - * @list: for list of mirrors of a given mm - * - * Each address space (mm_struct) being mirrored by a device must register one - * instance of an hmm_mirror struct with HMM. HMM will track the list of all - * mirrors for each mm_struct. - */ -struct hmm_mirror { - struct hmm *hmm; - const struct hmm_mirror_ops *ops; - struct list_head list; -}; - /* * Retry fault if non-blocking, drop mmap_sem and return -EAGAIN in that case. */ @@ -381,15 +226,9 @@ struct hmm_mirror { #define HMM_FAULT_SNAPSHOT (1 << 1) #ifdef CONFIG_HMM_MIRROR -int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm); -void hmm_mirror_unregister(struct hmm_mirror *mirror); - /* * Please see Documentation/vm/hmm.rst for how to use the range API. */ -int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror); -void hmm_range_unregister(struct hmm_range *range); - long hmm_range_fault(struct hmm_range *range, unsigned int flags); long hmm_range_dma_map(struct hmm_range *range, @@ -401,24 +240,6 @@ long hmm_range_dma_unmap(struct hmm_range *range, dma_addr_t *daddrs, bool dirty); #else -int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm) -{ - return -EOPNOTSUPP; -} - -void hmm_mirror_unregister(struct hmm_mirror *mirror) -{ -} - -int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror) -{ - return -EOPNOTSUPP; -} - -void hmm_range_unregister(struct hmm_range *range) -{ -} - static inline long hmm_range_fault(struct hmm_range *range, unsigned int flags) { return -EOPNOTSUPP; -- cgit v1.2.3 From 93f4e735b6d98ee4b7a1252d81e815a983e359f2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Nov 2019 14:45:28 +0100 Subject: mm/hmm: remove hmm_range_dma_map and hmm_range_dma_unmap These two functions have never been used since they were added. Link: https://lore.kernel.org/r/20191113134528.21187-1-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 1225b3c87aba..ddf9f7144c43 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -230,34 +230,11 @@ static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, * Please see Documentation/vm/hmm.rst for how to use the range API. */ long hmm_range_fault(struct hmm_range *range, unsigned int flags); - -long hmm_range_dma_map(struct hmm_range *range, - struct device *device, - dma_addr_t *daddrs, - unsigned int flags); -long hmm_range_dma_unmap(struct hmm_range *range, - struct device *device, - dma_addr_t *daddrs, - bool dirty); #else static inline long hmm_range_fault(struct hmm_range *range, unsigned int flags) { return -EOPNOTSUPP; } - -static inline long hmm_range_dma_map(struct hmm_range *range, - struct device *device, dma_addr_t *daddrs, - unsigned int flags) -{ - return -EOPNOTSUPP; -} - -static inline long hmm_range_dma_unmap(struct hmm_range *range, - struct device *device, - dma_addr_t *daddrs, bool dirty) -{ - return -EOPNOTSUPP; -} #endif /* -- cgit v1.2.3