From b944afc9d64ddf1b6a152c23ff86bf26e1fd430c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 17 Oct 2020 16:15:06 -0700 Subject: mm: add a VM_MAP_PUT_PAGES flag for vmap Add a flag so that vmap takes ownership of the passed in page array. When vfree is called on such an allocation it will put one reference on each page, and free the page array itself. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Cc: Boris Ostrovsky Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Juergen Gross Cc: Matthew Auld Cc: "Matthew Wilcox (Oracle)" Cc: Minchan Kim Cc: Nitin Gupta Cc: Peter Zijlstra Cc: Rodrigo Vivi Cc: Stefano Stabellini Cc: Tvrtko Ursulin Cc: Uladzislau Rezki (Sony) Link: https://lkml.kernel.org/r/20201002122204.1534411-3-hch@lst.de Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/vmalloc.h') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 0221f852a7e1..b899681e3ff9 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -24,6 +24,7 @@ struct notifier_block; /* in notifier.h */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ #define VM_NO_GUARD 0x00000040 /* don't add guard page */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ +#define VM_MAP_PUT_PAGES 0x00000100 /* put pages and free array in vfree */ /* * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC. -- cgit v1.2.3 From 3e9a9e256b1e1e6e8f19faf76fa9c37578ae35ee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 17 Oct 2020 16:15:10 -0700 Subject: mm: add a vmap_pfn function Add a proper helper to remap PFNs into kernel virtual space so that drivers don't have to abuse alloc_vm_area and open coded PTE manipulation for it. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Cc: Boris Ostrovsky Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Juergen Gross Cc: Matthew Auld Cc: "Matthew Wilcox (Oracle)" Cc: Minchan Kim Cc: Nitin Gupta Cc: Peter Zijlstra Cc: Rodrigo Vivi Cc: Stefano Stabellini Cc: Tvrtko Ursulin Cc: Uladzislau Rezki (Sony) Link: https://lkml.kernel.org/r/20201002122204.1534411-4-hch@lst.de Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/vmalloc.h') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index b899681e3ff9..c77efeac2425 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -122,6 +122,7 @@ extern void vfree_atomic(const void *addr); extern void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot); +void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot); extern void vunmap(const void *addr); extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, -- cgit v1.2.3 From 301fa9f2ddf7fb248c188af292c9cc04f8283dff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 17 Oct 2020 16:15:39 -0700 Subject: mm: remove alloc_vm_area All users are gone now. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Cc: Boris Ostrovsky Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Juergen Gross Cc: Matthew Auld Cc: "Matthew Wilcox (Oracle)" Cc: Minchan Kim Cc: Nitin Gupta Cc: Peter Zijlstra Cc: Rodrigo Vivi Cc: Stefano Stabellini Cc: Tvrtko Ursulin Cc: Uladzislau Rezki (Sony) Link: https://lkml.kernel.org/r/20201002122204.1534411-12-hch@lst.de Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux/vmalloc.h') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index c77efeac2425..938eaf9517e2 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -169,6 +169,7 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, const void *caller); +void free_vm_area(struct vm_struct *area); extern struct vm_struct *remove_vm_area(const void *addr); extern struct vm_struct *find_vm_area(const void *addr); @@ -204,10 +205,6 @@ static inline void set_vm_flush_reset_perms(void *addr) } #endif -/* Allocate/destroy a 'vmalloc' VM area. */ -extern struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes); -extern void free_vm_area(struct vm_struct *area); - /* for /dev/kmem */ extern long vread(char *buf, char *addr, unsigned long count); extern long vwrite(char *buf, char *addr, unsigned long count); -- cgit v1.2.3 From 96e2db456135db0cf2476b6890f1e8b2fdcf21eb Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Mon, 14 Dec 2020 19:08:49 -0800 Subject: mm/vmalloc: rework the drain logic A current "lazy drain" model suffers from at least two issues. First one is related to the unsorted list of vmap areas, thus in order to identify the [min:max] range of areas to be drained, it requires a full list scan. What is a time consuming if the list is too long. Second one and as a next step is about merging all fragments with a free space. What is also a time consuming because it has to iterate over entire list which holds outstanding lazy areas. See below the "preemptirqsoff" tracer that illustrates a high latency. It is ~24676us. Our workloads like audio and video are effected by such long latency: tracer: preemptirqsoff preemptirqsoff latency trace v1.1.5 on 4.9.186-perf+ -------------------------------------------------------------------- latency: 24676 us, #4/4, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 P:8) ----------------- | task: crtc_commit:112-261 (uid:0 nice:0 policy:1 rt_prio:16) ----------------- => started at: __purge_vmap_area_lazy => ended at: __purge_vmap_area_lazy _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / delay cmd pid ||||| time | caller \ / ||||| \ | / crtc_com-261 1...1 1us*: _raw_spin_lock <-__purge_vmap_area_lazy [...] crtc_com-261 1...1 24675us : _raw_spin_unlock <-__purge_vmap_area_lazy crtc_com-261 1...1 24677us : trace_preempt_on <-__purge_vmap_area_lazy crtc_com-261 1...1 24683us : => free_vmap_area_noflush => remove_vm_area => __vunmap => vfree => drm_property_free_blob => drm_mode_object_unreference => drm_property_unreference_blob => __drm_atomic_helper_crtc_destroy_state => sde_crtc_destroy_state => drm_atomic_state_default_clear => drm_atomic_state_clear => drm_atomic_state_free => complete_commit => _msm_drm_commit_work_cb => kthread_worker_fn => kthread => ret_from_fork To address those two issues we can redesign a purging of the outstanding lazy areas. Instead of queuing vmap areas to the list, we replace it by the separate rb-tree. In hat case an area is located in the tree/list in ascending order. It will give us below advantages: a) Outstanding vmap areas are merged creating bigger coalesced blocks, thus it becomes less fragmented. b) It is possible to calculate a flush range [min:max] without scanning all elements. It is O(1) access time or complexity; c) The final merge of areas with the rb-tree that represents a free space is faster because of (a). As a result the lock contention is also reduced. Link: https://lkml.kernel.org/r/20201116220033.1837-2-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Cc: Hillf Danton Cc: Michal Hocko Cc: Matthew Wilcox Cc: Oleksiy Avramchenko Cc: Steven Rostedt Cc: Minchan Kim Cc: huang ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux/vmalloc.h') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 938eaf9517e2..80c0181c411d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -72,16 +72,14 @@ struct vmap_area { struct list_head list; /* address sorted list */ /* - * The following three variables can be packed, because - * a vmap_area object is always one of the three states: + * The following two variables can be packed, because + * a vmap_area object can be either: * 1) in "free" tree (root is vmap_area_root) - * 2) in "busy" tree (root is free_vmap_area_root) - * 3) in purge list (head is vmap_purge_list) + * 2) or "busy" tree (root is free_vmap_area_root) */ union { unsigned long subtree_max_size; /* in "free" tree */ struct vm_struct *vm; /* in "busy" tree */ - struct llist_node purge_list; /* in purge list */ }; }; -- cgit v1.2.3 From 4f6ec8602341e97b364e4e0d41a1ed08148f5e98 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Thu, 4 Feb 2021 18:32:24 -0800 Subject: mm/vmalloc: separate put pages and flush VM flags When VM_MAP_PUT_PAGES was added, it was defined with the same value as VM_FLUSH_RESET_PERMS. This doesn't seem like it will cause any big functional problems other than some excess flushing for VM_MAP_PUT_PAGES allocations. Redefine VM_MAP_PUT_PAGES to have its own value. Also, rearrange things so flags are less likely to be missed in the future. Link: https://lkml.kernel.org/r/20210122233706.9304-1-rick.p.edgecombe@intel.com Fixes: b944afc9d64d ("mm: add a VM_MAP_PUT_PAGES flag for vmap") Signed-off-by: Rick Edgecombe Suggested-by: Matthew Wilcox Cc: Miaohe Lin Cc: Christoph Hellwig Cc: Daniel Axtens Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux/vmalloc.h') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 80c0181c411d..cedcda6593f6 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -24,7 +24,8 @@ struct notifier_block; /* in notifier.h */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ #define VM_NO_GUARD 0x00000040 /* don't add guard page */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ -#define VM_MAP_PUT_PAGES 0x00000100 /* put pages and free array in vfree */ +#define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */ +#define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ /* * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC. @@ -37,12 +38,6 @@ struct notifier_block; /* in notifier.h */ * determine which allocations need the module shadow freed. */ -/* - * Memory with VM_FLUSH_RESET_PERMS cannot be freed in an interrupt or with - * vfree_atomic(). - */ -#define VM_FLUSH_RESET_PERMS 0x00000100 /* Reset direct map and flush TLB on unmap */ - /* bits [20..32] reserved for arch specific ioremap internals */ /* -- cgit v1.2.3