From 54473e0ef849f44e5ee43e6d6746c27030c3825b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Aug 2025 22:22:09 +0200 Subject: perf/core: Preserve AUX buffer allocation failure result A recent overhaul sets the return value to 0 unconditionally after the allocations, which causes reference count leaks and corrupts the user->vm accounting. Preserve the AUX buffer allocation failure return value, so that the subsequent code works correctly. Fixes: 0983593f32c4 ("perf/core: Lift event->mmap_mutex in perf_mmap()") Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes Cc: stable@vger.kernel.org --- kernel/events/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 22fdf0c187cd..c05262e15b7d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7115,6 +7115,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) perf_event_update_time(event); perf_event_init_userpage(event); perf_event_update_userpage(event); + ret = 0; } else { ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, event->attr.aux_watermark, flags); @@ -7122,8 +7123,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) rb->aux_mmap_locked = extra; } - ret = 0; - unlock: if (!ret) { atomic_long_add(user_extra, &user->locked_vm); -- cgit v1.2.3 From 5468c0fbccbb9d156522c50832244a8b722374fb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 2 Aug 2025 12:39:39 +0200 Subject: perf/core: Don't leak AUX buffer refcount on allocation failure Failure of the AUX buffer allocation leaks the reference count. Set the reference count to 1 only when the allocation succeeds. Fixes: 45bfb2e50471 ("perf: Add AUX area to ring buffer for raw data streams") Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes Cc: stable@vger.kernel.org --- kernel/events/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index c05262e15b7d..e89e77228591 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7051,8 +7051,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) ret = 0; goto unlock; } - - atomic_set(&rb->aux_mmap_count, 1); } user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10); @@ -7119,8 +7117,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) } else { ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, event->attr.aux_watermark, flags); - if (!ret) + if (!ret) { + atomic_set(&rb->aux_mmap_count, 1); rb->aux_mmap_locked = extra; + } } unlock: @@ -7130,6 +7130,7 @@ unlock: atomic_inc(&event->mmap_count); } else if (rb) { + /* AUX allocation failed */ atomic_dec(&rb->mmap_count); } aux_unlock: -- cgit v1.2.3 From 07091aade394f690e7b655578140ef84d0e8d7b0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 2 Aug 2025 12:49:48 +0200 Subject: perf/core: Exit early on perf_mmap() fail When perf_mmap() fails to allocate a buffer, it still invokes the event_mapped() callback of the related event. On X86 this might increase the perf_rdpmc_allowed reference counter. But nothing undoes this as perf_mmap_close() is never called in this case, which causes another reference count leak. Return early on failure to prevent that. Fixes: 1e0fb9ec679c ("perf: Add pmu callbacks to track event mapping and unmapping") Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes Cc: stable@vger.kernel.org --- kernel/events/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index e89e77228591..a2e3591175c6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7138,6 +7138,9 @@ aux_unlock: mutex_unlock(aux_mutex); mutex_unlock(&event->mmap_mutex); + if (ret) + return ret; + /* * Since pinned accounting is per vm we cannot allow fork() to copy our * vma. @@ -7145,8 +7148,7 @@ aux_unlock: vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP); vma->vm_ops = &perf_mmap_vmops; - if (!ret) - ret = map_range(rb, vma); + ret = map_range(rb, vma); mapped = get_mapped(event, event_mapped); if (mapped) -- cgit v1.2.3 From f74b9f4ba63ffdf597aaaa6cad7e284cb8e04820 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 2 Aug 2025 12:48:55 +0200 Subject: perf/core: Handle buffer mapping fail correctly in perf_mmap() After successful allocation of a buffer or a successful attachment to an existing buffer perf_mmap() tries to map the buffer read only into the page table. If that fails, the already set up page table entries are zapped, but the other perf specific side effects of that failure are not handled. The calling code just cleans up the VMA and does not invoke perf_mmap_close(). This leaks reference counts, corrupts user->vm accounting and also results in an unbalanced invocation of event::event_mapped(). Cure this by moving the event::event_mapped() invocation before the map_range() call so that on map_range() failure perf_mmap_close() can be invoked without causing an unbalanced event::event_unmapped() call. perf_mmap_close() undoes the reference counts and eventually frees buffers. Fixes: b709eb872e19 ("perf: map pages in advance") Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes Cc: stable@vger.kernel.org --- kernel/events/core.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index a2e3591175c6..4563bd864bbc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7148,12 +7148,20 @@ aux_unlock: vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP); vma->vm_ops = &perf_mmap_vmops; - ret = map_range(rb, vma); - mapped = get_mapped(event, event_mapped); if (mapped) mapped(event, vma->vm_mm); + /* + * Try to map it into the page table. On fail, invoke + * perf_mmap_close() to undo the above, as the callsite expects + * full cleanup in this case and therefore does not invoke + * vmops::close(). + */ + ret = map_range(rb, vma); + if (ret) + perf_mmap_close(vma); + return ret; } -- cgit v1.2.3 From b024d7b56c77191cde544f838debb7f8451cd0d6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jul 2025 23:01:21 +0200 Subject: perf/core: Prevent VMA split of buffer mappings The perf mmap code is careful about mmap()'ing the user page with the ringbuffer and additionally the auxiliary buffer, when the event supports it. Once the first mapping is established, subsequent mapping have to use the same offset and the same size in both cases. The reference counting for the ringbuffer and the auxiliary buffer depends on this being correct. Though perf does not prevent that a related mapping is split via mmap(2), munmap(2) or mremap(2). A split of a VMA results in perf_mmap_open() calls, which take reference counts, but then the subsequent perf_mmap_close() calls are not longer fulfilling the offset and size checks. This leads to reference count leaks. As perf already has the requirement for subsequent mappings to match the initial mapping, the obvious consequence is that VMA splits, caused by resizing of a mapping or partial unmapping, have to be prevented. Implement the vm_operations_struct::may_split() callback and return unconditionally -EINVAL. That ensures that the mapping offsets and sizes cannot be changed after the fact. Remapping to a different fixed address with the same size is still possible as it takes the references for the new mapping and drops those of the old mapping. Fixes: 45bfb2e50471 ("perf: Add AUX area to ring buffer for raw data streams") Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-27504 Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes Acked-by: Arnaldo Carvalho de Melo Acked-by: Vlastimil Babka Cc: stable@vger.kernel.org --- kernel/events/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 4563bd864bbc..8060c2857bb2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6842,10 +6842,20 @@ static vm_fault_t perf_mmap_pfn_mkwrite(struct vm_fault *vmf) return vmf->pgoff == 0 ? 0 : VM_FAULT_SIGBUS; } +static int perf_mmap_may_split(struct vm_area_struct *vma, unsigned long addr) +{ + /* + * Forbid splitting perf mappings to prevent refcount leaks due to + * the resulting non-matching offsets and sizes. See open()/close(). + */ + return -EINVAL; +} + static const struct vm_operations_struct perf_mmap_vmops = { .open = perf_mmap_open, .close = perf_mmap_close, /* non mergeable */ .pfn_mkwrite = perf_mmap_pfn_mkwrite, + .may_split = perf_mmap_may_split, }; static int map_range(struct perf_buffer *rb, struct vm_area_struct *vma) -- cgit v1.2.3