From 2ab346cfb0decf01523949e29f5cf542f2304611 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 16 Aug 2017 17:18:16 +0100 Subject: perf/aux: Make aux_{head,wakeup} ring_buffer members long The aux_head and aux_wakeup members of struct ring_buffer are defined using the local_t type, despite the fact that they are only accessed via the perf_aux_output_*() functions, which cannot race with each other for a given ring buffer. This patch changes the type of the members to long, so we can avoid using the local_*() API where it isn't needed. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1502900297-21839-1-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- kernel/events/ring_buffer.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'kernel/events/ring_buffer.c') diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index ee97196bb151..25437fda56e3 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -367,7 +367,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, if (WARN_ON_ONCE(local_xchg(&rb->aux_nest, 1))) goto err_put; - aux_head = local_read(&rb->aux_head); + aux_head = rb->aux_head; handle->rb = rb; handle->event = event; @@ -382,7 +382,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, */ if (!rb->aux_overwrite) { aux_tail = ACCESS_ONCE(rb->user_page->aux_tail); - handle->wakeup = local_read(&rb->aux_wakeup) + rb->aux_watermark; + handle->wakeup = rb->aux_wakeup + rb->aux_watermark; if (aux_head - aux_tail < perf_aux_size(rb)) handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb)); @@ -433,12 +433,12 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE; aux_head = handle->head; - local_set(&rb->aux_head, aux_head); + rb->aux_head = aux_head; } else { handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE; - aux_head = local_read(&rb->aux_head); - local_add(size, &rb->aux_head); + aux_head = rb->aux_head; + rb->aux_head += size; } if (size || handle->aux_flags) { @@ -450,11 +450,10 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) handle->aux_flags); } - aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); - - if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) { + rb->user_page->aux_head = rb->aux_head; + if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { wakeup = true; - local_add(rb->aux_watermark, &rb->aux_wakeup); + rb->aux_wakeup += rb->aux_watermark; } if (wakeup) { @@ -478,22 +477,20 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) { struct ring_buffer *rb = handle->rb; - unsigned long aux_head; if (size > handle->size) return -ENOSPC; - local_add(size, &rb->aux_head); + rb->aux_head += size; - aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); - if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) { + rb->user_page->aux_head = rb->aux_head; + if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { perf_output_wakeup(handle); - local_add(rb->aux_watermark, &rb->aux_wakeup); - handle->wakeup = local_read(&rb->aux_wakeup) + - rb->aux_watermark; + rb->aux_wakeup += rb->aux_watermark; + handle->wakeup = rb->aux_wakeup + rb->aux_watermark; } - handle->head = aux_head; + handle->head = rb->aux_head; handle->size -= size; return 0; -- cgit v1.2.3 From d9a50b0256f06bd39a1bed1ba40baec37c356b11 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 16 Aug 2017 17:18:17 +0100 Subject: perf/aux: Ensure aux_wakeup represents most recent wakeup index The aux_watermark member of struct ring_buffer represents the period (in terms of bytes) at which wakeup events should be generated when data is written to the aux buffer in non-snapshot mode. On hardware that cannot generate an interrupt when the aux_head reaches an arbitrary wakeup index (such as ARM SPE), the aux_head sampled from handle->head in perf_aux_output_{skip,end} may in fact be past the wakeup index. This can lead to wakeup slowly falling behind the head. For example, consider the case where hardware can only generate an interrupt on a page-boundary and the aux buffer is initialised as follows: // Buffer size is 2 * PAGE_SIZE rb->aux_head = rb->aux_wakeup = 0 rb->aux_watermark = PAGE_SIZE / 2 following the first perf_aux_output_begin call, the handle is initialised with: handle->head = 0 handle->size = 2 * PAGE_SIZE handle->wakeup = PAGE_SIZE / 2 and the hardware will be programmed to generate an interrupt at PAGE_SIZE. When the interrupt is raised, the hardware head will be at PAGE_SIZE, so calling perf_aux_output_end(handle, PAGE_SIZE) puts the ring buffer into the following state: rb->aux_head = PAGE_SIZE rb->aux_wakeup = PAGE_SIZE / 2 rb->aux_watermark = PAGE_SIZE / 2 and then the next call to perf_aux_output_begin will result in: handle->head = handle->wakeup = PAGE_SIZE for which the semantics are unclear and, for a smaller aux_watermark (e.g. PAGE_SIZE / 4), then the wakeup would in fact be behind head at this point. This patch fixes the problem by rounding down the aux_head (as sampled from the handle) to the nearest aux_watermark boundary when updating rb->aux_wakeup, therefore taking into account any overruns by the hardware. Reported-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Acked-by: Alexander Shishkin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1502900297-21839-2-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- kernel/events/ring_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/events/ring_buffer.c') diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 25437fda56e3..af71a84e12ee 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -453,7 +453,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) rb->user_page->aux_head = rb->aux_head; if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { wakeup = true; - rb->aux_wakeup += rb->aux_watermark; + rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark); } if (wakeup) { @@ -486,7 +486,7 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) rb->user_page->aux_head = rb->aux_head; if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { perf_output_wakeup(handle); - rb->aux_wakeup += rb->aux_watermark; + rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark); handle->wakeup = rb->aux_wakeup + rb->aux_watermark; } -- cgit v1.2.3 From 441430eb54a00586f95f1aefc48e0801bbd6a923 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 6 Sep 2017 19:08:11 +0300 Subject: perf/aux: Only update ->aux_wakeup in non-overwrite mode The following commit: d9a50b0256 ("perf/aux: Ensure aux_wakeup represents most recent wakeup index") changed the AUX wakeup position calculation to rounddown(), which causes a division-by-zero in AUX overwrite mode (aka "snapshot mode"). The zero denominator results from the fact that perf record doesn't set aux_watermark to anything, in which case the kernel will set it to half the AUX buffer size, but only for non-overwrite mode. In the overwrite mode aux_watermark stays zero. The good news is that, AUX overwrite mode, wakeups don't happen and related bookkeeping is not relevant, so we can simply forego the whole wakeup updates. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/20170906160811.16510-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/ring_buffer.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'kernel/events/ring_buffer.c') diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index af71a84e12ee..f684d8e5fa2b 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -412,6 +412,19 @@ err: return NULL; } +static bool __always_inline rb_need_aux_wakeup(struct ring_buffer *rb) +{ + if (rb->aux_overwrite) + return false; + + if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { + rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark); + return true; + } + + return false; +} + /* * Commit the data written by hardware into the ring buffer by adjusting * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the @@ -451,10 +464,8 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) } rb->user_page->aux_head = rb->aux_head; - if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { + if (rb_need_aux_wakeup(rb)) wakeup = true; - rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark); - } if (wakeup) { if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) @@ -484,9 +495,8 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) rb->aux_head += size; rb->user_page->aux_head = rb->aux_head; - if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { + if (rb_need_aux_wakeup(rb)) { perf_output_wakeup(handle); - rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark); handle->wakeup = rb->aux_wakeup + rb->aux_watermark; } -- cgit v1.2.3 From bc1d202023eb66f088f736ba423bee1cf135c720 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 16 Aug 2016 16:53:15 +0100 Subject: perf/core: Export AUX buffer helpers to modules Perf PMU drivers using AUX buffers cannot be built as modules unless the AUX helpers are exported. This patch exports perf_aux_output_{begin,end,skip} and perf_get_aux to modules. Cc: Peter Zijlstra Signed-off-by: Will Deacon --- kernel/events/ring_buffer.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/events/ring_buffer.c') diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index f684d8e5fa2b..6d9bffe4d6cc 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -411,6 +411,7 @@ err: return NULL; } +EXPORT_SYMBOL_GPL(perf_aux_output_begin); static bool __always_inline rb_need_aux_wakeup(struct ring_buffer *rb) { @@ -480,6 +481,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) rb_free_aux(rb); ring_buffer_put(rb); } +EXPORT_SYMBOL_GPL(perf_aux_output_end); /* * Skip over a given number of bytes in the AUX buffer, due to, for example, @@ -505,6 +507,7 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) return 0; } +EXPORT_SYMBOL_GPL(perf_aux_output_skip); void *perf_get_aux(struct perf_output_handle *handle) { @@ -514,6 +517,7 @@ void *perf_get_aux(struct perf_output_handle *handle) return handle->rb->aux_priv; } +EXPORT_SYMBOL_GPL(perf_get_aux); #define PERF_AUX_GFP (GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY) -- cgit v1.2.3 From 6aa7de059173a986114ac43b8f50b297a86f09a8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Oct 2017 14:07:29 -0700 Subject: locking/atomics: COCCINELLE/treewide: Convert trivial ACCESS_ONCE() patterns to READ_ONCE()/WRITE_ONCE() Please do not apply this to mainline directly, instead please re-run the coccinelle script shown below and apply its output. For several reasons, it is desirable to use {READ,WRITE}_ONCE() in preference to ACCESS_ONCE(), and new code is expected to use one of the former. So far, there's been no reason to change most existing uses of ACCESS_ONCE(), as these aren't harmful, and changing them results in churn. However, for some features, the read/write distinction is critical to correct operation. To distinguish these cases, separate read/write accessors must be used. This patch migrates (most) remaining ACCESS_ONCE() instances to {READ,WRITE}_ONCE(), using the following coccinelle script: ---- // Convert trivial ACCESS_ONCE() uses to equivalent READ_ONCE() and // WRITE_ONCE() // $ make coccicheck COCCI=/home/mark/once.cocci SPFLAGS="--include-headers" MODE=patch virtual patch @ depends on patch @ expression E1, E2; @@ - ACCESS_ONCE(E1) = E2 + WRITE_ONCE(E1, E2) @ depends on patch @ expression E; @@ - ACCESS_ONCE(E) + READ_ONCE(E) ---- Signed-off-by: Mark Rutland Signed-off-by: Paul E. McKenney Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: davem@davemloft.net Cc: linux-arch@vger.kernel.org Cc: mpe@ellerman.id.au Cc: shuah@kernel.org Cc: snitzer@redhat.com Cc: thor.thayer@linux.intel.com Cc: tj@kernel.org Cc: viro@zeniv.linux.org.uk Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/1508792849-3115-19-git-send-email-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/events/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/events/ring_buffer.c') diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index f684d8e5fa2b..f3e37971c842 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -381,7 +381,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, * (B) <-> (C) ordering is still observed by the pmu driver. */ if (!rb->aux_overwrite) { - aux_tail = ACCESS_ONCE(rb->user_page->aux_tail); + aux_tail = READ_ONCE(rb->user_page->aux_tail); handle->wakeup = rb->aux_wakeup + rb->aux_watermark; if (aux_head - aux_tail < perf_aux_size(rb)) handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb)); -- cgit v1.2.3