diff options
Diffstat (limited to 'tools/include')
| -rw-r--r-- | tools/include/asm/barrier.h | 35 | ||||
| -rw-r--r-- | tools/include/linux/ring_buffer.h | 73 |
2 files changed, 108 insertions, 0 deletions
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h index 391d942536e5..8d378c57cb01 100644 --- a/tools/include/asm/barrier.h +++ b/tools/include/asm/barrier.h @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/compiler.h> #if defined(__i386__) || defined(__x86_64__) #include "../../arch/x86/include/asm/barrier.h" #elif defined(__arm__) @@ -26,3 +27,37 @@ #else #include <asm-generic/barrier.h> #endif + +/* + * Generic fallback smp_*() definitions for archs that haven't + * been updated yet. + */ + +#ifndef smp_rmb +# define smp_rmb() rmb() +#endif + +#ifndef smp_wmb +# define smp_wmb() wmb() +#endif + +#ifndef smp_mb +# define smp_mb() mb() +#endif + +#ifndef smp_store_release +# define smp_store_release(p, v) \ +do { \ + smp_mb(); \ + WRITE_ONCE(*p, v); \ +} while (0) +#endif + +#ifndef smp_load_acquire +# define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + smp_mb(); \ + ___p1; \ +}) +#endif diff --git a/tools/include/linux/ring_buffer.h b/tools/include/linux/ring_buffer.h new file mode 100644 index 000000000000..9a083ae60473 --- /dev/null +++ b/tools/include/linux/ring_buffer.h @@ -0,0 +1,73 @@ +#ifndef _TOOLS_LINUX_RING_BUFFER_H_ +#define _TOOLS_LINUX_RING_BUFFER_H_ + +#include <asm/barrier.h> + +/* + * Contract with kernel for walking the perf ring buffer from + * user space requires the following barrier pairing (quote + * from kernel/events/ring_buffer.c): + * + * Since the mmap() consumer (userspace) can run on a + * different CPU: + * + * kernel user + * + * if (LOAD ->data_tail) { LOAD ->data_head + * (A) smp_rmb() (C) + * STORE $data LOAD $data + * smp_wmb() (B) smp_mb() (D) + * STORE ->data_head STORE ->data_tail + * } + * + * Where A pairs with D, and B pairs with C. + * + * In our case A is a control dependency that separates the + * load of the ->data_tail and the stores of $data. In case + * ->data_tail indicates there is no room in the buffer to + * store $data we do not. + * + * D needs to be a full barrier since it separates the data + * READ from the tail WRITE. + * + * For B a WMB is sufficient since it separates two WRITEs, + * and for C an RMB is sufficient since it separates two READs. + * + * Note, instead of B, C, D we could also use smp_store_release() + * in B and D as well as smp_load_acquire() in C. + * + * However, this optimization does not make sense for all kernel + * supported architectures since for a fair number it would + * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(), + * and smp_mb() + WRITE_ONCE() pair for smp_store_release(). + * + * Thus for those smp_wmb() in B and smp_rmb() in C would still + * be less expensive. For the case of D this has either the same + * cost or is less expensive, for example, due to TSO x86 can + * avoid the CPU barrier entirely. + */ + +static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base) +{ +/* + * Architectures where smp_load_acquire() does not fallback to + * READ_ONCE() + smp_mb() pair. + */ +#if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \ + defined(__ia64__) || defined(__sparc__) && defined(__arch64__) + return smp_load_acquire(&base->data_head); +#else + u64 head = READ_ONCE(base->data_head); + + smp_rmb(); + return head; +#endif +} + +static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base, + u64 tail) +{ + smp_store_release(&base->data_tail, tail); +} + +#endif /* _TOOLS_LINUX_RING_BUFFER_H_ */ |
