From a043971141f163f9845324a2f83502d15011485d Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sat, 14 Dec 2013 20:31:55 -0800 Subject: perf bench: Add futex-hash microbenchmark Introduce futexes to perf-bench and add a program that stresses and measures the kernel's implementation of the hash table. This is a multi-threaded program that simply measures the amount of failed futex wait calls - we only want to deal with the hashing overhead, so a negative return of futex_wait_setup() is enough to do the trick. An example run: $ perf bench futex hash -t 32 Run summary [PID 10989]: 32 threads, each operating on 1024 [private] futexes for 10 secs. [thread 0] futexes: 0x19d9b10 ... 0x19dab0c [ 418713 ops/sec ] [thread 1] futexes: 0x19daca0 ... 0x19dbc9c [ 469913 ops/sec ] [thread 2] futexes: 0x19dbe30 ... 0x19dce2c [ 479744 ops/sec ] ... [thread 31] futexes: 0x19fbb80 ... 0x19fcb7c [ 464179 ops/sec ] Averaged 454310 operations/sec (+- 0.84%), total secs = 10 Signed-off-by: Davidlohr Bueso Acked-by: Darren Hart Cc: Aswin Chandramouleeswaran Cc: Darren Hart Cc: Ingo Molnar Cc: Jason Low Cc: Peter Zijlstra Cc: Scott J Norton Cc: Thomas Gleixner Cc: Waiman Long Link: http://lkml.kernel.org/r/1387081917-9102-2-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/bench.h | 1 + tools/perf/bench/futex-hash.c | 212 ++++++++++++++++++++++++++++++++++++++++++ tools/perf/bench/futex.h | 48 ++++++++++ 3 files changed, 261 insertions(+) create mode 100644 tools/perf/bench/futex-hash.c create mode 100644 tools/perf/bench/futex.h (limited to 'tools/perf/bench') diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 0fdc85269c4d..34edb5c34db3 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -31,6 +31,7 @@ extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused); extern int bench_mem_memset(int argc, const char **argv, const char *prefix); +extern int bench_futex_hash(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c new file mode 100644 index 000000000000..a84206e9c4aa --- /dev/null +++ b/tools/perf/bench/futex-hash.c @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2013 Davidlohr Bueso + * + * futex-hash: Stress the hell out of the Linux kernel futex uaddr hashing. + * + * This program is particularly useful for measuring the kernel's futex hash + * table/function implementation. In order for it to make sense, use with as + * many threads and futexes as possible. + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/stat.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "futex.h" + +#include +#include +#include +#include + +static unsigned int nthreads = 0; +static unsigned int nsecs = 10; +/* amount of futexes per thread */ +static unsigned int nfutexes = 1024; +static bool fshared = false, done = false, silent = false; + +struct timeval start, end, runtime; +static pthread_mutex_t thread_lock; +static unsigned int threads_starting; +static struct stats throughput_stats; +static pthread_cond_t thread_parent, thread_worker; + +struct worker { + int tid; + u_int32_t *futex; + pthread_t thread; + unsigned long ops; +}; + +static const struct option options[] = { + OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), + OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"), + OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"), + OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), + OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), + OPT_END() +}; + +static const char * const bench_futex_hash_usage[] = { + "perf bench futex hash ", + NULL +}; + +static void *workerfn(void *arg) +{ + int ret; + unsigned int i; + struct worker *w = (struct worker *) arg; + + pthread_mutex_lock(&thread_lock); + threads_starting--; + if (!threads_starting) + pthread_cond_signal(&thread_parent); + pthread_cond_wait(&thread_worker, &thread_lock); + pthread_mutex_unlock(&thread_lock); + + do { + for (i = 0; i < nfutexes; i++, w->ops++) { + /* + * We want the futex calls to fail in order to stress + * the hashing of uaddr and not measure other steps, + * such as internal waitqueue handling, thus enlarging + * the critical region protected by hb->lock. + */ + ret = futex_wait(&w->futex[i], 1234, NULL, + fshared ? 0 : FUTEX_PRIVATE_FLAG); + if (!silent && + (!ret || errno != EAGAIN || errno != EWOULDBLOCK)) + warn("Non-expected futex return call"); + } + } while (!done); + + return NULL; +} + +static void toggle_done(int sig __maybe_unused, + siginfo_t *info __maybe_unused, + void *uc __maybe_unused) +{ + /* inform all threads that we're done for the day */ + done = true; + gettimeofday(&end, NULL); + timersub(&end, &start, &runtime); +} + +static void print_summary(void) +{ + unsigned long avg = avg_stats(&throughput_stats); + double stddev = stddev_stats(&throughput_stats); + + printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", + !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), + (int) runtime.tv_sec); +} + +int bench_futex_hash(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + int ret = 0; + cpu_set_t cpu; + struct sigaction act; + unsigned int i, ncpus; + pthread_attr_t thread_attr; + struct worker *worker = NULL; + + argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0); + if (argc) { + usage_with_options(bench_futex_hash_usage, options); + exit(EXIT_FAILURE); + } + + ncpus = sysconf(_SC_NPROCESSORS_ONLN); + + sigfillset(&act.sa_mask); + act.sa_sigaction = toggle_done; + sigaction(SIGINT, &act, NULL); + + if (!nthreads) /* default to the number of CPUs */ + nthreads = ncpus; + + worker = calloc(nthreads, sizeof(*worker)); + if (!worker) + goto errmem; + + printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n", + getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs); + + init_stats(&throughput_stats); + pthread_mutex_init(&thread_lock, NULL); + pthread_cond_init(&thread_parent, NULL); + pthread_cond_init(&thread_worker, NULL); + + threads_starting = nthreads; + pthread_attr_init(&thread_attr); + gettimeofday(&start, NULL); + for (i = 0; i < nthreads; i++) { + worker[i].tid = i; + worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex)); + if (!worker[i].futex) + goto errmem; + + CPU_ZERO(&cpu); + CPU_SET(i % ncpus, &cpu); + + ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu); + if (ret) + err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + + ret = pthread_create(&worker[i].thread, &thread_attr, workerfn, + (void *)(struct worker *) &worker[i]); + if (ret) + err(EXIT_FAILURE, "pthread_create"); + + } + pthread_attr_destroy(&thread_attr); + + pthread_mutex_lock(&thread_lock); + while (threads_starting) + pthread_cond_wait(&thread_parent, &thread_lock); + pthread_cond_broadcast(&thread_worker); + pthread_mutex_unlock(&thread_lock); + + sleep(nsecs); + toggle_done(0, NULL, NULL); + + for (i = 0; i < nthreads; i++) { + ret = pthread_join(worker[i].thread, NULL); + if (ret) + err(EXIT_FAILURE, "pthread_join"); + } + + /* cleanup & report results */ + pthread_cond_destroy(&thread_parent); + pthread_cond_destroy(&thread_worker); + pthread_mutex_destroy(&thread_lock); + + for (i = 0; i < nthreads; i++) { + unsigned long t = worker[i].ops/runtime.tv_sec; + update_stats(&throughput_stats, t); + if (!silent) { + if (nfutexes == 1) + printf("[thread %2d] futex: %p [ %ld ops/sec ]\n", + worker[i].tid, &worker[i].futex[0], t); + else + printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n", + worker[i].tid, &worker[i].futex[0], + &worker[i].futex[nfutexes-1], t); + } + + free(worker[i].futex); + } + + print_summary(); + + free(worker); + return ret; +errmem: + err(EXIT_FAILURE, "calloc"); +} diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h new file mode 100644 index 000000000000..7d0bda543e3d --- /dev/null +++ b/tools/perf/bench/futex.h @@ -0,0 +1,48 @@ +/* + * Glibc independent futex library for testing kernel functionality. + * Shamelessly stolen from Darren Hart + * http://git.kernel.org/cgit/linux/kernel/git/dvhart/futextest.git/ + */ + +#ifndef _FUTEX_H +#define _FUTEX_H + +#include +#include +#include +#include + +/** + * futex() - SYS_futex syscall wrapper + * @uaddr: address of first futex + * @op: futex op code + * @val: typically expected value of uaddr, but varies by op + * @timeout: typically an absolute struct timespec (except where noted + * otherwise). Overloaded by some ops + * @uaddr2: address of second futex for some ops\ + * @val3: varies by op + * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG + * + * futex() is used by all the following futex op wrappers. It can also be + * used for misuse and abuse testing. Generally, the specific op wrappers + * should be used instead. It is a macro instead of an static inline function as + * some of the types over overloaded (timeout is used for nr_requeue for + * example). + * + * These argument descriptions are the defaults for all + * like-named arguments in the following wrappers except where noted below. + */ +#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \ + syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3) + +/** + * futex_wait() - block on uaddr with optional timeout + * @timeout: relative timeout + */ +static inline int +futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflags) +{ + return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); +} + +#endif /* _FUTEX_H */ -- cgit v1.2.3 From 27db78307481dbba68c5f3563c6cb694b25521d9 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sat, 14 Dec 2013 20:31:56 -0800 Subject: perf bench: Add futex-wake microbenchmark Block a bunch of threads on a futex and wake them up, N at a time. This program is particularly useful to measure the latency of nthread wakeups in non-error situations: all waiters are queued and all wake calls wakeup one or more tasks. An example run: $ perf bench futex wake -t 512 -r 100 Run summary [PID 27823]: blocking on 512 threads (at futex 0x7e10d4), waking up 1 at a time. [Run 1]: Wokeup 512 of 512 threads in 6.0080 ms [Run 2]: Wokeup 512 of 512 threads in 5.2280 ms [Run 3]: Wokeup 512 of 512 threads in 4.8300 ms ... [Run 100]: Wokeup 512 of 512 threads in 5.0100 ms Wokeup 512 of 512 threads in 5.0109 ms (+-2.25%) Signed-off-by: Davidlohr Bueso Acked-by: Darren Hart Cc: Aswin Chandramouleeswaran Cc: Darren Hart Cc: Ingo Molnar Cc: Jason Low Cc: Peter Zijlstra Cc: Scott J Norton Cc: Thomas Gleixner Cc: Waiman Long Link: http://lkml.kernel.org/r/1387081917-9102-3-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/bench.h | 1 + tools/perf/bench/futex-wake.c | 201 ++++++++++++++++++++++++++++++++++++++++++ tools/perf/bench/futex.h | 10 +++ 3 files changed, 212 insertions(+) create mode 100644 tools/perf/bench/futex-wake.c (limited to 'tools/perf/bench') diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 34edb5c34db3..6ac3f1d083cc 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -32,6 +32,7 @@ extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused); extern int bench_mem_memset(int argc, const char **argv, const char *prefix); extern int bench_futex_hash(int argc, const char **argv, const char *prefix); +extern int bench_futex_wake(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c new file mode 100644 index 000000000000..d096169b161e --- /dev/null +++ b/tools/perf/bench/futex-wake.c @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2013 Davidlohr Bueso + * + * futex-wake: Block a bunch of threads on a futex and wake'em up, N at a time. + * + * This program is particularly useful to measure the latency of nthread wakeups + * in non-error situations: all waiters are queued and all wake calls wakeup + * one or more tasks, and thus the waitqueue is never empty. + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/stat.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "futex.h" + +#include +#include +#include +#include + +/* all threads will block on the same futex */ +static u_int32_t futex1 = 0; + +/* + * How many wakeups to do at a time. + * Default to 1 in order to make the kernel work more. + */ +static unsigned int nwakes = 1; + +/* + * There can be significant variance from run to run, + * the more repeats, the more exact the overall avg and + * the better idea of the futex latency. + */ +static unsigned int repeat = 10; + +pthread_t *worker; +static bool done = 0, silent = 0; +static pthread_mutex_t thread_lock; +static pthread_cond_t thread_parent, thread_worker; +static struct stats waketime_stats, wakeup_stats; +static unsigned int ncpus, threads_starting, nthreads = 0; + +static const struct option options[] = { + OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), + OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"), + OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"), + OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), + OPT_END() +}; + +static const char * const bench_futex_wake_usage[] = { + "perf bench futex wake ", + NULL +}; + +static void *workerfn(void *arg __maybe_unused) +{ + pthread_mutex_lock(&thread_lock); + threads_starting--; + if (!threads_starting) + pthread_cond_signal(&thread_parent); + pthread_cond_wait(&thread_worker, &thread_lock); + pthread_mutex_unlock(&thread_lock); + + futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG); + return NULL; +} + +static void print_summary(void) +{ + double waketime_avg = avg_stats(&waketime_stats); + double waketime_stddev = stddev_stats(&waketime_stats); + unsigned int wakeup_avg = avg_stats(&wakeup_stats); + + printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n", + wakeup_avg, + nthreads, + waketime_avg/1e3, + rel_stddev_stats(waketime_stddev, waketime_avg)); +} + +static void block_threads(pthread_t *w, + pthread_attr_t thread_attr) +{ + cpu_set_t cpu; + unsigned int i; + + threads_starting = nthreads; + + /* create and block all threads */ + for (i = 0; i < nthreads; i++) { + CPU_ZERO(&cpu); + CPU_SET(i % ncpus, &cpu); + + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + + if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) + err(EXIT_FAILURE, "pthread_create"); + } +} + +static void toggle_done(int sig __maybe_unused, + siginfo_t *info __maybe_unused, + void *uc __maybe_unused) +{ + done = true; +} + +int bench_futex_wake(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + int ret = 0; + unsigned int i, j; + struct sigaction act; + pthread_attr_t thread_attr; + + argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0); + if (argc) { + usage_with_options(bench_futex_wake_usage, options); + exit(EXIT_FAILURE); + } + + ncpus = sysconf(_SC_NPROCESSORS_ONLN); + + sigfillset(&act.sa_mask); + act.sa_sigaction = toggle_done; + sigaction(SIGINT, &act, NULL); + + if (!nthreads) + nthreads = ncpus; + + worker = calloc(nthreads, sizeof(*worker)); + if (!worker) + err(EXIT_FAILURE, "calloc"); + + printf("Run summary [PID %d]: blocking on %d threads (at futex %p), " + "waking up %d at a time.\n\n", + getpid(), nthreads, &futex1, nwakes); + + init_stats(&wakeup_stats); + init_stats(&waketime_stats); + pthread_attr_init(&thread_attr); + pthread_mutex_init(&thread_lock, NULL); + pthread_cond_init(&thread_parent, NULL); + pthread_cond_init(&thread_worker, NULL); + + for (j = 0; j < repeat && !done; j++) { + unsigned int nwoken = 0; + struct timeval start, end, runtime; + + /* create, launch & block all threads */ + block_threads(worker, thread_attr); + + /* make sure all threads are already blocked */ + pthread_mutex_lock(&thread_lock); + while (threads_starting) + pthread_cond_wait(&thread_parent, &thread_lock); + pthread_cond_broadcast(&thread_worker); + pthread_mutex_unlock(&thread_lock); + + usleep(100000); + + /* Ok, all threads are patiently blocked, start waking folks up */ + gettimeofday(&start, NULL); + while (nwoken != nthreads) + nwoken += futex_wake(&futex1, nwakes, FUTEX_PRIVATE_FLAG); + gettimeofday(&end, NULL); + timersub(&end, &start, &runtime); + + update_stats(&wakeup_stats, nwoken); + update_stats(&waketime_stats, runtime.tv_usec); + + if (!silent) { + printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n", + j + 1, nwoken, nthreads, runtime.tv_usec/1e3); + } + + for (i = 0; i < nthreads; i++) { + ret = pthread_join(worker[i], NULL); + if (ret) + err(EXIT_FAILURE, "pthread_join"); + } + + } + + /* cleanup & report results */ + pthread_cond_destroy(&thread_parent); + pthread_cond_destroy(&thread_worker); + pthread_mutex_destroy(&thread_lock); + pthread_attr_destroy(&thread_attr); + + print_summary(); + + free(worker); + return ret; +} diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index 7d0bda543e3d..6ac45093fac4 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -45,4 +45,14 @@ futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflag return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); } +/** + * futex_wake() - wake one or more tasks blocked on uaddr + * @nr_wake: wake up to this many tasks + */ +static inline int +futex_wake(u_int32_t *uaddr, int nr_wake, int opflags) +{ + return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags); +} + #endif /* _FUTEX_H */ -- cgit v1.2.3 From 0fb298cf95c0d8119557b7d4657724a146e0622e Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sat, 14 Dec 2013 20:31:57 -0800 Subject: perf bench: Add futex-requeue microbenchmark Block a bunch of threads on a futex and requeue them on another, N at a time. This program is particularly useful to measure the latency of nthread requeues without waking up any tasks -- thus mimicking a regular futex_wait. An example run: $ perf bench futex requeue -r 100 -t 64 Run summary [PID 151011]: Requeuing 64 threads (from 0x7d15c4 to 0x7d15c8), 1 at a time. [Run 1]: Requeued 64 of 64 threads in 0.0400 ms [Run 2]: Requeued 64 of 64 threads in 0.0390 ms [Run 3]: Requeued 64 of 64 threads in 0.0400 ms ... [Run 100]: Requeued 64 of 64 threads in 0.0390 ms Requeued 64 of 64 threads in 0.0399 ms (+-0.37%) Signed-off-by: Davidlohr Bueso Acked-by: Darren Hart Cc: Aswin Chandramouleeswaran Cc: Darren Hart Cc: Ingo Molnar Cc: Jason Low Cc: Peter Zijlstra Cc: Scott J Norton Cc: Thomas Gleixner Cc: Waiman Long Link: http://lkml.kernel.org/r/1387081917-9102-4-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/bench.h | 1 + tools/perf/bench/futex-requeue.c | 211 +++++++++++++++++++++++++++++++++++++++ tools/perf/bench/futex.h | 13 +++ 3 files changed, 225 insertions(+) create mode 100644 tools/perf/bench/futex-requeue.c (limited to 'tools/perf/bench') diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 6ac3f1d083cc..eba46709b279 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -33,6 +33,7 @@ extern int bench_mem_memcpy(int argc, const char **argv, extern int bench_mem_memset(int argc, const char **argv, const char *prefix); extern int bench_futex_hash(int argc, const char **argv, const char *prefix); extern int bench_futex_wake(int argc, const char **argv, const char *prefix); +extern int bench_futex_requeue(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c new file mode 100644 index 000000000000..a16255876f1d --- /dev/null +++ b/tools/perf/bench/futex-requeue.c @@ -0,0 +1,211 @@ +/* + * Copyright (C) 2013 Davidlohr Bueso + * + * futex-requeue: Block a bunch of threads on futex1 and requeue them + * on futex2, N at a time. + * + * This program is particularly useful to measure the latency of nthread + * requeues without waking up any tasks -- thus mimicking a regular futex_wait. + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/stat.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "futex.h" + +#include +#include +#include +#include + +static u_int32_t futex1 = 0, futex2 = 0; + +/* + * How many tasks to requeue at a time. + * Default to 1 in order to make the kernel work more. + */ +static unsigned int nrequeue = 1; + +/* + * There can be significant variance from run to run, + * the more repeats, the more exact the overall avg and + * the better idea of the futex latency. + */ +static unsigned int repeat = 10; + +static pthread_t *worker; +static bool done = 0, silent = 0; +static pthread_mutex_t thread_lock; +static pthread_cond_t thread_parent, thread_worker; +static struct stats requeuetime_stats, requeued_stats; +static unsigned int ncpus, threads_starting, nthreads = 0; + +static const struct option options[] = { + OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), + OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"), + OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"), + OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), + OPT_END() +}; + +static const char * const bench_futex_requeue_usage[] = { + "perf bench futex requeue ", + NULL +}; + +static void print_summary(void) +{ + double requeuetime_avg = avg_stats(&requeuetime_stats); + double requeuetime_stddev = stddev_stats(&requeuetime_stats); + unsigned int requeued_avg = avg_stats(&requeued_stats); + + printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n", + requeued_avg, + nthreads, + requeuetime_avg/1e3, + rel_stddev_stats(requeuetime_stddev, requeuetime_avg)); +} + +static void *workerfn(void *arg __maybe_unused) +{ + pthread_mutex_lock(&thread_lock); + threads_starting--; + if (!threads_starting) + pthread_cond_signal(&thread_parent); + pthread_cond_wait(&thread_worker, &thread_lock); + pthread_mutex_unlock(&thread_lock); + + futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG); + return NULL; +} + +static void block_threads(pthread_t *w, + pthread_attr_t thread_attr) +{ + cpu_set_t cpu; + unsigned int i; + + threads_starting = nthreads; + + /* create and block all threads */ + for (i = 0; i < nthreads; i++) { + CPU_ZERO(&cpu); + CPU_SET(i % ncpus, &cpu); + + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + + if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) + err(EXIT_FAILURE, "pthread_create"); + } +} + +static void toggle_done(int sig __maybe_unused, + siginfo_t *info __maybe_unused, + void *uc __maybe_unused) +{ + done = true; +} + +int bench_futex_requeue(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + int ret = 0; + unsigned int i, j; + struct sigaction act; + pthread_attr_t thread_attr; + + argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0); + if (argc) + goto err; + + ncpus = sysconf(_SC_NPROCESSORS_ONLN); + + sigfillset(&act.sa_mask); + act.sa_sigaction = toggle_done; + sigaction(SIGINT, &act, NULL); + + if (!nthreads) + nthreads = ncpus; + + worker = calloc(nthreads, sizeof(*worker)); + if (!worker) + err(EXIT_FAILURE, "calloc"); + + printf("Run summary [PID %d]: Requeuing %d threads (from %p to %p), " + "%d at a time.\n\n", + getpid(), nthreads, &futex1, &futex2, nrequeue); + + init_stats(&requeued_stats); + init_stats(&requeuetime_stats); + pthread_attr_init(&thread_attr); + pthread_mutex_init(&thread_lock, NULL); + pthread_cond_init(&thread_parent, NULL); + pthread_cond_init(&thread_worker, NULL); + + for (j = 0; j < repeat && !done; j++) { + unsigned int nrequeued = 0; + struct timeval start, end, runtime; + + /* create, launch & block all threads */ + block_threads(worker, thread_attr); + + /* make sure all threads are already blocked */ + pthread_mutex_lock(&thread_lock); + while (threads_starting) + pthread_cond_wait(&thread_parent, &thread_lock); + pthread_cond_broadcast(&thread_worker); + pthread_mutex_unlock(&thread_lock); + + usleep(100000); + + /* Ok, all threads are patiently blocked, start requeueing */ + gettimeofday(&start, NULL); + for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue) + /* + * Do not wakeup any tasks blocked on futex1, allowing + * us to really measure futex_wait functionality. + */ + futex_cmp_requeue(&futex1, 0, &futex2, 0, nrequeue, + FUTEX_PRIVATE_FLAG); + gettimeofday(&end, NULL); + timersub(&end, &start, &runtime); + + update_stats(&requeued_stats, nrequeued); + update_stats(&requeuetime_stats, runtime.tv_usec); + + if (!silent) { + printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n", + j + 1, nrequeued, nthreads, runtime.tv_usec/1e3); + } + + /* everybody should be blocked on futex2, wake'em up */ + nrequeued = futex_wake(&futex2, nthreads, FUTEX_PRIVATE_FLAG); + if (nthreads != nrequeued) + warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads); + + for (i = 0; i < nthreads; i++) { + ret = pthread_join(worker[i], NULL); + if (ret) + err(EXIT_FAILURE, "pthread_join"); + } + + } + + /* cleanup & report results */ + pthread_cond_destroy(&thread_parent); + pthread_cond_destroy(&thread_worker); + pthread_mutex_destroy(&thread_lock); + pthread_attr_destroy(&thread_attr); + + print_summary(); + + free(worker); + return ret; +err: + usage_with_options(bench_futex_requeue_usage, options); + exit(EXIT_FAILURE); +} diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index 6ac45093fac4..71f2844cf97f 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -55,4 +55,17 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags) return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags); } +/** +* futex_cmp_requeue() - requeue tasks from uaddr to uaddr2 +* @nr_wake: wake up to this many tasks +* @nr_requeue: requeue up to this many tasks +*/ +static inline int +futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wake, + int nr_requeue, int opflags) +{ + return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2, + val, opflags); +} + #endif /* _FUTEX_H */ -- cgit v1.2.3