From be3de80dc2e671d9ee15e69fe9cd84d2b71e2225 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 24 Jan 2012 10:03:22 -0200 Subject: perf bench: Also allow measuring memset() This simply clones the respective memcpy() implementation. Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/4F16D743020000780006D735@nat28.tlf.novell.com Signed-off-by: Jan Beulich Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-memset.c | 291 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 tools/perf/bench/mem-memset.c (limited to 'tools/perf/bench/mem-memset.c') diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c new file mode 100644 index 000000000000..9c0c6f0cba9b --- /dev/null +++ b/tools/perf/bench/mem-memset.c @@ -0,0 +1,291 @@ +/* + * mem-memset.c + * + * memset: Simple memory set in various ways + * + * Trivial clone of mem-memcpy.c. + */ +#include + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "mem-memset-arch.h" + +#include +#include +#include +#include +#include + +#define K 1024 + +static const char *length_str = "1MB"; +static const char *routine = "default"; +static bool use_clock; +static int clock_fd; +static bool only_prefault; +static bool no_prefault; + +static const struct option options[] = { + OPT_STRING('l', "length", &length_str, "1MB", + "Specify length of memory to copy. " + "available unit: B, MB, GB (upper and lower)"), + OPT_STRING('r', "routine", &routine, "default", + "Specify routine to copy"), + OPT_BOOLEAN('c', "clock", &use_clock, + "Use CPU clock for measuring"), + OPT_BOOLEAN('o', "only-prefault", &only_prefault, + "Show only the result with page faults before memset()"), + OPT_BOOLEAN('n', "no-prefault", &no_prefault, + "Show only the result without page faults before memset()"), + OPT_END() +}; + +typedef void *(*memset_t)(void *, int, size_t); + +struct routine { + const char *name; + const char *desc; + memset_t fn; +}; + +static const struct routine routines[] = { + { "default", + "Default memset() provided by glibc", + memset }, +#ifdef ARCH_X86_64 + +#define MEMSET_FN(fn, name, desc) { name, desc, fn }, +#include "mem-memset-x86-64-asm-def.h" +#undef MEMSET_FN + +#endif + + { NULL, + NULL, + NULL } +}; + +static const char * const bench_mem_memset_usage[] = { + "perf bench mem memset ", + NULL +}; + +static struct perf_event_attr clock_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES +}; + +static void init_clock(void) +{ + clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); + + if (clock_fd < 0 && errno == ENOSYS) + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + else + BUG_ON(clock_fd < 0); +} + +static u64 get_clock(void) +{ + int ret; + u64 clk; + + ret = read(clock_fd, &clk, sizeof(u64)); + BUG_ON(ret != sizeof(u64)); + + return clk; +} + +static double timeval2double(struct timeval *ts) +{ + return (double)ts->tv_sec + + (double)ts->tv_usec / (double)1000000; +} + +static void alloc_mem(void **dst, size_t length) +{ + *dst = zalloc(length); + if (!dst) + die("memory allocation failed - maybe length is too large?\n"); +} + +static u64 do_memset_clock(memset_t fn, size_t len, bool prefault) +{ + u64 clock_start = 0ULL, clock_end = 0ULL; + void *dst = NULL; + + alloc_mem(&dst, len); + + if (prefault) + fn(dst, -1, len); + + clock_start = get_clock(); + fn(dst, 0, len); + clock_end = get_clock(); + + free(dst); + return clock_end - clock_start; +} + +static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) +{ + struct timeval tv_start, tv_end, tv_diff; + void *dst = NULL; + + alloc_mem(&dst, len); + + if (prefault) + fn(dst, -1, len); + + BUG_ON(gettimeofday(&tv_start, NULL)); + fn(dst, 0, len); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(dst); + return (double)((double)len / timeval2double(&tv_diff)); +} + +#define pf (no_prefault ? 0 : 1) + +#define print_bps(x) do { \ + if (x < K) \ + printf(" %14lf B/Sec", x); \ + else if (x < K * K) \ + printf(" %14lfd KB/Sec", x / K); \ + else if (x < K * K * K) \ + printf(" %14lf MB/Sec", x / K / K); \ + else \ + printf(" %14lf GB/Sec", x / K / K / K); \ + } while (0) + +int bench_mem_memset(int argc, const char **argv, + const char *prefix __used) +{ + int i; + size_t len; + double result_bps[2]; + u64 result_clock[2]; + + argc = parse_options(argc, argv, options, + bench_mem_memset_usage, 0); + + if (use_clock) + init_clock(); + + len = (size_t)perf_atoll((char *)length_str); + + result_clock[0] = result_clock[1] = 0ULL; + result_bps[0] = result_bps[1] = 0.0; + + if ((s64)len <= 0) { + fprintf(stderr, "Invalid length:%s\n", length_str); + return 1; + } + + /* same to without specifying either of prefault and no-prefault */ + if (only_prefault && no_prefault) + only_prefault = no_prefault = false; + + for (i = 0; routines[i].name; i++) { + if (!strcmp(routines[i].name, routine)) + break; + } + if (!routines[i].name) { + printf("Unknown routine:%s\n", routine); + printf("Available routines...\n"); + for (i = 0; routines[i].name; i++) { + printf("\t%s ... %s\n", + routines[i].name, routines[i].desc); + } + return 1; + } + + if (bench_format == BENCH_FORMAT_DEFAULT) + printf("# Copying %s Bytes ...\n\n", length_str); + + if (!only_prefault && !no_prefault) { + /* show both of results */ + if (use_clock) { + result_clock[0] = + do_memset_clock(routines[i].fn, len, false); + result_clock[1] = + do_memset_clock(routines[i].fn, len, true); + } else { + result_bps[0] = + do_memset_gettimeofday(routines[i].fn, + len, false); + result_bps[1] = + do_memset_gettimeofday(routines[i].fn, + len, true); + } + } else { + if (use_clock) { + result_clock[pf] = + do_memset_clock(routines[i].fn, + len, only_prefault); + } else { + result_bps[pf] = + do_memset_gettimeofday(routines[i].fn, + len, only_prefault); + } + } + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + if (!only_prefault && !no_prefault) { + if (use_clock) { + printf(" %14lf Clock/Byte\n", + (double)result_clock[0] + / (double)len); + printf(" %14lf Clock/Byte (with prefault)\n ", + (double)result_clock[1] + / (double)len); + } else { + print_bps(result_bps[0]); + printf("\n"); + print_bps(result_bps[1]); + printf(" (with prefault)\n"); + } + } else { + if (use_clock) { + printf(" %14lf Clock/Byte", + (double)result_clock[pf] + / (double)len); + } else + print_bps(result_bps[pf]); + + printf("%s\n", only_prefault ? " (with prefault)" : ""); + } + break; + case BENCH_FORMAT_SIMPLE: + if (!only_prefault && !no_prefault) { + if (use_clock) { + printf("%lf %lf\n", + (double)result_clock[0] / (double)len, + (double)result_clock[1] / (double)len); + } else { + printf("%lf %lf\n", + result_bps[0], result_bps[1]); + } + } else { + if (use_clock) { + printf("%lf\n", (double)result_clock[pf] + / (double)len); + } else + printf("%lf\n", result_bps[pf]); + } + break; + default: + /* reaching this means there's some disaster: */ + die("unknown format: %d\n", bench_format); + break; + } + + return 0; +} -- cgit v1.2.3 From e3e877e79b7c6a322f9f628e87052c13581238cc Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 18 Jan 2012 13:29:59 +0000 Subject: perf bench: Allow passing an iteration count to "bench mem mem{cpy,set}" "perf stat ... perf bench mem mem..." is pretty meaningless when using small block sizes (as the overhead of the invocation of each test run basically hides the actual test result in the noise). Repeating the actually interesting function's invocation a number of times allows the results to become meaningful. Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/4F16D767020000780006D738@nat28.tlf.novell.com Signed-off-by: Jan Beulich Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-memset.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'tools/perf/bench/mem-memset.c') diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c index 9c0c6f0cba9b..59d4933eff44 100644 --- a/tools/perf/bench/mem-memset.c +++ b/tools/perf/bench/mem-memset.c @@ -24,6 +24,7 @@ static const char *length_str = "1MB"; static const char *routine = "default"; +static int iterations = 1; static bool use_clock; static int clock_fd; static bool only_prefault; @@ -35,6 +36,8 @@ static const struct option options[] = { "available unit: B, MB, GB (upper and lower)"), OPT_STRING('r', "routine", &routine, "default", "Specify routine to copy"), + OPT_INTEGER('i', "iterations", &iterations, + "repeat memset() invocation this number of times"), OPT_BOOLEAN('c', "clock", &use_clock, "Use CPU clock for measuring"), OPT_BOOLEAN('o', "only-prefault", &only_prefault, @@ -117,6 +120,7 @@ static u64 do_memset_clock(memset_t fn, size_t len, bool prefault) { u64 clock_start = 0ULL, clock_end = 0ULL; void *dst = NULL; + int i; alloc_mem(&dst, len); @@ -124,7 +128,8 @@ static u64 do_memset_clock(memset_t fn, size_t len, bool prefault) fn(dst, -1, len); clock_start = get_clock(); - fn(dst, 0, len); + for (i = 0; i < iterations; ++i) + fn(dst, i, len); clock_end = get_clock(); free(dst); @@ -135,6 +140,7 @@ static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) { struct timeval tv_start, tv_end, tv_diff; void *dst = NULL; + int i; alloc_mem(&dst, len); @@ -142,7 +148,8 @@ static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) fn(dst, -1, len); BUG_ON(gettimeofday(&tv_start, NULL)); - fn(dst, 0, len); + for (i = 0; i < iterations; ++i) + fn(dst, i, len); BUG_ON(gettimeofday(&tv_end, NULL)); timersub(&tv_end, &tv_start, &tv_diff); -- cgit v1.2.3 From d30d4a080d195892091ad7d014fc9293cc08ea0f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 29 Jan 2012 17:55:55 +0900 Subject: perf tools: Remove unnecessary ctype.h inclusion There are unnecessary #include out there, and they might cause a nasty build failure in some environment. As we already have most of ctype macros in util.h, just get rid of them. A few of exceptions are util/symbol.c which needs isupper() macro util.h doesn't provide and perl scripting support code which includes ctype.h internally. Suggested-by: Ingo Molnar Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1327827356-8786-4-git-send-email-namhyung@gmail.com Signed-off-by: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-memset.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/perf/bench/mem-memset.c') diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c index 59d4933eff44..e9079185bd72 100644 --- a/tools/perf/bench/mem-memset.c +++ b/tools/perf/bench/mem-memset.c @@ -5,7 +5,6 @@ * * Trivial clone of mem-memcpy.c. */ -#include #include "../perf.h" #include "../util/util.h" -- cgit v1.2.3