From cfbcf468454ab4b20f0b4b62da51920b99fdb19e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 28 Apr 2016 12:30:53 -0300 Subject: perf core: Pass max stack as a perf_callchain_entry context This makes perf_callchain_{user,kernel}() receive the max stack as context for the perf_callchain_entry, instead of accessing the global sysctl_perf_event_max_stack. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: David Ahern Cc: Frederic Weisbecker Cc: He Kuang Cc: Jiri Olsa Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Wang Nan Cc: Zefan Li Link: http://lkml.kernel.org/n/tip-kolmn1yo40p7jhswxwrc7rrd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9e1c3ada91c4..dbd18246b36e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -61,6 +61,11 @@ struct perf_callchain_entry { __u64 ip[0]; /* /proc/sys/kernel/perf_event_max_stack */ }; +struct perf_callchain_entry_ctx { + struct perf_callchain_entry *entry; + u32 max_stack; +}; + struct perf_raw_record { u32 size; void *data; @@ -1063,19 +1068,20 @@ extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); -extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs); -extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs); +extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); +extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); extern struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, - bool crosstask, bool add_mark); + u32 max_stack, bool crosstask, bool add_mark); extern int get_callchain_buffers(void); extern void put_callchain_buffers(void); extern int sysctl_perf_event_max_stack; -static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) +static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip) { - if (entry->nr < sysctl_perf_event_max_stack) { + struct perf_callchain_entry *entry = ctx->entry; + if (entry->nr < ctx->max_stack) { entry->ip[entry->nr++] = ip; return 0; } else { -- cgit v1.2.3 From 3b1fff08038bd0792b1aa1e9703b2dd0512a3fd0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 May 2016 18:08:32 -0300 Subject: perf core: Add a 'nr' field to perf_event_callchain_context We will use it to count how many addresses are in the entry->ip[] array, excluding PERF_CONTEXT_{KERNEL,USER,etc} entries, so that we can really return the number of entries specified by the user via the relevant sysctl, kernel.perf_event_max_contexts, or via the per event perf_event_attr.sample_max_stack knob. This way we keep the perf_sample->ip_callchain->nr meaning, that is the number of entries, be it real addresses or PERF_CONTEXT_ entries, while honouring the max_stack knobs, i.e. the end result will be max_stack entries if we have at least that many entries in a given stack trace. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-s8teto51tdqvlfhefndtat9r@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index dbd18246b36e..3803bb1a862b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -64,6 +64,7 @@ struct perf_callchain_entry { struct perf_callchain_entry_ctx { struct perf_callchain_entry *entry; u32 max_stack; + u32 nr; }; struct perf_raw_record { @@ -1080,9 +1081,10 @@ extern int sysctl_perf_event_max_stack; static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip) { - struct perf_callchain_entry *entry = ctx->entry; - if (entry->nr < ctx->max_stack) { + if (ctx->nr < ctx->max_stack) { + struct perf_callchain_entry *entry = ctx->entry; entry->ip[entry->nr++] = ip; + ++ctx->nr; return 0; } else { return -1; /* no more room, stop walking the stack */ -- cgit v1.2.3 From 3e4de4ec4cfea40994b47a79767610153edbf45b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 May 2016 13:01:50 -0300 Subject: perf core: Add perf_callchain_store_context() helper We need have different helpers to account how many contexts we have in the sample and for real addresses, so do it now as a prep patch, to ease review. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-q964tnyuqrxw5gld18vizs3c@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3803bb1a862b..2024b14cc2b1 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1079,6 +1079,8 @@ extern void put_callchain_buffers(void); extern int sysctl_perf_event_max_stack; +#define perf_callchain_store_context(ctx, context) perf_callchain_store(ctx, context) + static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip) { if (ctx->nr < ctx->max_stack) { -- cgit v1.2.3 From c85b03349640b34f3545503c8429fc43005e9a92 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 May 2016 13:06:21 -0300 Subject: perf core: Separate accounting of contexts and real addresses in a stack trace The perf_sample->ip_callchain->nr value includes all the entries in the ip_callchain->ip[] array, real addresses and PERF_CONTEXT_{KERNEL,USER,etc}, while what the user expects is that what is in the kernel.perf_event_max_stack sysctl or in the upcoming per event perf_event_attr.sample_max_stack knob be honoured in terms of IP addresses in the stack trace. So allocate a bunch of extra entries for contexts, and do the accounting via perf_callchain_entry_ctx struct members. A new sysctl, kernel.perf_event_max_contexts_per_stack is also introduced for investigating possible bugs in the callchain implementation by some arch. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: David Ahern Cc: Frederic Weisbecker Cc: He Kuang Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Wang Nan Cc: Zefan Li Link: http://lkml.kernel.org/n/tip-3b4wnqk340c4sg4gwkfdi9yk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2024b14cc2b1..6b87be908790 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -65,6 +65,8 @@ struct perf_callchain_entry_ctx { struct perf_callchain_entry *entry; u32 max_stack; u32 nr; + short contexts; + bool contexts_maxed; }; struct perf_raw_record { @@ -1078,12 +1080,24 @@ extern int get_callchain_buffers(void); extern void put_callchain_buffers(void); extern int sysctl_perf_event_max_stack; +extern int sysctl_perf_event_max_contexts_per_stack; -#define perf_callchain_store_context(ctx, context) perf_callchain_store(ctx, context) +static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *ctx, u64 ip) +{ + if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) { + struct perf_callchain_entry *entry = ctx->entry; + entry->ip[entry->nr++] = ip; + ++ctx->contexts; + return 0; + } else { + ctx->contexts_maxed = true; + return -1; /* no more room, stop walking the stack */ + } +} static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip) { - if (ctx->nr < ctx->max_stack) { + if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) { struct perf_callchain_entry *entry = ctx->entry; entry->ip[entry->nr++] = ip; ++ctx->nr; -- cgit v1.2.3