From f371b304f12e31fe30207c41ca7754564e0ea4dc Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 11 Dec 2017 11:39:02 -0800 Subject: bpf/tracing: allow user space to query prog array on the same tp Commit e87c6bc3852b ("bpf: permit multiple bpf attachments for a single perf event") added support to attach multiple bpf programs to a single perf event. Although this provides flexibility, users may want to know what other bpf programs attached to the same tp interface. Besides getting visibility for the underlying bpf system, such information may also help consolidate multiple bpf programs, understand potential performance issues due to a large array, and debug (e.g., one bpf program which overwrites return code may impact subsequent program results). Commit 2541517c32be ("tracing, perf: Implement BPF programs attached to kprobes") utilized the existing perf ioctl interface and added the command PERF_EVENT_IOC_SET_BPF to attach a bpf program to a tracepoint. This patch adds a new ioctl command, given a perf event fd, to query the bpf program array attached to the same perf tracepoint event. The new uapi ioctl command: PERF_EVENT_IOC_QUERY_BPF The new uapi/linux/perf_event.h structure: struct perf_event_query_bpf { __u32 ids_len; __u32 prog_cnt; __u32 ids[0]; }; User space provides buffer "ids" for kernel to copy to. When returning from the kernel, the number of available programs in the array is set in "prog_cnt". The usage: struct perf_event_query_bpf *query = malloc(sizeof(*query) + sizeof(u32) * ids_len); query.ids_len = ids_len; err = ioctl(pmu_efd, PERF_EVENT_IOC_QUERY_BPF, query); if (err == 0) { /* query.prog_cnt is the number of available progs, * number of progs in ids: (ids_len == 0) ? 0 : query.prog_cnt */ } else if (errno == ENOSPC) { /* query.ids_len number of progs copied, * query.prog_cnt is the number of available progs */ } else { /* other errors */ } Signed-off-by: Yonghong Song Acked-by: Peter Zijlstra (Intel) Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- kernel/trace/bpf_trace.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0ce99c379c30..b143f2a05aff 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -820,3 +820,26 @@ void perf_event_detach_bpf_prog(struct perf_event *event) unlock: mutex_unlock(&bpf_event_mutex); } + +int bpf_event_query_prog_array(struct perf_event *event, void __user *info) +{ + struct perf_event_query_bpf __user *uquery = info; + struct perf_event_query_bpf query = {}; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (event->attr.type != PERF_TYPE_TRACEPOINT) + return -EINVAL; + if (copy_from_user(&query, uquery, sizeof(query))) + return -EFAULT; + + mutex_lock(&bpf_event_mutex); + ret = bpf_prog_array_copy_info(event->tp_event->prog_array, + uquery->ids, + query.ids_len, + &uquery->prog_cnt); + mutex_unlock(&bpf_event_mutex); + + return ret; +} -- cgit v1.2.3 From 9802d86585db91655c7d1929a4f6bbe0952ea88e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Dec 2017 11:36:48 -0500 Subject: bpf: add a bpf_override_function helper Error injection is sloppy and very ad-hoc. BPF could fill this niche perfectly with it's kprobe functionality. We could make sure errors are only triggered in specific call chains that we care about with very specific situations. Accomplish this with the bpf_override_funciton helper. This will modify the probe'd callers return value to the specified value and set the PC to an override function that simply returns, bypassing the originally probed function. This gives us a nice clean way to implement systematic error injection for all of our code paths. Acked-by: Alexei Starovoitov Acked-by: Ingo Molnar Signed-off-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- kernel/trace/Kconfig | 11 +++++++++ kernel/trace/bpf_trace.c | 35 +++++++++++++++++++++++++++++ kernel/trace/trace_kprobe.c | 55 +++++++++++++++++++++++++++++++++++++++------ kernel/trace/trace_probe.h | 12 ++++++++++ 4 files changed, 106 insertions(+), 7 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index af7dad126c13..3e6fd580fe7f 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -529,6 +529,17 @@ config FUNCTION_PROFILER If in doubt, say N. +config BPF_KPROBE_OVERRIDE + bool "Enable BPF programs to override a kprobed function" + depends on BPF_EVENTS + depends on KPROBES_ON_FTRACE + depends on HAVE_KPROBE_OVERRIDE + depends on DYNAMIC_FTRACE_WITH_REGS + default n + help + Allows BPF to override the execution of a probed function and + set a different return value. This is used for error injection. + config FTRACE_MCOUNT_RECORD def_bool y depends on DYNAMIC_FTRACE diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b143f2a05aff..e009b7ecf473 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -13,6 +13,10 @@ #include #include #include +#include +#include + +#include "trace_probe.h" #include "trace.h" u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); @@ -76,6 +80,24 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) } EXPORT_SYMBOL_GPL(trace_call_bpf); +#ifdef CONFIG_BPF_KPROBE_OVERRIDE +BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) +{ + __this_cpu_write(bpf_kprobe_override, 1); + regs_set_return_value(regs, rc); + arch_ftrace_kprobe_override_function(regs); + return 0; +} + +static const struct bpf_func_proto bpf_override_return_proto = { + .func = bpf_override_return, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; +#endif + BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { int ret; @@ -551,6 +573,10 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_get_stackid_proto; case BPF_FUNC_perf_event_read_value: return &bpf_perf_event_read_value_proto; +#ifdef CONFIG_BPF_KPROBE_OVERRIDE + case BPF_FUNC_override_return: + return &bpf_override_return_proto; +#endif default: return tracing_func_proto(func_id); } @@ -768,6 +794,15 @@ int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog_array *new_array; int ret = -EEXIST; + /* + * Kprobe override only works for ftrace based kprobes, and only if they + * are on the opt-in list. + */ + if (prog->kprobe_override && + (!trace_kprobe_ftrace(event->tp_event) || + !trace_kprobe_error_injectable(event->tp_event))) + return -EINVAL; + mutex_lock(&bpf_event_mutex); if (event->prog) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 492700c5fb4d..5db849809a56 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -42,6 +42,7 @@ struct trace_kprobe { (offsetof(struct trace_kprobe, tp.args) + \ (sizeof(struct probe_arg) * (n))) +DEFINE_PER_CPU(int, bpf_kprobe_override); static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) { @@ -87,6 +88,27 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) return nhit; } +int trace_kprobe_ftrace(struct trace_event_call *call) +{ + struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + return kprobe_ftrace(&tk->rp.kp); +} + +int trace_kprobe_error_injectable(struct trace_event_call *call) +{ + struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + unsigned long addr; + + if (tk->symbol) { + addr = (unsigned long) + kallsyms_lookup_name(trace_kprobe_symbol(tk)); + addr += tk->rp.kp.offset; + } else { + addr = (unsigned long)tk->rp.kp.addr; + } + return within_kprobe_error_injection_list(addr); +} + static int register_kprobe_event(struct trace_kprobe *tk); static int unregister_kprobe_event(struct trace_kprobe *tk); @@ -1170,7 +1192,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call) #ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ -static void +static int kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; @@ -1179,12 +1201,29 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) int size, __size, dsize; int rctx; - if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) - return; + if (bpf_prog_array_valid(call)) { + int ret; + + ret = trace_call_bpf(call, regs); + + /* + * We need to check and see if we modified the pc of the + * pt_regs, and if so clear the kprobe and return 1 so that we + * don't do the instruction skipping. Also reset our state so + * we are clean the next pass through. + */ + if (__this_cpu_read(bpf_kprobe_override)) { + __this_cpu_write(bpf_kprobe_override, 0); + reset_current_kprobe(); + return 1; + } + if (!ret) + return 0; + } head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) - return; + return 0; dsize = __get_data_size(&tk->tp, regs); __size = sizeof(*entry) + tk->tp.size + dsize; @@ -1193,13 +1232,14 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) entry = perf_trace_buf_alloc(size, NULL, &rctx); if (!entry) - return; + return 0; entry->ip = (unsigned long)tk->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); + return 0; } NOKPROBE_SYMBOL(kprobe_perf_func); @@ -1275,6 +1315,7 @@ static int kprobe_register(struct trace_event_call *event, static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) { struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); + int ret = 0; raw_cpu_inc(*tk->nhit); @@ -1282,9 +1323,9 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS if (tk->tp.flags & TP_FLAG_PROFILE) - kprobe_perf_func(tk, regs); + ret = kprobe_perf_func(tk, regs); #endif - return 0; /* We don't tweek kernel, so just return 0 */ + return ret; } NOKPROBE_SYMBOL(kprobe_dispatcher); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index fb66e3eaa192..5e54d748c84c 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -252,6 +252,8 @@ struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); void free_symbol_cache(struct symbol_cache *sc); struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); +int trace_kprobe_ftrace(struct trace_event_call *call); +int trace_kprobe_error_injectable(struct trace_event_call *call); #else /* uprobes do not support symbol fetch methods */ #define fetch_symbol_u8 NULL @@ -277,6 +279,16 @@ alloc_symbol_cache(const char *sym, long offset) { return NULL; } + +static inline int trace_kprobe_ftrace(struct trace_event_call *call) +{ + return 0; +} + +static inline int trace_kprobe_error_injectable(struct trace_event_call *call) +{ + return 0; +} #endif /* CONFIG_KPROBE_EVENTS */ struct probe_arg { -- cgit v1.2.3 From f4e2298e63d24bb7f5cf0f56f72867973cb7e652 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 13 Dec 2017 10:35:37 -0800 Subject: bpf/tracing: fix kernel/events/core.c compilation error Commit f371b304f12e ("bpf/tracing: allow user space to query prog array on the same tp") introduced a perf ioctl command to query prog array attached to the same perf tracepoint. The commit introduced a compilation error under certain config conditions, e.g., (1). CONFIG_BPF_SYSCALL is not defined, or (2). CONFIG_TRACING is defined but neither CONFIG_UPROBE_EVENTS nor CONFIG_KPROBE_EVENTS is defined. Error message: kernel/events/core.o: In function `perf_ioctl': core.c:(.text+0x98c4): undefined reference to `bpf_event_query_prog_array' This patch fixed this error by guarding the real definition under CONFIG_BPF_EVENTS and provided static inline dummy function if CONFIG_BPF_EVENTS was not defined. It renamed the function from bpf_event_query_prog_array to perf_event_query_prog_array and moved the definition from linux/bpf.h to linux/trace_events.h so the definition is in proximity to other prog_array related functions. Fixes: f371b304f12e ("bpf/tracing: allow user space to query prog array on the same tp") Reported-by: Stephen Rothwell Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- kernel/trace/bpf_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e009b7ecf473..c5dd60cc0751 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -856,7 +856,7 @@ unlock: mutex_unlock(&bpf_event_mutex); } -int bpf_event_query_prog_array(struct perf_event *event, void __user *info) +int perf_event_query_prog_array(struct perf_event *event, void __user *info) { struct perf_event_query_bpf __user *uquery = info; struct perf_event_query_bpf query = {}; -- cgit v1.2.3 From 46df3d209db080395a98fc0875bd05e45e8f44e0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 15 Dec 2017 21:42:57 -0500 Subject: trace: reenable preemption if we modify the ip Things got moved around between the original bpf_override_return patches and the final version, and now the ftrace kprobe dispatcher assumes if you modified the ip that you also enabled preemption. Make a comment of this and enable preemption, this fixes the lockdep splat that happened when using this feature. Fixes: 9802d86585db ("bpf: add a bpf_override_function helper") Signed-off-by: Josef Bacik Signed-off-by: Daniel Borkmann --- kernel/trace/trace_kprobe.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5db849809a56..91f4b57dab82 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1322,8 +1322,15 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) if (tk->tp.flags & TP_FLAG_TRACE) kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS - if (tk->tp.flags & TP_FLAG_PROFILE) + if (tk->tp.flags & TP_FLAG_PROFILE) { ret = kprobe_perf_func(tk, regs); + /* + * The ftrace kprobe handler leaves it up to us to re-enable + * preemption here before returning if we've modified the ip. + */ + if (ret) + preempt_enable_no_resched(); + } #endif return ret; } -- cgit v1.2.3 From b4da3340eae2c3932144be3e81ccfd4e424d87b7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 13 Jan 2018 02:54:04 +0900 Subject: tracing/kprobe: bpf: Check error injectable event is on function entry Check whether error injectable event is on function entry or not. Currently it checks the event is ftrace-based kprobes or not, but that is wrong. It should check if the event is on the entry of target function. Since error injection will override a function to just return with modified return value, that operation must be done before the target function starts making stackframe. As a side effect, bpf error injection is no need to depend on function-tracer. It can work with sw-breakpoint based kprobe events too. Signed-off-by: Masami Hiramatsu Reviewed-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- kernel/trace/Kconfig | 2 -- kernel/trace/bpf_trace.c | 8 ++++---- kernel/trace/trace_kprobe.c | 9 ++++++--- kernel/trace/trace_probe.h | 12 ++++++------ 4 files changed, 16 insertions(+), 15 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index ae3a2d519e50..6400e1bf97c5 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -533,9 +533,7 @@ config FUNCTION_PROFILER config BPF_KPROBE_OVERRIDE bool "Enable BPF programs to override a kprobed function" depends on BPF_EVENTS - depends on KPROBES_ON_FTRACE depends on HAVE_KPROBE_OVERRIDE - depends on DYNAMIC_FTRACE_WITH_REGS default n help Allows BPF to override the execution of a probed function and diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f6d2327ecb59..1966ad3bf3e0 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -85,7 +85,7 @@ BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) { __this_cpu_write(bpf_kprobe_override, 1); regs_set_return_value(regs, rc); - arch_ftrace_kprobe_override_function(regs); + arch_kprobe_override_function(regs); return 0; } @@ -800,11 +800,11 @@ int perf_event_attach_bpf_prog(struct perf_event *event, int ret = -EEXIST; /* - * Kprobe override only works for ftrace based kprobes, and only if they - * are on the opt-in list. + * Kprobe override only works if they are on the function entry, + * and only if they are on the opt-in list. */ if (prog->kprobe_override && - (!trace_kprobe_ftrace(event->tp_event) || + (!trace_kprobe_on_func_entry(event->tp_event) || !trace_kprobe_error_injectable(event->tp_event))) return -EINVAL; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 91f4b57dab82..3c8deb977a8b 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -88,13 +88,16 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) return nhit; } -int trace_kprobe_ftrace(struct trace_event_call *call) +bool trace_kprobe_on_func_entry(struct trace_event_call *call) { struct trace_kprobe *tk = (struct trace_kprobe *)call->data; - return kprobe_ftrace(&tk->rp.kp); + + return kprobe_on_func_entry(tk->rp.kp.addr, + tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name, + tk->rp.kp.addr ? 0 : tk->rp.kp.offset); } -int trace_kprobe_error_injectable(struct trace_event_call *call) +bool trace_kprobe_error_injectable(struct trace_event_call *call) { struct trace_kprobe *tk = (struct trace_kprobe *)call->data; unsigned long addr; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 5e54d748c84c..e101c5bb9eda 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -252,8 +252,8 @@ struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); void free_symbol_cache(struct symbol_cache *sc); struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); -int trace_kprobe_ftrace(struct trace_event_call *call); -int trace_kprobe_error_injectable(struct trace_event_call *call); +bool trace_kprobe_on_func_entry(struct trace_event_call *call); +bool trace_kprobe_error_injectable(struct trace_event_call *call); #else /* uprobes do not support symbol fetch methods */ #define fetch_symbol_u8 NULL @@ -280,14 +280,14 @@ alloc_symbol_cache(const char *sym, long offset) return NULL; } -static inline int trace_kprobe_ftrace(struct trace_event_call *call) +static inline bool trace_kprobe_on_func_entry(struct trace_event_call *call) { - return 0; + return false; } -static inline int trace_kprobe_error_injectable(struct trace_event_call *call) +static inline bool trace_kprobe_error_injectable(struct trace_event_call *call) { - return 0; + return false; } #endif /* CONFIG_KPROBE_EVENTS */ -- cgit v1.2.3 From 66665ad2f1023d3ffb0c12eea9e0a6d0b613ecb3 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 13 Jan 2018 02:54:33 +0900 Subject: tracing/kprobe: bpf: Compare instruction pointer with original one Compare instruction pointer with original one on the stack instead using per-cpu bpf_kprobe_override flag. This patch also consolidates reset_current_kprobe() and preempt_enable_no_resched() blocks. Those can be done in one place. Signed-off-by: Masami Hiramatsu Reviewed-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- kernel/trace/bpf_trace.c | 1 - kernel/trace/trace_kprobe.c | 21 +++++++-------------- 2 files changed, 7 insertions(+), 15 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 1966ad3bf3e0..24ed6363e00f 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -83,7 +83,6 @@ EXPORT_SYMBOL_GPL(trace_call_bpf); #ifdef CONFIG_BPF_KPROBE_OVERRIDE BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) { - __this_cpu_write(bpf_kprobe_override, 1); regs_set_return_value(regs, rc); arch_kprobe_override_function(regs); return 0; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3c8deb977a8b..b8c90441bc87 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -42,8 +42,6 @@ struct trace_kprobe { (offsetof(struct trace_kprobe, tp.args) + \ (sizeof(struct probe_arg) * (n))) -DEFINE_PER_CPU(int, bpf_kprobe_override); - static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) { return tk->rp.handler != NULL; @@ -1205,6 +1203,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) int rctx; if (bpf_prog_array_valid(call)) { + unsigned long orig_ip = instruction_pointer(regs); int ret; ret = trace_call_bpf(call, regs); @@ -1212,12 +1211,13 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) /* * We need to check and see if we modified the pc of the * pt_regs, and if so clear the kprobe and return 1 so that we - * don't do the instruction skipping. Also reset our state so - * we are clean the next pass through. + * don't do the single stepping. + * The ftrace kprobe handler leaves it up to us to re-enable + * preemption here before returning if we've modified the ip. */ - if (__this_cpu_read(bpf_kprobe_override)) { - __this_cpu_write(bpf_kprobe_override, 0); + if (orig_ip != instruction_pointer(regs)) { reset_current_kprobe(); + preempt_enable_no_resched(); return 1; } if (!ret) @@ -1325,15 +1325,8 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) if (tk->tp.flags & TP_FLAG_TRACE) kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS - if (tk->tp.flags & TP_FLAG_PROFILE) { + if (tk->tp.flags & TP_FLAG_PROFILE) ret = kprobe_perf_func(tk, regs); - /* - * The ftrace kprobe handler leaves it up to us to re-enable - * preemption here before returning if we've modified the ip. - */ - if (ret) - preempt_enable_no_resched(); - } #endif return ret; } -- cgit v1.2.3 From 540adea3809f61115d2a1ea4ed6e627613452ba1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 13 Jan 2018 02:55:03 +0900 Subject: error-injection: Separate error-injection from kprobe Since error-injection framework is not limited to be used by kprobes, nor bpf. Other kernel subsystems can use it freely for checking safeness of error-injection, e.g. livepatch, ftrace etc. So this separate error-injection framework from kprobes. Some differences has been made: - "kprobe" word is removed from any APIs/structures. - BPF_ALLOW_ERROR_INJECTION() is renamed to ALLOW_ERROR_INJECTION() since it is not limited for BPF too. - CONFIG_FUNCTION_ERROR_INJECTION is the config item of this feature. It is automatically enabled if the arch supports error injection feature for kprobe or ftrace etc. Signed-off-by: Masami Hiramatsu Reviewed-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- kernel/trace/Kconfig | 2 +- kernel/trace/bpf_trace.c | 4 ++-- kernel/trace/trace_kprobe.c | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 6400e1bf97c5..7114c885a78a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -533,7 +533,7 @@ config FUNCTION_PROFILER config BPF_KPROBE_OVERRIDE bool "Enable BPF programs to override a kprobed function" depends on BPF_EVENTS - depends on HAVE_KPROBE_OVERRIDE + depends on FUNCTION_ERROR_INJECTION default n help Allows BPF to override the execution of a probed function and diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 24ed6363e00f..f274468cbc45 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "trace_probe.h" #include "trace.h" @@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(trace_call_bpf); BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) { regs_set_return_value(regs, rc); - arch_kprobe_override_function(regs); + override_function_with_return(regs); return 0; } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index b8c90441bc87..1fad24acd444 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "trace_probe.h" @@ -107,7 +108,7 @@ bool trace_kprobe_error_injectable(struct trace_event_call *call) } else { addr = (unsigned long)tk->rp.kp.addr; } - return within_kprobe_error_injection_list(addr); + return within_error_injection_list(addr); } static int register_kprobe_event(struct trace_kprobe *tk); -- cgit v1.2.3 From eefa864a81501161c05b35f14197677c937e5b9a Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 17 Jan 2018 09:19:32 -0800 Subject: bpf: change fake_ip for bpf_trace_printk helper Currently, for bpf_trace_printk helper, fake ip address 0x1 is used with comments saying that fake ip will not be printed. This is indeed true for 4.12 and earlier version, but for 4.13 and later version, the ip address will be printed if it cannot be resolved with kallsym. Running samples/bpf/tracex5 program and you will have the following in the debugfs trace_pipe output: ... <...>-1819 [003] .... 443.497877: 0x00000001: mmap <...>-1819 [003] .... 443.498289: 0x00000001: syscall=102 (one of get/set uid/pid/gid) ... The kernel commit changed this behavior is: commit feaf1283d11794b9d518fcfd54b6bf8bee1f0b4b Author: Steven Rostedt (VMware) Date: Thu Jun 22 17:04:55 2017 -0400 tracing: Show address when function names are not found ... This patch changed the comment and also altered the fake ip address to 0x0 as users may think 0x1 has some special meaning while it doesn't. The new output: ... <...>-1799 [002] .... 25.953576: 0: mmap <...>-1799 [002] .... 25.953865: 0: read(fd=0, buf=00000000053936b5, size=512) ... Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- kernel/trace/bpf_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f274468cbc45..fc2838ac8b78 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -245,7 +245,7 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, */ #define __BPF_TP_EMIT() __BPF_ARG3_TP() #define __BPF_TP(...) \ - __trace_printk(1 /* Fake ip will not be printed. */, \ + __trace_printk(0 /* Fake ip */, \ fmt, ##__VA_ARGS__) #define __BPF_ARG1_TP(...) \ -- cgit v1.2.3