From 4fbd2f83fda0ca44a2ec6421ca3508b355b31858 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 21 Feb 2023 08:49:16 +0900 Subject: kprobes: Fix to handle forcibly unoptimized kprobes on freeing_list Since forcibly unoptimized kprobes will be put on the freeing_list directly in the unoptimize_kprobe(), do_unoptimize_kprobes() must continue to check the freeing_list even if unoptimizing_list is empty. This bug can happen if a kprobe is put in an instruction which is in the middle of the jump-replaced instruction sequence of an optprobe, *and* the optprobe is recently unregistered and queued on unoptimizing_list. In this case, the optprobe will be unoptimized forcibly (means immediately) and put it into the freeing_list, expecting the optprobe will be handled in do_unoptimize_kprobe(). But if there is no other optprobes on the unoptimizing_list, current code returns from the do_unoptimize_kprobe() soon and does not handle the optprobe which is on the freeing_list. Then the optprobe will hit the WARN_ON_ONCE() in the do_free_cleaned_kprobes(), because it is not handled in the latter loop of the do_unoptimize_kprobe(). To solve this issue, do not return from do_unoptimize_kprobes() immediately even if unoptimizing_list is empty. Moreover, this change affects another case. kill_optimized_kprobes() expects kprobe_optimizer() will just free the optprobe on freeing_list. So I changed it to just do list_move() to freeing_list if optprobes are on unoptimizing list. And the do_unoptimize_kprobe() will skip arch_disarm_kprobe() if the probe on freeing_list has gone flag. Link: https://lore.kernel.org/all/Y8URdIfVr3pq2X8w@xpf.sh.intel.com/ Link: https://lore.kernel.org/all/167448024501.3253718.13037333683110512967.stgit@devnote3/ Fixes: e4add247789e ("kprobes: Fix optimize_kprobe()/unoptimize_kprobe() cancellation logic") Reported-by: Pengfei Xu Signed-off-by: Masami Hiramatsu (Google) Cc: stable@vger.kernel.org Acked-by: Steven Rostedt (Google) --- kernel/kprobes.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 1c18ecf9f98b..6b6aff00b3b6 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -555,17 +555,15 @@ static void do_unoptimize_kprobes(void) /* See comment in do_optimize_kprobes() */ lockdep_assert_cpus_held(); - /* Unoptimization must be done anytime */ - if (list_empty(&unoptimizing_list)) - return; + if (!list_empty(&unoptimizing_list)) + arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); - arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); - /* Loop on 'freeing_list' for disarming */ + /* Loop on 'freeing_list' for disarming and removing from kprobe hash list */ list_for_each_entry_safe(op, tmp, &freeing_list, list) { /* Switching from detour code to origin */ op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; - /* Disarm probes if marked disabled */ - if (kprobe_disabled(&op->kp)) + /* Disarm probes if marked disabled and not gone */ + if (kprobe_disabled(&op->kp) && !kprobe_gone(&op->kp)) arch_disarm_kprobe(&op->kp); if (kprobe_unused(&op->kp)) { /* @@ -797,14 +795,13 @@ static void kill_optimized_kprobe(struct kprobe *p) op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; if (kprobe_unused(p)) { - /* Enqueue if it is unused */ - list_add(&op->list, &freeing_list); /* - * Remove unused probes from the hash list. After waiting - * for synchronization, this probe is reclaimed. - * (reclaiming is done by do_free_cleaned_kprobes().) + * Unused kprobe is on unoptimizing or freeing list. We move it + * to freeing_list and let the kprobe_optimizer() remove it from + * the kprobe hash list and free it. */ - hlist_del_rcu(&op->kp.hlist); + if (optprobe_queued_unopt(op)) + list_move(&op->list, &freeing_list); } /* Don't touch the code, because it is already freed. */ -- cgit v1.2.3 From 868a6fc0ca2407622d2833adefe1c4d284766c4c Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 21 Feb 2023 08:49:16 +0900 Subject: x86/kprobes: Fix __recover_optprobed_insn check optimizing logic Since the following commit: commit f66c0447cca1 ("kprobes: Set unoptimized flag after unoptimizing code") modified the update timing of the KPROBE_FLAG_OPTIMIZED, a optimized_kprobe may be in the optimizing or unoptimizing state when op.kp->flags has KPROBE_FLAG_OPTIMIZED and op->list is not empty. The __recover_optprobed_insn check logic is incorrect, a kprobe in the unoptimizing state may be incorrectly determined as unoptimizing. As a result, incorrect instructions are copied. The optprobe_queued_unopt function needs to be exported for invoking in arch directory. Link: https://lore.kernel.org/all/20230216034247.32348-2-yangjihong1@huawei.com/ Fixes: f66c0447cca1 ("kprobes: Set unoptimized flag after unoptimizing code") Cc: stable@vger.kernel.org Signed-off-by: Yang Jihong Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 6b6aff00b3b6..55e1807ca054 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -660,7 +660,7 @@ void wait_for_kprobe_optimizer(void) mutex_unlock(&kprobe_mutex); } -static bool optprobe_queued_unopt(struct optimized_kprobe *op) +bool optprobe_queued_unopt(struct optimized_kprobe *op) { struct optimized_kprobe *_op; -- cgit v1.2.3 From f1c97a1b4ef709e3f066f82e3ba3108c3b133ae6 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 21 Feb 2023 08:49:16 +0900 Subject: x86/kprobes: Fix arch_check_optimized_kprobe check within optimized_kprobe range When arch_prepare_optimized_kprobe calculating jump destination address, it copies original instructions from jmp-optimized kprobe (see __recover_optprobed_insn), and calculated based on length of original instruction. arch_check_optimized_kprobe does not check KPROBE_FLAG_OPTIMATED when checking whether jmp-optimized kprobe exists. As a result, setup_detour_execution may jump to a range that has been overwritten by jump destination address, resulting in an inval opcode error. For example, assume that register two kprobes whose addresses are and in "func" function. The original code of "func" function is as follows: 0xffffffff816cb5e9 <+9>: push %r12 0xffffffff816cb5eb <+11>: xor %r12d,%r12d 0xffffffff816cb5ee <+14>: test %rdi,%rdi 0xffffffff816cb5f1 <+17>: setne %r12b 0xffffffff816cb5f5 <+21>: push %rbp 1.Register the kprobe for , assume that is kp1, corresponding optimized_kprobe is op1. After the optimization, "func" code changes to: 0xffffffff816cc079 <+9>: push %r12 0xffffffff816cc07b <+11>: jmp 0xffffffffa0210000 0xffffffff816cc080 <+16>: incl 0xf(%rcx) 0xffffffff816cc083 <+19>: xchg %eax,%ebp 0xffffffff816cc084 <+20>: (bad) 0xffffffff816cc085 <+21>: push %rbp Now op1->flags == KPROBE_FLAG_OPTIMATED; 2. Register the kprobe for , assume that is kp2, corresponding optimized_kprobe is op2. register_kprobe(kp2) register_aggr_kprobe alloc_aggr_kprobe __prepare_optimized_kprobe arch_prepare_optimized_kprobe __recover_optprobed_insn // copy original bytes from kp1->optinsn.copied_insn, // jump address = 3. disable kp1: disable_kprobe(kp1) __disable_kprobe ... if (p == orig_p || aggr_kprobe_disabled(orig_p)) { ret = disarm_kprobe(orig_p, true) // add op1 in unoptimizing_list, not unoptimized orig_p->flags |= KPROBE_FLAG_DISABLED; // op1->flags == KPROBE_FLAG_OPTIMATED | KPROBE_FLAG_DISABLED ... 4. unregister kp2 __unregister_kprobe_top ... if (!kprobe_disabled(ap) && !kprobes_all_disarmed) { optimize_kprobe(op) ... if (arch_check_optimized_kprobe(op) < 0) // because op1 has KPROBE_FLAG_DISABLED, here not return return; p->kp.flags |= KPROBE_FLAG_OPTIMIZED; // now op2 has KPROBE_FLAG_OPTIMIZED } "func" code now is: 0xffffffff816cc079 <+9>: int3 0xffffffff816cc07a <+10>: push %rsp 0xffffffff816cc07b <+11>: jmp 0xffffffffa0210000 0xffffffff816cc080 <+16>: incl 0xf(%rcx) 0xffffffff816cc083 <+19>: xchg %eax,%ebp 0xffffffff816cc084 <+20>: (bad) 0xffffffff816cc085 <+21>: push %rbp 5. if call "func", int3 handler call setup_detour_execution: if (p->flags & KPROBE_FLAG_OPTIMIZED) { ... regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; ... } The code for the destination address is 0xffffffffa021072c: push %r12 0xffffffffa021072e: xor %r12d,%r12d 0xffffffffa0210731: jmp 0xffffffff816cb5ee However, is not a valid start instruction address. As a result, an error occurs. Link: https://lore.kernel.org/all/20230216034247.32348-3-yangjihong1@huawei.com/ Fixes: f66c0447cca1 ("kprobes: Set unoptimized flag after unoptimizing code") Signed-off-by: Yang Jihong Cc: stable@vger.kernel.org Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 55e1807ca054..00e177de91cc 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -458,7 +458,7 @@ static inline int kprobe_optready(struct kprobe *p) } /* Return true if the kprobe is disarmed. Note: p must be on hash list */ -static inline bool kprobe_disarmed(struct kprobe *p) +bool kprobe_disarmed(struct kprobe *p) { struct optimized_kprobe *op; -- cgit v1.2.3 From 133921530c42960c07d25d12677f9e131a2b0cdf Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 21 Feb 2023 08:49:16 +0900 Subject: tracing/eprobe: Fix to add filter on eprobe description in README file Fix to add a description of the filter on eprobe in README file. This is required to identify the kernel supports the filter on eprobe or not. Link: https://lore.kernel.org/all/167309833728.640500.12232259238201433587.stgit@devnote3/ Fixes: 752be5c5c910 ("tracing/eprobe: Add eprobe filter support") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c9e40f692650..b677f8d61deb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5598,7 +5598,7 @@ static const char readme_msg[] = #ifdef CONFIG_HIST_TRIGGERS "\t s:[synthetic/] []\n" #endif - "\t e[:[/][]] . []\n" + "\t e[:[/][]] . [] [if ]\n" "\t -:[/][]\n" #ifdef CONFIG_KPROBE_EVENTS "\t place: [:][+]|\n" -- cgit v1.2.3 From 8478cca1e3abd183f309cd9c2491f484acf5d377 Mon Sep 17 00:00:00 2001 From: Donglin Peng Date: Tue, 21 Feb 2023 08:52:42 +0900 Subject: tracing/probe: add a char type to show the character value of traced arguments There are scenes that we want to show the character value of traced arguments other than a decimal or hexadecimal or string value for debug convinience. I add a new type named 'char' to do it and a new test case file named 'kprobe_args_char.tc' to do selftest for char type. For example: The to be traced function is 'void demo_func(char type, char *name);', we can add a kprobe event as follows to show argument values as we want: echo 'p:myprobe demo_func $arg1:char +0($arg2):char[5]' > kprobe_events we will get the following trace log: ... myprobe: (demo_func+0x0/0x29) arg1='A' arg2={'b','p','f','1',''} Link: https://lore.kernel.org/all/20221219110613.367098-1-dolinux.peng@gmail.com/ Signed-off-by: Donglin Peng Acked-by: Masami Hiramatsu (Google) Reported-by: kernel test robot Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace.c | 2 +- kernel/trace/trace_probe.c | 2 ++ kernel/trace/trace_probe.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b677f8d61deb..712ba8d6f91f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5615,7 +5615,7 @@ static const char readme_msg[] = "\t $stack, $stack, $retval, $comm,\n" #endif "\t +|-[u](), \\imm-value, \\\"imm-string\"\n" - "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n" + "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n" "\t b@/, ustring,\n" "\t symstr, \\[\\]\n" #ifdef CONFIG_HIST_TRIGGERS diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 01ebabbbe8c9..11008c098727 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -50,6 +50,7 @@ DEFINE_BASIC_PRINT_TYPE_FUNC(x8, u8, "0x%x") DEFINE_BASIC_PRINT_TYPE_FUNC(x16, u16, "0x%x") DEFINE_BASIC_PRINT_TYPE_FUNC(x32, u32, "0x%x") DEFINE_BASIC_PRINT_TYPE_FUNC(x64, u64, "0x%Lx") +DEFINE_BASIC_PRINT_TYPE_FUNC(char, u8, "'%c'") int PRINT_TYPE_FUNC_NAME(symbol)(struct trace_seq *s, void *data, void *ent) { @@ -95,6 +96,7 @@ static const struct fetch_type probe_fetch_types[] = { ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0), ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0), ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0), + ASSIGN_FETCH_TYPE_ALIAS(char, u8, u8, 0), ASSIGN_FETCH_TYPE_ALIAS(symbol, ADDR_FETCH_TYPE, ADDR_FETCH_TYPE, 0), ASSIGN_FETCH_TYPE_END diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 23acfd1c3812..b4f99553411e 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -166,6 +166,7 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(x16); DECLARE_BASIC_PRINT_TYPE_FUNC(x32); DECLARE_BASIC_PRINT_TYPE_FUNC(x64); +DECLARE_BASIC_PRINT_TYPE_FUNC(char); DECLARE_BASIC_PRINT_TYPE_FUNC(string); DECLARE_BASIC_PRINT_TYPE_FUNC(symbol); -- cgit v1.2.3 From c96abaec78f34366b3ddf1c6be52ca5c1241e15b Mon Sep 17 00:00:00 2001 From: Quanfa Fu Date: Tue, 21 Feb 2023 08:52:42 +0900 Subject: tracing/eprobe: no need to check for negative ret value for snprintf No need to check for negative return value from snprintf() as the code does not return negative values. Link: https://lore.kernel.org/all/20230109040625.3259642-1-quanfafu@gmail.com/ Signed-off-by: Quanfa Fu Acked-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_eprobe.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 352b65e2b910..594ac1d086aa 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -923,17 +923,13 @@ static int trace_eprobe_parse_filter(struct trace_eprobe *ep, int argc, const ch p = ep->filter_str; for (i = 0; i < argc; i++) { - ret = snprintf(p, len, "%s ", argv[i]); - if (ret < 0) - goto error; - if (ret > len) { - ret = -E2BIG; - goto error; - } + if (i) + ret = snprintf(p, len, " %s", argv[i]); + else + ret = snprintf(p, len, "%s", argv[i]); p += ret; len -= ret; } - p[-1] = '\0'; /* * Ensure the filter string can be parsed correctly. Note, this -- cgit v1.2.3