From ddb017ec9c3346e511f16dbae784a72eb91994dd Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sat, 29 Mar 2025 15:39:59 +0900 Subject: tracing: probe-events: Cleanup entry-arg storing code Cleanup __store_entry_arg() so that it is easier to understand. The main complexity may come from combining the loops for finding stored-entry-arg and max-offset and appending new entry. This split those different loops into 3 parts, lookup the same entry-arg, find the max offset and append new entry. Link: https://lore.kernel.org/all/174323039929.348535.4705349977127704120.stgit@devnote2/ Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_probe.c | 128 ++++++++++++++++++++++++--------------------- 1 file changed, 69 insertions(+), 59 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 424751cdf31f..abfab8957a6c 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -779,6 +779,36 @@ static int check_prepare_btf_string_fetch(char *typename, #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API +static void store_entry_arg_at(struct fetch_insn *code, int argnum, int offset) +{ + code[0].op = FETCH_OP_ARG; + code[0].param = argnum; + code[1].op = FETCH_OP_ST_EDATA; + code[1].offset = offset; +} + +static int get_entry_arg_max_offset(struct probe_entry_arg *earg) +{ + int i, max_offset = 0; + + /* + * earg->code[] array has an operation sequence which is run in + * the entry handler. + * The sequence stopped by FETCH_OP_END and each data stored in + * the entry data buffer by FETCH_OP_ST_EDATA. The FETCH_OP_ST_EDATA + * stores the data at the data buffer + its offset, and all data are + * "unsigned long" size. The offset must be increased when a data is + * stored. Thus we need to find the last FETCH_OP_ST_EDATA in the + * code array. + */ + for (i = 0; i < earg->size - 1 && earg->code[i].op != FETCH_OP_END; i++) { + if (earg->code[i].op == FETCH_OP_ST_EDATA) + if (earg->code[i].offset > max_offset) + max_offset = earg->code[i].offset; + } + return max_offset; +} + /* * Add the entry code to store the 'argnum'th parameter and return the offset * in the entry data buffer where the data will be stored. @@ -786,8 +816,7 @@ static int check_prepare_btf_string_fetch(char *typename, static int __store_entry_arg(struct trace_probe *tp, int argnum) { struct probe_entry_arg *earg = tp->entry_arg; - bool match = false; - int i, offset; + int i, offset, last_offset = 0; if (!earg) { earg = kzalloc(sizeof(*tp->entry_arg), GFP_KERNEL); @@ -804,78 +833,59 @@ static int __store_entry_arg(struct trace_probe *tp, int argnum) for (i = 0; i < earg->size; i++) earg->code[i].op = FETCH_OP_END; tp->entry_arg = earg; + store_entry_arg_at(earg->code, argnum, 0); + return 0; } /* - * The entry code array is repeating the pair of - * [FETCH_OP_ARG(argnum)][FETCH_OP_ST_EDATA(offset of entry data buffer)] - * and the rest of entries are filled with [FETCH_OP_END]. + * NOTE: if anyone change the following rule, please rewrite this. + * The entry code array is filled with the pair of + * + * [FETCH_OP_ARG(argnum)] + * [FETCH_OP_ST_EDATA(offset of entry data buffer)] * - * To reduce the redundant function parameter fetching, we scan the entry - * code array to find the FETCH_OP_ARG which already fetches the 'argnum' - * parameter. If it doesn't match, update 'offset' to find the last - * offset. - * If we find the FETCH_OP_END without matching FETCH_OP_ARG entry, we - * will save the entry with FETCH_OP_ARG and FETCH_OP_ST_EDATA, and - * return data offset so that caller can find the data offset in the entry - * data buffer. + * and the rest of entries are filled with [FETCH_OP_END]. + * The offset should be incremented, thus the last pair should + * have the largest offset. */ - offset = 0; - for (i = 0; i < earg->size - 1; i++) { - switch (earg->code[i].op) { - case FETCH_OP_END: - earg->code[i].op = FETCH_OP_ARG; - earg->code[i].param = argnum; - earg->code[i + 1].op = FETCH_OP_ST_EDATA; - earg->code[i + 1].offset = offset; - return offset; - case FETCH_OP_ARG: - match = (earg->code[i].param == argnum); - break; - case FETCH_OP_ST_EDATA: - offset = earg->code[i].offset; - if (match) - return offset; - offset += sizeof(unsigned long); - break; - default: - break; - } + + /* Search the offset for the sprcified argnum. */ + for (i = 0; i < earg->size - 1 && earg->code[i].op != FETCH_OP_END; i += 2) { + if (WARN_ON_ONCE(earg->code[i].op != FETCH_OP_ARG)) + return -EINVAL; + + if (earg->code[i].param != argnum) + continue; + + if (WARN_ON_ONCE(earg->code[i + 1].op != FETCH_OP_ST_EDATA)) + return -EINVAL; + + return earg->code[i + 1].offset; } - return -ENOSPC; + /* Not found, append new entry if possible. */ + if (i >= earg->size - 1) + return -ENOSPC; + + /* The last entry must have the largest offset. */ + if (i != 0) { + if (WARN_ON_ONCE(earg->code[i - 1].op != FETCH_OP_ST_EDATA)) + return -EINVAL; + last_offset = earg->code[i - 1].offset; + } + + offset = last_offset + sizeof(unsigned long); + store_entry_arg_at(&earg->code[i], argnum, offset); + return offset; } int traceprobe_get_entry_data_size(struct trace_probe *tp) { struct probe_entry_arg *earg = tp->entry_arg; - int i, size = 0; if (!earg) return 0; - /* - * earg->code[] array has an operation sequence which is run in - * the entry handler. - * The sequence stopped by FETCH_OP_END and each data stored in - * the entry data buffer by FETCH_OP_ST_EDATA. The FETCH_OP_ST_EDATA - * stores the data at the data buffer + its offset, and all data are - * "unsigned long" size. The offset must be increased when a data is - * stored. Thus we need to find the last FETCH_OP_ST_EDATA in the - * code array. - */ - for (i = 0; i < earg->size; i++) { - switch (earg->code[i].op) { - case FETCH_OP_END: - goto out; - case FETCH_OP_ST_EDATA: - size = earg->code[i].offset + sizeof(unsigned long); - break; - default: - break; - } - } -out: - return size; + return get_entry_arg_max_offset(earg) + sizeof(unsigned long); } void store_trace_entry_data(void *edata, struct trace_probe *tp, struct pt_regs *regs) -- cgit v1.2.3 From c135ab4a96e3f65abf83621a0efe007e64f224cf Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 1 Apr 2025 00:35:53 +0900 Subject: tracing: tprobe-events: Remove mod field from tprobe-event Remove unneeded 'mod' struct module pointer field from trace_fprobe because we don't need to save this info. Link: https://lore.kernel.org/all/174343535351.843280.5868426549023332120.stgit@devnote2/ Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_fprobe.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index b40fa59159ac..017d6ebfdf8b 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -46,7 +46,6 @@ struct trace_fprobe { struct fprobe fp; const char *symbol; struct tracepoint *tpoint; - struct module *mod; struct trace_probe tp; }; @@ -426,7 +425,6 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group, const char *event, const char *symbol, struct tracepoint *tpoint, - struct module *mod, int nargs, bool is_return) { struct trace_fprobe *tf __free(free_trace_fprobe) = NULL; @@ -446,7 +444,6 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group, tf->fp.entry_handler = fentry_dispatcher; tf->tpoint = tpoint; - tf->mod = mod; ret = trace_probe_init(&tf->tp, event, group, false, nargs); if (ret < 0) @@ -776,7 +773,6 @@ static void __unregister_trace_fprobe(struct trace_fprobe *tf) tracepoint_probe_unregister(tf->tpoint, tf->tpoint->probestub, NULL); tf->tpoint = NULL; - tf->mod = NULL; } } } @@ -1001,23 +997,23 @@ static int __tracepoint_probe_module_cb(struct notifier_block *self, mutex_lock(&event_mutex); for_each_trace_fprobe(tf, pos) { + if (!trace_fprobe_is_tracepoint(tf)) + continue; if (val == MODULE_STATE_COMING && tf->tpoint == TRACEPOINT_STUB) { tpoint = find_tracepoint_in_module(tp_mod->mod, tf->symbol); if (tpoint) { tf->tpoint = tpoint; - tf->mod = tp_mod->mod; if (!WARN_ON_ONCE(__regsiter_tracepoint_fprobe(tf)) && trace_probe_is_enabled(&tf->tp)) reenable_trace_fprobe(tf); } - } else if (val == MODULE_STATE_GOING && tp_mod->mod == tf->mod) { + } else if (val == MODULE_STATE_GOING && + tf->tpoint != TRACEPOINT_STUB && + within_module((unsigned long)tf->tpoint->probestub, tp_mod->mod)) { unregister_fprobe(&tf->fp); - if (trace_fprobe_is_tracepoint(tf)) { - tracepoint_probe_unregister(tf->tpoint, - tf->tpoint->probestub, NULL); - tf->tpoint = TRACEPOINT_STUB; - tf->mod = NULL; - } + tracepoint_probe_unregister(tf->tpoint, + tf->tpoint->probestub, NULL); + tf->tpoint = TRACEPOINT_STUB; } } mutex_unlock(&event_mutex); @@ -1218,8 +1214,7 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], return ret; /* setup a probe */ - tf = alloc_trace_fprobe(group, event, symbol, tpoint, tp_mod, - argc, is_return); + tf = alloc_trace_fprobe(group, event, symbol, tpoint, argc, is_return); if (IS_ERR(tf)) { ret = PTR_ERR(tf); /* This must return -ENOMEM, else there is a bug */ -- cgit v1.2.3 From e3d6e1b9a34c745b635f122ac471a198867cd0ec Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 1 Apr 2025 00:36:02 +0900 Subject: tracing: tprobe-events: Support multiple tprobes on the same tracepoint Allow user to set multiple tracepoint-probe events on the same tracepoint. After the last tprobe-event is removed, the tracepoint callback is unregistered. Link: https://lore.kernel.org/all/174343536245.843280.6548776576601537671.stgit@devnote2/ Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_fprobe.c | 251 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 201 insertions(+), 50 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 017d6ebfdf8b..4a9ce7b0e084 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -21,7 +21,6 @@ #define FPROBE_EVENT_SYSTEM "fprobes" #define TRACEPOINT_EVENT_SYSTEM "tracepoints" #define RETHOOK_MAXACTIVE_MAX 4096 -#define TRACEPOINT_STUB ERR_PTR(-ENOENT) static int trace_fprobe_create(const char *raw_command); static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev); @@ -38,6 +37,89 @@ static struct dyn_event_operations trace_fprobe_ops = { .match = trace_fprobe_match, }; +struct tracepoint_user { + struct tracepoint *tpoint; + unsigned int refcount; +}; + +static bool tracepoint_user_is_registered(struct tracepoint_user *tuser) +{ + return tuser && tuser->tpoint; +} + +static int tracepoint_user_register(struct tracepoint_user *tuser) +{ + struct tracepoint *tpoint = tuser->tpoint; + + if (!tpoint) + return 0; + + return tracepoint_probe_register_prio_may_exist(tpoint, + tpoint->probestub, NULL, 0); +} + +static void tracepoint_user_unregister(struct tracepoint_user *tuser) +{ + if (!tuser->tpoint) + return; + + WARN_ON_ONCE(tracepoint_probe_unregister(tuser->tpoint, tuser->tpoint->probestub, NULL)); + tuser->tpoint = NULL; +} + +static unsigned long tracepoint_user_ip(struct tracepoint_user *tuser) +{ + if (!tuser->tpoint) + return 0UL; + + return (unsigned long)tuser->tpoint->probestub; +} + +static bool tracepoint_user_within_module(struct tracepoint_user *tuser, + struct module *mod) +{ + return within_module(tracepoint_user_ip(tuser), mod); +} + +static struct tracepoint_user *tracepoint_user_allocate(struct tracepoint *tpoint) +{ + struct tracepoint_user *tuser; + + tuser = kzalloc(sizeof(*tuser), GFP_KERNEL); + if (!tuser) + return NULL; + tuser->tpoint = tpoint; + tuser->refcount = 1; + + return tuser; +} + +/* These must be called with event_mutex */ +static void tracepoint_user_get(struct tracepoint_user *tuser) +{ + tuser->refcount++; +} + +static void tracepoint_user_put(struct tracepoint_user *tuser) +{ + if (--tuser->refcount > 0) + return; + + if (tracepoint_user_is_registered(tuser)) + tracepoint_user_unregister(tuser); + kfree(tuser); +} + +static const char *tracepoint_user_lookup(struct tracepoint_user *tuser, char *buf) +{ + struct tracepoint *tpoint = tuser->tpoint; + + if (!tpoint) + return NULL; + + return kallsyms_lookup((unsigned long)tpoint->probestub, NULL, NULL, NULL, buf); +} + /* * Fprobe event core functions */ @@ -45,7 +127,7 @@ struct trace_fprobe { struct dyn_event devent; struct fprobe fp; const char *symbol; - struct tracepoint *tpoint; + struct tracepoint_user *tuser; struct trace_probe tp; }; @@ -75,7 +157,7 @@ static bool trace_fprobe_is_return(struct trace_fprobe *tf) static bool trace_fprobe_is_tracepoint(struct trace_fprobe *tf) { - return tf->tpoint != NULL; + return tf->tuser != NULL; } static const char *trace_fprobe_symbol(struct trace_fprobe *tf) @@ -125,6 +207,56 @@ static bool trace_fprobe_is_registered(struct trace_fprobe *tf) return fprobe_is_registered(&tf->fp); } +static struct tracepoint *find_tracepoint(const char *tp_name, + struct module **tp_mod); + +/* + * Get tracepoint_user if exist, or allocate new one. If tracepoint is on a + * module, get its refcounter. + */ +static struct tracepoint_user * +trace_fprobe_get_tracepoint_user(const char *name, struct module **pmod) +{ + struct tracepoint_user *tuser __free(kfree) = NULL; + struct tracepoint *tpoint; + struct trace_fprobe *tf; + struct dyn_event *dpos; + struct module *mod __free(module_put) = NULL; + int ret; + + /* + * Find appropriate tracepoint and locking module. + * Note: tpoint can be NULL if it is unloaded (or failed to get module.) + */ + tpoint = find_tracepoint(name, &mod); + + /* Search existing tracepoint_user */ + for_each_trace_fprobe(tf, dpos) { + if (!trace_fprobe_is_tracepoint(tf)) + continue; + if (!strcmp(tf->symbol, name)) { + tracepoint_user_get(tf->tuser); + *pmod = no_free_ptr(mod); + return tf->tuser; + } + } + + /* Not found, allocate and register new tracepoint_user. */ + tuser = tracepoint_user_allocate(tpoint); + if (!tuser) + return NULL; + + if (tpoint) { + /* If the tracepoint is not loaded, tpoint can be NULL. */ + ret = tracepoint_user_register(tuser); + if (ret) + return ERR_PTR(ret); + } + + *pmod = no_free_ptr(mod); + return_ptr(tuser); +} + /* * Note that we don't verify the fetch_insn code, since it does not come * from user space. @@ -410,6 +542,8 @@ static void free_trace_fprobe(struct trace_fprobe *tf) { if (tf) { trace_probe_cleanup(&tf->tp); + if (tf->tuser) + tracepoint_user_put(tf->tuser); kfree(tf->symbol); kfree(tf); } @@ -424,7 +558,7 @@ DEFINE_FREE(free_trace_fprobe, struct trace_fprobe *, if (!IS_ERR_OR_NULL(_T)) f static struct trace_fprobe *alloc_trace_fprobe(const char *group, const char *event, const char *symbol, - struct tracepoint *tpoint, + struct tracepoint_user *tuser, int nargs, bool is_return) { struct trace_fprobe *tf __free(free_trace_fprobe) = NULL; @@ -443,7 +577,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group, else tf->fp.entry_handler = fentry_dispatcher; - tf->tpoint = tpoint; + tf->tuser = tuser; ret = trace_probe_init(&tf->tp, event, group, false, nargs); if (ret < 0) @@ -709,19 +843,11 @@ static int unregister_fprobe_event(struct trace_fprobe *tf) static int __regsiter_tracepoint_fprobe(struct trace_fprobe *tf) { - struct tracepoint *tpoint = tf->tpoint; - unsigned long ip = (unsigned long)tpoint->probestub; - int ret; + unsigned long ip = tracepoint_user_ip(tf->tuser); + + if (!ip) + return -ENOENT; - /* - * Here, we do 2 steps to enable fprobe on a tracepoint. - * At first, put __probestub_##TP function on the tracepoint - * and put a fprobe on the stub function. - */ - ret = tracepoint_probe_register_prio_may_exist(tpoint, - tpoint->probestub, NULL, 0); - if (ret < 0) - return ret; return register_fprobe_ips(&tf->fp, &ip, 1); } @@ -753,7 +879,7 @@ static int __register_trace_fprobe(struct trace_fprobe *tf) if (trace_fprobe_is_tracepoint(tf)) { /* This tracepoint is not loaded yet */ - if (tf->tpoint == TRACEPOINT_STUB) + if (!tracepoint_user_is_registered(tf->tuser)) return 0; return __regsiter_tracepoint_fprobe(tf); @@ -770,9 +896,8 @@ static void __unregister_trace_fprobe(struct trace_fprobe *tf) unregister_fprobe(&tf->fp); memset(&tf->fp, 0, sizeof(tf->fp)); if (trace_fprobe_is_tracepoint(tf)) { - tracepoint_probe_unregister(tf->tpoint, - tf->tpoint->probestub, NULL); - tf->tpoint = NULL; + tracepoint_user_put(tf->tuser); + tf->tuser = NULL; } } } @@ -988,7 +1113,7 @@ static int __tracepoint_probe_module_cb(struct notifier_block *self, unsigned long val, void *data) { struct tp_module *tp_mod = data; - struct tracepoint *tpoint; + struct tracepoint_user *tuser; struct trace_fprobe *tf; struct dyn_event *pos; @@ -999,21 +1124,46 @@ static int __tracepoint_probe_module_cb(struct notifier_block *self, for_each_trace_fprobe(tf, pos) { if (!trace_fprobe_is_tracepoint(tf)) continue; - if (val == MODULE_STATE_COMING && tf->tpoint == TRACEPOINT_STUB) { + + if (val == MODULE_STATE_COMING) { + /* + * If any tracepoint used by tprobe is in the module, + * register the stub. + */ + struct tracepoint *tpoint; + tpoint = find_tracepoint_in_module(tp_mod->mod, tf->symbol); - if (tpoint) { - tf->tpoint = tpoint; - if (!WARN_ON_ONCE(__regsiter_tracepoint_fprobe(tf)) && - trace_probe_is_enabled(&tf->tp)) - reenable_trace_fprobe(tf); + /* This is not a tracepoint in this module. Skip it. */ + if (!tpoint) + continue; + + tuser = tf->tuser; + /* If the tracepoint is not registered yet, register it. */ + if (!tracepoint_user_is_registered(tuser)) { + tuser->tpoint = tpoint; + if (WARN_ON_ONCE(tracepoint_user_register(tuser))) + continue; } - } else if (val == MODULE_STATE_GOING && - tf->tpoint != TRACEPOINT_STUB && - within_module((unsigned long)tf->tpoint->probestub, tp_mod->mod)) { - unregister_fprobe(&tf->fp); - tracepoint_probe_unregister(tf->tpoint, - tf->tpoint->probestub, NULL); - tf->tpoint = TRACEPOINT_STUB; + + /* Finally enable fprobe on this module. */ + if (!WARN_ON_ONCE(__regsiter_tracepoint_fprobe(tf)) && + trace_probe_is_enabled(&tf->tp)) + reenable_trace_fprobe(tf); + } else if (val == MODULE_STATE_GOING) { + tuser = tf->tuser; + /* Unregister all tracepoint_user in this module. */ + if (tracepoint_user_is_registered(tuser) && + tracepoint_user_within_module(tuser, tp_mod->mod)) + tracepoint_user_unregister(tuser); + + /* + * Here we need to handle shared tracepoint_user case. + * Such tuser is unregistered, but trace_fprobe itself + * is registered. (Note this only handles tprobes.) + */ + if (!tracepoint_user_is_registered(tuser) && + trace_fprobe_is_registered(tf)) + unregister_fprobe(&tf->fp); } } mutex_unlock(&event_mutex); @@ -1082,7 +1232,9 @@ static int parse_symbol_and_return(int argc, const char *argv[], return 0; } -DEFINE_FREE(module_put, struct module *, if (_T) module_put(_T)) +DEFINE_FREE(tuser_put, struct tracepoint_user *, + if (!IS_ERR_OR_NULL(_T)) + tracepoint_user_put(_T)) static int trace_fprobe_create_internal(int argc, const char *argv[], struct traceprobe_parse_context *ctx) @@ -1112,6 +1264,8 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], * FETCHARG:TYPE : use TYPE instead of unsigned long. */ struct trace_fprobe *tf __free(free_trace_fprobe) = NULL; + struct tracepoint_user *tuser __free(tuser_put) = NULL; + struct module *mod __free(module_put) = NULL; int i, new_argc = 0, ret = 0; bool is_return = false; char *symbol __free(kfree) = NULL; @@ -1123,8 +1277,6 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], char abuf[MAX_BTF_ARGS_LEN]; char *dbuf __free(kfree) = NULL; bool is_tracepoint = false; - struct module *tp_mod __free(module_put) = NULL; - struct tracepoint *tpoint = NULL; if ((argv[0][0] != 'f' && argv[0][0] != 't') || argc < 2) return -ECANCELED; @@ -1177,20 +1329,18 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], if (is_tracepoint) { ctx->flags |= TPARG_FL_TPOINT; - tpoint = find_tracepoint(symbol, &tp_mod); - if (tpoint) { - ctx->funcname = kallsyms_lookup( - (unsigned long)tpoint->probestub, - NULL, NULL, NULL, sbuf); - } else if (IS_ENABLED(CONFIG_MODULES)) { - /* This *may* be loaded afterwards */ - tpoint = TRACEPOINT_STUB; - ctx->funcname = symbol; - } else { + tuser = trace_fprobe_get_tracepoint_user(symbol, &mod); + if (!tuser) + return -ENOMEM; + if (IS_ERR(tuser)) { trace_probe_log_set_index(1); trace_probe_log_err(0, NO_TRACEPOINT); - return -EINVAL; + return PTR_ERR(tuser); } + ctx->funcname = tracepoint_user_lookup(tuser, sbuf); + /* If tracepoint is not loaded yet, use symbol name as funcname. */ + if (!ctx->funcname) + ctx->funcname = symbol; } else ctx->funcname = symbol; @@ -1214,13 +1364,14 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], return ret; /* setup a probe */ - tf = alloc_trace_fprobe(group, event, symbol, tpoint, argc, is_return); + tf = alloc_trace_fprobe(group, event, symbol, tuser, argc, is_return); if (IS_ERR(tf)) { ret = PTR_ERR(tf); /* This must return -ENOMEM, else there is a bug */ WARN_ON_ONCE(ret != -ENOMEM); return ret; } + tuser = NULL; /* Move tuser to tf. */ /* parse arguments */ for (i = 0; i < argc; i++) { -- cgit v1.2.3 From 2db832ec9090d3b5f726f49ad4d0322d6b68a490 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 1 Apr 2025 00:36:11 +0900 Subject: tracing: fprobe-events: Register fprobe-events only when it is enabled Currently fprobe events are registered when it is defined. Thus it will give some overhead even if it is disabled. This changes it to register the fprobe only when it is enabled. Link: https://lore.kernel.org/all/174343537128.843280.16131300052837035043.stgit@devnote2/ Suggested-by: Steven Rostedt Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/fprobe.c | 5 + kernel/trace/trace_fprobe.c | 237 ++++++++++++++++++++++---------------------- 2 files changed, 121 insertions(+), 121 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index ba7ff14f5339..b78fce0982c7 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -648,6 +648,11 @@ static int fprobe_init(struct fprobe *fp, unsigned long *addrs, int num) #define FPROBE_IPS_MAX INT_MAX +int fprobe_count_ips_from_filter(const char *filter, const char *notfilter) +{ + return get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX); +} + /** * register_fprobe() - Register fprobe to ftrace by pattern. * @fp: A fprobe data structure to be registered. diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 4a9ce7b0e084..79ffc1b2d519 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -600,98 +600,6 @@ static struct trace_fprobe *find_trace_fprobe(const char *event, return NULL; } -static inline int __enable_trace_fprobe(struct trace_fprobe *tf) -{ - if (trace_fprobe_is_registered(tf)) - enable_fprobe(&tf->fp); - - return 0; -} - -static void __disable_trace_fprobe(struct trace_probe *tp) -{ - struct trace_fprobe *tf; - - list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) { - if (!trace_fprobe_is_registered(tf)) - continue; - disable_fprobe(&tf->fp); - } -} - -/* - * Enable trace_probe - * if the file is NULL, enable "perf" handler, or enable "trace" handler. - */ -static int enable_trace_fprobe(struct trace_event_call *call, - struct trace_event_file *file) -{ - struct trace_probe *tp; - struct trace_fprobe *tf; - bool enabled; - int ret = 0; - - tp = trace_probe_primary_from_call(call); - if (WARN_ON_ONCE(!tp)) - return -ENODEV; - enabled = trace_probe_is_enabled(tp); - - /* This also changes "enabled" state */ - if (file) { - ret = trace_probe_add_file(tp, file); - if (ret) - return ret; - } else - trace_probe_set_flag(tp, TP_FLAG_PROFILE); - - if (!enabled) { - list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) { - /* TODO: check the fprobe is gone */ - __enable_trace_fprobe(tf); - } - } - - return 0; -} - -/* - * Disable trace_probe - * if the file is NULL, disable "perf" handler, or disable "trace" handler. - */ -static int disable_trace_fprobe(struct trace_event_call *call, - struct trace_event_file *file) -{ - struct trace_probe *tp; - - tp = trace_probe_primary_from_call(call); - if (WARN_ON_ONCE(!tp)) - return -ENODEV; - - if (file) { - if (!trace_probe_get_file_link(tp, file)) - return -ENOENT; - if (!trace_probe_has_single_file(tp)) - goto out; - trace_probe_clear_flag(tp, TP_FLAG_TRACE); - } else - trace_probe_clear_flag(tp, TP_FLAG_PROFILE); - - if (!trace_probe_is_enabled(tp)) - __disable_trace_fprobe(tp); - - out: - if (file) - /* - * Synchronization is done in below function. For perf event, - * file == NULL and perf_trace_event_unreg() calls - * tracepoint_synchronize_unregister() to ensure synchronize - * event. We don't need to care about it. - */ - trace_probe_remove_file(tp, file); - - return 0; -} - /* Event entry printers */ static enum print_line_t print_fentry_event(struct trace_iterator *iter, int flags, @@ -851,6 +759,29 @@ static int __regsiter_tracepoint_fprobe(struct trace_fprobe *tf) return register_fprobe_ips(&tf->fp, &ip, 1); } +/* Returns an error if the target function is not available, or 0 */ +static int trace_fprobe_verify_target(struct trace_fprobe *tf) +{ + int ret; + + if (trace_fprobe_is_tracepoint(tf)) { + + /* This tracepoint is not loaded yet */ + if (!tracepoint_user_is_registered(tf->tuser)) + return 0; + + /* We assume all stab function is tracable. */ + return tracepoint_user_ip(tf->tuser) ? 0 : -ENOENT; + } + + /* + * Note: since we don't lock the module, even if this succeeded, + * register_fprobe() later can fail. + */ + ret = fprobe_count_ips_from_filter(tf->symbol, NULL); + return (ret < 0) ? ret : 0; +} + /* Internal register function - just handle fprobe and flags */ static int __register_trace_fprobe(struct trace_fprobe *tf) { @@ -870,11 +801,7 @@ static int __register_trace_fprobe(struct trace_fprobe *tf) return ret; } - /* Set/clear disabled flag according to tp->flag */ - if (trace_probe_is_enabled(&tf->tp)) - tf->fp.flags &= ~FPROBE_FL_DISABLED; - else - tf->fp.flags |= FPROBE_FL_DISABLED; + tf->fp.flags &= ~FPROBE_FL_DISABLED; if (trace_fprobe_is_tracepoint(tf)) { @@ -895,10 +822,10 @@ static void __unregister_trace_fprobe(struct trace_fprobe *tf) if (trace_fprobe_is_registered(tf)) { unregister_fprobe(&tf->fp); memset(&tf->fp, 0, sizeof(tf->fp)); - if (trace_fprobe_is_tracepoint(tf)) { - tracepoint_user_put(tf->tuser); - tf->tuser = NULL; - } + } + if (trace_fprobe_is_tracepoint(tf)) { + tracepoint_user_put(tf->tuser); + tf->tuser = NULL; } } @@ -958,7 +885,7 @@ static bool trace_fprobe_has_same_fprobe(struct trace_fprobe *orig, return false; } -static int append_trace_fprobe(struct trace_fprobe *tf, struct trace_fprobe *to) +static int append_trace_fprobe_event(struct trace_fprobe *tf, struct trace_fprobe *to) { int ret; @@ -986,7 +913,7 @@ static int append_trace_fprobe(struct trace_fprobe *tf, struct trace_fprobe *to) if (ret) return ret; - ret = __register_trace_fprobe(tf); + ret = trace_fprobe_verify_target(tf); if (ret) trace_probe_unlink(&tf->tp); else @@ -995,8 +922,8 @@ static int append_trace_fprobe(struct trace_fprobe *tf, struct trace_fprobe *to) return ret; } -/* Register a trace_probe and probe_event */ -static int register_trace_fprobe(struct trace_fprobe *tf) +/* Register a trace_probe and probe_event, and check the fprobe is available. */ +static int register_trace_fprobe_event(struct trace_fprobe *tf) { struct trace_fprobe *old_tf; int ret; @@ -1006,7 +933,7 @@ static int register_trace_fprobe(struct trace_fprobe *tf) old_tf = find_trace_fprobe(trace_probe_name(&tf->tp), trace_probe_group_name(&tf->tp)); if (old_tf) - return append_trace_fprobe(tf, old_tf); + return append_trace_fprobe_event(tf, old_tf); /* Register new event */ ret = register_fprobe_event(tf); @@ -1019,8 +946,8 @@ static int register_trace_fprobe(struct trace_fprobe *tf) return ret; } - /* Register fprobe */ - ret = __register_trace_fprobe(tf); + /* Verify fprobe is sane. */ + ret = trace_fprobe_verify_target(tf); if (ret < 0) unregister_fprobe_event(tf); else @@ -1084,15 +1011,6 @@ static struct tracepoint *find_tracepoint(const char *tp_name, } #ifdef CONFIG_MODULES -static void reenable_trace_fprobe(struct trace_fprobe *tf) -{ - struct trace_probe *tp = &tf->tp; - - list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) { - __enable_trace_fprobe(tf); - } -} - /* * Find a tracepoint from specified module. In this case, this does not get the * module's refcount. The caller must ensure the module is not freed. @@ -1146,9 +1064,8 @@ static int __tracepoint_probe_module_cb(struct notifier_block *self, } /* Finally enable fprobe on this module. */ - if (!WARN_ON_ONCE(__regsiter_tracepoint_fprobe(tf)) && - trace_probe_is_enabled(&tf->tp)) - reenable_trace_fprobe(tf); + if (trace_probe_is_enabled(&tf->tp) && !trace_fprobe_is_registered(tf)) + WARN_ON_ONCE(__regsiter_tracepoint_fprobe(tf)); } else if (val == MODULE_STATE_GOING) { tuser = tf->tuser; /* Unregister all tracepoint_user in this module. */ @@ -1397,7 +1314,7 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], if (ret < 0) return ret; - ret = register_trace_fprobe(tf); + ret = register_trace_fprobe_event(tf); if (ret) { trace_probe_log_set_index(1); if (ret == -EILSEQ) @@ -1466,6 +1383,84 @@ static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev) return 0; } +/* + * Enable trace_probe + * if the file is NULL, enable "perf" handler, or enable "trace" handler. + */ +static int enable_trace_fprobe(struct trace_event_call *call, + struct trace_event_file *file) +{ + struct trace_probe *tp; + struct trace_fprobe *tf; + bool enabled; + int ret = 0; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return -ENODEV; + enabled = trace_probe_is_enabled(tp); + + /* This also changes "enabled" state */ + if (file) { + ret = trace_probe_add_file(tp, file); + if (ret) + return ret; + } else + trace_probe_set_flag(tp, TP_FLAG_PROFILE); + + if (!enabled) { + list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) { + ret = __register_trace_fprobe(tf); + if (ret < 0) + return ret; + } + } + + return 0; +} + +/* + * Disable trace_probe + * if the file is NULL, disable "perf" handler, or disable "trace" handler. + */ +static int disable_trace_fprobe(struct trace_event_call *call, + struct trace_event_file *file) +{ + struct trace_fprobe *tf; + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return -ENODEV; + + if (file) { + if (!trace_probe_get_file_link(tp, file)) + return -ENOENT; + if (!trace_probe_has_single_file(tp)) + goto out; + trace_probe_clear_flag(tp, TP_FLAG_TRACE); + } else + trace_probe_clear_flag(tp, TP_FLAG_PROFILE); + + if (!trace_probe_is_enabled(tp)) { + list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) { + unregister_fprobe(&tf->fp); + } + } + + out: + if (file) + /* + * Synchronization is done in below function. For perf event, + * file == NULL and perf_trace_event_unreg() calls + * tracepoint_synchronize_unregister() to ensure synchronize + * event. We don't need to care about it. + */ + trace_probe_remove_file(tp, file); + + return 0; +} + /* * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex. */ -- cgit v1.2.3 From 2867495dea86324e984fbafa09474bf92534b652 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 1 Apr 2025 00:36:29 +0900 Subject: tracing: tprobe-events: Register tracepoint when enable tprobe event As same as fprobe, register tracepoint stub function only when enabling tprobe events. The major changes are introducing a list of tracepoint_user and its lock, and tprobe_event_module_nb, which is another module notifier for module loading/unloading. By spliting the lock from event_mutex and a module notifier for trace_fprobe, it solved AB-BA lock dependency issue between event_mutex and tracepoint_module_list_mutex. Link: https://lore.kernel.org/all/174343538901.843280.423773753642677941.stgit@devnote2/ Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_fprobe.c | 382 +++++++++++++++++++++++++------------------- 1 file changed, 217 insertions(+), 165 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 79ffc1b2d519..dbf9d413125a 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -7,7 +7,9 @@ #include #include +#include #include +#include #include #include #include @@ -37,15 +39,21 @@ static struct dyn_event_operations trace_fprobe_ops = { .match = trace_fprobe_match, }; +/* List of tracepoint_user */ +static LIST_HEAD(tracepoint_user_list); +static DEFINE_MUTEX(tracepoint_user_mutex); + +/* While living tracepoint_user, @tpoint can be NULL and @refcount != 0. */ struct tracepoint_user { + struct list_head list; + const char *name; struct tracepoint *tpoint; unsigned int refcount; }; -static bool tracepoint_user_is_registered(struct tracepoint_user *tuser) -{ - return tuser && tuser->tpoint; -} +/* NOTE: you must lock tracepoint_user_mutex. */ +#define for_each_tracepoint_user(tuser) \ + list_for_each_entry(tuser, &tracepoint_user_list, list) static int tracepoint_user_register(struct tracepoint_user *tuser) { @@ -75,58 +83,111 @@ static unsigned long tracepoint_user_ip(struct tracepoint_user *tuser) return (unsigned long)tuser->tpoint->probestub; } -static bool tracepoint_user_within_module(struct tracepoint_user *tuser, - struct module *mod) +static void __tracepoint_user_free(struct tracepoint_user *tuser) { - return within_module(tracepoint_user_ip(tuser), mod); + if (!tuser) + return; + kfree(tuser->name); + kfree(tuser); } -static struct tracepoint_user *tracepoint_user_allocate(struct tracepoint *tpoint) +DEFINE_FREE(tuser_free, struct tracepoint_user *, __tracepoint_user_free(_T)) + +static struct tracepoint_user *__tracepoint_user_init(const char *name, struct tracepoint *tpoint) { - struct tracepoint_user *tuser; + struct tracepoint_user *tuser __free(tuser_free) = NULL; + int ret; tuser = kzalloc(sizeof(*tuser), GFP_KERNEL); if (!tuser) return NULL; + tuser->name = kstrdup(name, GFP_KERNEL); + if (!tuser->name) + return NULL; + + if (tpoint) { + ret = tracepoint_user_register(tuser); + if (ret) + return ERR_PTR(ret); + } + tuser->tpoint = tpoint; tuser->refcount = 1; + INIT_LIST_HEAD(&tuser->list); + list_add(&tuser->list, &tracepoint_user_list); - return tuser; + return_ptr(tuser); } -/* These must be called with event_mutex */ -static void tracepoint_user_get(struct tracepoint_user *tuser) -{ - tuser->refcount++; -} +static struct tracepoint *find_tracepoint(const char *tp_name, + struct module **tp_mod); -static void tracepoint_user_put(struct tracepoint_user *tuser) +/* + * Get tracepoint_user if exist, or allocate new one and register it. + * If tracepoint is on a module, get its refcounter too. + * This returns errno or NULL (not loaded yet) or tracepoint_user. + */ +static struct tracepoint_user *tracepoint_user_find_get(const char *name, struct module **pmod) { - if (--tuser->refcount > 0) - return; + struct module *mod __free(module_put) = NULL; + struct tracepoint_user *tuser; + struct tracepoint *tpoint; - if (tracepoint_user_is_registered(tuser)) - tracepoint_user_unregister(tuser); - kfree(tuser); + if (!name || !pmod) + return ERR_PTR(-EINVAL); + + /* Get and lock the module which has tracepoint. */ + tpoint = find_tracepoint(name, &mod); + + guard(mutex)(&tracepoint_user_mutex); + /* Search existing tracepoint_user */ + for_each_tracepoint_user(tuser) { + if (!strcmp(tuser->name, name)) { + tuser->refcount++; + *pmod = no_free_ptr(mod); + return tuser; + } + } + + /* The corresponding tracepoint_user is not found. */ + tuser = __tracepoint_user_init(name, tpoint); + if (!IS_ERR_OR_NULL(tuser)) + *pmod = no_free_ptr(mod); + + return tuser; } -static const char *tracepoint_user_lookup(struct tracepoint_user *tuser, char *buf) +static void tracepoint_user_put(struct tracepoint_user *tuser) { - struct tracepoint *tpoint = tuser->tpoint; + scoped_guard(mutex, &tracepoint_user_mutex) { + if (--tuser->refcount > 0) + return; - if (!tpoint) - return NULL; + list_del(&tuser->list); + tracepoint_user_unregister(tuser); + } - return kallsyms_lookup((unsigned long)tpoint->probestub, NULL, NULL, NULL, buf); + __tracepoint_user_free(tuser); } +DEFINE_FREE(tuser_put, struct tracepoint_user *, + if (!IS_ERR_OR_NULL(_T)) + tracepoint_user_put(_T)) + /* * Fprobe event core functions */ + +/* + * @tprobe is true for tracepoint probe. + * @tuser can be NULL if the trace_fprobe is disabled or the tracepoint is not + * loaded with a module. If @tuser != NULL, this trace_fprobe is enabled. + */ struct trace_fprobe { struct dyn_event devent; struct fprobe fp; const char *symbol; + bool tprobe; struct tracepoint_user *tuser; struct trace_probe tp; }; @@ -157,7 +218,7 @@ static bool trace_fprobe_is_return(struct trace_fprobe *tf) static bool trace_fprobe_is_tracepoint(struct trace_fprobe *tf) { - return tf->tuser != NULL; + return tf->tprobe; } static const char *trace_fprobe_symbol(struct trace_fprobe *tf) @@ -207,56 +268,6 @@ static bool trace_fprobe_is_registered(struct trace_fprobe *tf) return fprobe_is_registered(&tf->fp); } -static struct tracepoint *find_tracepoint(const char *tp_name, - struct module **tp_mod); - -/* - * Get tracepoint_user if exist, or allocate new one. If tracepoint is on a - * module, get its refcounter. - */ -static struct tracepoint_user * -trace_fprobe_get_tracepoint_user(const char *name, struct module **pmod) -{ - struct tracepoint_user *tuser __free(kfree) = NULL; - struct tracepoint *tpoint; - struct trace_fprobe *tf; - struct dyn_event *dpos; - struct module *mod __free(module_put) = NULL; - int ret; - - /* - * Find appropriate tracepoint and locking module. - * Note: tpoint can be NULL if it is unloaded (or failed to get module.) - */ - tpoint = find_tracepoint(name, &mod); - - /* Search existing tracepoint_user */ - for_each_trace_fprobe(tf, dpos) { - if (!trace_fprobe_is_tracepoint(tf)) - continue; - if (!strcmp(tf->symbol, name)) { - tracepoint_user_get(tf->tuser); - *pmod = no_free_ptr(mod); - return tf->tuser; - } - } - - /* Not found, allocate and register new tracepoint_user. */ - tuser = tracepoint_user_allocate(tpoint); - if (!tuser) - return NULL; - - if (tpoint) { - /* If the tracepoint is not loaded, tpoint can be NULL. */ - ret = tracepoint_user_register(tuser); - if (ret) - return ERR_PTR(ret); - } - - *pmod = no_free_ptr(mod); - return_ptr(tuser); -} - /* * Note that we don't verify the fetch_insn code, since it does not come * from user space. @@ -558,8 +569,8 @@ DEFINE_FREE(free_trace_fprobe, struct trace_fprobe *, if (!IS_ERR_OR_NULL(_T)) f static struct trace_fprobe *alloc_trace_fprobe(const char *group, const char *event, const char *symbol, - struct tracepoint_user *tuser, - int nargs, bool is_return) + int nargs, bool is_return, + bool is_tracepoint) { struct trace_fprobe *tf __free(free_trace_fprobe) = NULL; int ret = -ENOMEM; @@ -577,7 +588,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group, else tf->fp.entry_handler = fentry_dispatcher; - tf->tuser = tuser; + tf->tprobe = is_tracepoint; ret = trace_probe_init(&tf->tp, event, group, false, nargs); if (ret < 0) @@ -751,12 +762,35 @@ static int unregister_fprobe_event(struct trace_fprobe *tf) static int __regsiter_tracepoint_fprobe(struct trace_fprobe *tf) { - unsigned long ip = tracepoint_user_ip(tf->tuser); + struct tracepoint_user *tuser __free(tuser_put) = NULL; + struct module *mod __free(module_put) = NULL; + unsigned long ip; + int ret; - if (!ip) - return -ENOENT; + if (WARN_ON_ONCE(tf->tuser)) + return -EINVAL; + + /* If the tracepoint is in a module, it must be locked in this function. */ + tuser = tracepoint_user_find_get(tf->symbol, &mod); + /* This tracepoint is not loaded yet */ + if (IS_ERR(tuser)) + return PTR_ERR(tuser); + if (!tuser) + return -ENOMEM; - return register_fprobe_ips(&tf->fp, &ip, 1); + /* Register fprobe only if the tracepoint is loaded. */ + if (tuser->tpoint) { + ip = tracepoint_user_ip(tuser); + if (WARN_ON_ONCE(!ip)) + return -ENOENT; + + ret = register_fprobe_ips(&tf->fp, &ip, 1); + if (ret < 0) + return ret; + } + + tf->tuser = no_free_ptr(tuser); + return 0; } /* Returns an error if the target function is not available, or 0 */ @@ -764,15 +798,9 @@ static int trace_fprobe_verify_target(struct trace_fprobe *tf) { int ret; - if (trace_fprobe_is_tracepoint(tf)) { - - /* This tracepoint is not loaded yet */ - if (!tracepoint_user_is_registered(tf->tuser)) - return 0; - - /* We assume all stab function is tracable. */ - return tracepoint_user_ip(tf->tuser) ? 0 : -ENOENT; - } + /* Tracepoint should have a stub function. */ + if (trace_fprobe_is_tracepoint(tf)) + return 0; /* * Note: since we don't lock the module, even if this succeeded, @@ -803,14 +831,8 @@ static int __register_trace_fprobe(struct trace_fprobe *tf) tf->fp.flags &= ~FPROBE_FL_DISABLED; - if (trace_fprobe_is_tracepoint(tf)) { - - /* This tracepoint is not loaded yet */ - if (!tracepoint_user_is_registered(tf->tuser)) - return 0; - + if (trace_fprobe_is_tracepoint(tf)) return __regsiter_tracepoint_fprobe(tf); - } /* TODO: handle filter, nofilter or symbol list */ return register_fprobe(&tf->fp, tf->symbol, NULL); @@ -819,11 +841,9 @@ static int __register_trace_fprobe(struct trace_fprobe *tf) /* Internal unregister function - just handle fprobe and flags */ static void __unregister_trace_fprobe(struct trace_fprobe *tf) { - if (trace_fprobe_is_registered(tf)) { + if (trace_fprobe_is_registered(tf)) unregister_fprobe(&tf->fp); - memset(&tf->fp, 0, sizeof(tf->fp)); - } - if (trace_fprobe_is_tracepoint(tf)) { + if (tf->tuser) { tracepoint_user_put(tf->tuser); tf->tuser = NULL; } @@ -1027,63 +1047,52 @@ static struct tracepoint *find_tracepoint_in_module(struct module *mod, return data.tpoint; } +/* These are CONFIG_MODULES=y specific functions. */ +static bool tracepoint_user_within_module(struct tracepoint_user *tuser, + struct module *mod) +{ + return within_module(tracepoint_user_ip(tuser), mod); +} + +static int tracepoint_user_register_again(struct tracepoint_user *tuser, + struct tracepoint *tpoint) +{ + tuser->tpoint = tpoint; + return tracepoint_user_register(tuser); +} + +static void tracepoint_user_unregister_clear(struct tracepoint_user *tuser) +{ + tracepoint_user_unregister(tuser); + tuser->tpoint = NULL; +} + +/* module callback for tracepoint_user */ static int __tracepoint_probe_module_cb(struct notifier_block *self, unsigned long val, void *data) { struct tp_module *tp_mod = data; struct tracepoint_user *tuser; - struct trace_fprobe *tf; - struct dyn_event *pos; + struct tracepoint *tpoint; if (val != MODULE_STATE_GOING && val != MODULE_STATE_COMING) return NOTIFY_DONE; - mutex_lock(&event_mutex); - for_each_trace_fprobe(tf, pos) { - if (!trace_fprobe_is_tracepoint(tf)) - continue; - + mutex_lock(&tracepoint_user_mutex); + for_each_tracepoint_user(tuser) { if (val == MODULE_STATE_COMING) { - /* - * If any tracepoint used by tprobe is in the module, - * register the stub. - */ - struct tracepoint *tpoint; - - tpoint = find_tracepoint_in_module(tp_mod->mod, tf->symbol); /* This is not a tracepoint in this module. Skip it. */ + tpoint = find_tracepoint_in_module(tp_mod->mod, tuser->name); if (!tpoint) continue; - - tuser = tf->tuser; - /* If the tracepoint is not registered yet, register it. */ - if (!tracepoint_user_is_registered(tuser)) { - tuser->tpoint = tpoint; - if (WARN_ON_ONCE(tracepoint_user_register(tuser))) - continue; - } - - /* Finally enable fprobe on this module. */ - if (trace_probe_is_enabled(&tf->tp) && !trace_fprobe_is_registered(tf)) - WARN_ON_ONCE(__regsiter_tracepoint_fprobe(tf)); - } else if (val == MODULE_STATE_GOING) { - tuser = tf->tuser; + WARN_ON_ONCE(tracepoint_user_register_again(tuser, tpoint)); + } else if (val == MODULE_STATE_GOING && + tracepoint_user_within_module(tuser, tp_mod->mod)) { /* Unregister all tracepoint_user in this module. */ - if (tracepoint_user_is_registered(tuser) && - tracepoint_user_within_module(tuser, tp_mod->mod)) - tracepoint_user_unregister(tuser); - - /* - * Here we need to handle shared tracepoint_user case. - * Such tuser is unregistered, but trace_fprobe itself - * is registered. (Note this only handles tprobes.) - */ - if (!tracepoint_user_is_registered(tuser) && - trace_fprobe_is_registered(tf)) - unregister_fprobe(&tf->fp); + tracepoint_user_unregister_clear(tuser); } } - mutex_unlock(&event_mutex); + mutex_unlock(&tracepoint_user_mutex); return NOTIFY_DONE; } @@ -1091,6 +1100,54 @@ static int __tracepoint_probe_module_cb(struct notifier_block *self, static struct notifier_block tracepoint_module_nb = { .notifier_call = __tracepoint_probe_module_cb, }; + +/* module callback for tprobe events */ +static int __tprobe_event_module_cb(struct notifier_block *self, + unsigned long val, void *data) +{ + struct trace_fprobe *tf; + struct dyn_event *pos; + struct module *mod = data; + + if (val != MODULE_STATE_GOING && val != MODULE_STATE_COMING) + return NOTIFY_DONE; + + mutex_lock(&event_mutex); + for_each_trace_fprobe(tf, pos) { + /* Skip fprobe and disabled tprobe events. */ + if (!trace_fprobe_is_tracepoint(tf) || !tf->tuser) + continue; + + /* Before this notification, tracepoint notifier has already done. */ + if (val == MODULE_STATE_COMING && + tracepoint_user_within_module(tf->tuser, mod)) { + unsigned long ip = tracepoint_user_ip(tf->tuser); + + WARN_ON_ONCE(register_fprobe_ips(&tf->fp, &ip, 1)); + } else if (val == MODULE_STATE_GOING && + /* + * tracepoint_user_within_module() does not work here because + * tracepoint_user is already unregistered and cleared tpoint. + * Instead, checking whether the fprobe is registered but + * tpoint is cleared(unregistered). Such unbalance probes + * must be adjusted anyway. + */ + trace_fprobe_is_registered(tf) && + !tf->tuser->tpoint) { + unregister_fprobe(&tf->fp); + } + } + mutex_unlock(&event_mutex); + + return NOTIFY_DONE; +} + +/* NOTE: this must be called after tracepoint callback */ +static struct notifier_block tprobe_event_module_nb = { + .notifier_call = __tprobe_event_module_cb, + /* Make sure this is later than tracepoint module notifier. */ + .priority = -10, +}; #endif /* CONFIG_MODULES */ static int parse_symbol_and_return(int argc, const char *argv[], @@ -1149,10 +1206,6 @@ static int parse_symbol_and_return(int argc, const char *argv[], return 0; } -DEFINE_FREE(tuser_put, struct tracepoint_user *, - if (!IS_ERR_OR_NULL(_T)) - tracepoint_user_put(_T)) - static int trace_fprobe_create_internal(int argc, const char *argv[], struct traceprobe_parse_context *ctx) { @@ -1181,7 +1234,6 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], * FETCHARG:TYPE : use TYPE instead of unsigned long. */ struct trace_fprobe *tf __free(free_trace_fprobe) = NULL; - struct tracepoint_user *tuser __free(tuser_put) = NULL; struct module *mod __free(module_put) = NULL; int i, new_argc = 0, ret = 0; bool is_return = false; @@ -1244,21 +1296,19 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], else ctx->flags |= TPARG_FL_FENTRY; + ctx->funcname = NULL; if (is_tracepoint) { + /* Get tracepoint and lock its module until the end of the registration. */ + struct tracepoint *tpoint; + ctx->flags |= TPARG_FL_TPOINT; - tuser = trace_fprobe_get_tracepoint_user(symbol, &mod); - if (!tuser) - return -ENOMEM; - if (IS_ERR(tuser)) { - trace_probe_log_set_index(1); - trace_probe_log_err(0, NO_TRACEPOINT); - return PTR_ERR(tuser); - } - ctx->funcname = tracepoint_user_lookup(tuser, sbuf); - /* If tracepoint is not loaded yet, use symbol name as funcname. */ - if (!ctx->funcname) - ctx->funcname = symbol; - } else + mod = NULL; + tpoint = find_tracepoint(symbol, &mod); + if (tpoint) + ctx->funcname = kallsyms_lookup((unsigned long)tpoint->probestub, + NULL, NULL, NULL, sbuf); + } + if (!ctx->funcname) ctx->funcname = symbol; argc -= 2; argv += 2; @@ -1281,14 +1331,13 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], return ret; /* setup a probe */ - tf = alloc_trace_fprobe(group, event, symbol, tuser, argc, is_return); + tf = alloc_trace_fprobe(group, event, symbol, argc, is_return, is_tracepoint); if (IS_ERR(tf)) { ret = PTR_ERR(tf); /* This must return -ENOMEM, else there is a bug */ WARN_ON_ONCE(ret != -ENOMEM); return ret; } - tuser = NULL; /* Move tuser to tf. */ /* parse arguments */ for (i = 0; i < argc; i++) { @@ -1506,6 +1555,9 @@ static __init int init_fprobe_trace_early(void) ret = register_tracepoint_module_notifier(&tracepoint_module_nb); if (ret) return ret; + ret = register_module_notifier(&tprobe_event_module_nb); + if (ret) + return ret; #endif return 0; -- cgit v1.2.3 From 2f02a61d84c629235b8f9684dd0a67e33a2a3d81 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 23 Jul 2025 10:30:47 +0900 Subject: tracing: probes: Sort #include alphabetically Sort the #include directives in trace_probe* files alphabetically for easier maintenance and avoid double includes. This also groups headers as linux-generic, asm-generic, and local headers. Link: https://lore.kernel.org/all/175323424678.57270.11975372127870059007.stgit@devnote2/ Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_eprobe.c | 4 ++-- kernel/trace/trace_fprobe.c | 3 ++- kernel/trace/trace_kprobe.c | 8 ++++---- kernel/trace/trace_probe.c | 2 +- kernel/trace/trace_probe.h | 17 +++++++++-------- kernel/trace/trace_uprobe.c | 12 ++++++------ 6 files changed, 24 insertions(+), 22 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 916555f0de81..23e06712bead 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -9,14 +9,14 @@ * Copyright (C) 2021, VMware Inc, Tzvetomir Stoyanov tz.stoyanov@gmail.com> * */ +#include #include #include -#include #include "trace_dynevent.h" #include "trace_probe.h" -#include "trace_probe_tmpl.h" #include "trace_probe_kernel.h" +#include "trace_probe_tmpl.h" #define EPROBE_EVENT_SYSTEM "eprobes" diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index dbf9d413125a..add08ffb04d7 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -4,7 +4,6 @@ * Copyright (C) 2022 Google LLC. */ #define pr_fmt(fmt) "trace_fprobe: " fmt -#include #include #include @@ -15,6 +14,8 @@ #include #include +#include + #include "trace_dynevent.h" #include "trace_probe.h" #include "trace_probe_kernel.h" diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3e5c47b6d7b2..cac128a5f7e0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -9,19 +9,19 @@ #include #include -#include +#include #include -#include #include -#include +#include +#include #include /* for COMMAND_LINE_SIZE */ #include "trace_dynevent.h" #include "trace_kprobe_selftest.h" #include "trace_probe.h" -#include "trace_probe_tmpl.h" #include "trace_probe_kernel.h" +#include "trace_probe_tmpl.h" #define KPROBE_EVENT_SYSTEM "kprobes" #define KRETPROBE_MAXACTIVE_MAX 4096 diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index abfab8957a6c..9d26d901c9e5 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -13,8 +13,8 @@ #include #include -#include "trace_btf.h" +#include "trace_btf.h" #include "trace_probe.h" #undef C diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 854e5668f5ee..719604855279 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -10,20 +10,21 @@ * Author: Srikar Dronamraju */ +#include +#include +#include +#include +#include +#include #include #include #include -#include -#include #include -#include -#include -#include #include -#include +#include +#include #include -#include -#include + #include #include "trace.h" diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index f95a2c3d5b1b..3cc3404b09f0 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -8,16 +8,16 @@ #define pr_fmt(fmt) "trace_uprobe: " fmt #include -#include #include +#include #include -#include -#include #include -#include -#include -#include #include +#include +#include +#include +#include +#include #include "trace_dynevent.h" #include "trace_probe.h" -- cgit v1.2.3 From 43beb5e89bc8c0b753553964dd0654e2d1aa23f9 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 23 Jul 2025 10:30:56 +0900 Subject: tracing: probe: Allocate traceprobe_parse_context from heap Instead of allocating traceprobe_parse_context on stack, allocate it dynamically from heap (slab). This change is likely intended to prevent potential stack overflow issues, which can be a concern in the kernel environment where stack space is limited. Link: https://lore.kernel.org/all/175323425650.57270.280750740753792504.stgit@devnote2/ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202506240416.nZIhDXoO-lkp@intel.com/ Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) --- kernel/trace/trace_eprobe.c | 14 ++++++++------ kernel/trace/trace_fprobe.c | 13 ++++++++----- kernel/trace/trace_kprobe.c | 10 +++++++--- kernel/trace/trace_probe.h | 9 +++++++++ kernel/trace/trace_uprobe.c | 13 ++++++++----- 5 files changed, 40 insertions(+), 19 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 23e06712bead..7ba3a18be4c5 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -797,18 +797,20 @@ find_and_get_event(const char *system, const char *event_name) static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[], int i) { - struct traceprobe_parse_context ctx = { - .event = ep->event, - .flags = TPARG_FL_KERNEL | TPARG_FL_TEVENT, - }; + struct traceprobe_parse_context *ctx __free(traceprobe_parse_context) = NULL; int ret; - ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], &ctx); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + ctx->event = ep->event; + ctx->flags = TPARG_FL_KERNEL | TPARG_FL_TEVENT; + + ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], ctx); /* Handle symbols "@" */ if (!ret) ret = traceprobe_update_arg(&ep->tp.args[i]); - traceprobe_finish_parse(&ctx); return ret; } diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index add08ffb04d7..610f8d53be8a 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -1384,14 +1384,17 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], static int trace_fprobe_create_cb(int argc, const char *argv[]) { - struct traceprobe_parse_context ctx = { - .flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE, - }; + struct traceprobe_parse_context *ctx __free(traceprobe_parse_context) = NULL; int ret; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE, + trace_probe_log_init("trace_fprobe", argc, argv); - ret = trace_fprobe_create_internal(argc, argv, &ctx); - traceprobe_finish_parse(&ctx); + ret = trace_fprobe_create_internal(argc, argv, ctx); trace_probe_log_clear(); return ret; } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index cac128a5f7e0..d14b33e205f7 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1065,14 +1065,18 @@ static int trace_kprobe_create_internal(int argc, const char *argv[], static int trace_kprobe_create_cb(int argc, const char *argv[]) { - struct traceprobe_parse_context ctx = { .flags = TPARG_FL_KERNEL }; + struct traceprobe_parse_context *ctx __free(traceprobe_parse_context) = NULL; int ret; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + ctx->flags = TPARG_FL_KERNEL; + trace_probe_log_init("trace_kprobe", argc, argv); - ret = trace_kprobe_create_internal(argc, argv, &ctx); + ret = trace_kprobe_create_internal(argc, argv, ctx); - traceprobe_finish_parse(&ctx); trace_probe_log_clear(); return ret; } diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 719604855279..842383fbc03b 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -439,6 +440,14 @@ extern void traceprobe_free_probe_arg(struct probe_arg *arg); * this MUST be called for clean up the context and return a resource. */ void traceprobe_finish_parse(struct traceprobe_parse_context *ctx); +static inline void traceprobe_free_parse_ctx(struct traceprobe_parse_context *ctx) +{ + traceprobe_finish_parse(ctx); + kfree(ctx); +} + +DEFINE_FREE(traceprobe_parse_context, struct traceprobe_parse_context *, + if (_T) traceprobe_free_parse_ctx(_T)) extern int traceprobe_split_symbol_offset(char *symbol, long *offset); int traceprobe_parse_event_name(const char **pevent, const char **pgroup, diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 3cc3404b09f0..872dce092e46 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -695,13 +695,16 @@ static int __trace_uprobe_create(int argc, const char **argv) /* parse arguments */ for (i = 0; i < argc; i++) { - struct traceprobe_parse_context ctx = { - .flags = (is_return ? TPARG_FL_RETURN : 0) | TPARG_FL_USER, - }; + struct traceprobe_parse_context *ctx __free(traceprobe_parse_context) + = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + ret = -ENOMEM; + goto error; + } + ctx->flags = (is_return ? TPARG_FL_RETURN : 0) | TPARG_FL_USER; trace_probe_log_set_index(i + 2); - ret = traceprobe_parse_probe_arg(&tu->tp, i, argv[i], &ctx); - traceprobe_finish_parse(&ctx); + ret = traceprobe_parse_probe_arg(&tu->tp, i, argv[i], ctx); if (ret) goto error; } -- cgit v1.2.3 From d643eaa7082dc3d2438c9ba3ab856eb1c8ad9ffc Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 23 Jul 2025 10:31:06 +0900 Subject: tracing: fprobe-event: Allocate string buffers from heap Allocate temporary string buffers for fprobe-event from heap instead of stack. This fixes the stack frame exceed limit error. Link: https://lore.kernel.org/all/175323426643.57270.6657152008331160704.stgit@devnote2/ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202506240416.nZIhDXoO-lkp@intel.com/ Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) --- kernel/trace/trace_fprobe.c | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 610f8d53be8a..9d14a910fbf7 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -1235,18 +1235,18 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], * FETCHARG:TYPE : use TYPE instead of unsigned long. */ struct trace_fprobe *tf __free(free_trace_fprobe) = NULL; - struct module *mod __free(module_put) = NULL; - int i, new_argc = 0, ret = 0; - bool is_return = false; - char *symbol __free(kfree) = NULL; const char *event = NULL, *group = FPROBE_EVENT_SYSTEM; + struct module *mod __free(module_put) = NULL; const char **new_argv __free(kfree) = NULL; - char buf[MAX_EVENT_NAME_LEN]; - char gbuf[MAX_EVENT_NAME_LEN]; - char sbuf[KSYM_NAME_LEN]; - char abuf[MAX_BTF_ARGS_LEN]; + char *symbol __free(kfree) = NULL; + char *ebuf __free(kfree) = NULL; + char *gbuf __free(kfree) = NULL; + char *sbuf __free(kfree) = NULL; + char *abuf __free(kfree) = NULL; char *dbuf __free(kfree) = NULL; + int i, new_argc = 0, ret = 0; bool is_tracepoint = false; + bool is_return = false; if ((argv[0][0] != 'f' && argv[0][0] != 't') || argc < 2) return -ECANCELED; @@ -1274,6 +1274,9 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], trace_probe_log_set_index(0); if (event) { + gbuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL); + if (!gbuf) + return -ENOMEM; ret = traceprobe_parse_event_name(&event, &group, gbuf, event - argv[0]); if (ret) @@ -1281,15 +1284,18 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], } if (!event) { + ebuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL); + if (!ebuf) + return -ENOMEM; /* Make a new event name */ if (is_tracepoint) - snprintf(buf, MAX_EVENT_NAME_LEN, "%s%s", + snprintf(ebuf, MAX_EVENT_NAME_LEN, "%s%s", isdigit(*symbol) ? "_" : "", symbol); else - snprintf(buf, MAX_EVENT_NAME_LEN, "%s__%s", symbol, + snprintf(ebuf, MAX_EVENT_NAME_LEN, "%s__%s", symbol, is_return ? "exit" : "entry"); - sanitize_event_name(buf); - event = buf; + sanitize_event_name(ebuf); + event = ebuf; } if (is_return) @@ -1305,13 +1311,20 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], ctx->flags |= TPARG_FL_TPOINT; mod = NULL; tpoint = find_tracepoint(symbol, &mod); - if (tpoint) + if (tpoint) { + sbuf = kmalloc(KSYM_NAME_LEN, GFP_KERNEL); + if (!sbuf) + return -ENOMEM; ctx->funcname = kallsyms_lookup((unsigned long)tpoint->probestub, NULL, NULL, NULL, sbuf); + } } if (!ctx->funcname) ctx->funcname = symbol; + abuf = kmalloc(MAX_BTF_ARGS_LEN, GFP_KERNEL); + if (!abuf) + return -ENOMEM; argc -= 2; argv += 2; new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc, abuf, MAX_BTF_ARGS_LEN, ctx); -- cgit v1.2.3 From 33b4e38baa03b1556bec29dd80e58504fe3de1f2 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 23 Jul 2025 10:31:16 +0900 Subject: tracing: kprobe-event: Allocate string buffers from heap Allocate temporary string buffers for parsing kprobe-events from heap instead of stack. Link: https://lore.kernel.org/all/175323427627.57270.5105357260879695051.stgit@devnote2/ Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) --- kernel/trace/trace_kprobe.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d14b33e205f7..ccae62d4fb91 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -861,20 +861,20 @@ static int trace_kprobe_create_internal(int argc, const char *argv[], * FETCHARG:TYPE : use TYPE instead of unsigned long. */ struct trace_kprobe *tk __free(free_trace_kprobe) = NULL; + const char *event = NULL, *group = KPROBE_EVENT_SYSTEM; + const char **new_argv __free(kfree) = NULL; int i, len, new_argc = 0, ret = 0; - bool is_return = false; char *symbol __free(kfree) = NULL; - char *tmp = NULL; - const char **new_argv __free(kfree) = NULL; - const char *event = NULL, *group = KPROBE_EVENT_SYSTEM; + char *ebuf __free(kfree) = NULL; + char *gbuf __free(kfree) = NULL; + char *abuf __free(kfree) = NULL; + char *dbuf __free(kfree) = NULL; enum probe_print_type ptype; + bool is_return = false; int maxactive = 0; - long offset = 0; void *addr = NULL; - char buf[MAX_EVENT_NAME_LEN]; - char gbuf[MAX_EVENT_NAME_LEN]; - char abuf[MAX_BTF_ARGS_LEN]; - char *dbuf __free(kfree) = NULL; + char *tmp = NULL; + long offset = 0; switch (argv[0][0]) { case 'r': @@ -893,6 +893,8 @@ static int trace_kprobe_create_internal(int argc, const char *argv[], event++; if (isdigit(argv[0][1])) { + char *buf __free(kfree) = NULL; + if (!is_return) { trace_probe_log_err(1, BAD_MAXACT_TYPE); return -EINVAL; @@ -905,7 +907,7 @@ static int trace_kprobe_create_internal(int argc, const char *argv[], trace_probe_log_err(1, BAD_MAXACT); return -EINVAL; } - memcpy(buf, &argv[0][1], len); + buf = kmemdup(&argv[0][1], len + 1, GFP_KERNEL); buf[len] = '\0'; ret = kstrtouint(buf, 0, &maxactive); if (ret || !maxactive) { @@ -973,6 +975,9 @@ static int trace_kprobe_create_internal(int argc, const char *argv[], trace_probe_log_set_index(0); if (event) { + gbuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL); + if (!gbuf) + return -ENOMEM; ret = traceprobe_parse_event_name(&event, &group, gbuf, event - argv[0]); if (ret) @@ -981,16 +986,22 @@ static int trace_kprobe_create_internal(int argc, const char *argv[], if (!event) { /* Make a new event name */ + ebuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL); + if (!ebuf) + return -ENOMEM; if (symbol) - snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld", + snprintf(ebuf, MAX_EVENT_NAME_LEN, "%c_%s_%ld", is_return ? 'r' : 'p', symbol, offset); else - snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p", + snprintf(ebuf, MAX_EVENT_NAME_LEN, "%c_0x%p", is_return ? 'r' : 'p', addr); - sanitize_event_name(buf); - event = buf; + sanitize_event_name(ebuf); + event = ebuf; } + abuf = kmalloc(MAX_BTF_ARGS_LEN, GFP_KERNEL); + if (!abuf) + return -ENOMEM; argc -= 2; argv += 2; ctx->funcname = symbol; new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc, -- cgit v1.2.3 From 4c6edb43ead62776d4ae58cc1a121700b546db0f Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 23 Jul 2025 10:31:26 +0900 Subject: tracing: eprobe-event: Allocate string buffers from heap Allocate temporary string buffers for parsing eprobe-events from heap instead of stack. Link: https://lore.kernel.org/all/175323428599.57270.988038042425748956.stgit@devnote2/ Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) --- kernel/trace/trace_eprobe.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 7ba3a18be4c5..f16a0ab0b001 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -9,6 +9,7 @@ * Copyright (C) 2021, VMware Inc, Tzvetomir Stoyanov tz.stoyanov@gmail.com> * */ +#include #include #include #include @@ -871,10 +872,10 @@ static int __trace_eprobe_create(int argc, const char *argv[]) const char *event = NULL, *group = EPROBE_EVENT_SYSTEM; const char *sys_event = NULL, *sys_name = NULL; struct trace_event_call *event_call; + char *buf1 __free(kfree) = NULL; + char *buf2 __free(kfree) = NULL; + char *gbuf __free(kfree) = NULL; struct trace_eprobe *ep = NULL; - char buf1[MAX_EVENT_NAME_LEN]; - char buf2[MAX_EVENT_NAME_LEN]; - char gbuf[MAX_EVENT_NAME_LEN]; int ret = 0, filter_idx = 0; int i, filter_cnt; @@ -885,6 +886,9 @@ static int __trace_eprobe_create(int argc, const char *argv[]) event = strchr(&argv[0][1], ':'); if (event) { + gbuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL); + if (!gbuf) + goto mem_error; event++; ret = traceprobe_parse_event_name(&event, &group, gbuf, event - argv[0]); @@ -894,6 +898,11 @@ static int __trace_eprobe_create(int argc, const char *argv[]) trace_probe_log_set_index(1); sys_event = argv[1]; + + buf2 = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL); + if (!buf2) + goto mem_error; + ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2, 0); if (ret || !sys_event || !sys_name) { trace_probe_log_err(0, NO_EVENT_INFO); @@ -901,7 +910,9 @@ static int __trace_eprobe_create(int argc, const char *argv[]) } if (!event) { - strscpy(buf1, sys_event, MAX_EVENT_NAME_LEN); + buf1 = kstrdup(sys_event, GFP_KERNEL); + if (!buf1) + goto mem_error; event = buf1; } @@ -974,6 +985,9 @@ static int __trace_eprobe_create(int argc, const char *argv[]) trace_probe_log_clear(); return ret; +mem_error: + ret = -ENOMEM; + goto error; parse_error: ret = -EINVAL; error: -- cgit v1.2.3 From 97e8230f89a3570785a66b1f5eec86a2e4324bf9 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 23 Jul 2025 10:31:36 +0900 Subject: tracing: uprobe-event: Allocate string buffers from heap Allocate temporary string buffers for parsing uprobe-events from heap instead of stack. Link: https://lore.kernel.org/all/175323429593.57270.12369235525923902341.stgit@devnote2/ Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) --- kernel/trace/trace_uprobe.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 872dce092e46..8b0bcc0d8f41 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) "trace_uprobe: " fmt #include +#include #include #include #include @@ -19,6 +20,7 @@ #include #include +#include "trace.h" #include "trace_dynevent.h" #include "trace_probe.h" #include "trace_probe_tmpl.h" @@ -537,15 +539,15 @@ static int register_trace_uprobe(struct trace_uprobe *tu) */ static int __trace_uprobe_create(int argc, const char **argv) { - struct trace_uprobe *tu; const char *event = NULL, *group = UPROBE_EVENT_SYSTEM; char *arg, *filename, *rctr, *rctr_end, *tmp; - char buf[MAX_EVENT_NAME_LEN]; - char gbuf[MAX_EVENT_NAME_LEN]; - enum probe_print_type ptype; - struct path path; unsigned long offset, ref_ctr_offset; + char *gbuf __free(kfree) = NULL; + char *buf __free(kfree) = NULL; + enum probe_print_type ptype; + struct trace_uprobe *tu; bool is_return = false; + struct path path; int i, ret; ref_ctr_offset = 0; @@ -653,6 +655,10 @@ static int __trace_uprobe_create(int argc, const char **argv) /* setup a probe */ trace_probe_log_set_index(0); if (event) { + gbuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL); + if (!gbuf) + goto fail_mem; + ret = traceprobe_parse_event_name(&event, &group, gbuf, event - argv[0]); if (ret) @@ -664,15 +670,16 @@ static int __trace_uprobe_create(int argc, const char **argv) char *ptr; tail = kstrdup(kbasename(filename), GFP_KERNEL); - if (!tail) { - ret = -ENOMEM; - goto fail_address_parse; - } + if (!tail) + goto fail_mem; ptr = strpbrk(tail, ".-_"); if (ptr) *ptr = '\0'; + buf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL); + if (!buf) + goto fail_mem; snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_0x%lx", 'p', tail, offset); event = buf; kfree(tail); @@ -724,6 +731,9 @@ out: trace_probe_log_clear(); return ret; +fail_mem: + ret = -ENOMEM; + fail_address_parse: trace_probe_log_clear(); path_put(&path); -- cgit v1.2.3 From 558d5f3cd250b5c20386200d573b0518a8f5fe64 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 23 Jul 2025 10:31:45 +0900 Subject: tracing: probes: Add a kerneldoc for traceprobe_parse_event_name() Since traceprobe_parse_event_name() is a bit complicated, add a kerneldoc for explaining the behavior. Link: https://lore.kernel.org/all/175323430565.57270.2602609519355112748.stgit@devnote2/ Suggested-by: Steven Rostedt Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) --- kernel/trace/trace_probe.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 9d26d901c9e5..4fb3d98e38ea 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -247,7 +247,25 @@ int traceprobe_split_symbol_offset(char *symbol, long *offset) return 0; } -/* @buf must has MAX_EVENT_NAME_LEN size */ +/** + * traceprobe_parse_event_name() - Parse a string into group and event names + * @pevent: A pointer to the string to be parsed. + * @pgroup: A pointer to the group name. + * @buf: A buffer to store the parsed group name. + * @offset: The offset of the string in the original user command, for logging. + * + * This parses a string with the format `[GROUP/][EVENT]` or `[GROUP.][EVENT]` + * (either GROUP or EVENT or both must be specified). + * Since the parsed group name is stored in @buf, the caller must ensure @buf + * is at least MAX_EVENT_NAME_LEN bytes. + * + * Return: 0 on success, or -EINVAL on failure. + * + * If success, *@pevent is updated to point to the event name part of the + * original string, or NULL if there is no event name. + * Also, *@pgroup is updated to point to the parsed group which is stored + * in @buf, or NULL if there is no group name. + */ int traceprobe_parse_event_name(const char **pevent, const char **pgroup, char *buf, int offset) { -- cgit v1.2.3 From dabd3e7dcc5881d5d3d6dc60c6f14780fee9a9bd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 23 Jul 2025 12:42:02 -0400 Subject: tracing: Have eprobes handle arrays eprobes are dynamic events that can read other events using their fields to create new events. Currently it doesn't work with arrays. When the new event field is attached to the old event field, it looks at the size of the field to determine what type of field the new field should be. For 1 byte fields it's a char, for 2 bytes, it's a short and for 4 bytes it's an integer. For all other sizes it just defaults to "long". This also reads the contents of the field for such cases. For arrays that are bigger than the size of long, return the value of the address of the content itself. This will allow eprobes to read other values in the array of the old event. This is useful when raw_syscalls is enabled but the syscall events are not. The syscall events are created from the raw_syscalls as they have an array of "args" that holds the 6 long words passed to the syscall entry point. To read the value of "filename" from sys_openat, the eprobe could attach to the raw_syscall and read the second value. It can then even be passed to a synthetic event and converted back to another eprobe to get the value of "filename" after it has been read by the kernel during the system call: [ Create an eprobe called "sys" and attach it to sys_enter. Read the id of the system call and the second argument ] # echo 'e:sys raw_syscalls.sys_enter nr=$id:u32 arg2=+8($args):u64' >> /sys/kernel/tracing/dynamic_events [ Create a synthetic event "path" that will hold the address of the sys_openat filename. This is on a 64bit machine, so make it 64 bits ] # echo 's:path u64 file;' >> /sys/kernel/tracing/dynamic_events [ Add a histogram to the eprobe/sys which tiggers if the "nr" field is 257 (sys_openat), and save the filename in the "file" variable. ] # echo 'hist:keys=common_pid:file=arg2 if nr == 257' > /sys/kernel/tracing/events/eprobes/sys/trigger [ Attach a histogram to sys_exit event that triggers the "path" synthetic event and records the "filename" that was passed from the sys eprobe. ] # echo 'hist:keys=common_pid:f=$file:onmatch(eprobes.sys).trace(path,$f)' >> /sys/kernel/tracing/events/raw_syscalls/sys_exit/trigger [ Create another eprobe that dereferences the "file" field as a user space string and displays it. ] # echo 'e:open synthetic.path file=+0($file):ustring' >> /sys/kernel/tracing/dynamic_events # echo 1 > /sys/kernel/tracing/events/eprobes/open/enable # cat trace_pipe cat-1142 [003] ...5. 799.521912: open: (synthetic.path) file="/etc/ld.so.cache" cat-1142 [003] ...5. 799.521934: open: (synthetic.path) file="/etc/ld.so.cache" cat-1142 [003] ...5. 799.522065: open: (synthetic.path) file="/etc/ld.so.cache" cat-1142 [003] ...5. 799.522080: open: (synthetic.path) file="/etc/ld.so.cache" cat-1142 [003] ...5. 799.522296: open: (synthetic.path) file="/lib/x86_64-linux-gnu/libc.so.6" cat-1142 [003] ...5. 799.522319: open: (synthetic.path) file="/lib/x86_64-linux-gnu/libc.so.6" less-1143 [005] ...5. 799.522327: open: (synthetic.path) file="/etc/ld.so.cache" cat-1142 [003] ...5. 799.522333: open: (synthetic.path) file="/lib/x86_64-linux-gnu/libc.so.6" cat-1142 [003] ...5. 799.522348: open: (synthetic.path) file="/lib/x86_64-linux-gnu/libc.so.6" less-1143 [005] ...5. 799.522349: open: (synthetic.path) file="/etc/ld.so.cache" cat-1142 [003] ...5. 799.522363: open: (synthetic.path) file="/lib/x86_64-linux-gnu/libc.so.6" less-1143 [005] ...5. 799.522477: open: (synthetic.path) file="/etc/ld.so.cache" cat-1142 [003] ...5. 799.522489: open: (synthetic.path) file="/lib/x86_64-linux-gnu/libc.so.6" less-1143 [005] ...5. 799.522492: open: (synthetic.path) file="/etc/ld.so.cache" less-1143 [005] ...5. 799.522720: open: (synthetic.path) file="/lib/x86_64-linux-gnu/libtinfo.so.6" less-1143 [005] ...5. 799.522744: open: (synthetic.path) file="/lib/x86_64-linux-gnu/libtinfo.so.6" less-1143 [005] ...5. 799.522759: open: (synthetic.path) file="/lib/x86_64-linux-gnu/libtinfo.so.6" cat-1142 [003] ...5. 799.522850: open: (synthetic.path) file="/lib/x86_64-linux-gnu/libc.so.6" Link: https://lore.kernel.org/all/20250723124202.4f7475be@batman.local.home/ Signed-off-by: Steven Rostedt (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_eprobe.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index f16a0ab0b001..a1d402124836 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -344,10 +344,15 @@ get_event_field(struct fetch_insn *code, void *rec) val = *(unsigned int *)addr; break; default: - if (field->is_signed) - val = *(long *)addr; - else - val = *(unsigned long *)addr; + if (field->size == sizeof(long)) { + if (field->is_signed) + val = *(long *)addr; + else + val = *(unsigned long *)addr; + break; + } + /* This is an array, point to the addr itself */ + val = (unsigned long)addr; break; } return val; -- cgit v1.2.3 From 133c302a0c60bca1f0a2f5f85ef11e7f5e8f1331 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 28 Jul 2025 11:13:12 +0900 Subject: tracing: trace_fprobe: Fix typo of the semicolon Fix a typo that uses ',' instead of ';' for line delimiter. Link: https://lore.kernel.org/linux-trace-kernel/175366879192.487099.5714468217360139639.stgit@mhiramat.tok.corp.google.com/ Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_fprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 9d14a910fbf7..b36ade43d4b3 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -1404,7 +1404,7 @@ static int trace_fprobe_create_cb(int argc, const char *argv[]) if (!ctx) return -ENOMEM; - ctx->flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE, + ctx->flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE; trace_probe_log_init("trace_fprobe", argc, argv); ret = trace_fprobe_create_internal(argc, argv, ctx); -- cgit v1.2.3