diff options
| author | Steven Rostedt <rostedt@goodmis.org> | 2025-10-28 19:11:22 -0400 |
|---|---|---|
| committer | Steven Rostedt (Google) <rostedt@goodmis.org> | 2025-10-28 20:10:59 -0400 |
| commit | 299ea67e6a2b3d0d4b707f45b8c66d8b4bbbf2c6 (patch) | |
| tree | e6c6997f778e3ca6909175a1aca5f30bf130c199 /kernel/trace/trace_syscalls.c | |
| parent | baa031b7bd2ce7502339174a42974321859ecd6a (diff) | |
tracing: Add a config and syscall_user_buf_size file to limit amount written
When a system call that can copy user space addresses into the ring
buffer, it can copy up to 511 bytes of data. This can waste precious ring
buffer space if the user isn't interested in the output. Add a new file
"syscall_user_buf_size" that gets initialized to a new config
CONFIG_SYSCALL_BUF_SIZE_DEFAULT that defaults to 63.
The config also is used to limit how much perf can read from user space.
Also lower the max down to 165, as this isn't to record everything that a
system call may be passing through to the kernel. 165 is more than enough.
The reason for 165 is because adding one for the nul terminating byte, as
well as possibly needing to append the "..." string turns it into 170
bytes. As this needs to save up to 3 arguments and 3 * 170 is 510 which
fits nicely in 512 bytes (a power of 2).
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Takaya Saeki <takayas@google.com>
Cc: Tom Zanussi <zanussi@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ian Rogers <irogers@google.com>
Cc: Douglas Raillard <douglas.raillard@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Link: https://lore.kernel.org/20251028231148.260068913@kernel.org
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace/trace_syscalls.c')
| -rw-r--r-- | kernel/trace/trace_syscalls.c | 50 |
1 files changed, 28 insertions, 22 deletions
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 3eafe1b8f53e..a2de6364777a 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -390,21 +390,19 @@ static int __init syscall_enter_define_fields(struct trace_event_call *call) /* * Create a per CPU temporary buffer to copy user space pointers into. * - * SYSCALL_FAULT_BUF_SZ holds the size of the per CPU buffer to use - * to copy memory from user space addresses into. - * - * SYSCALL_FAULT_ARG_SZ is the amount to copy from user space. - * - * SYSCALL_FAULT_USER_MAX is the amount to copy into the ring buffer. - * It's slightly smaller than SYSCALL_FAULT_ARG_SZ to know if it - * needs to append the EXTRA or not. + * SYSCALL_FAULT_USER_MAX is the amount to copy from user space. + * (defined in kernel/trace/trace.h) + + * SYSCALL_FAULT_ARG_SZ is the amount to copy from user space plus the + * nul terminating byte and possibly appended EXTRA (4 bytes). * - * This only allows up to 3 args from system calls. + * SYSCALL_FAULT_BUF_SZ holds the size of the per CPU buffer to use + * to copy memory from user space addresses into that will hold + * 3 args as only 3 args are allowed to be copied from system calls. */ -#define SYSCALL_FAULT_BUF_SZ 512 -#define SYSCALL_FAULT_ARG_SZ 168 -#define SYSCALL_FAULT_USER_MAX 128 +#define SYSCALL_FAULT_ARG_SZ (SYSCALL_FAULT_USER_MAX + 1 + 4) #define SYSCALL_FAULT_MAX_CNT 3 +#define SYSCALL_FAULT_BUF_SZ (SYSCALL_FAULT_ARG_SZ * SYSCALL_FAULT_MAX_CNT) /* Use the tracing per CPU buffer infrastructure to copy from user space */ struct syscall_user_buffer { @@ -498,7 +496,8 @@ static int syscall_copy_user_array(char *buf, const char __user *ptr, return 0; } -static char *sys_fault_user(struct syscall_metadata *sys_data, +static char *sys_fault_user(unsigned int buf_size, + struct syscall_metadata *sys_data, struct syscall_user_buffer *sbuf, unsigned long *args, unsigned int data_size[SYSCALL_FAULT_MAX_CNT]) @@ -548,6 +547,10 @@ static char *sys_fault_user(struct syscall_metadata *sys_data, data_size[i] = -1; /* Denotes no pointer */ } + /* A zero size means do not even try */ + if (!buf_size) + return NULL; + buffer = trace_user_fault_read(&sbuf->buf, NULL, size, syscall_copy, &sargs); if (!buffer) @@ -568,19 +571,20 @@ static char *sys_fault_user(struct syscall_metadata *sys_data, buf[x] = '.'; } + size = min(buf_size, SYSCALL_FAULT_USER_MAX); + /* * If the text was truncated due to our max limit, * add "..." to the string. */ - if (ret > SYSCALL_FAULT_USER_MAX) { - strscpy(buf + SYSCALL_FAULT_USER_MAX, EXTRA, - sizeof(EXTRA)); - ret = SYSCALL_FAULT_USER_MAX + sizeof(EXTRA); + if (ret > size) { + strscpy(buf + size, EXTRA, sizeof(EXTRA)); + ret = size + sizeof(EXTRA); } else { buf[ret++] = '\0'; } } else { - ret = min(ret, SYSCALL_FAULT_USER_MAX); + ret = min((unsigned int)ret, buf_size); } data_size[i] = ret; } @@ -590,7 +594,8 @@ static char *sys_fault_user(struct syscall_metadata *sys_data, static int syscall_get_data(struct syscall_metadata *sys_data, unsigned long *args, - char **buffer, int *size, int *user_sizes, int *uargs) + char **buffer, int *size, int *user_sizes, int *uargs, + int buf_size) { struct syscall_user_buffer *sbuf; int i; @@ -600,7 +605,7 @@ syscall_get_data(struct syscall_metadata *sys_data, unsigned long *args, if (!sbuf) return -1; - *buffer = sys_fault_user(sys_data, sbuf, args, user_sizes); + *buffer = sys_fault_user(buf_size, sys_data, sbuf, args, user_sizes); /* * user_size is the amount of data to append. * Need to add 4 for the meta field that points to @@ -705,7 +710,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) if (mayfault) { if (syscall_get_data(sys_data, args, &user_ptr, - &size, user_sizes, &uargs) < 0) + &size, user_sizes, &uargs, tr->syscall_buf_sz) < 0) return; } @@ -1204,6 +1209,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) bool mayfault; char *user_ptr; int user_sizes[SYSCALL_FAULT_MAX_CNT] = {}; + int buf_size = CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT; int syscall_nr; int rctx; int size = 0; @@ -1233,7 +1239,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) if (mayfault) { if (syscall_get_data(sys_data, args, &user_ptr, - &size, user_sizes, &uargs) < 0) + &size, user_sizes, &uargs, buf_size) < 0) return; } |
