From a70e9f647f501e36a6a092888b1ea7386b7c5664 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Tue, 24 Jun 2025 20:35:56 +0200 Subject: entry: Split generic entry into generic exception and syscall entry Currently CONFIG_GENERIC_ENTRY enables both the generic exception entry logic and the generic syscall entry logic, which are otherwise loosely coupled. Introduce separate config options for these so that architectures can select the two independently. This will make it easier for architectures to migrate to generic entry code. Suggested-by: Mark Rutland Signed-off-by: Jinjie Ruan Signed-off-by: Linus Walleij Signed-off-by: Thomas Gleixner Acked-by: Linus Walleij Link: https://lore.kernel.org/20250213130007.1418890-2-ruanjinjie@huawei.com Link: https://lore.kernel.org/all/20250624-generic-entry-split-v1-1-53d5ef4f94df@linaro.org [Linus Walleij: rebase onto v6.16-rc1] --- kernel/entry/syscall-common.c | 112 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 kernel/entry/syscall-common.c (limited to 'kernel/entry/syscall-common.c') diff --git a/kernel/entry/syscall-common.c b/kernel/entry/syscall-common.c new file mode 100644 index 000000000000..66e6ba7fa80c --- /dev/null +++ b/kernel/entry/syscall-common.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "common.h" + +#define CREATE_TRACE_POINTS +#include + +static inline void syscall_enter_audit(struct pt_regs *regs, long syscall) +{ + if (unlikely(audit_context())) { + unsigned long args[6]; + + syscall_get_arguments(current, regs, args); + audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]); + } +} + +long syscall_trace_enter(struct pt_regs *regs, long syscall, + unsigned long work) +{ + long ret = 0; + + /* + * Handle Syscall User Dispatch. This must comes first, since + * the ABI here can be something that doesn't make sense for + * other syscall_work features. + */ + if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { + if (syscall_user_dispatch(regs)) + return -1L; + } + + /* Handle ptrace */ + if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) { + ret = ptrace_report_syscall_entry(regs); + if (ret || (work & SYSCALL_WORK_SYSCALL_EMU)) + return -1L; + } + + /* Do seccomp after ptrace, to catch any tracer changes. */ + if (work & SYSCALL_WORK_SECCOMP) { + ret = __secure_computing(); + if (ret == -1L) + return ret; + } + + /* Either of the above might have changed the syscall number */ + syscall = syscall_get_nr(current, regs); + + if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) { + trace_sys_enter(regs, syscall); + /* + * Probes or BPF hooks in the tracepoint may have changed the + * system call number as well. + */ + syscall = syscall_get_nr(current, regs); + } + + syscall_enter_audit(regs, syscall); + + return ret ? : syscall; +} + +noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs) +{ + enter_from_user_mode(regs); + instrumentation_begin(); + local_irq_enable(); + instrumentation_end(); +} + +/* + * If SYSCALL_EMU is set, then the only reason to report is when + * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall + * instruction has been already reported in syscall_enter_from_user_mode(). + */ +static inline bool report_single_step(unsigned long work) +{ + if (work & SYSCALL_WORK_SYSCALL_EMU) + return false; + + return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP; +} + +void syscall_exit_work(struct pt_regs *regs, unsigned long work) +{ + bool step; + + /* + * If the syscall was rolled back due to syscall user dispatching, + * then the tracers below are not invoked for the same reason as + * the entry side was not invoked in syscall_trace_enter(): The ABI + * of these syscalls is unknown. + */ + if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { + if (unlikely(current->syscall_dispatch.on_dispatch)) { + current->syscall_dispatch.on_dispatch = false; + return; + } + } + + audit_syscall_exit(regs); + + if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT) + trace_sys_exit(regs, syscall_get_return_value(current, regs)); + + step = report_single_step(work); + if (step || work & SYSCALL_WORK_SYSCALL_TRACE) + ptrace_report_syscall_exit(regs, step); +} -- cgit v1.2.3