diff options
| author | Thomas Gleixner <tglx@kernel.org> | 2026-01-30 15:40:05 +0100 |
|---|---|---|
| committer | Thomas Gleixner <tglx@kernel.org> | 2026-01-30 15:40:05 +0100 |
| commit | 5c4378b7b0e1f45cf38e77db9305ee2c7bf88002 (patch) | |
| tree | 94762f2604492b9a04de849ae30ce129883e092e /include | |
| parent | 377521af0341083bc9d196cf021ec7265dc47c20 (diff) | |
| parent | 31c9387d0d84bc1d643a0c30155b6d92d05c92fc (diff) | |
Merge branch 'core/entry' into sched/core
Pull the entry update to avoid merge conflicts with the time slice
extension changes.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/entry-common.h | 165 |
1 files changed, 148 insertions, 17 deletions
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 026201a44aa2..f83ca0abf2cd 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -2,6 +2,7 @@ #ifndef __LINUX_ENTRYCOMMON_H #define __LINUX_ENTRYCOMMON_H +#include <linux/audit.h> #include <linux/irq-entry-common.h> #include <linux/livepatch.h> #include <linux/ptrace.h> @@ -45,7 +46,84 @@ SYSCALL_WORK_SYSCALL_EXIT_TRAP | \ ARCH_SYSCALL_WORK_EXIT) -long syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long work); +/** + * arch_ptrace_report_syscall_entry - Architecture specific ptrace_report_syscall_entry() wrapper + * + * Invoked from syscall_trace_enter() to wrap ptrace_report_syscall_entry(). + * + * This allows architecture specific ptrace_report_syscall_entry() + * implementations. If not defined by the architecture this falls back to + * to ptrace_report_syscall_entry(). + */ +static __always_inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs); + +#ifndef arch_ptrace_report_syscall_entry +static __always_inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs) +{ + return ptrace_report_syscall_entry(regs); +} +#endif + +bool syscall_user_dispatch(struct pt_regs *regs); +long trace_syscall_enter(struct pt_regs *regs, long syscall); +void trace_syscall_exit(struct pt_regs *regs, long ret); + +static inline void syscall_enter_audit(struct pt_regs *regs, long syscall) +{ + if (unlikely(audit_context())) { + unsigned long args[6]; + + syscall_get_arguments(current, regs, args); + audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]); + } +} + +static __always_inline long syscall_trace_enter(struct pt_regs *regs, unsigned long work) +{ + long syscall, ret = 0; + + /* + * Handle Syscall User Dispatch. This must comes first, since + * the ABI here can be something that doesn't make sense for + * other syscall_work features. + */ + if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { + if (syscall_user_dispatch(regs)) + return -1L; + } + + /* + * User space got a time slice extension granted and relinquishes + * the CPU. The work stops the slice timer to avoid an extra round + * through hrtimer_interrupt(). + */ + if (work & SYSCALL_WORK_SYSCALL_RSEQ_SLICE) + rseq_syscall_enter_work(syscall_get_nr(current, regs)); + + /* Handle ptrace */ + if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) { + ret = arch_ptrace_report_syscall_entry(regs); + if (ret || (work & SYSCALL_WORK_SYSCALL_EMU)) + return -1L; + } + + /* Do seccomp after ptrace, to catch any tracer changes. */ + if (work & SYSCALL_WORK_SECCOMP) { + ret = __secure_computing(); + if (ret == -1L) + return ret; + } + + /* Either of the above might have changed the syscall number */ + syscall = syscall_get_nr(current, regs); + + if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) + syscall = trace_syscall_enter(regs, syscall); + + syscall_enter_audit(regs, syscall); + + return ret ? : syscall; +} /** * syscall_enter_from_user_mode_work - Check and handle work before invoking @@ -75,7 +153,7 @@ static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *re unsigned long work = READ_ONCE(current_thread_info()->syscall_work); if (work & SYSCALL_WORK_ENTER) - syscall = syscall_trace_enter(regs, syscall, work); + syscall = syscall_trace_enter(regs, work); return syscall; } @@ -112,6 +190,37 @@ static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, l return ret; } +/* + * If SYSCALL_EMU is set, then the only reason to report is when + * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall + * instruction has been already reported in syscall_enter_from_user_mode(). + */ +static __always_inline bool report_single_step(unsigned long work) +{ + if (work & SYSCALL_WORK_SYSCALL_EMU) + return false; + + return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP; +} + +/** + * arch_ptrace_report_syscall_exit - Architecture specific ptrace_report_syscall_exit() + * + * This allows architecture specific ptrace_report_syscall_exit() + * implementations. If not defined by the architecture this falls back to + * to ptrace_report_syscall_exit(). + */ +static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs, + int step); + +#ifndef arch_ptrace_report_syscall_exit +static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs, + int step) +{ + ptrace_report_syscall_exit(regs, step); +} +#endif + /** * syscall_exit_work - Handle work before returning to user mode * @regs: Pointer to current pt_regs @@ -119,20 +228,40 @@ static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, l * * Do one-time syscall specific work. */ -void syscall_exit_work(struct pt_regs *regs, unsigned long work); +static __always_inline void syscall_exit_work(struct pt_regs *regs, unsigned long work) +{ + bool step; + + /* + * If the syscall was rolled back due to syscall user dispatching, + * then the tracers below are not invoked for the same reason as + * the entry side was not invoked in syscall_trace_enter(): The ABI + * of these syscalls is unknown. + */ + if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { + if (unlikely(current->syscall_dispatch.on_dispatch)) { + current->syscall_dispatch.on_dispatch = false; + return; + } + } + + audit_syscall_exit(regs); + + if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT) + trace_syscall_exit(regs, syscall_get_return_value(current, regs)); + + step = report_single_step(work); + if (step || work & SYSCALL_WORK_SYSCALL_TRACE) + arch_ptrace_report_syscall_exit(regs, step); +} /** - * syscall_exit_to_user_mode_work - Handle work before returning to user mode + * syscall_exit_to_user_mode_work - Handle one time work before returning to user mode * @regs: Pointer to currents pt_regs * - * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling - * exit_to_user_mode() to perform the final transition to user mode. + * Step 1 of syscall_exit_to_user_mode() with the same calling convention. * - * Calling convention is the same as for syscall_exit_to_user_mode() and it - * returns with all work handled and interrupts disabled. The caller must - * invoke exit_to_user_mode() before actually switching to user mode to - * make the final state transitions. Interrupts must stay disabled between - * return from this function and the invocation of exit_to_user_mode(). + * The caller must invoke steps 2-3 of syscall_exit_to_user_mode() afterwards. */ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) { @@ -155,15 +284,13 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) */ if (unlikely(work & SYSCALL_WORK_EXIT)) syscall_exit_work(regs, work); - local_irq_disable_exit_to_user(); - syscall_exit_to_user_mode_prepare(regs); } /** * syscall_exit_to_user_mode - Handle work before returning to user mode * @regs: Pointer to currents pt_regs * - * Invoked with interrupts enabled and fully valid regs. Returns with all + * Invoked with interrupts enabled and fully valid @regs. Returns with all * work handled, interrupts disabled such that the caller can immediately * switch to user mode. Called from architecture specific syscall and ret * from fork code. @@ -176,6 +303,7 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) * - ptrace (single stepping) * * 2) Preparatory work + * - Disable interrupts * - Exit to user mode loop (common TIF handling). Invokes * arch_exit_to_user_mode_work() for architecture specific TIF work * - Architecture specific one time work arch_exit_to_user_mode_prepare() @@ -184,14 +312,17 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the * functionality in exit_to_user_mode(). * - * This is a combination of syscall_exit_to_user_mode_work() (1,2) and - * exit_to_user_mode(). This function is preferred unless there is a - * compelling architectural reason to use the separate functions. + * This is a combination of syscall_exit_to_user_mode_work() (1), disabling + * interrupts followed by syscall_exit_to_user_mode_prepare() (2) and + * exit_to_user_mode() (3). This function is preferred unless there is a + * compelling architectural reason to invoke the functions separately. */ static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs) { instrumentation_begin(); syscall_exit_to_user_mode_work(regs); + local_irq_disable_exit_to_user(); + syscall_exit_to_user_mode_prepare(regs); instrumentation_end(); exit_to_user_mode(); } |
