summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@kernel.org>2026-01-30 15:40:05 +0100
committerThomas Gleixner <tglx@kernel.org>2026-01-30 15:40:05 +0100
commit5c4378b7b0e1f45cf38e77db9305ee2c7bf88002 (patch)
tree94762f2604492b9a04de849ae30ce129883e092e /include
parent377521af0341083bc9d196cf021ec7265dc47c20 (diff)
parent31c9387d0d84bc1d643a0c30155b6d92d05c92fc (diff)
Merge branch 'core/entry' into sched/core
Pull the entry update to avoid merge conflicts with the time slice extension changes. Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/entry-common.h165
1 files changed, 148 insertions, 17 deletions
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index 026201a44aa2..f83ca0abf2cd 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -2,6 +2,7 @@
#ifndef __LINUX_ENTRYCOMMON_H
#define __LINUX_ENTRYCOMMON_H
+#include <linux/audit.h>
#include <linux/irq-entry-common.h>
#include <linux/livepatch.h>
#include <linux/ptrace.h>
@@ -45,7 +46,84 @@
SYSCALL_WORK_SYSCALL_EXIT_TRAP | \
ARCH_SYSCALL_WORK_EXIT)
-long syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long work);
+/**
+ * arch_ptrace_report_syscall_entry - Architecture specific ptrace_report_syscall_entry() wrapper
+ *
+ * Invoked from syscall_trace_enter() to wrap ptrace_report_syscall_entry().
+ *
+ * This allows architecture specific ptrace_report_syscall_entry()
+ * implementations. If not defined by the architecture this falls back to
+ * to ptrace_report_syscall_entry().
+ */
+static __always_inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs);
+
+#ifndef arch_ptrace_report_syscall_entry
+static __always_inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs)
+{
+ return ptrace_report_syscall_entry(regs);
+}
+#endif
+
+bool syscall_user_dispatch(struct pt_regs *regs);
+long trace_syscall_enter(struct pt_regs *regs, long syscall);
+void trace_syscall_exit(struct pt_regs *regs, long ret);
+
+static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
+{
+ if (unlikely(audit_context())) {
+ unsigned long args[6];
+
+ syscall_get_arguments(current, regs, args);
+ audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
+ }
+}
+
+static __always_inline long syscall_trace_enter(struct pt_regs *regs, unsigned long work)
+{
+ long syscall, ret = 0;
+
+ /*
+ * Handle Syscall User Dispatch. This must comes first, since
+ * the ABI here can be something that doesn't make sense for
+ * other syscall_work features.
+ */
+ if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
+ if (syscall_user_dispatch(regs))
+ return -1L;
+ }
+
+ /*
+ * User space got a time slice extension granted and relinquishes
+ * the CPU. The work stops the slice timer to avoid an extra round
+ * through hrtimer_interrupt().
+ */
+ if (work & SYSCALL_WORK_SYSCALL_RSEQ_SLICE)
+ rseq_syscall_enter_work(syscall_get_nr(current, regs));
+
+ /* Handle ptrace */
+ if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) {
+ ret = arch_ptrace_report_syscall_entry(regs);
+ if (ret || (work & SYSCALL_WORK_SYSCALL_EMU))
+ return -1L;
+ }
+
+ /* Do seccomp after ptrace, to catch any tracer changes. */
+ if (work & SYSCALL_WORK_SECCOMP) {
+ ret = __secure_computing();
+ if (ret == -1L)
+ return ret;
+ }
+
+ /* Either of the above might have changed the syscall number */
+ syscall = syscall_get_nr(current, regs);
+
+ if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT))
+ syscall = trace_syscall_enter(regs, syscall);
+
+ syscall_enter_audit(regs, syscall);
+
+ return ret ? : syscall;
+}
/**
* syscall_enter_from_user_mode_work - Check and handle work before invoking
@@ -75,7 +153,7 @@ static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *re
unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
if (work & SYSCALL_WORK_ENTER)
- syscall = syscall_trace_enter(regs, syscall, work);
+ syscall = syscall_trace_enter(regs, work);
return syscall;
}
@@ -112,6 +190,37 @@ static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, l
return ret;
}
+/*
+ * If SYSCALL_EMU is set, then the only reason to report is when
+ * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
+ * instruction has been already reported in syscall_enter_from_user_mode().
+ */
+static __always_inline bool report_single_step(unsigned long work)
+{
+ if (work & SYSCALL_WORK_SYSCALL_EMU)
+ return false;
+
+ return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
+}
+
+/**
+ * arch_ptrace_report_syscall_exit - Architecture specific ptrace_report_syscall_exit()
+ *
+ * This allows architecture specific ptrace_report_syscall_exit()
+ * implementations. If not defined by the architecture this falls back to
+ * to ptrace_report_syscall_exit().
+ */
+static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs,
+ int step);
+
+#ifndef arch_ptrace_report_syscall_exit
+static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs,
+ int step)
+{
+ ptrace_report_syscall_exit(regs, step);
+}
+#endif
+
/**
* syscall_exit_work - Handle work before returning to user mode
* @regs: Pointer to current pt_regs
@@ -119,20 +228,40 @@ static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, l
*
* Do one-time syscall specific work.
*/
-void syscall_exit_work(struct pt_regs *regs, unsigned long work);
+static __always_inline void syscall_exit_work(struct pt_regs *regs, unsigned long work)
+{
+ bool step;
+
+ /*
+ * If the syscall was rolled back due to syscall user dispatching,
+ * then the tracers below are not invoked for the same reason as
+ * the entry side was not invoked in syscall_trace_enter(): The ABI
+ * of these syscalls is unknown.
+ */
+ if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
+ if (unlikely(current->syscall_dispatch.on_dispatch)) {
+ current->syscall_dispatch.on_dispatch = false;
+ return;
+ }
+ }
+
+ audit_syscall_exit(regs);
+
+ if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
+ trace_syscall_exit(regs, syscall_get_return_value(current, regs));
+
+ step = report_single_step(work);
+ if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
+ arch_ptrace_report_syscall_exit(regs, step);
+}
/**
- * syscall_exit_to_user_mode_work - Handle work before returning to user mode
+ * syscall_exit_to_user_mode_work - Handle one time work before returning to user mode
* @regs: Pointer to currents pt_regs
*
- * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling
- * exit_to_user_mode() to perform the final transition to user mode.
+ * Step 1 of syscall_exit_to_user_mode() with the same calling convention.
*
- * Calling convention is the same as for syscall_exit_to_user_mode() and it
- * returns with all work handled and interrupts disabled. The caller must
- * invoke exit_to_user_mode() before actually switching to user mode to
- * make the final state transitions. Interrupts must stay disabled between
- * return from this function and the invocation of exit_to_user_mode().
+ * The caller must invoke steps 2-3 of syscall_exit_to_user_mode() afterwards.
*/
static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
{
@@ -155,15 +284,13 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
*/
if (unlikely(work & SYSCALL_WORK_EXIT))
syscall_exit_work(regs, work);
- local_irq_disable_exit_to_user();
- syscall_exit_to_user_mode_prepare(regs);
}
/**
* syscall_exit_to_user_mode - Handle work before returning to user mode
* @regs: Pointer to currents pt_regs
*
- * Invoked with interrupts enabled and fully valid regs. Returns with all
+ * Invoked with interrupts enabled and fully valid @regs. Returns with all
* work handled, interrupts disabled such that the caller can immediately
* switch to user mode. Called from architecture specific syscall and ret
* from fork code.
@@ -176,6 +303,7 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
* - ptrace (single stepping)
*
* 2) Preparatory work
+ * - Disable interrupts
* - Exit to user mode loop (common TIF handling). Invokes
* arch_exit_to_user_mode_work() for architecture specific TIF work
* - Architecture specific one time work arch_exit_to_user_mode_prepare()
@@ -184,14 +312,17 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
* 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the
* functionality in exit_to_user_mode().
*
- * This is a combination of syscall_exit_to_user_mode_work() (1,2) and
- * exit_to_user_mode(). This function is preferred unless there is a
- * compelling architectural reason to use the separate functions.
+ * This is a combination of syscall_exit_to_user_mode_work() (1), disabling
+ * interrupts followed by syscall_exit_to_user_mode_prepare() (2) and
+ * exit_to_user_mode() (3). This function is preferred unless there is a
+ * compelling architectural reason to invoke the functions separately.
*/
static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs)
{
instrumentation_begin();
syscall_exit_to_user_mode_work(regs);
+ local_irq_disable_exit_to_user();
+ syscall_exit_to_user_mode_prepare(regs);
instrumentation_end();
exit_to_user_mode();
}