summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2025-12-15 17:52:19 +0100
committerPeter Zijlstra <peterz@infradead.org>2026-01-22 11:11:18 +0100
commitdd0a04606937af5810e9117d343ee3792635bd3d (patch)
tree67127a31d50c716cc9f39e93b61ea261b40995c5 /include/linux
parent99d2592023e5d0a31f5f5a83c694df48239a1e6c (diff)
rseq: Implement syscall entry work for time slice extensions
The kernel sets SYSCALL_WORK_RSEQ_SLICE when it grants a time slice extension. This allows to handle the rseq_slice_yield() syscall, which is used by user space to relinquish the CPU after finishing the critical section for which it requested an extension. In case the kernel state is still GRANTED, the kernel resets both kernel and user space state with a set of sanity checks. If the kernel state is already cleared, then this raced against the timer or some other interrupt and just clears the work bit. Doing it in syscall entry work allows to catch misbehaving user space, which issues an arbitrary syscall, i.e. not rseq_slice_yield(), from the critical section. Contrary to the initial strict requirement to use rseq_slice_yield() arbitrary syscalls are not considered a violation of the ABI contract anymore to allow onion architecture applications, which cannot control the code inside a critical section, to utilize this as well. If the code detects inconsistent user space that result in a SIGSEGV for the application. If the grant was still active and the task was not preempted yet, the work code reschedules immediately before continuing through the syscall. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/20251215155709.005777059@linutronix.de
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/entry-common.h2
-rw-r--r--include/linux/rseq.h2
-rw-r--r--include/linux/thread_info.h16
3 files changed, 12 insertions, 8 deletions
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index 87efb38b7081..026201a44aa2 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -36,8 +36,8 @@
SYSCALL_WORK_SYSCALL_EMU | \
SYSCALL_WORK_SYSCALL_AUDIT | \
SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
+ SYSCALL_WORK_SYSCALL_RSEQ_SLICE | \
ARCH_SYSCALL_WORK_ENTER)
-
#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \
SYSCALL_WORK_SYSCALL_TRACE | \
SYSCALL_WORK_SYSCALL_AUDIT | \
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index 3c194a02ad0a..7a01a0760405 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -164,8 +164,10 @@ static inline void rseq_syscall(struct pt_regs *regs) { }
#endif /* !CONFIG_DEBUG_RSEQ */
#ifdef CONFIG_RSEQ_SLICE_EXTENSION
+void rseq_syscall_enter_work(long syscall);
int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3);
#else /* CONFIG_RSEQ_SLICE_EXTENSION */
+static inline void rseq_syscall_enter_work(long syscall) { }
static inline int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3)
{
return -ENOTSUPP;
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index b40de9bab4b7..051e42902690 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -46,15 +46,17 @@ enum syscall_work_bit {
SYSCALL_WORK_BIT_SYSCALL_AUDIT,
SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH,
SYSCALL_WORK_BIT_SYSCALL_EXIT_TRAP,
+ SYSCALL_WORK_BIT_SYSCALL_RSEQ_SLICE,
};
-#define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP)
-#define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT)
-#define SYSCALL_WORK_SYSCALL_TRACE BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE)
-#define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU)
-#define SYSCALL_WORK_SYSCALL_AUDIT BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT)
-#define SYSCALL_WORK_SYSCALL_USER_DISPATCH BIT(SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH)
-#define SYSCALL_WORK_SYSCALL_EXIT_TRAP BIT(SYSCALL_WORK_BIT_SYSCALL_EXIT_TRAP)
+#define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP)
+#define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT)
+#define SYSCALL_WORK_SYSCALL_TRACE BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE)
+#define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU)
+#define SYSCALL_WORK_SYSCALL_AUDIT BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT)
+#define SYSCALL_WORK_SYSCALL_USER_DISPATCH BIT(SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH)
+#define SYSCALL_WORK_SYSCALL_EXIT_TRAP BIT(SYSCALL_WORK_BIT_SYSCALL_EXIT_TRAP)
+#define SYSCALL_WORK_SYSCALL_RSEQ_SLICE BIT(SYSCALL_WORK_BIT_SYSCALL_RSEQ_SLICE)
#endif
#include <asm/thread_info.h>