diff options
| author | Jinjie Ruan <ruanjinjie@huawei.com> | 2026-01-28 11:19:33 +0800 |
|---|---|---|
| committer | Thomas Gleixner <tglx@kernel.org> | 2026-01-30 15:38:10 +0100 |
| commit | 31c9387d0d84bc1d643a0c30155b6d92d05c92fc (patch) | |
| tree | 455198e1fcb8e9381fd02fc444a1113ddb0a4979 /include/linux | |
| parent | 578b21fd3ab2d9901ce40ed802e428a41a40610d (diff) | |
entry: Inline syscall_exit_work() and syscall_trace_enter()
After switching ARM64 to the generic entry code, a syscall_exit_work()
appeared as a profiling hotspot because it is not inlined.
Inlining both syscall_trace_enter() and syscall_exit_work() provides a
performance gain when any of the work items is enabled. With audit enabled
this results in a ~4% performance gain for perf bench basic syscall on
a kunpeng920 system:
| Metric | Baseline | Inlined | Change |
| ---------- | ----------- | ----------- | ------ |
| Total time | 2.353 [sec] | 2.264 [sec] | ↓3.8% |
| usecs/op | 0.235374 | 0.226472 | ↓3.8% |
| ops/sec | 4,248,588 | 4,415,554 | ↑3.9% |
Small gains can be observed on x86 as well, though the generated code
optimizes for the work case, which is counterproductive for high
performance scenarios where such entry/exit work is usually avoided.
Avoid this by marking the work check in syscall_enter_from_user_mode_work()
unlikely, which is what the corresponding check in the exit path does
already.
[ tglx: Massage changelog and add the unlikely() ]
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260128031934.3906955-14-ruanjinjie@huawei.com
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/entry-common.h | 94 |
1 files changed, 92 insertions, 2 deletions
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index bea207e32c58..e67e3afe39ad 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -2,6 +2,7 @@ #ifndef __LINUX_ENTRYCOMMON_H #define __LINUX_ENTRYCOMMON_H +#include <linux/audit.h> #include <linux/irq-entry-common.h> #include <linux/livepatch.h> #include <linux/ptrace.h> @@ -63,7 +64,58 @@ static __always_inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs } #endif -long syscall_trace_enter(struct pt_regs *regs, unsigned long work); +bool syscall_user_dispatch(struct pt_regs *regs); +long trace_syscall_enter(struct pt_regs *regs, long syscall); +void trace_syscall_exit(struct pt_regs *regs, long ret); + +static inline void syscall_enter_audit(struct pt_regs *regs, long syscall) +{ + if (unlikely(audit_context())) { + unsigned long args[6]; + + syscall_get_arguments(current, regs, args); + audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]); + } +} + +static __always_inline long syscall_trace_enter(struct pt_regs *regs, unsigned long work) +{ + long syscall, ret = 0; + + /* + * Handle Syscall User Dispatch. This must comes first, since + * the ABI here can be something that doesn't make sense for + * other syscall_work features. + */ + if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { + if (syscall_user_dispatch(regs)) + return -1L; + } + + /* Handle ptrace */ + if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) { + ret = arch_ptrace_report_syscall_entry(regs); + if (ret || (work & SYSCALL_WORK_SYSCALL_EMU)) + return -1L; + } + + /* Do seccomp after ptrace, to catch any tracer changes. */ + if (work & SYSCALL_WORK_SECCOMP) { + ret = __secure_computing(); + if (ret == -1L) + return ret; + } + + /* Either of the above might have changed the syscall number */ + syscall = syscall_get_nr(current, regs); + + if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) + syscall = trace_syscall_enter(regs, syscall); + + syscall_enter_audit(regs, syscall); + + return ret ? : syscall; +} /** * syscall_enter_from_user_mode_work - Check and handle work before invoking @@ -130,6 +182,19 @@ static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, l return ret; } +/* + * If SYSCALL_EMU is set, then the only reason to report is when + * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall + * instruction has been already reported in syscall_enter_from_user_mode(). + */ +static __always_inline bool report_single_step(unsigned long work) +{ + if (work & SYSCALL_WORK_SYSCALL_EMU) + return false; + + return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP; +} + /** * arch_ptrace_report_syscall_exit - Architecture specific ptrace_report_syscall_exit() * @@ -155,7 +220,32 @@ static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs * * Do one-time syscall specific work. */ -void syscall_exit_work(struct pt_regs *regs, unsigned long work); +static __always_inline void syscall_exit_work(struct pt_regs *regs, unsigned long work) +{ + bool step; + + /* + * If the syscall was rolled back due to syscall user dispatching, + * then the tracers below are not invoked for the same reason as + * the entry side was not invoked in syscall_trace_enter(): The ABI + * of these syscalls is unknown. + */ + if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { + if (unlikely(current->syscall_dispatch.on_dispatch)) { + current->syscall_dispatch.on_dispatch = false; + return; + } + } + + audit_syscall_exit(regs); + + if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT) + trace_syscall_exit(regs, syscall_get_return_value(current, regs)); + + step = report_single_step(work); + if (step || work & SYSCALL_WORK_SYSCALL_TRACE) + arch_ptrace_report_syscall_exit(regs, step); +} /** * syscall_exit_to_user_mode_work - Handle one time work before returning to user mode |
