From 83409986f49f17b14a675f9c598ad50d4c60191b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 27 Oct 2025 09:44:28 +0100 Subject: rseq, virt: Retrigger RSEQ after vcpu_run() Hypervisors invoke resume_user_mode_work() before entering the guest, which clears TIF_NOTIFY_RESUME. The @regs argument is NULL as there is no user space context available to them, so the rseq notify handler skips inspecting the critical section, but updates the CPU/MM CID values unconditionally so that the eventual pending rseq event is not lost on the way to user space. This is a pointless exercise as the task might be rescheduled before actually returning to user space and it creates unnecessary work in the vcpu_run() loops. It's way more efficient to ignore that invocation based on @regs == NULL and let the hypervisors re-raise TIF_NOTIFY_RESUME after returning from the vcpu_run() loop before returning from the ioctl(). This ensures that a pending RSEQ update is not lost and the IDs are updated before returning to user space. Once the RSEQ handling is decoupled from TIF_NOTIFY_RESUME, this turns into a NOOP. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Mathieu Desnoyers Acked-by: Sean Christopherson Link: https://patch.msgid.link/20251027084306.399495855@linutronix.de --- include/linux/rseq.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/rseq.h b/include/linux/rseq.h index 241067bf20db..c6267f70c746 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h @@ -37,6 +37,22 @@ static __always_inline void rseq_exit_to_user_mode(void) } } +/* + * KVM/HYPERV invoke resume_user_mode_work() before entering guest mode, + * which clears TIF_NOTIFY_RESUME. To avoid updating user space RSEQ in + * that case just to do it eventually again before returning to user space, + * the entry resume_user_mode_work() invocation is ignored as the register + * argument is NULL. + * + * After returning from guest mode, they have to invoke this function to + * re-raise TIF_NOTIFY_RESUME if necessary. + */ +static inline void rseq_virt_userspace_exit(void) +{ + if (current->rseq_event_pending) + set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); +} + /* * If parent process has a registered restartable sequences area, the * child inherits. Unregister rseq for a clone with CLONE_VM set. @@ -68,6 +84,7 @@ static inline void rseq_execve(struct task_struct *t) static inline void rseq_handle_notify_resume(struct pt_regs *regs) { } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { } static inline void rseq_sched_switch_event(struct task_struct *t) { } +static inline void rseq_virt_userspace_exit(void) { } static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { } static inline void rseq_execve(struct task_struct *t) { } static inline void rseq_exit_to_user_mode(void) { } -- cgit v1.2.3