summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2025-10-27 09:44:28 +0100
committerIngo Molnar <mingo@kernel.org>2025-11-04 08:30:23 +0100
commit83409986f49f17b14a675f9c598ad50d4c60191b (patch)
tree59efe6d5d76c099ea7914d015a7f7e2e1185350e /include/linux
parentd923739e2e356424cc566143a3323c62cd6ed067 (diff)
rseq, virt: Retrigger RSEQ after vcpu_run()
Hypervisors invoke resume_user_mode_work() before entering the guest, which clears TIF_NOTIFY_RESUME. The @regs argument is NULL as there is no user space context available to them, so the rseq notify handler skips inspecting the critical section, but updates the CPU/MM CID values unconditionally so that the eventual pending rseq event is not lost on the way to user space. This is a pointless exercise as the task might be rescheduled before actually returning to user space and it creates unnecessary work in the vcpu_run() loops. It's way more efficient to ignore that invocation based on @regs == NULL and let the hypervisors re-raise TIF_NOTIFY_RESUME after returning from the vcpu_run() loop before returning from the ioctl(). This ensures that a pending RSEQ update is not lost and the IDs are updated before returning to user space. Once the RSEQ handling is decoupled from TIF_NOTIFY_RESUME, this turns into a NOOP. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Acked-by: Sean Christopherson <seanjc@google.com> Link: https://patch.msgid.link/20251027084306.399495855@linutronix.de
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/rseq.h17
1 files changed, 17 insertions, 0 deletions
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index 241067bf20db..c6267f70c746 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -38,6 +38,22 @@ static __always_inline void rseq_exit_to_user_mode(void)
}
/*
+ * KVM/HYPERV invoke resume_user_mode_work() before entering guest mode,
+ * which clears TIF_NOTIFY_RESUME. To avoid updating user space RSEQ in
+ * that case just to do it eventually again before returning to user space,
+ * the entry resume_user_mode_work() invocation is ignored as the register
+ * argument is NULL.
+ *
+ * After returning from guest mode, they have to invoke this function to
+ * re-raise TIF_NOTIFY_RESUME if necessary.
+ */
+static inline void rseq_virt_userspace_exit(void)
+{
+ if (current->rseq_event_pending)
+ set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
+}
+
+/*
* If parent process has a registered restartable sequences area, the
* child inherits. Unregister rseq for a clone with CLONE_VM set.
*/
@@ -68,6 +84,7 @@ static inline void rseq_execve(struct task_struct *t)
static inline void rseq_handle_notify_resume(struct pt_regs *regs) { }
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
static inline void rseq_sched_switch_event(struct task_struct *t) { }
+static inline void rseq_virt_userspace_exit(void) { }
static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { }
static inline void rseq_execve(struct task_struct *t) { }
static inline void rseq_exit_to_user_mode(void) { }