summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/exec.c2
-rw-r--r--include/linux/rseq.h81
-rw-r--r--include/linux/rseq_types.h11
-rw-r--r--kernel/rseq.c2
-rw-r--r--kernel/sched/core.c7
-rw-r--r--kernel/sched/sched.h5
6 files changed, 95 insertions, 13 deletions
diff --git a/fs/exec.c b/fs/exec.c
index e45b29890269..90e47eb156ab 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1775,7 +1775,7 @@ out:
force_fatal_sig(SIGSEGV);
sched_mm_cid_after_execve(current);
- rseq_sched_switch_event(current);
+ rseq_force_update();
current->in_execve = 0;
return retval;
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index f5a43188023f..abfbeb42d1a2 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -11,7 +11,8 @@ void __rseq_handle_notify_resume(struct pt_regs *regs);
static inline void rseq_handle_notify_resume(struct pt_regs *regs)
{
- if (current->rseq.event.has_rseq)
+ /* '&' is intentional to spare one conditional branch */
+ if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
__rseq_handle_notify_resume(regs);
}
@@ -33,12 +34,75 @@ static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *reg
}
}
-/* Raised from context switch and exevce to force evaluation on exit to user */
-static inline void rseq_sched_switch_event(struct task_struct *t)
+static inline void rseq_raise_notify_resume(struct task_struct *t)
{
- if (t->rseq.event.has_rseq) {
- t->rseq.event.sched_switch = true;
- set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+}
+
+/* Invoked from context switch to force evaluation on exit to user */
+static __always_inline void rseq_sched_switch_event(struct task_struct *t)
+{
+ struct rseq_event *ev = &t->rseq.event;
+
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
+ /*
+ * Avoid a boat load of conditionals by using simple logic
+ * to determine whether NOTIFY_RESUME needs to be raised.
+ *
+ * It's required when the CPU or MM CID has changed or
+ * the entry was from user space.
+ */
+ bool raise = (ev->user_irq | ev->ids_changed) & ev->has_rseq;
+
+ if (raise) {
+ ev->sched_switch = true;
+ rseq_raise_notify_resume(t);
+ }
+ } else {
+ if (ev->has_rseq) {
+ t->rseq.event.sched_switch = true;
+ rseq_raise_notify_resume(t);
+ }
+ }
+}
+
+/*
+ * Invoked from __set_task_cpu() when a task migrates to enforce an IDs
+ * update.
+ *
+ * This does not raise TIF_NOTIFY_RESUME as that happens in
+ * rseq_sched_switch_event().
+ */
+static __always_inline void rseq_sched_set_task_cpu(struct task_struct *t, unsigned int cpu)
+{
+ t->rseq.event.ids_changed = true;
+}
+
+/*
+ * Invoked from switch_mm_cid() in context switch when the task gets a MM
+ * CID assigned.
+ *
+ * This does not raise TIF_NOTIFY_RESUME as that happens in
+ * rseq_sched_switch_event().
+ */
+static __always_inline void rseq_sched_set_task_mm_cid(struct task_struct *t, unsigned int cid)
+{
+ /*
+ * Requires a comparison as the switch_mm_cid() code does not
+ * provide a conditional for it readily. So avoid excessive updates
+ * when nothing changes.
+ */
+ if (t->rseq.ids.mm_cid != cid)
+ t->rseq.event.ids_changed = true;
+}
+
+/* Enforce a full update after RSEQ registration and when execve() failed */
+static inline void rseq_force_update(void)
+{
+ if (current->rseq.event.has_rseq) {
+ current->rseq.event.ids_changed = true;
+ current->rseq.event.sched_switch = true;
+ rseq_raise_notify_resume(current);
}
}
@@ -55,7 +119,7 @@ static inline void rseq_sched_switch_event(struct task_struct *t)
static inline void rseq_virt_userspace_exit(void)
{
if (current->rseq.event.sched_switch)
- set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
+ rseq_raise_notify_resume(current);
}
static inline void rseq_reset(struct task_struct *t)
@@ -91,6 +155,9 @@ static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
static inline void rseq_handle_notify_resume(struct pt_regs *regs) { }
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
static inline void rseq_sched_switch_event(struct task_struct *t) { }
+static inline void rseq_sched_set_task_cpu(struct task_struct *t, unsigned int cpu) { }
+static inline void rseq_sched_set_task_mm_cid(struct task_struct *t, unsigned int cid) { }
+static inline void rseq_force_update(void) { }
static inline void rseq_virt_userspace_exit(void) { }
static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { }
static inline void rseq_execve(struct task_struct *t) { }
diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h
index 7c123947bb98..a1389fff4fca 100644
--- a/include/linux/rseq_types.h
+++ b/include/linux/rseq_types.h
@@ -11,20 +11,27 @@ struct rseq;
* struct rseq_event - Storage for rseq related event management
* @all: Compound to initialize and clear the data efficiently
* @events: Compound to access events with a single load/store
- * @sched_switch: True if the task was scheduled out
+ * @sched_switch: True if the task was scheduled and needs update on
+ * exit to user
+ * @ids_changed: Indicator that IDs need to be updated
* @user_irq: True on interrupt entry from user mode
* @has_rseq: True if the task has a rseq pointer installed
* @error: Compound error code for the slow path to analyze
* @fatal: User space data corrupted or invalid
+ *
+ * @sched_switch and @ids_changed must be adjacent and the combo must be
+ * 16bit aligned to allow a single store, when both are set at the same
+ * time in the scheduler.
*/
struct rseq_event {
union {
u64 all;
struct {
union {
- u16 events;
+ u32 events;
struct {
u8 sched_switch;
+ u8 ids_changed;
u8 user_irq;
};
};
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 148fb2103023..183dde756808 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -464,7 +464,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32
* are updated before returning to user-space.
*/
current->rseq.event.has_rseq = true;
- rseq_sched_switch_event(current);
+ rseq_force_update();
return 0;
efault:
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b75e8e1eca4a..579a8e93578f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5118,7 +5118,6 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
kcov_prepare_switch(prev);
sched_info_switch(rq, prev, next);
perf_event_task_sched_out(prev, next);
- rseq_sched_switch_event(prev);
fire_sched_out_preempt_notifiers(prev, next);
kmap_local_sched_out();
prepare_task(next);
@@ -5316,6 +5315,12 @@ context_switch(struct rq *rq, struct task_struct *prev,
/* switch_mm_cid() requires the memory barriers above. */
switch_mm_cid(rq, prev, next);
+ /*
+ * Tell rseq that the task was scheduled in. Must be after
+ * switch_mm_cid() to get the TIF flag set.
+ */
+ rseq_sched_switch_event(next);
+
prepare_lock_switch(rq, next, rf);
/* Here we just switch the register state and the stack. */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index adfb6e3409d7..4838dda75b10 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2209,6 +2209,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
smp_wmb();
WRITE_ONCE(task_thread_info(p)->cpu, cpu);
p->wake_cpu = cpu;
+ rseq_sched_set_task_cpu(p, cpu);
#endif /* CONFIG_SMP */
}
@@ -3807,8 +3808,10 @@ static inline void switch_mm_cid(struct rq *rq,
mm_cid_put_lazy(prev);
prev->mm_cid = -1;
}
- if (next->mm_cid_active)
+ if (next->mm_cid_active) {
next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next, next->mm);
+ rseq_sched_set_task_mm_cid(next, next->mm_cid);
+ }
}
#else /* !CONFIG_SCHED_MM_CID: */