diff options
| -rw-r--r-- | fs/exec.c | 2 | ||||
| -rw-r--r-- | include/linux/rseq.h | 81 | ||||
| -rw-r--r-- | include/linux/rseq_types.h | 11 | ||||
| -rw-r--r-- | kernel/rseq.c | 2 | ||||
| -rw-r--r-- | kernel/sched/core.c | 7 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 5 |
6 files changed, 95 insertions, 13 deletions
diff --git a/fs/exec.c b/fs/exec.c index e45b29890269..90e47eb156ab 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1775,7 +1775,7 @@ out: force_fatal_sig(SIGSEGV); sched_mm_cid_after_execve(current); - rseq_sched_switch_event(current); + rseq_force_update(); current->in_execve = 0; return retval; diff --git a/include/linux/rseq.h b/include/linux/rseq.h index f5a43188023f..abfbeb42d1a2 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h @@ -11,7 +11,8 @@ void __rseq_handle_notify_resume(struct pt_regs *regs); static inline void rseq_handle_notify_resume(struct pt_regs *regs) { - if (current->rseq.event.has_rseq) + /* '&' is intentional to spare one conditional branch */ + if (current->rseq.event.sched_switch & current->rseq.event.has_rseq) __rseq_handle_notify_resume(regs); } @@ -33,12 +34,75 @@ static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *reg } } -/* Raised from context switch and exevce to force evaluation on exit to user */ -static inline void rseq_sched_switch_event(struct task_struct *t) +static inline void rseq_raise_notify_resume(struct task_struct *t) { - if (t->rseq.event.has_rseq) { - t->rseq.event.sched_switch = true; - set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); +} + +/* Invoked from context switch to force evaluation on exit to user */ +static __always_inline void rseq_sched_switch_event(struct task_struct *t) +{ + struct rseq_event *ev = &t->rseq.event; + + if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) { + /* + * Avoid a boat load of conditionals by using simple logic + * to determine whether NOTIFY_RESUME needs to be raised. + * + * It's required when the CPU or MM CID has changed or + * the entry was from user space. + */ + bool raise = (ev->user_irq | ev->ids_changed) & ev->has_rseq; + + if (raise) { + ev->sched_switch = true; + rseq_raise_notify_resume(t); + } + } else { + if (ev->has_rseq) { + t->rseq.event.sched_switch = true; + rseq_raise_notify_resume(t); + } + } +} + +/* + * Invoked from __set_task_cpu() when a task migrates to enforce an IDs + * update. + * + * This does not raise TIF_NOTIFY_RESUME as that happens in + * rseq_sched_switch_event(). + */ +static __always_inline void rseq_sched_set_task_cpu(struct task_struct *t, unsigned int cpu) +{ + t->rseq.event.ids_changed = true; +} + +/* + * Invoked from switch_mm_cid() in context switch when the task gets a MM + * CID assigned. + * + * This does not raise TIF_NOTIFY_RESUME as that happens in + * rseq_sched_switch_event(). + */ +static __always_inline void rseq_sched_set_task_mm_cid(struct task_struct *t, unsigned int cid) +{ + /* + * Requires a comparison as the switch_mm_cid() code does not + * provide a conditional for it readily. So avoid excessive updates + * when nothing changes. + */ + if (t->rseq.ids.mm_cid != cid) + t->rseq.event.ids_changed = true; +} + +/* Enforce a full update after RSEQ registration and when execve() failed */ +static inline void rseq_force_update(void) +{ + if (current->rseq.event.has_rseq) { + current->rseq.event.ids_changed = true; + current->rseq.event.sched_switch = true; + rseq_raise_notify_resume(current); } } @@ -55,7 +119,7 @@ static inline void rseq_sched_switch_event(struct task_struct *t) static inline void rseq_virt_userspace_exit(void) { if (current->rseq.event.sched_switch) - set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); + rseq_raise_notify_resume(current); } static inline void rseq_reset(struct task_struct *t) @@ -91,6 +155,9 @@ static inline void rseq_fork(struct task_struct *t, u64 clone_flags) static inline void rseq_handle_notify_resume(struct pt_regs *regs) { } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { } static inline void rseq_sched_switch_event(struct task_struct *t) { } +static inline void rseq_sched_set_task_cpu(struct task_struct *t, unsigned int cpu) { } +static inline void rseq_sched_set_task_mm_cid(struct task_struct *t, unsigned int cid) { } +static inline void rseq_force_update(void) { } static inline void rseq_virt_userspace_exit(void) { } static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { } static inline void rseq_execve(struct task_struct *t) { } diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h index 7c123947bb98..a1389fff4fca 100644 --- a/include/linux/rseq_types.h +++ b/include/linux/rseq_types.h @@ -11,20 +11,27 @@ struct rseq; * struct rseq_event - Storage for rseq related event management * @all: Compound to initialize and clear the data efficiently * @events: Compound to access events with a single load/store - * @sched_switch: True if the task was scheduled out + * @sched_switch: True if the task was scheduled and needs update on + * exit to user + * @ids_changed: Indicator that IDs need to be updated * @user_irq: True on interrupt entry from user mode * @has_rseq: True if the task has a rseq pointer installed * @error: Compound error code for the slow path to analyze * @fatal: User space data corrupted or invalid + * + * @sched_switch and @ids_changed must be adjacent and the combo must be + * 16bit aligned to allow a single store, when both are set at the same + * time in the scheduler. */ struct rseq_event { union { u64 all; struct { union { - u16 events; + u32 events; struct { u8 sched_switch; + u8 ids_changed; u8 user_irq; }; }; diff --git a/kernel/rseq.c b/kernel/rseq.c index 148fb2103023..183dde756808 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -464,7 +464,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 * are updated before returning to user-space. */ current->rseq.event.has_rseq = true; - rseq_sched_switch_event(current); + rseq_force_update(); return 0; efault: diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b75e8e1eca4a..579a8e93578f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5118,7 +5118,6 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, kcov_prepare_switch(prev); sched_info_switch(rq, prev, next); perf_event_task_sched_out(prev, next); - rseq_sched_switch_event(prev); fire_sched_out_preempt_notifiers(prev, next); kmap_local_sched_out(); prepare_task(next); @@ -5316,6 +5315,12 @@ context_switch(struct rq *rq, struct task_struct *prev, /* switch_mm_cid() requires the memory barriers above. */ switch_mm_cid(rq, prev, next); + /* + * Tell rseq that the task was scheduled in. Must be after + * switch_mm_cid() to get the TIF flag set. + */ + rseq_sched_switch_event(next); + prepare_lock_switch(rq, next, rf); /* Here we just switch the register state and the stack. */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index adfb6e3409d7..4838dda75b10 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2209,6 +2209,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) smp_wmb(); WRITE_ONCE(task_thread_info(p)->cpu, cpu); p->wake_cpu = cpu; + rseq_sched_set_task_cpu(p, cpu); #endif /* CONFIG_SMP */ } @@ -3807,8 +3808,10 @@ static inline void switch_mm_cid(struct rq *rq, mm_cid_put_lazy(prev); prev->mm_cid = -1; } - if (next->mm_cid_active) + if (next->mm_cid_active) { next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next, next->mm); + rseq_sched_set_task_mm_cid(next, next->mm_cid); + } } #else /* !CONFIG_SCHED_MM_CID: */ |
