summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-03 12:05:06 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-03 12:05:06 -0700
commit631919fb12fe7b1f0453fe1035e62ce704bc3923 (patch)
tree77e9b79bc365f83b980b26bf5e915ef2f3a25666 /kernel
parente41255ce7acc4a3412ecdaa74b32deee980d27f7 (diff)
parent7e0ffb72de8aa3b25989c2d980e81b829c577010 (diff)
Merge tag 'sched_ext-for-7.0-rc6-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext fixes from Tejun Heo: "These are late but both fix subtle yet critical problems and the blast radius is limited strictly to sched_ext. - Fix stale direct dispatch state in ddsp_dsq_id which can cause spurious warnings in mark_direct_dispatch() on task wakeup - Fix is_bpf_migration_disabled() false negative on non-PREEMPT_RCU configs which can lead to incorrectly dispatching migration- disabled tasks to remote CPUs" * tag 'sched_ext-for-7.0-rc6-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: sched_ext: Fix stale direct dispatch state in ddsp_dsq_id sched_ext: Fix is_bpf_migration_disabled() false negative on non-PREEMPT_RCU
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/ext.c49
-rw-r--r--kernel/sched/ext_idle.c31
2 files changed, 54 insertions, 26 deletions
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index d5bdcdb3f700..064eaa76be4b 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -1110,15 +1110,6 @@ static void dispatch_enqueue(struct scx_sched *sch, struct scx_dispatch_q *dsq,
p->scx.dsq = dsq;
/*
- * scx.ddsp_dsq_id and scx.ddsp_enq_flags are only relevant on the
- * direct dispatch path, but we clear them here because the direct
- * dispatch verdict may be overridden on the enqueue path during e.g.
- * bypass.
- */
- p->scx.ddsp_dsq_id = SCX_DSQ_INVALID;
- p->scx.ddsp_enq_flags = 0;
-
- /*
* We're transitioning out of QUEUEING or DISPATCHING. store_release to
* match waiters' load_acquire.
*/
@@ -1283,12 +1274,34 @@ static void mark_direct_dispatch(struct scx_sched *sch,
p->scx.ddsp_enq_flags = enq_flags;
}
+/*
+ * Clear @p direct dispatch state when leaving the scheduler.
+ *
+ * Direct dispatch state must be cleared in the following cases:
+ * - direct_dispatch(): cleared on the synchronous enqueue path, deferred
+ * dispatch keeps the state until consumed
+ * - process_ddsp_deferred_locals(): cleared after consuming deferred state,
+ * - do_enqueue_task(): cleared on enqueue fallbacks where the dispatch
+ * verdict is ignored (local/global/bypass)
+ * - dequeue_task_scx(): cleared after dispatch_dequeue(), covering deferred
+ * cancellation and holding_cpu races
+ * - scx_disable_task(): cleared for queued wakeup tasks, which are excluded by
+ * the scx_bypass() loop, so that stale state is not reused by a subsequent
+ * scheduler instance
+ */
+static inline void clear_direct_dispatch(struct task_struct *p)
+{
+ p->scx.ddsp_dsq_id = SCX_DSQ_INVALID;
+ p->scx.ddsp_enq_flags = 0;
+}
+
static void direct_dispatch(struct scx_sched *sch, struct task_struct *p,
u64 enq_flags)
{
struct rq *rq = task_rq(p);
struct scx_dispatch_q *dsq =
find_dsq_for_dispatch(sch, rq, p->scx.ddsp_dsq_id, p);
+ u64 ddsp_enq_flags;
touch_core_sched_dispatch(rq, p);
@@ -1329,8 +1342,10 @@ static void direct_dispatch(struct scx_sched *sch, struct task_struct *p,
return;
}
- dispatch_enqueue(sch, dsq, p,
- p->scx.ddsp_enq_flags | SCX_ENQ_CLEAR_OPSS);
+ ddsp_enq_flags = p->scx.ddsp_enq_flags;
+ clear_direct_dispatch(p);
+
+ dispatch_enqueue(sch, dsq, p, ddsp_enq_flags | SCX_ENQ_CLEAR_OPSS);
}
static bool scx_rq_online(struct rq *rq)
@@ -1439,6 +1454,7 @@ enqueue:
*/
touch_core_sched(rq, p);
refill_task_slice_dfl(sch, p);
+ clear_direct_dispatch(p);
dispatch_enqueue(sch, dsq, p, enq_flags);
}
@@ -1610,6 +1626,7 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
sub_nr_running(rq, 1);
dispatch_dequeue(rq, p);
+ clear_direct_dispatch(p);
return true;
}
@@ -2293,13 +2310,15 @@ static void process_ddsp_deferred_locals(struct rq *rq)
struct task_struct, scx.dsq_list.node))) {
struct scx_sched *sch = scx_root;
struct scx_dispatch_q *dsq;
+ u64 dsq_id = p->scx.ddsp_dsq_id;
+ u64 enq_flags = p->scx.ddsp_enq_flags;
list_del_init(&p->scx.dsq_list.node);
+ clear_direct_dispatch(p);
- dsq = find_dsq_for_dispatch(sch, rq, p->scx.ddsp_dsq_id, p);
+ dsq = find_dsq_for_dispatch(sch, rq, dsq_id, p);
if (!WARN_ON_ONCE(dsq->id != SCX_DSQ_LOCAL))
- dispatch_to_local_dsq(sch, rq, dsq, p,
- p->scx.ddsp_enq_flags);
+ dispatch_to_local_dsq(sch, rq, dsq, p, enq_flags);
}
}
@@ -3015,6 +3034,8 @@ static void scx_disable_task(struct task_struct *p)
lockdep_assert_rq_held(rq);
WARN_ON_ONCE(scx_get_task_state(p) != SCX_TASK_ENABLED);
+ clear_direct_dispatch(p);
+
if (SCX_HAS_OP(sch, disable))
SCX_CALL_OP_TASK(sch, SCX_KF_REST, disable, rq, p);
scx_set_task_state(p, SCX_TASK_READY);
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index 0ae93cd64004..44c3a50c542c 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -860,25 +860,32 @@ static bool check_builtin_idle_enabled(struct scx_sched *sch)
* code.
*
* We can't simply check whether @p->migration_disabled is set in a
- * sched_ext callback, because migration is always disabled for the current
- * task while running BPF code.
+ * sched_ext callback, because the BPF prolog (__bpf_prog_enter) may disable
+ * migration for the current task while running BPF code.
*
- * The prolog (__bpf_prog_enter) and epilog (__bpf_prog_exit) respectively
- * disable and re-enable migration. For this reason, the current task
- * inside a sched_ext callback is always a migration-disabled task.
+ * Since the BPF prolog calls migrate_disable() only when CONFIG_PREEMPT_RCU
+ * is enabled (via rcu_read_lock_dont_migrate()), migration_disabled == 1 for
+ * the current task is ambiguous only in that case: it could be from the BPF
+ * prolog rather than a real migrate_disable() call.
*
- * Therefore, when @p->migration_disabled == 1, check whether @p is the
- * current task or not: if it is, then migration was not disabled before
- * entering the callback, otherwise migration was disabled.
+ * Without CONFIG_PREEMPT_RCU, the BPF prolog never calls migrate_disable(),
+ * so migration_disabled == 1 always means the task is truly
+ * migration-disabled.
+ *
+ * Therefore, when migration_disabled == 1 and CONFIG_PREEMPT_RCU is enabled,
+ * check whether @p is the current task or not: if it is, then migration was
+ * not disabled before entering the callback, otherwise migration was disabled.
*
* Returns true if @p is migration-disabled, false otherwise.
*/
static bool is_bpf_migration_disabled(const struct task_struct *p)
{
- if (p->migration_disabled == 1)
- return p != current;
- else
- return p->migration_disabled;
+ if (p->migration_disabled == 1) {
+ if (IS_ENABLED(CONFIG_PREEMPT_RCU))
+ return p != current;
+ return true;
+ }
+ return p->migration_disabled;
}
static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p,