sched_ext: Read scx_root under scx_cgroup_ops_rwsem in cgroup setters

scx_group_set_{weight,idle,bandwidth}() cache scx_root before acquiring scx_cgroup_ops_rwsem, so the pointer can be stale by the time the op runs. If the loaded scheduler is disabled and freed (via RCU work) and another is enabled between the naked load and the rwsem acquire, the reader sees scx_cgroup_enabled=true (the new scheduler's) but dereferences the freed one - UAF on SCX_HAS_OP(sch, ...) / SCX_CALL_OP(sch, ...). scx_cgroup_enabled is toggled only under scx_cgroup_ops_rwsem write (scx_cgroup_{init,exit}), so reading scx_root inside the rwsem read section correlates @sch with the enabled snapshot. Fixes: a5bd6ba30b33 ("sched_ext: Use cgroup_lock/unlock() to synchronize against cgroup operations") Cc: stable@vger.kernel.org # v6.18+ Reported-by: Chris Mason <clm@meta.com> Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Andrea Righi <arighi@nvidia.com>
author: Tejun Heo <tj@kernel.org> 2026-04-24 14:31:35 -1000
committer: Tejun Heo <tj@kernel.org> 2026-04-24 14:31:35 -1000
commit: 80afd4c84bc8f5e80145ce35279f5ce53f6043db (patch)
tree: 592c4cbb256355b680e5d5d4b30dc1c8a5f473a1 /kernel
parent: 21a5a97ba47842ef0c52d6c89e501dce27806550 (diff)
1 files changed, 6 insertions, 3 deletions
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index dd0539ab9ba8..f6d22636a4de 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4343,9 +4343,10 @@ void scx_cgroup_cancel_attach(struct cgroup_taskset *tset)
 
 void scx_group_set_weight(struct task_group *tg, unsigned long weight)
 {
-	struct scx_sched *sch = scx_root;
+	struct scx_sched *sch;
 
 	percpu_down_read(&scx_cgroup_ops_rwsem);
+	sch = scx_root;
 
 	if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_weight) &&
 	    tg->scx.weight != weight)
@@ -4358,9 +4359,10 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight)
 
 void scx_group_set_idle(struct task_group *tg, bool idle)
 {
-	struct scx_sched *sch = scx_root;
+	struct scx_sched *sch;
 
 	percpu_down_read(&scx_cgroup_ops_rwsem);
+	sch = scx_root;
 
 	if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_idle))
 		SCX_CALL_OP(sch, cgroup_set_idle, NULL, tg_cgrp(tg), idle);
@@ -4374,9 +4376,10 @@ void scx_group_set_idle(struct task_group *tg, bool idle)
 void scx_group_set_bandwidth(struct task_group *tg,
 			     u64 period_us, u64 quota_us, u64 burst_us)
 {
-	struct scx_sched *sch = scx_root;
+	struct scx_sched *sch;
 
 	percpu_down_read(&scx_cgroup_ops_rwsem);
+	sch = scx_root;
 
 	if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_bandwidth) &&
 	    (tg->scx.bw_period_us != period_us ||
author	Tejun Heo <tj@kernel.org>	2026-04-24 14:31:35 -1000
committer	Tejun Heo <tj@kernel.org>	2026-04-24 14:31:35 -1000
commit	80afd4c84bc8f5e80145ce35279f5ce53f6043db (patch)
tree	592c4cbb256355b680e5d5d4b30dc1c8a5f473a1 /kernel
parent	21a5a97ba47842ef0c52d6c89e501dce27806550 (diff)