From 9311e6c29b348b005e79228ef6facd38ebcc73f9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 6 Nov 2025 08:12:36 -1000 Subject: cgroup: Fix sleeping from invalid context warning on PREEMPT_RT cgroup_task_dead() is called from finish_task_switch() which runs with preemption disabled and doesn't allow scheduling even on PREEMPT_RT. The function needs to acquire css_set_lock which is a regular spinlock that can sleep on RT kernels, leading to "sleeping function called from invalid context" warnings. css_set_lock is too large in scope to convert to a raw_spinlock. However, the unlinking operations don't need to run synchronously - they just need to complete after the task is done running. On PREEMPT_RT, defer the work through irq_work. While the work doesn't need to happen immediately, it can't be delayed indefinitely either as the dead task pins the cgroup and task_struct can be pinned indefinitely. Use the lazy version of irq_work to allow batching and lower impact while ensuring timely completion. v2: Use IRQ_WORK_INIT_LAZY instead of immediate irq_work and add explanation for why the work can't be delayed indefinitely (Sebastian Andrzej Siewior). Fixes: d245698d727a ("cgroup: Defer task cgroup unlink until after the task is done switching out") Reported-by: Calvin Owens Link: https://lore.kernel.org/r/20251104181114.489391-1-calvin@wbinvd.org Signed-off-by: Tejun Heo --- include/linux/sched.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index cbb7340c5866..5e80d48488ef 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1324,7 +1324,10 @@ struct task_struct { struct css_set __rcu *cgroups; /* cg_list protected by css_set_lock and tsk->alloc_lock: */ struct list_head cg_list; -#endif +#ifdef CONFIG_PREEMPT_RT + struct llist_node cg_dead_lnode; +#endif /* CONFIG_PREEMPT_RT */ +#endif /* CONFIG_CGROUPS */ #ifdef CONFIG_X86_CPU_RESCTRL u32 closid; u32 rmid; -- cgit v1.2.3