summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorJuri Lelli <juri.lelli@redhat.com>2026-03-02 16:45:40 +0100
committerPeter Zijlstra <peterz@infradead.org>2026-03-04 17:06:08 +0100
commitd658686a1331db3bb108ca079d76deb3208ed949 (patch)
tree4c2574c84f16a523df3b1885999a68da177b4ae5 /kernel
parent11439c4635edd669ae435eec308f4ab8a0804808 (diff)
sched/deadline: Fix missing ENQUEUE_REPLENISH during PI de-boosting
Running stress-ng --schedpolicy 0 on an RT kernel on a big machine might lead to the following WARNINGs (edited). sched: DL de-boosted task PID 22725: REPLENISH flag missing WARNING: CPU: 93 PID: 0 at kernel/sched/deadline.c:239 dequeue_task_dl+0x15c/0x1f8 ... (running_bw underflow) Call trace: dequeue_task_dl+0x15c/0x1f8 (P) dequeue_task+0x80/0x168 deactivate_task+0x24/0x50 push_dl_task+0x264/0x2e0 dl_task_timer+0x1b0/0x228 __hrtimer_run_queues+0x188/0x378 hrtimer_interrupt+0xfc/0x260 ... The problem is that when a SCHED_DEADLINE task (lock holder) is changed to a lower priority class via sched_setscheduler(), it may fail to properly inherit the parameters of potential DEADLINE donors if it didn't already inherit them in the past (shorter deadline than donor's at that time). This might lead to bandwidth accounting corruption, as enqueue_task_dl() won't recognize the lock holder as boosted. The scenario occurs when: 1. A DEADLINE task (donor) blocks on a PI mutex held by another DEADLINE task (holder), but the holder doesn't inherit parameters (e.g., it already has a shorter deadline) 2. sched_setscheduler() changes the holder from DEADLINE to a lower class while still holding the mutex 3. The holder should now inherit DEADLINE parameters from the donor and be enqueued with ENQUEUE_REPLENISH, but this doesn't happen Fix the issue by introducing __setscheduler_dl_pi(), which detects when a DEADLINE (proper or boosted) task gets setscheduled to a lower priority class. In case, the function makes the task inherit DEADLINE parameters of the donoer (pi_se) and sets ENQUEUE_REPLENISH flag to ensure proper bandwidth accounting during the next enqueue operation. Fixes: 2279f540ea7d ("sched/deadline: Fix priority inheritance with multiple scheduling classes") Reported-by: Bruno Goncalves <bgoncalv@redhat.com> Signed-off-by: Juri Lelli <juri.lelli@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/20260302-upstream-fix-deadline-piboost-b4-v3-1-6ba32184a9e0@redhat.com
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/syscalls.c30
1 files changed, 30 insertions, 0 deletions
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index 6f10db3646e7..cadb0e9fe19b 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -284,6 +284,35 @@ static bool check_same_owner(struct task_struct *p)
uid_eq(cred->euid, pcred->uid));
}
+#ifdef CONFIG_RT_MUTEXES
+static inline void __setscheduler_dl_pi(int newprio, int policy,
+ struct task_struct *p,
+ struct sched_change_ctx *scope)
+{
+ /*
+ * In case a DEADLINE task (either proper or boosted) gets
+ * setscheduled to a lower priority class, check if it neeeds to
+ * inherit parameters from a potential pi_task. In that case make
+ * sure replenishment happens with the next enqueue.
+ */
+
+ if (dl_prio(newprio) && !dl_policy(policy)) {
+ struct task_struct *pi_task = rt_mutex_get_top_task(p);
+
+ if (pi_task) {
+ p->dl.pi_se = pi_task->dl.pi_se;
+ scope->flags |= ENQUEUE_REPLENISH;
+ }
+ }
+}
+#else /* !CONFIG_RT_MUTEXES */
+static inline void __setscheduler_dl_pi(int newprio, int policy,
+ struct task_struct *p,
+ struct sched_change_ctx *scope)
+{
+}
+#endif /* !CONFIG_RT_MUTEXES */
+
#ifdef CONFIG_UCLAMP_TASK
static int uclamp_validate(struct task_struct *p,
@@ -655,6 +684,7 @@ change:
__setscheduler_params(p, attr);
p->sched_class = next_class;
p->prio = newprio;
+ __setscheduler_dl_pi(newprio, policy, p, scope);
}
__setscheduler_uclamp(p, attr);