From e9139f765ac7048cadc9981e962acdf8b08eabf3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 30 Oct 2024 13:43:43 +0100
Subject: sched: Employ sched_change guards

As proposed a long while ago -- and half done by scx -- wrap the
scheduler's 'change' pattern in a guard helper.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Juri Lelli <juri.lelli@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Vincent Guittot <vincent.guittot@linaro.org>
---
 include/linux/cleanup.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index 2573585b7f06..ae381675455d 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -340,6 +340,11 @@ _label:                                                         \
 #define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond)	\
 static __maybe_unused const bool class_##_name##_is_conditional = _is_cond
 
+#define DEFINE_CLASS_IS_UNCONDITIONAL(_name)		\
+	__DEFINE_CLASS_IS_CONDITIONAL(_name, false);	\
+	static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \
+	{ return (void *)1; }
+
 #define __GUARD_IS_ERR(_ptr)                                       \
 	({                                                         \
 		unsigned long _rc = (__force unsigned long)(_ptr); \
-- 
cgit v1.2.3


From b079d93796528053cde322f2ca838c2d21c297e7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 10 Sep 2025 10:08:05 +0200
Subject: sched: Rename do_set_cpus_allowed()

Hopefully saner naming.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Juri Lelli <juri.lelli@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Vincent Guittot <vincent.guittot@linaro.org>
---
 include/linux/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index cbb7340c5866..77426c347cff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1861,8 +1861,8 @@ extern int task_can_attach(struct task_struct *p);
 extern int dl_bw_alloc(int cpu, u64 dl_bw);
 extern void dl_bw_free(int cpu, u64 dl_bw);
 
-/* do_set_cpus_allowed() - consider using set_cpus_allowed_ptr() instead */
-extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
+/* set_cpus_allowed_force() - consider using set_cpus_allowed_ptr() instead */
+extern void set_cpus_allowed_force(struct task_struct *p, const struct cpumask *new_mask);
 
 /**
  * set_cpus_allowed_ptr - set CPU affinity mask of a task
-- 
cgit v1.2.3


From 50653216e4ff7a74c95b2ee9ec439916875556ec Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelagnelf@nvidia.com>
Date: Sat, 9 Aug 2025 14:47:50 -0400
Subject: sched: Add support to pick functions to take rf

Some pick functions like the internal pick_next_task_fair() already take
rf but some others dont. We need this for scx's server pick function.
Prepare for this by having pick functions accept it.

[peterz: - added RETRY_TASK handling
         - removed pick_next_task_fair indirection]
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
---
 include/linux/sched.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 77426c347cff..07576479c0ed 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -637,8 +637,8 @@ struct sched_rt_entity {
 #endif
 } __randomize_layout;
 
-typedef bool (*dl_server_has_tasks_f)(struct sched_dl_entity *);
-typedef struct task_struct *(*dl_server_pick_f)(struct sched_dl_entity *);
+struct rq_flags;
+typedef struct task_struct *(*dl_server_pick_f)(struct sched_dl_entity *, struct rq_flags *rf);
 
 struct sched_dl_entity {
 	struct rb_node			rb_node;
@@ -730,9 +730,6 @@ struct sched_dl_entity {
 	 * dl_server_update().
 	 *
 	 * @rq the runqueue this server is for
-	 *
-	 * @server_has_tasks() returns true if @server_pick return a
-	 * runnable task.
 	 */
 	struct rq			*rq;
 	dl_server_pick_f		server_pick_task;
-- 
cgit v1.2.3


From f5a538c07df26f5c601e41f7b9c7ade3e1e75803 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 31 Oct 2025 13:54:24 +0100
Subject: sched/deadline: Fix dl_server stop condition

Gabriel reported that the dl_server doesn't stop as expected.

The problem was found to be the fact that idle time and fair runtime are
treated equally. Both will count towards dl_server runtime and push the
activation forwards when it is in the zero-laxity wait state.

Notably:

  dl_server_update_idle()
    update_curr_dl_se()
      if (dl_defer && dl_throttled && dl_runtime_exceeded())
        hrtimer_try_to_cancel(); // stop timer
	replenish_dl_new_period()
	  deadline = now + dl_deadline; // fwd period
	  runtime = dl_runtime;
        start_dl_timer(); // restart timer

And while we do want idle time accounted towards the *current* activation of
the dl_server -- after all, a fair task could've ran if we had any -- we don't
necessarily want idle time to cause or push forward an activation.

Introduce dl_defer_idle to make this distinction. It will be set once idle time
pushed the activation forward, once set idle time will only be allowed to
consume any runtime but not push the activation. This will then cause
dl_server_timer() to fire, which will stop the dl_server.

Any non-idle time accounting during this phase will clear dl_defer_idle, so
only a full period of idle will cause the dl_server to stop.

Reported-by: Gabriele Monaco <gmonaco@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251101000057.GA2184199@noisy.programming.kicks-ass.net
---
 include/linux/sched.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 07576479c0ed..bb436ee1942d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -685,20 +685,22 @@ struct sched_dl_entity {
 	 *
 	 * @dl_server tells if this is a server entity.
 	 *
-	 * @dl_defer tells if this is a deferred or regular server. For
-	 * now only defer server exists.
-	 *
-	 * @dl_defer_armed tells if the deferrable server is waiting
-	 * for the replenishment timer to activate it.
-	 *
 	 * @dl_server_active tells if the dlserver is active(started).
 	 * dlserver is started on first cfs enqueue on an idle runqueue
 	 * and is stopped when a dequeue results in 0 cfs tasks on the
 	 * runqueue. In other words, dlserver is active only when cpu's
 	 * runqueue has atleast one cfs task.
 	 *
+	 * @dl_defer tells if this is a deferred or regular server. For
+	 * now only defer server exists.
+	 *
+	 * @dl_defer_armed tells if the deferrable server is waiting
+	 * for the replenishment timer to activate it.
+	 *
 	 * @dl_defer_running tells if the deferrable server is actually
 	 * running, skipping the defer phase.
+	 *
+	 * @dl_defer_idle tracks idle state
 	 */
 	unsigned int			dl_throttled      : 1;
 	unsigned int			dl_yielded        : 1;
@@ -709,6 +711,7 @@ struct sched_dl_entity {
 	unsigned int			dl_defer	  : 1;
 	unsigned int			dl_defer_armed	  : 1;
 	unsigned int			dl_defer_running  : 1;
+	unsigned int			dl_defer_idle     : 1;
 
 	/*
 	 * Bandwidth enforcement timer. Each -deadline task has its
-- 
cgit v1.2.3


From 33cf66d88306663d16e4759e9d24766b0aaa2e17 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 7 Nov 2025 17:01:31 +0100
Subject: sched/fair: Proportional newidle balance

Add a randomized algorithm that runs newidle balancing proportional to
its success rate.

This improves schbench significantly:

 6.18-rc4:			2.22 Mrps/s
 6.18-rc4+revert:		2.04 Mrps/s
 6.18-rc4+revert+random:	2.18 Mrps/S

Conversely, per Adam Li this affects SpecJBB slightly, reducing it by 1%:

 6.17:			-6%
 6.17+revert:		 0%
 6.17+revert+random:	-1%

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Tested-by: Chris Mason <clm@meta.com>
Link: https://lkml.kernel.org/r/6825c50d-7fa7-45d8-9b81-c6e7e25738e2@meta.com
Link: https://patch.msgid.link/20251107161739.770122091@infradead.org
---
 include/linux/sched/topology.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index bbcfdf12aa6e..45c0022b91ce 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -92,6 +92,9 @@ struct sched_domain {
 	unsigned int nr_balance_failed; /* initialise to 0 */
 
 	/* idle_balance() stats */
+	unsigned int newidle_call;
+	unsigned int newidle_success;
+	unsigned int newidle_ratio;
 	u64 max_newidle_lb_cost;
 	unsigned long last_decay_max_lb_cost;
 
-- 
cgit v1.2.3


From c04507ac500e2cc8048000c2a849588227554e06 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sun, 16 Nov 2025 21:51:07 +0100
Subject: sched: Provide and use set_need_resched_current()

set_tsk_need_resched(current) requires set_preempt_need_resched(current) to
work correctly outside of the scheduler.

Provide set_need_resched_current() which wraps this correctly and replace
all the open coded instances.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://patch.msgid.link/20251116174750.665769842@linutronix.de
---
 include/linux/sched.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index bb436ee1942d..021d05aa941a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2058,6 +2058,13 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
 	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
 }
 
+static inline void set_need_resched_current(void)
+{
+	lockdep_assert_irqs_disabled();
+	set_tsk_need_resched(current);
+	set_preempt_need_resched();
+}
+
 /*
  * cond_resched() and cond_resched_lock(): latency reduction via
  * explicit rescheduling in places that are safe. The return
-- 
cgit v1.2.3