summaryrefslogtreecommitdiff
path: root/kernel/sched/syscalls.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-01 21:04:45 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-01 21:04:45 -0800
commit6d2c10e889db596938bb7b4d3cdd42d67208439a (patch)
treec8adec6da2f3c89e60bf30397c7ebff7ee85e288 /kernel/sched/syscalls.c
parent6c26fbe8c9d3e932dce6afe2505b19b4b261cae9 (diff)
parentc04507ac500e2cc8048000c2a849588227554e06 (diff)
Merge tag 'sched-core-2025-12-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "Scalability and load-balancing improvements: - Enable scheduler feature NEXT_BUDDY (Mel Gorman) - Reimplement NEXT_BUDDY to align with EEVDF goals (Mel Gorman) - Skip sched_balance_running cmpxchg when balance is not due (Tim Chen) - Implement generic code for architecture specific sched domain NUMA distances (Tim Chen) - Optimize the NUMA distances of the sched-domains builds of Intel Granite Rapids (GNR) and Clearwater Forest (CWF) platforms (Tim Chen) - Implement proportional newidle balance: a randomized algorithm that runs newidle balancing proportional to its success rate. (Peter Zijlstra) Scheduler infrastructure changes: - Implement the 'sched_change' scoped_guard() pattern for the entire scheduler (Peter Zijlstra) - More broadly utilize the sched_change guard (Peter Zijlstra) - Add support to pick functions to take runqueue-flags (Joel Fernandes) - Provide and use set_need_resched_current() (Peter Zijlstra) Fair scheduling enhancements: - Forfeit vruntime on yield (Fernand Sieber) - Only update stats for allowed CPUs when looking for dst group (Adam Li) CPU-core scheduling enhancements: - Optimize core cookie matching check (Fernand Sieber) Deadline scheduler fixes: - Only set free_cpus for online runqueues (Doug Berger) - Fix dl_server time accounting (Peter Zijlstra) - Fix dl_server stop condition (Peter Zijlstra) Proxy scheduling fixes: - Yield the donor task (Fernand Sieber) Fixes and cleanups: - Fix do_set_cpus_allowed() locking (Peter Zijlstra) - Fix migrate_disable_switch() locking (Peter Zijlstra) - Remove double update_rq_clock() in __set_cpus_allowed_ptr_locked() (Hao Jia) - Increase sched_tick_remote timeout (Phil Auld) - sched/deadline: Use cpumask_weight_and() in dl_bw_cpus() (Shrikanth Hegde) - sched/deadline: Clean up select_task_rq_dl() (Shrikanth Hegde)" * tag 'sched-core-2025-12-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits) sched: Provide and use set_need_resched_current() sched/fair: Proportional newidle balance sched/fair: Small cleanup to update_newidle_cost() sched/fair: Small cleanup to sched_balance_newidle() sched/fair: Revert max_newidle_lb_cost bump sched/fair: Reimplement NEXT_BUDDY to align with EEVDF goals sched/fair: Enable scheduler feature NEXT_BUDDY sched: Increase sched_tick_remote timeout sched/fair: Have SD_SERIALIZE affect newidle balancing sched/fair: Skip sched_balance_running cmpxchg when balance is not due sched/deadline: Minor cleanup in select_task_rq_dl() sched/deadline: Use cpumask_weight_and() in dl_bw_cpus sched/deadline: Document dl_server sched/deadline: Fix dl_server stop condition sched/deadline: Fix dl_server time accounting sched/core: Remove double update_rq_clock() in __set_cpus_allowed_ptr_locked() sched/eevdf: Fix min_vruntime vs avg_vruntime sched/core: Add comment explaining force-idle vruntime snapshots sched/core: Optimize core cookie matching check sched/proxy: Yield the donor task ...
Diffstat (limited to 'kernel/sched/syscalls.c')
-rw-r--r--kernel/sched/syscalls.c87
1 files changed, 28 insertions, 59 deletions
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index 77ae87f36e84..807879131add 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -64,8 +64,6 @@ static int effective_prio(struct task_struct *p)
void set_user_nice(struct task_struct *p, long nice)
{
- bool queued, running;
- struct rq *rq;
int old_prio;
if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
@@ -74,10 +72,7 @@ void set_user_nice(struct task_struct *p, long nice)
* We have to be careful, if called from sys_setpriority(),
* the task might be in the middle of scheduling on another CPU.
*/
- CLASS(task_rq_lock, rq_guard)(p);
- rq = rq_guard.rq;
-
- update_rq_clock(rq);
+ guard(task_rq_lock)(p);
/*
* The RT priorities are set via sched_setscheduler(), but we still
@@ -90,28 +85,12 @@ void set_user_nice(struct task_struct *p, long nice)
return;
}
- queued = task_on_rq_queued(p);
- running = task_current_donor(rq, p);
- if (queued)
- dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
- if (running)
- put_prev_task(rq, p);
-
- p->static_prio = NICE_TO_PRIO(nice);
- set_load_weight(p, true);
- old_prio = p->prio;
- p->prio = effective_prio(p);
-
- if (queued)
- enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
- if (running)
- set_next_task(rq, p);
-
- /*
- * If the task increased its priority or is running and
- * lowered its priority, then reschedule its CPU:
- */
- p->sched_class->prio_changed(rq, p, old_prio);
+ scoped_guard (sched_change, p, DEQUEUE_SAVE) {
+ p->static_prio = NICE_TO_PRIO(nice);
+ set_load_weight(p, true);
+ old_prio = p->prio;
+ p->prio = effective_prio(p);
+ }
}
EXPORT_SYMBOL(set_user_nice);
@@ -515,7 +494,7 @@ int __sched_setscheduler(struct task_struct *p,
bool user, bool pi)
{
int oldpolicy = -1, policy = attr->sched_policy;
- int retval, oldprio, newprio, queued, running;
+ int retval, oldprio, newprio;
const struct sched_class *prev_class, *next_class;
struct balance_callback *head;
struct rq_flags rf;
@@ -695,38 +674,27 @@ change:
prev_class = p->sched_class;
next_class = __setscheduler_class(policy, newprio);
- if (prev_class != next_class && p->se.sched_delayed)
- dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
-
- queued = task_on_rq_queued(p);
- running = task_current_donor(rq, p);
- if (queued)
- dequeue_task(rq, p, queue_flags);
- if (running)
- put_prev_task(rq, p);
+ if (prev_class != next_class)
+ queue_flags |= DEQUEUE_CLASS;
- if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
- __setscheduler_params(p, attr);
- p->sched_class = next_class;
- p->prio = newprio;
- }
- __setscheduler_uclamp(p, attr);
- check_class_changing(rq, p, prev_class);
+ scoped_guard (sched_change, p, queue_flags) {
- if (queued) {
- /*
- * We enqueue to tail when the priority of a task is
- * increased (user space view).
- */
- if (oldprio < p->prio)
- queue_flags |= ENQUEUE_HEAD;
+ if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
+ __setscheduler_params(p, attr);
+ p->sched_class = next_class;
+ p->prio = newprio;
+ }
+ __setscheduler_uclamp(p, attr);
- enqueue_task(rq, p, queue_flags);
+ if (scope->queued) {
+ /*
+ * We enqueue to tail when the priority of a task is
+ * increased (user space view).
+ */
+ if (oldprio < p->prio)
+ scope->flags |= ENQUEUE_HEAD;
+ }
}
- if (running)
- set_next_task(rq, p);
-
- check_class_changed(rq, p, prev_class, oldprio);
/* Avoid rq from going away on us: */
preempt_disable();
@@ -1351,7 +1319,7 @@ static void do_sched_yield(void)
rq = this_rq_lock_irq(&rf);
schedstat_inc(rq->yld_count);
- current->sched_class->yield_task(rq);
+ rq->donor->sched_class->yield_task(rq);
preempt_disable();
rq_unlock_irq(rq, &rf);
@@ -1420,12 +1388,13 @@ EXPORT_SYMBOL(yield);
*/
int __sched yield_to(struct task_struct *p, bool preempt)
{
- struct task_struct *curr = current;
+ struct task_struct *curr;
struct rq *rq, *p_rq;
int yielded = 0;
scoped_guard (raw_spinlock_irqsave, &p->pi_lock) {
rq = this_rq();
+ curr = rq->donor;
again:
p_rq = task_rq(p);