diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-01 21:04:45 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-01 21:04:45 -0800 |
| commit | 6d2c10e889db596938bb7b4d3cdd42d67208439a (patch) | |
| tree | c8adec6da2f3c89e60bf30397c7ebff7ee85e288 /kernel/sched/syscalls.c | |
| parent | 6c26fbe8c9d3e932dce6afe2505b19b4b261cae9 (diff) | |
| parent | c04507ac500e2cc8048000c2a849588227554e06 (diff) | |
Merge tag 'sched-core-2025-12-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
"Scalability and load-balancing improvements:
- Enable scheduler feature NEXT_BUDDY (Mel Gorman)
- Reimplement NEXT_BUDDY to align with EEVDF goals (Mel Gorman)
- Skip sched_balance_running cmpxchg when balance is not due (Tim
Chen)
- Implement generic code for architecture specific sched domain NUMA
distances (Tim Chen)
- Optimize the NUMA distances of the sched-domains builds of Intel
Granite Rapids (GNR) and Clearwater Forest (CWF) platforms (Tim
Chen)
- Implement proportional newidle balance: a randomized algorithm that
runs newidle balancing proportional to its success rate. (Peter
Zijlstra)
Scheduler infrastructure changes:
- Implement the 'sched_change' scoped_guard() pattern for the entire
scheduler (Peter Zijlstra)
- More broadly utilize the sched_change guard (Peter Zijlstra)
- Add support to pick functions to take runqueue-flags (Joel
Fernandes)
- Provide and use set_need_resched_current() (Peter Zijlstra)
Fair scheduling enhancements:
- Forfeit vruntime on yield (Fernand Sieber)
- Only update stats for allowed CPUs when looking for dst group (Adam
Li)
CPU-core scheduling enhancements:
- Optimize core cookie matching check (Fernand Sieber)
Deadline scheduler fixes:
- Only set free_cpus for online runqueues (Doug Berger)
- Fix dl_server time accounting (Peter Zijlstra)
- Fix dl_server stop condition (Peter Zijlstra)
Proxy scheduling fixes:
- Yield the donor task (Fernand Sieber)
Fixes and cleanups:
- Fix do_set_cpus_allowed() locking (Peter Zijlstra)
- Fix migrate_disable_switch() locking (Peter Zijlstra)
- Remove double update_rq_clock() in __set_cpus_allowed_ptr_locked()
(Hao Jia)
- Increase sched_tick_remote timeout (Phil Auld)
- sched/deadline: Use cpumask_weight_and() in dl_bw_cpus() (Shrikanth
Hegde)
- sched/deadline: Clean up select_task_rq_dl() (Shrikanth Hegde)"
* tag 'sched-core-2025-12-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits)
sched: Provide and use set_need_resched_current()
sched/fair: Proportional newidle balance
sched/fair: Small cleanup to update_newidle_cost()
sched/fair: Small cleanup to sched_balance_newidle()
sched/fair: Revert max_newidle_lb_cost bump
sched/fair: Reimplement NEXT_BUDDY to align with EEVDF goals
sched/fair: Enable scheduler feature NEXT_BUDDY
sched: Increase sched_tick_remote timeout
sched/fair: Have SD_SERIALIZE affect newidle balancing
sched/fair: Skip sched_balance_running cmpxchg when balance is not due
sched/deadline: Minor cleanup in select_task_rq_dl()
sched/deadline: Use cpumask_weight_and() in dl_bw_cpus
sched/deadline: Document dl_server
sched/deadline: Fix dl_server stop condition
sched/deadline: Fix dl_server time accounting
sched/core: Remove double update_rq_clock() in __set_cpus_allowed_ptr_locked()
sched/eevdf: Fix min_vruntime vs avg_vruntime
sched/core: Add comment explaining force-idle vruntime snapshots
sched/core: Optimize core cookie matching check
sched/proxy: Yield the donor task
...
Diffstat (limited to 'kernel/sched/syscalls.c')
| -rw-r--r-- | kernel/sched/syscalls.c | 87 |
1 files changed, 28 insertions, 59 deletions
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c index 77ae87f36e84..807879131add 100644 --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c @@ -64,8 +64,6 @@ static int effective_prio(struct task_struct *p) void set_user_nice(struct task_struct *p, long nice) { - bool queued, running; - struct rq *rq; int old_prio; if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) @@ -74,10 +72,7 @@ void set_user_nice(struct task_struct *p, long nice) * We have to be careful, if called from sys_setpriority(), * the task might be in the middle of scheduling on another CPU. */ - CLASS(task_rq_lock, rq_guard)(p); - rq = rq_guard.rq; - - update_rq_clock(rq); + guard(task_rq_lock)(p); /* * The RT priorities are set via sched_setscheduler(), but we still @@ -90,28 +85,12 @@ void set_user_nice(struct task_struct *p, long nice) return; } - queued = task_on_rq_queued(p); - running = task_current_donor(rq, p); - if (queued) - dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK); - if (running) - put_prev_task(rq, p); - - p->static_prio = NICE_TO_PRIO(nice); - set_load_weight(p, true); - old_prio = p->prio; - p->prio = effective_prio(p); - - if (queued) - enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); - if (running) - set_next_task(rq, p); - - /* - * If the task increased its priority or is running and - * lowered its priority, then reschedule its CPU: - */ - p->sched_class->prio_changed(rq, p, old_prio); + scoped_guard (sched_change, p, DEQUEUE_SAVE) { + p->static_prio = NICE_TO_PRIO(nice); + set_load_weight(p, true); + old_prio = p->prio; + p->prio = effective_prio(p); + } } EXPORT_SYMBOL(set_user_nice); @@ -515,7 +494,7 @@ int __sched_setscheduler(struct task_struct *p, bool user, bool pi) { int oldpolicy = -1, policy = attr->sched_policy; - int retval, oldprio, newprio, queued, running; + int retval, oldprio, newprio; const struct sched_class *prev_class, *next_class; struct balance_callback *head; struct rq_flags rf; @@ -695,38 +674,27 @@ change: prev_class = p->sched_class; next_class = __setscheduler_class(policy, newprio); - if (prev_class != next_class && p->se.sched_delayed) - dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK); - - queued = task_on_rq_queued(p); - running = task_current_donor(rq, p); - if (queued) - dequeue_task(rq, p, queue_flags); - if (running) - put_prev_task(rq, p); + if (prev_class != next_class) + queue_flags |= DEQUEUE_CLASS; - if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { - __setscheduler_params(p, attr); - p->sched_class = next_class; - p->prio = newprio; - } - __setscheduler_uclamp(p, attr); - check_class_changing(rq, p, prev_class); + scoped_guard (sched_change, p, queue_flags) { - if (queued) { - /* - * We enqueue to tail when the priority of a task is - * increased (user space view). - */ - if (oldprio < p->prio) - queue_flags |= ENQUEUE_HEAD; + if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { + __setscheduler_params(p, attr); + p->sched_class = next_class; + p->prio = newprio; + } + __setscheduler_uclamp(p, attr); - enqueue_task(rq, p, queue_flags); + if (scope->queued) { + /* + * We enqueue to tail when the priority of a task is + * increased (user space view). + */ + if (oldprio < p->prio) + scope->flags |= ENQUEUE_HEAD; + } } - if (running) - set_next_task(rq, p); - - check_class_changed(rq, p, prev_class, oldprio); /* Avoid rq from going away on us: */ preempt_disable(); @@ -1351,7 +1319,7 @@ static void do_sched_yield(void) rq = this_rq_lock_irq(&rf); schedstat_inc(rq->yld_count); - current->sched_class->yield_task(rq); + rq->donor->sched_class->yield_task(rq); preempt_disable(); rq_unlock_irq(rq, &rf); @@ -1420,12 +1388,13 @@ EXPORT_SYMBOL(yield); */ int __sched yield_to(struct task_struct *p, bool preempt) { - struct task_struct *curr = current; + struct task_struct *curr; struct rq *rq, *p_rq; int yielded = 0; scoped_guard (raw_spinlock_irqsave, &p->pi_lock) { rq = this_rq(); + curr = rq->donor; again: p_rq = task_rq(p); |
