From e3dfd64c1f344ebec9397719244c27b360255855 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 13 Sep 2024 09:23:40 -0700 Subject: perf: Fix missing RCU reader protection in perf_event_clear_cpumask() Running rcutorture scenario TREE05, the below warning is triggered. [ 32.604594] WARNING: suspicious RCU usage [ 32.605928] 6.11.0-rc5-00040-g4ba4f1afb6a9 #55238 Not tainted [ 32.607812] ----------------------------- [ 32.609140] kernel/events/core.c:13946 RCU-list traversed in non-reader section!! [ 32.611595] other info that might help us debug this: [ 32.614247] rcu_scheduler_active = 2, debug_locks = 1 [ 32.616392] 3 locks held by cpuhp/4/35: [ 32.617687] #0: ffffffffb666a650 (cpu_hotplug_lock){++++}-{0:0}, at: cpuhp_thread_fun+0x4e/0x200 [ 32.620563] #1: ffffffffb666cd20 (cpuhp_state-down){+.+.}-{0:0}, at: cpuhp_thread_fun+0x4e/0x200 [ 32.623412] #2: ffffffffb677c288 (pmus_lock){+.+.}-{3:3}, at: perf_event_exit_cpu_context+0x32/0x2f0 In perf_event_clear_cpumask(), uses list_for_each_entry_rcu() without an obvious RCU read-side critical section. Either pmus_srcu or pmus_lock is good enough to protect the pmus list. In the current context, pmus_lock is already held. The list_for_each_entry_rcu() is not required. Fixes: 4ba4f1afb6a9 ("perf: Generic hotplug support for a PMU with a scope") Closes: https://lore.kernel.org/lkml/2b66dff8-b827-494b-b151-1ad8d56f13e6@paulmck-laptop/ Closes: https://lore.kernel.org/oe-lkp/202409131559.545634cc-oliver.sang@intel.com Reported-by: "Paul E. McKenney" Reported-by: kernel test robot Suggested-by: Peter Zijlstra Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: "Paul E. McKenney" Link: https://lore.kernel.org/r/20240913162340.2142976-1-kan.liang@linux.intel.com --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index cdd09769e6c5..df27d08a7232 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -13959,7 +13959,7 @@ static void perf_event_clear_cpumask(unsigned int cpu) } /* migrate */ - list_for_each_entry_rcu(pmu, &pmus, entry, lockdep_is_held(&pmus_srcu)) { + list_for_each_entry(pmu, &pmus, entry) { if (pmu->scope == PERF_PMU_SCOPE_NONE || WARN_ON_ONCE(pmu->scope >= PERF_PMU_MAX_SCOPE)) continue; -- cgit v1.2.3 From b55945c500c5723992504aa03b362fab416863a6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 23 Oct 2024 11:36:41 +0200 Subject: sched: Fix pick_next_task_fair() vs try_to_wake_up() race Syzkaller robot reported KCSAN tripping over the ASSERT_EXCLUSIVE_WRITER(p->on_rq) in __block_task(). The report noted that both pick_next_task_fair() and try_to_wake_up() were concurrently trying to write to the same p->on_rq, violating the assertion -- even though both paths hold rq->__lock. The logical consequence is that both code paths end up holding a different rq->__lock. And looking through ttwu(), this is possible when the __block_task() 'p->on_rq = 0' store is visible to the ttwu() 'p->on_rq' load, which then assumes the task is not queued and continues to migrate it. Rearrange things such that __block_task() releases @p with the store and no code thereafter will use @p again. Fixes: 152e11f6df29 ("sched/fair: Implement delayed dequeue") Reported-by: syzbot+0ec1e96c2cdf5c0e512a@syzkaller.appspotmail.com Reported-by: Kent Overstreet Signed-off-by: Peter Zijlstra (Intel) Tested-by: Marco Elver Link: https://lkml.kernel.org/r/20241023093641.GE16066@noisy.programming.kicks-ass.net --- kernel/sched/fair.c | 21 ++++++++++++++------- kernel/sched/sched.h | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c157d4860a3b..879614686188 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5625,8 +5625,9 @@ pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq) struct sched_entity *se = pick_eevdf(cfs_rq); if (se->sched_delayed) { dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED); - SCHED_WARN_ON(se->sched_delayed); - SCHED_WARN_ON(se->on_rq); + /* + * Must not reference @se again, see __block_task(). + */ return NULL; } return se; @@ -7176,7 +7177,11 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) /* Fix-up what dequeue_task_fair() skipped */ hrtick_update(rq); - /* Fix-up what block_task() skipped. */ + /* + * Fix-up what block_task() skipped. + * + * Must be last, @p might not be valid after this. + */ __block_task(rq, p); } @@ -7193,12 +7198,14 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE)))) util_est_dequeue(&rq->cfs, p); - if (dequeue_entities(rq, &p->se, flags) < 0) { - util_est_update(&rq->cfs, p, DEQUEUE_SLEEP); + util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP); + if (dequeue_entities(rq, &p->se, flags) < 0) return false; - } - util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP); + /* + * Must not reference @p after dequeue_entities(DEQUEUE_DELAYED). + */ + hrtick_update(rq); return true; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 081519ffab46..9f9d1cc390b1 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2769,8 +2769,6 @@ static inline void sub_nr_running(struct rq *rq, unsigned count) static inline void __block_task(struct rq *rq, struct task_struct *p) { - WRITE_ONCE(p->on_rq, 0); - ASSERT_EXCLUSIVE_WRITER(p->on_rq); if (p->sched_contributes_to_load) rq->nr_uninterruptible++; @@ -2778,6 +2776,38 @@ static inline void __block_task(struct rq *rq, struct task_struct *p) atomic_inc(&rq->nr_iowait); delayacct_blkio_start(); } + + ASSERT_EXCLUSIVE_WRITER(p->on_rq); + + /* + * The moment this write goes through, ttwu() can swoop in and migrate + * this task, rendering our rq->__lock ineffective. + * + * __schedule() try_to_wake_up() + * LOCK rq->__lock LOCK p->pi_lock + * pick_next_task() + * pick_next_task_fair() + * pick_next_entity() + * dequeue_entities() + * __block_task() + * RELEASE p->on_rq = 0 if (p->on_rq && ...) + * break; + * + * ACQUIRE (after ctrl-dep) + * + * cpu = select_task_rq(); + * set_task_cpu(p, cpu); + * ttwu_queue() + * ttwu_do_activate() + * LOCK rq->__lock + * activate_task() + * STORE p->on_rq = 1 + * UNLOCK rq->__lock + * + * Callers must ensure to not reference @p after this -- we no longer + * own it. + */ + smp_store_release(&p->on_rq, 0); } extern void activate_task(struct rq *rq, struct task_struct *p, int flags); -- cgit v1.2.3 From 9c70b2a33cd2aa6a5a59c5523ef053bd42265209 Mon Sep 17 00:00:00 2001 From: Shawn Wang Date: Fri, 25 Oct 2024 10:22:08 +0800 Subject: sched/numa: Fix the potential null pointer dereference in task_numa_work() When running stress-ng-vm-segv test, we found a null pointer dereference error in task_numa_work(). Here is the backtrace: [323676.066985] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020 ...... [323676.067108] CPU: 35 PID: 2694524 Comm: stress-ng-vm-se ...... [323676.067113] pstate: 23401009 (nzCv daif +PAN -UAO +TCO +DIT +SSBS BTYPE=--) [323676.067115] pc : vma_migratable+0x1c/0xd0 [323676.067122] lr : task_numa_work+0x1ec/0x4e0 [323676.067127] sp : ffff8000ada73d20 [323676.067128] x29: ffff8000ada73d20 x28: 0000000000000000 x27: 000000003e89f010 [323676.067130] x26: 0000000000080000 x25: ffff800081b5c0d8 x24: ffff800081b27000 [323676.067133] x23: 0000000000010000 x22: 0000000104d18cc0 x21: ffff0009f7158000 [323676.067135] x20: 0000000000000000 x19: 0000000000000000 x18: ffff8000ada73db8 [323676.067138] x17: 0001400000000000 x16: ffff800080df40b0 x15: 0000000000000035 [323676.067140] x14: ffff8000ada73cc8 x13: 1fffe0017cc72001 x12: ffff8000ada73cc8 [323676.067142] x11: ffff80008001160c x10: ffff000be639000c x9 : ffff8000800f4ba4 [323676.067145] x8 : ffff000810375000 x7 : ffff8000ada73974 x6 : 0000000000000001 [323676.067147] x5 : 0068000b33e26707 x4 : 0000000000000001 x3 : ffff0009f7158000 [323676.067149] x2 : 0000000000000041 x1 : 0000000000004400 x0 : 0000000000000000 [323676.067152] Call trace: [323676.067153] vma_migratable+0x1c/0xd0 [323676.067155] task_numa_work+0x1ec/0x4e0 [323676.067157] task_work_run+0x78/0xd8 [323676.067161] do_notify_resume+0x1ec/0x290 [323676.067163] el0_svc+0x150/0x160 [323676.067167] el0t_64_sync_handler+0xf8/0x128 [323676.067170] el0t_64_sync+0x17c/0x180 [323676.067173] Code: d2888001 910003fd f9000bf3 aa0003f3 (f9401000) [323676.067177] SMP: stopping secondary CPUs [323676.070184] Starting crashdump kernel... stress-ng-vm-segv in stress-ng is used to stress test the SIGSEGV error handling function of the system, which tries to cause a SIGSEGV error on return from unmapping the whole address space of the child process. Normally this program will not cause kernel crashes. But before the munmap system call returns to user mode, a potential task_numa_work() for numa balancing could be added and executed. In this scenario, since the child process has no vma after munmap, the vma_next() in task_numa_work() will return a null pointer even if the vma iterator restarts from 0. Recheck the vma pointer before dereferencing it in task_numa_work(). Fixes: 214dbc428137 ("sched: convert to vma iterator") Signed-off-by: Shawn Wang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org # v6.2+ Link: https://lkml.kernel.org/r/20241025022208.125527-1-shawnwang@linux.alibaba.com --- kernel/sched/fair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 879614686188..2d16c8545c71 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3369,7 +3369,7 @@ retry_pids: vma = vma_next(&vmi); } - do { + for (; vma; vma = vma_next(&vmi)) { if (!vma_migratable(vma) || !vma_policy_mof(vma) || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_MIXEDMAP)) { trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_UNSUITABLE); @@ -3491,7 +3491,7 @@ retry_pids: */ if (vma_pids_forced) break; - } for_each_vma(vmi, vma); + } /* * If no VMAs are remaining and VMAs were skipped due to the PID -- cgit v1.2.3 From b5413156bad91dc2995a5c4eab1b05e56914638a Mon Sep 17 00:00:00 2001 From: Benjamin Segall Date: Fri, 25 Oct 2024 18:35:35 -0700 Subject: posix-cpu-timers: Clear TICK_DEP_BIT_POSIX_TIMER on clone When cloning a new thread, its posix_cputimers are not inherited, and are cleared by posix_cputimers_init(). However, this does not clear the tick dependency it creates in tsk->tick_dep_mask, and the handler does not reach the code to clear the dependency if there were no timers to begin with. Thus if a thread has a cputimer running before clone/fork, all descendants will prevent nohz_full unless they create a cputimer of their own. Fix this by entirely clearing the tick_dep_mask in copy_process(). (There is currently no inherited state that needs a tick dependency) Process-wide timers do not have this problem because fork does not copy signal_struct as a baseline, it creates one from scratch. Fixes: b78783000d5c ("posix-cpu-timers: Migrate to use new tick dependency mask model") Signed-off-by: Ben Segall Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/xm26o737bq8o.fsf@google.com --- kernel/fork.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 89ceb4a68af2..6fa9fe62e01e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -105,6 +105,7 @@ #include #include #include +#include #include #include @@ -2292,6 +2293,7 @@ __latent_entropy struct task_struct *copy_process( acct_clear_integrals(p); posix_cputimers_init(&p->posix_cputimers); + tick_dep_init_task(p); p->io_context = NULL; audit_set_context(p, NULL); -- cgit v1.2.3 From 5f994f534120f47432092fb36f5cb0c7a80ed2bf Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Sat, 26 Oct 2024 14:36:39 +0800 Subject: genirq/msi: Fix off-by-one error in msi_domain_alloc() The error path in msi_domain_alloc(), frees the already allocated MSI interrupts in a loop, but the loop condition terminates when the index reaches zero, which fails to free the first allocated MSI interrupt at index zero. Check for >= 0 so that msi[0] is freed as well. Fixes: f3cf8bb0d6c3 ("genirq: Add generic msi irq domain support") Signed-off-by: Jinjie Ruan Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241026063639.10711-1-ruanjinjie@huawei.com --- kernel/irq/msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 3a24d6b5f559..396a067a8a56 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -718,7 +718,7 @@ static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq, ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg); if (ret < 0) { if (ops->msi_free) { - for (i--; i > 0; i--) + for (i--; i >= 0; i--) ops->msi_free(domain, info, virq + i); } irq_domain_free_irqs_top(domain, virq, nr_irqs); -- cgit v1.2.3 From 5db91545ef8150c45a526675ef99e8998b648a41 Mon Sep 17 00:00:00 2001 From: Aboorva Devarajan Date: Sat, 26 Oct 2024 00:20:20 +0530 Subject: sched: Pass correct scheduling policy to __setscheduler_class Commit 98442f0ccd82 ("sched: Fix delayed_dequeue vs switched_from_fair()") overlooked that __setscheduler_prio(), now __setscheduler_class() relies on p->policy for task_should_scx(), and moved the call before __setscheduler_params() updates it, causing it to be using the old p->policy value. Resolve this by changing task_should_scx() to take the policy itself instead of a task pointer, such that __sched_setscheduler() can pass in the updated policy. Fixes: 98442f0ccd82 ("sched: Fix delayed_dequeue vs switched_from_fair()") Signed-off-by: Aboorva Devarajan Signed-off-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo --- kernel/sched/core.c | 8 ++++---- kernel/sched/ext.c | 8 ++++---- kernel/sched/ext.h | 2 +- kernel/sched/sched.h | 2 +- kernel/sched/syscalls.c | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index dbfb5717d6af..719e0ed1e976 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4711,7 +4711,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) if (rt_prio(p->prio)) { p->sched_class = &rt_sched_class; #ifdef CONFIG_SCHED_CLASS_EXT - } else if (task_should_scx(p)) { + } else if (task_should_scx(p->policy)) { p->sched_class = &ext_sched_class; #endif } else { @@ -7025,7 +7025,7 @@ int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flag } EXPORT_SYMBOL(default_wake_function); -const struct sched_class *__setscheduler_class(struct task_struct *p, int prio) +const struct sched_class *__setscheduler_class(int policy, int prio) { if (dl_prio(prio)) return &dl_sched_class; @@ -7034,7 +7034,7 @@ const struct sched_class *__setscheduler_class(struct task_struct *p, int prio) return &rt_sched_class; #ifdef CONFIG_SCHED_CLASS_EXT - if (task_should_scx(p)) + if (task_should_scx(policy)) return &ext_sched_class; #endif @@ -7142,7 +7142,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) queue_flag &= ~DEQUEUE_MOVE; prev_class = p->sched_class; - next_class = __setscheduler_class(p, prio); + next_class = __setscheduler_class(p->policy, prio); if (prev_class != next_class && p->se.sched_delayed) dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK); diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 5900b06fd036..40bdfe84e4f0 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4256,14 +4256,14 @@ static const struct kset_uevent_ops scx_uevent_ops = { * Used by sched_fork() and __setscheduler_prio() to pick the matching * sched_class. dl/rt are already handled. */ -bool task_should_scx(struct task_struct *p) +bool task_should_scx(int policy) { if (!scx_enabled() || unlikely(scx_ops_enable_state() == SCX_OPS_DISABLING)) return false; if (READ_ONCE(scx_switching_all)) return true; - return p->policy == SCHED_EXT; + return policy == SCHED_EXT; } /** @@ -4493,7 +4493,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work) sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx); - p->sched_class = __setscheduler_class(p, p->prio); + p->sched_class = __setscheduler_class(p->policy, p->prio); check_class_changing(task_rq(p), p, old_class); sched_enq_and_set_task(&ctx); @@ -5204,7 +5204,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx); p->scx.slice = SCX_SLICE_DFL; - p->sched_class = __setscheduler_class(p, p->prio); + p->sched_class = __setscheduler_class(p->policy, p->prio); check_class_changing(task_rq(p), p, old_class); sched_enq_and_set_task(&ctx); diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index 246019519231..b1675bb59fc4 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -18,7 +18,7 @@ bool scx_can_stop_tick(struct rq *rq); void scx_rq_activate(struct rq *rq); void scx_rq_deactivate(struct rq *rq); int scx_check_setscheduler(struct task_struct *p, int policy); -bool task_should_scx(struct task_struct *p); +bool task_should_scx(int policy); void init_sched_ext_class(void); static inline u32 scx_cpuperf_target(s32 cpu) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 9f9d1cc390b1..6c54a57275cc 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3830,7 +3830,7 @@ static inline int rt_effective_prio(struct task_struct *p, int prio) extern int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user, bool pi); extern int __sched_setaffinity(struct task_struct *p, struct affinity_context *ctx); -extern const struct sched_class *__setscheduler_class(struct task_struct *p, int prio); +extern const struct sched_class *__setscheduler_class(int policy, int prio); extern void set_load_weight(struct task_struct *p, bool update_load); extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags); extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags); diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c index 0470bcc3d204..24f9f90b6574 100644 --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c @@ -707,7 +707,7 @@ change: } prev_class = p->sched_class; - next_class = __setscheduler_class(p, newprio); + next_class = __setscheduler_class(policy, newprio); if (prev_class != next_class && p->se.sched_delayed) dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK); -- cgit v1.2.3 From 69d5e722be949a1e2409c3f2865ba6020c279db6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Oct 2024 11:49:34 +0100 Subject: sched/ext: Fix scx vs sched_delayed Commit 98442f0ccd82 ("sched: Fix delayed_dequeue vs switched_from_fair()") forgot about scx :/ Fixes: 98442f0ccd82 ("sched: Fix delayed_dequeue vs switched_from_fair()") Reported-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo Link: https://lkml.kernel.org/r/20241030104934.GK14555@noisy.programming.kicks-ass.net --- kernel/sched/ext.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 40bdfe84e4f0..721a75489ff5 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4489,11 +4489,16 @@ static void scx_ops_disable_workfn(struct kthread_work *work) scx_task_iter_start(&sti); while ((p = scx_task_iter_next_locked(&sti))) { const struct sched_class *old_class = p->sched_class; + const struct sched_class *new_class = + __setscheduler_class(p->policy, p->prio); struct sched_enq_and_set_ctx ctx; + if (old_class != new_class && p->se.sched_delayed) + dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED); + sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx); - p->sched_class = __setscheduler_class(p->policy, p->prio); + p->sched_class = new_class; check_class_changing(task_rq(p), p, old_class); sched_enq_and_set_task(&ctx); @@ -5199,12 +5204,17 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) scx_task_iter_start(&sti); while ((p = scx_task_iter_next_locked(&sti))) { const struct sched_class *old_class = p->sched_class; + const struct sched_class *new_class = + __setscheduler_class(p->policy, p->prio); struct sched_enq_and_set_ctx ctx; + if (old_class != new_class && p->se.sched_delayed) + dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED); + sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx); p->scx.slice = SCX_SLICE_DFL; - p->sched_class = __setscheduler_class(p->policy, p->prio); + p->sched_class = new_class; check_class_changing(task_rq(p), p, old_class); sched_enq_and_set_task(&ctx); -- cgit v1.2.3 From fa17cb4b3b42618aeed1e0bce80cc55106561718 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Wed, 30 Oct 2024 10:17:49 -0700 Subject: tracing: Document tracefs gid mount option Commit ee7f3666995d ("tracefs: Have new files inherit the ownership of their parent") and commit 48b27b6b5191 ("tracefs: Set all files to the same group ownership as the mount option") introduced a new gid mount option that allows specifying a group to apply to all entries in tracefs. Document this in the tracing readme. Cc: Eric Sandeen Cc: Mathieu Desnoyers Cc: Shuah Khan Cc: Ali Zahraee Cc: Christian Brauner Cc: David Howells Cc: Masami Hiramatsu Link: https://lore.kernel.org/20241030171928.4168869-3-kaleshsingh@google.com Signed-off-by: Kalesh Singh Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a8f52b6527ca..2b64b3ec67d9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5501,6 +5501,10 @@ static const struct file_operations tracing_iter_fops = { static const char readme_msg[] = "tracing mini-HOWTO:\n\n" + "By default tracefs removes all OTH file permission bits.\n" + "When mounting tracefs an optional group id can be specified\n" + "which adds the group to every directory and file in tracefs:\n\n" + "\t e.g. mount -t tracefs [-o [gid=]] nodev /sys/kernel/tracing\n\n" "# echo 0 > tracing_on : quick way to disable tracing\n" "# echo 1 > tracing_on : quick way to re-enable tracing\n\n" " Important files:\n" -- cgit v1.2.3