From faab3ae329a6efb96995aeb72a68a99f664fed38 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 23 Jun 2025 12:39:55 -0700 Subject: rcu: Document that rcu_barrier() hurries lazy callbacks This commit adds to the rcu_barrier() kerneldoc header stating that this function hurries lazy callbacks and that it does not normally result in additional RCU grace periods. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8eff357b0436..1291e0761d70 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3800,6 +3800,11 @@ static void rcu_barrier_handler(void *cpu_in) * to complete. For example, if there are no RCU callbacks queued anywhere * in the system, then rcu_barrier() is within its rights to return * immediately, without waiting for anything, much less an RCU grace period. + * In fact, rcu_barrier() will normally not result in any RCU grace periods + * beyond those that were already destined to be executed. + * + * In kernels built with CONFIG_RCU_LAZY=y, this function also hurries all + * pending lazy RCU callbacks. */ void rcu_barrier(void) { -- cgit v1.2.3 From 42d590d100f2e47e47d974a902b9ed610e464824 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Wed, 13 Aug 2025 21:30:02 +0800 Subject: rcu: Remove local_irq_save/restore() in rcu_preempt_deferred_qs_handler() The per-CPU rcu_data structure's ->defer_qs_iw field is initialized by IRQ_WORK_INIT_HARD(), which means that the subsequent invocation of rcu_preempt_deferred_qs_handler() will always be executed with interrupts disabled. This commit therefore removes the local_irq_save/restore() operations from rcu_preempt_deferred_qs_handler() and adds a call to lockdep_assert_irqs_disabled() in order to enable lockdep to diagnose mistaken invocations of this function from interrupts-enabled code. Signed-off-by: Zqiang Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 4cd170b2d655..d85763336b3c 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -626,11 +626,10 @@ notrace void rcu_preempt_deferred_qs(struct task_struct *t) */ static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp) { - unsigned long flags; struct rcu_data *rdp; + lockdep_assert_irqs_disabled(); rdp = container_of(iwp, struct rcu_data, defer_qs_iw); - local_irq_save(flags); /* * If the IRQ work handler happens to run in the middle of RCU read-side @@ -647,8 +646,6 @@ static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp) */ if (rcu_preempt_depth() > 0) WRITE_ONCE(rdp->defer_qs_iw_pending, DEFER_QS_IDLE); - - local_irq_restore(flags); } /* -- cgit v1.2.3 From a214365140cc3009f07d4e14a8b481fd3dc41d31 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 10 Jul 2025 15:15:28 +0300 Subject: rculist: move list_for_each_rcu() to where it belongs The list_for_each_rcu() relies on the rcu_dereference() API which is not provided by the list.h. At the same time list.h is a low-level basic header that must not have dependencies like RCU, besides the fact of the potential circular dependencies in some cases. With all that said, move RCU related API to the rculist.h where it belongs. Signed-off-by: Andy Shevchenko Reviewed-by: Simona Vetter Reviewed-by: "Paul E. McKenney" Signed-off-by: Neeraj Upadhyay (AMD) Signed-off-by: "Paul E. McKenney" --- kernel/cgroup/dmem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c index 10b63433f057..e12b946278b6 100644 --- a/kernel/cgroup/dmem.c +++ b/kernel/cgroup/dmem.c @@ -14,6 +14,7 @@ #include #include #include +#include #include struct dmem_cgroup_region { -- cgit v1.2.3 From 143ddfa169bbb733275b1a720bafd5a4366b3d89 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 19 Sep 2025 16:50:37 +0200 Subject: rcu: replace use of system_wq with system_percpu_wq Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. system_wq is a per-CPU worqueue, yet nothing in its name tells about that CPU affinity constraint, which is very often not required by users. Make it clear by adding a system_percpu_wq. The old wq will be kept for a few release cylces. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index f92443561d36..2dc044fd126e 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -553,13 +553,13 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu rtpcp_next = rtp->rtpcp_array[index]; if (rtpcp_next->cpu < smp_load_acquire(&rtp->percpu_dequeue_lim)) { cpuwq = rcu_cpu_beenfullyonline(rtpcp_next->cpu) ? rtpcp_next->cpu : WORK_CPU_UNBOUND; - queue_work_on(cpuwq, system_wq, &rtpcp_next->rtp_work); + queue_work_on(cpuwq, system_percpu_wq, &rtpcp_next->rtp_work); index++; if (index < num_possible_cpus()) { rtpcp_next = rtp->rtpcp_array[index]; if (rtpcp_next->cpu < smp_load_acquire(&rtp->percpu_dequeue_lim)) { cpuwq = rcu_cpu_beenfullyonline(rtpcp_next->cpu) ? rtpcp_next->cpu : WORK_CPU_UNBOUND; - queue_work_on(cpuwq, system_wq, &rtpcp_next->rtp_work); + queue_work_on(cpuwq, system_percpu_wq, &rtpcp_next->rtp_work); } } } -- cgit v1.2.3 From 499d48f75b230522f4aa5aa4b9cc3c5b1594e1af Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 19 Sep 2025 16:50:38 +0200 Subject: rcu: WQ_PERCPU added to alloc_workqueue users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This patch adds a new WQ_PERCPU flag to explicitly request the use of the per-CPU behavior. Both flags coexist for one release cycle to allow callers to transition their calls. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. All existing users have been updated accordingly. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1291e0761d70..01e76c6b2c13 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4890,7 +4890,7 @@ void __init rcu_init(void) rcutree_online_cpu(cpu); /* Create workqueue for Tree SRCU and for expedited GPs. */ - rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0); + rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM | WQ_PERCPU, 0); WARN_ON(!rcu_gp_wq); sync_wq = alloc_workqueue("sync_wq", WQ_MEM_RECLAIM, 0); -- cgit v1.2.3 From 82c427bc935aa5b91d0cabbbc062e71132be2bb8 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 19 Sep 2025 16:50:39 +0200 Subject: rcu: WQ_UNBOUND added to sync_wq workqueue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This change add the WQ_UNBOUND flag to sync_wq, to make explicit this workqueue can be unbound and that it does not benefit from per-cpu work. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 01e76c6b2c13..31690ffa452a 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4893,7 +4893,7 @@ void __init rcu_init(void) rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM | WQ_PERCPU, 0); WARN_ON(!rcu_gp_wq); - sync_wq = alloc_workqueue("sync_wq", WQ_MEM_RECLAIM, 0); + sync_wq = alloc_workqueue("sync_wq", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); WARN_ON(!sync_wq); /* Respect if explicitly disabled via a boot parameter. */ -- cgit v1.2.3