From 0998f0ac308c14f7a0750e9c24fe2083a817d8fb Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Wed, 4 Jun 2025 20:21:37 -0400 Subject: workqueue: fix opencoded cpumask_next_and_wrap() in wq_select_unbound_cpu() The dedicated helper is more verbose and effective comparing to cpumask_first() followed by cpumask_next(). Signed-off-by: Yury Norov [NVIDIA] Signed-off-by: Tejun Heo --- kernel/workqueue.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 97f37b5bae66..d9de0f2a2e00 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2221,12 +2221,9 @@ static int wq_select_unbound_cpu(int cpu) } new_cpu = __this_cpu_read(wq_rr_cpu_last); - new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask); - if (unlikely(new_cpu >= nr_cpu_ids)) { - new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask); - if (unlikely(new_cpu >= nr_cpu_ids)) - return cpu; - } + new_cpu = cpumask_next_and_wrap(new_cpu, wq_unbound_cpumask, cpu_online_mask); + if (unlikely(new_cpu >= nr_cpu_ids)) + return cpu; __this_cpu_write(wq_rr_cpu_last, new_cpu); return new_cpu; -- cgit v1.2.3 From fda6add9243867486f8cd456d7b05395d2132e0a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 16 Jun 2025 13:59:20 -0400 Subject: workqueue: Basic memory allocation profiling support Hook alloc_workqueue and alloc_workqueue_attrs() so that they're accounted to the callsite. Since we're doing allocations on behalf of another subsystem, this helps when using memory allocation profiling to check for leaks. Cc: Tejun Heo Cc: Lai Jiangshan Signed-off-by: Kent Overstreet Signed-off-by: Tejun Heo --- kernel/workqueue.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index d9de0f2a2e00..c24844afaa98 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4626,7 +4626,7 @@ void free_workqueue_attrs(struct workqueue_attrs *attrs) * * Return: The allocated new workqueue_attr on success. %NULL on failure. */ -struct workqueue_attrs *alloc_workqueue_attrs(void) +struct workqueue_attrs *alloc_workqueue_attrs_noprof(void) { struct workqueue_attrs *attrs; @@ -5679,12 +5679,12 @@ static struct workqueue_struct *__alloc_workqueue(const char *fmt, else wq_size = sizeof(*wq); - wq = kzalloc(wq_size, GFP_KERNEL); + wq = kzalloc_noprof(wq_size, GFP_KERNEL); if (!wq) return NULL; if (flags & WQ_UNBOUND) { - wq->unbound_attrs = alloc_workqueue_attrs(); + wq->unbound_attrs = alloc_workqueue_attrs_noprof(); if (!wq->unbound_attrs) goto err_free_wq; } @@ -5774,9 +5774,9 @@ err_destroy: } __printf(1, 4) -struct workqueue_struct *alloc_workqueue(const char *fmt, - unsigned int flags, - int max_active, ...) +struct workqueue_struct *alloc_workqueue_noprof(const char *fmt, + unsigned int flags, + int max_active, ...) { struct workqueue_struct *wq; va_list args; @@ -5791,7 +5791,7 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, return wq; } -EXPORT_SYMBOL_GPL(alloc_workqueue); +EXPORT_SYMBOL_GPL(alloc_workqueue_noprof); #ifdef CONFIG_LOCKDEP __printf(1, 5) -- cgit v1.2.3 From 128ea9f6ccfb6960293ae4212f4f97165e42222d Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Sat, 14 Jun 2025 15:35:29 +0200 Subject: workqueue: Add system_percpu_wq and system_dfl_wq Currently, if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. system_wq is a per-CPU worqueue, yet nothing in its name tells about that CPU affinity constraint, which is very often not required by users. Make it clear by adding a system_percpu_wq. system_unbound_wq should be the default workqueue so as not to enforce locality constraints for random work whenever it's not required. Adding system_dfl_wq to encourage its use when unbound work should be used. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Signed-off-by: Tejun Heo --- kernel/workqueue.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index cf6203282737..71a4dd59977c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -505,12 +505,16 @@ static struct kthread_worker *pwq_release_worker __ro_after_init; struct workqueue_struct *system_wq __ro_after_init; EXPORT_SYMBOL(system_wq); +struct workqueue_struct *system_percpu_wq __ro_after_init; +EXPORT_SYMBOL(system_percpu_wq); struct workqueue_struct *system_highpri_wq __ro_after_init; EXPORT_SYMBOL_GPL(system_highpri_wq); struct workqueue_struct *system_long_wq __ro_after_init; EXPORT_SYMBOL_GPL(system_long_wq); struct workqueue_struct *system_unbound_wq __ro_after_init; EXPORT_SYMBOL_GPL(system_unbound_wq); +struct workqueue_struct *system_dfl_wq __ro_after_init; +EXPORT_SYMBOL_GPL(system_dfl_wq); struct workqueue_struct *system_freezable_wq __ro_after_init; EXPORT_SYMBOL_GPL(system_freezable_wq); struct workqueue_struct *system_power_efficient_wq __ro_after_init; @@ -7816,10 +7820,11 @@ void __init workqueue_init_early(void) } system_wq = alloc_workqueue("events", 0, 0); + system_percpu_wq = alloc_workqueue("events", 0, 0); system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0); system_long_wq = alloc_workqueue("events_long", 0, 0); - system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, - WQ_MAX_ACTIVE); + system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, WQ_MAX_ACTIVE); + system_dfl_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, WQ_MAX_ACTIVE); system_freezable_wq = alloc_workqueue("events_freezable", WQ_FREEZABLE, 0); system_power_efficient_wq = alloc_workqueue("events_power_efficient", @@ -7830,8 +7835,8 @@ void __init workqueue_init_early(void) system_bh_wq = alloc_workqueue("events_bh", WQ_BH, 0); system_bh_highpri_wq = alloc_workqueue("events_bh_highpri", WQ_BH | WQ_HIGHPRI, 0); - BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq || - !system_unbound_wq || !system_freezable_wq || + BUG_ON(!system_wq || !system_percpu_wq|| !system_highpri_wq || !system_long_wq || + !system_unbound_wq || !system_freezable_wq || !system_dfl_wq || !system_power_efficient_wq || !system_freezable_power_efficient_wq || !system_bh_wq || !system_bh_highpri_wq); -- cgit v1.2.3 From 261dce3d64021e7ec828a17b4975ce9182e54ceb Mon Sep 17 00:00:00 2001 From: Chuyi Zhou Date: Tue, 17 Jun 2025 12:42:16 +0800 Subject: workqueue: Initialize wq_isolated_cpumask in workqueue_init_early() Now when isolcpus is enabled via the cmdline, wq_isolated_cpumask does not include these isolated CPUs, even wq_unbound_cpumask has already excluded them. It is only when we successfully configure an isolate cpuset partition that wq_isolated_cpumask gets overwritten by workqueue_unbound_exclude_cpumask(), including both the cmdline-specified isolated CPUs and the isolated CPUs within the cpuset partitions. Fix this issue by initializing wq_isolated_cpumask properly in workqueue_init_early(). Fixes: fe28f631fa94 ("workqueue: Add workqueue_unbound_exclude_cpumask() to exclude CPUs from wq_unbound_cpumask") Signed-off-by: Chuyi Zhou Reviewed-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/workqueue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 97f37b5bae66..9f9148075828 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -7767,7 +7767,8 @@ void __init workqueue_init_early(void) restrict_unbound_cpumask("workqueue.unbound_cpus", &wq_cmdline_cpumask); cpumask_copy(wq_requested_unbound_cpumask, wq_unbound_cpumask); - + cpumask_andnot(wq_isolated_cpumask, cpu_possible_mask, + housekeeping_cpumask(HK_TYPE_DOMAIN)); pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); unbound_wq_update_pwq_attrs_buf = alloc_workqueue_attrs(); -- cgit v1.2.3 From fc2898ea793a48bc4b74b61cde2d8656f20efdf4 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 23 Jun 2025 01:30:49 +0100 Subject: workqueue: Remove unused work_on_cpu_safe The last use of the work_on_cpu_safe() macro was removed recently by commit 9cda46babdfe ("crypto: n2 - remove Niagara2 SPU driver") Remove it, and the work_on_cpu_safe_key() function it calls. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Tejun Heo --- kernel/workqueue.c | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 086452e339c4..12c3080332bd 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -6771,31 +6771,6 @@ long work_on_cpu_key(int cpu, long (*fn)(void *), return wfc.ret; } EXPORT_SYMBOL_GPL(work_on_cpu_key); - -/** - * work_on_cpu_safe_key - run a function in thread context on a particular cpu - * @cpu: the cpu to run on - * @fn: the function to run - * @arg: the function argument - * @key: The lock class key for lock debugging purposes - * - * Disables CPU hotplug and calls work_on_cpu(). The caller must not hold - * any locks which would prevent @fn from completing. - * - * Return: The value @fn returns. - */ -long work_on_cpu_safe_key(int cpu, long (*fn)(void *), - void *arg, struct lock_class_key *key) -{ - long ret = -ENODEV; - - cpus_read_lock(); - if (cpu_online(cpu)) - ret = work_on_cpu_key(cpu, fn, arg, key); - cpus_read_unlock(); - return ret; -} -EXPORT_SYMBOL_GPL(work_on_cpu_safe_key); #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER -- cgit v1.2.3 From df316ab3d4440177e322a2847969d356c29b0eef Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 9 Jul 2025 15:19:03 +0200 Subject: workqueue: Use atomic_try_cmpxchg_relaxed() in tryinc_node_nr_active() Use try_cmpxchg() family of locking primitives instead of cmpxchg(*ptr, old, new) == old. The x86 CMPXCHG instruction returns success in the ZF flag, so this change saves a compare after CMPXCHG (and related move instruction in front of CMPXCHG). Also, try_cmpxchg() implicitly assigns old *ptr value to "old" when CMPXCHG fails. There is no need to re-read the value in the loop. The generated assembly improves from: 3f7: 44 8b 0a mov (%rdx),%r9d 3fa: eb 12 jmp 40e <...> 3fc: 8d 79 01 lea 0x1(%rcx),%edi 3ff: 89 c8 mov %ecx,%eax 401: f0 0f b1 7a 04 lock cmpxchg %edi,0x4(%rdx) 406: 39 c1 cmp %eax,%ecx 408: 0f 84 83 00 00 00 je 491 <...> 40e: 8b 4a 04 mov 0x4(%rdx),%ecx 411: 41 39 c9 cmp %ecx,%r9d 414: 7f e6 jg 3fc <...> to: 256b: 45 8b 08 mov (%r8),%r9d 256e: 41 8b 40 04 mov 0x4(%r8),%eax 2572: 41 39 c1 cmp %eax,%r9d 2575: 7e 10 jle 2587 <...> 2577: 8d 78 01 lea 0x1(%rax),%edi 257a: f0 41 0f b1 78 04 lock cmpxchg %edi,0x4(%r8) 2580: 75 f0 jne 2572 <...> No functional change intended. Signed-off-by: Uros Bizjak Cc: Tejun Heo Cc: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 12c3080332bd..ad83662a0533 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1690,17 +1690,14 @@ static void __pwq_activate_work(struct pool_workqueue *pwq, static bool tryinc_node_nr_active(struct wq_node_nr_active *nna) { int max = READ_ONCE(nna->max); + int old = atomic_read(&nna->nr); - while (true) { - int old, tmp; - - old = atomic_read(&nna->nr); + do { if (old >= max) return false; - tmp = atomic_cmpxchg_relaxed(&nna->nr, old, old + 1); - if (tmp == old) - return true; - } + } while (!atomic_try_cmpxchg_relaxed(&nna->nr, &old, old + 1)); + + return true; } /** -- cgit v1.2.3