From 4457265c61f304ebe1e732ec1b729315b379972b Mon Sep 17 00:00:00 2001
From: zhang jiao <zhangjiao2@cmss.chinamobile.com>
Date: Mon, 10 Nov 2025 09:26:07 +0800
Subject: workqueue: Remove unused assert_rcu_or_wq_mutex_or_pool_mutex

assert_rcu_or_wq_mutex_or_pool_mutex is never referenced in the code.
Just remove it.

Signed-off-by: zhang jiao <zhangjiao2@cmss.chinamobile.com>
Reviewed-by: Lai Jiangshan <jiangshanlai@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'kernel/workqueue.c')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 45320e27a16c..c6e52f3862a5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -541,12 +541,6 @@ static void show_one_worker_pool(struct worker_pool *pool);
 			 !lockdep_is_held(&wq_pool_mutex),		\
 			 "RCU or wq_pool_mutex should be held")
 
-#define assert_rcu_or_wq_mutex_or_pool_mutex(wq)			\
-	RCU_LOCKDEP_WARN(!rcu_read_lock_any_held() &&			\
-			 !lockdep_is_held(&wq->mutex) &&		\
-			 !lockdep_is_held(&wq_pool_mutex),		\
-			 "RCU, wq->mutex or wq_pool_mutex should be held")
-
 #define for_each_bh_worker_pool(pool, cpu)				\
 	for ((pool) = &per_cpu(bh_worker_pools, cpu)[0];		\
 	     (pool) < &per_cpu(bh_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
-- 
cgit v1.2.3


From e36bce4466d7807a40720abd277803fcad823c08 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Date: Mon, 17 Nov 2025 11:09:11 +0800
Subject: workqueue: Update the rescuer's affinity only when it is detached

When a rescuer is attached to a pool, its affinity should be only
managed by the pool.

But updating the detached rescuer's affinity is still meaningful so
that it will not disrupt isolated CPUs when it is to be waken up.

But the commit d64f2fa064f8 ("kernel/workqueue: Let rescuers follow
unbound wq cpumask changes") updates the affinity unconditionally, and
causes some issues

1) it also changes the affinity when the rescuer is already attached to
   a pool, which violates the affinity management.

2) the said commit tries to update the affinity of the rescuers, but it
   misses the rescuers of the PERCPU workqueues, and isolated CPUs can
   be possibly disrupted by these rescuers when they are summoned.

3) The affinity to set to the rescuers should be consistent in all paths
   when a rescuer is in detached state. The affinity could be either
   wq_unbound_cpumask or unbound_effective_cpumask(wq). Related paths:
       rescuer's worker_detach_from_pool()
       update wq_unbound_cpumask
       update wq's cpumask
       init_rescuer()
   Both affinities are Ok as long as they are consistent in all paths.
   But using unbound_effective_cpumask(wq) requres much more code to
   maintain the consistency, and it doesn't make much sense since the
   affinity is only effective when the rescuer is not processing works.
   wq_unbound_cpumask is more favorable.

Fix the 1) issue by testing rescuer->pool before updating with
wq_pool_attach_mutex held.

Fix the 2) issue by moving the rescuer's affinity updating code to
the place updating wq_unbound_cpumask and make it also update for
PERCPU workqueues.

Partially cleanup the 3) consistency issue by using wq_unbound_cpumask.
So that the path of "updating wq's cpumask" doesn't need to maintain it.
and both the paths of "updating wq_unbound_cpumask" and "rescuer's
worker_detach_from_pool()" use wq_unbound_cpumask.

Cleanup for init_rescuer()'s consistency for affinity can be done in
future.

Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'kernel/workqueue.c')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c6e52f3862a5..bc673ceaac55 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -5370,11 +5370,6 @@ static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
 	/* update node_nr_active->max */
 	wq_update_node_max_active(ctx->wq, -1);
 
-	/* rescuer needs to respect wq cpumask changes */
-	if (ctx->wq->rescuer)
-		set_cpus_allowed_ptr(ctx->wq->rescuer->task,
-				     unbound_effective_cpumask(ctx->wq));
-
 	mutex_unlock(&ctx->wq->mutex);
 }
 
@@ -6933,6 +6928,11 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask)
 	if (!ret) {
 		mutex_lock(&wq_pool_attach_mutex);
 		cpumask_copy(wq_unbound_cpumask, unbound_cpumask);
+		/* rescuer needs to respect cpumask changes when it is not attached */
+		list_for_each_entry(wq, &workqueues, list) {
+			if (wq->rescuer && !wq->rescuer->pool)
+				unbind_worker(wq->rescuer);
+		}
 		mutex_unlock(&wq_pool_attach_mutex);
 	}
 	return ret;
-- 
cgit v1.2.3


From 8ac4dbe7dd05f44121da120e480239dc89c3b496 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Date: Mon, 17 Nov 2025 11:09:12 +0800
Subject: workqueue: Let DISASSOCIATED workers follow unbound wq cpumask
 changes

When workqueue cpumask changes are committed, the DISASSOCIATED workers
affinity is not touched and this might be a problem down the line for
isolated setups when the DISASSOCIATED pools still have works to run
after the cpu is offline.

Make sure the workers' affinity is updated every time a workqueue cpumask
changes, so these workers can't break isolation.

Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'kernel/workqueue.c')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index bc673ceaac55..5916342ba6e3 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -6926,6 +6926,10 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask)
 	}
 
 	if (!ret) {
+		int cpu;
+		struct worker_pool *pool;
+		struct worker *worker;
+
 		mutex_lock(&wq_pool_attach_mutex);
 		cpumask_copy(wq_unbound_cpumask, unbound_cpumask);
 		/* rescuer needs to respect cpumask changes when it is not attached */
@@ -6933,6 +6937,15 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask)
 			if (wq->rescuer && !wq->rescuer->pool)
 				unbind_worker(wq->rescuer);
 		}
+		/* DISASSOCIATED worker needs to respect wq_unbound_cpumask */
+		for_each_possible_cpu(cpu) {
+			for_each_cpu_worker_pool(pool, cpu) {
+				if (!(pool->flags & POOL_DISASSOCIATED))
+					continue;
+				for_each_pool_worker(worker, pool)
+					unbind_worker(worker);
+			}
+		}
 		mutex_unlock(&wq_pool_attach_mutex);
 	}
 	return ret;
-- 
cgit v1.2.3


From c9c19e8bbc1ebc4e74140f7f9acbd90dcdf36748 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Date: Mon, 17 Nov 2025 11:09:13 +0800
Subject: workqueue: Init rescuer's affinities as wq_unbound_cpumask

The affinity to set to the rescuers should be consistent in all paths
when a rescuer is in detached state. The affinity could be either
wq_unbound_cpumask or unbound_effective_cpumask(wq).

Related paths:
       rescuer's worker_detach_from_pool()
       update wq_unbound_cpumask
       update wq's cpumask
       init_rescuer()

Both affinities are Ok as long as they are consistent in all paths.

In the commit 449b31ad2937 ("workqueue: Init rescuer's affinities as
the wq's effective cpumask") makes init_rescuer use
unbound_effective_cpumask(wq) which is consistent with then
apply_wqattrs_commit().

But using unbound_effective_cpumask(wq) requres much more code to
maintain the consistency, and it doesn't make much sense since the
affinity is only effective when the rescuer is not processing works.
wq_unbound_cpumask is more favorable.

So apply_wqattrs_commit() and the path of "updating wq's cpumask" had
been changed to not update the rescuer's affinity, and both the paths
of "updating wq_unbound_cpumask" and "rescuer's
worker_detach_from_pool()" had been changed to use wq_unbound_cpumask.

Now, make init_rescuer() use wq_unbound_cpumask for rescuer's affinity
and make all the paths consistent.

Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'kernel/workqueue.c')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5916342ba6e3..d9ce3ab0ec23 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -5603,10 +5603,13 @@ static int init_rescuer(struct workqueue_struct *wq)
 	}
 
 	wq->rescuer = rescuer;
-	if (wq->flags & WQ_UNBOUND)
-		kthread_bind_mask(rescuer->task, unbound_effective_cpumask(wq));
+
+	/* initial cpumask is consistent with the detached rescuer and unbind_worker() */
+	if (cpumask_intersects(wq_unbound_cpumask, cpu_active_mask))
+		kthread_bind_mask(rescuer->task, wq_unbound_cpumask);
 	else
 		kthread_bind_mask(rescuer->task, cpu_possible_mask);
+
 	wake_up_process(rescuer->task);
 
 	return 0;
-- 
cgit v1.2.3


From 99ed6f62a46e91dc796b785618d646eeded1b230 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Date: Fri, 21 Nov 2025 22:57:14 +0800
Subject: workqueue: Factor out assign_rescuer_work()

Move the code to assign work to rescuer and assign_rescuer_work().

Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

(limited to 'kernel/workqueue.c')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d9ce3ab0ec23..0e5ec6e002b1 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3437,6 +3437,23 @@ sleep:
 	goto woke_up;
 }
 
+static bool assign_rescuer_work(struct pool_workqueue *pwq, struct worker *rescuer)
+{
+	struct worker_pool *pool = pwq->pool;
+	struct work_struct *work, *n;
+
+	/*
+	 * Slurp in all works issued via this workqueue and
+	 * process'em.
+	 */
+	list_for_each_entry_safe(work, n, &pool->worklist, entry) {
+		if (get_work_pwq(work) == pwq && assign_work(work, rescuer, &n))
+			pwq->stats[PWQ_STAT_RESCUED]++;
+	}
+
+	return !list_empty(&rescuer->scheduled);
+}
+
 /**
  * rescuer_thread - the rescuer thread function
  * @__rescuer: self
@@ -3491,7 +3508,6 @@ repeat:
 		struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
 					struct pool_workqueue, mayday_node);
 		struct worker_pool *pool = pwq->pool;
-		struct work_struct *work, *n;
 
 		__set_current_state(TASK_RUNNING);
 		list_del_init(&pwq->mayday_node);
@@ -3502,18 +3518,9 @@ repeat:
 
 		raw_spin_lock_irq(&pool->lock);
 
-		/*
-		 * Slurp in all works issued via this workqueue and
-		 * process'em.
-		 */
 		WARN_ON_ONCE(!list_empty(&rescuer->scheduled));
-		list_for_each_entry_safe(work, n, &pool->worklist, entry) {
-			if (get_work_pwq(work) == pwq &&
-			    assign_work(work, rescuer, &n))
-				pwq->stats[PWQ_STAT_RESCUED]++;
-		}
 
-		if (!list_empty(&rescuer->scheduled)) {
+		if (assign_rescuer_work(pwq, rescuer)) {
 			process_scheduled_works(rescuer);
 
 			/*
-- 
cgit v1.2.3


From 7b05c90b3302cf3d830dfa6f8961376bcaf43b94 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Date: Fri, 21 Nov 2025 22:57:15 +0800
Subject: workqueue: Only assign rescuer work when really needed

If the pwq does not need rescue (normal workers have been created or
become available), the rescuer can immediately move on to other stalled
pwqs.

Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'kernel/workqueue.c')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0e5ec6e002b1..656715d216b4 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3442,6 +3442,10 @@ static bool assign_rescuer_work(struct pool_workqueue *pwq, struct worker *rescu
 	struct worker_pool *pool = pwq->pool;
 	struct work_struct *work, *n;
 
+	/* need rescue? */
+	if (!pwq->nr_active || !need_to_create_worker(pool))
+		return false;
+
 	/*
 	 * Slurp in all works issued via this workqueue and
 	 * process'em.
-- 
cgit v1.2.3


From 6d90215dc015f7630589b29fc2c771bce16a4c96 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Date: Fri, 21 Nov 2025 22:57:16 +0800
Subject: workqueue: Don't rely on wq->rescuer to stop rescuer

The commit1 def98c84b6cd ("workqueue: Fix spurious sanity check failures
in destroy_workqueue()") tries to fix spurious sanity check failures by
stopping send_mayday() via setting wq->rescuer to NULL.

But it fails to stop the pwq->mayday_node requeuing in the rescuer, and
the commit2 e66b39af00f4 ("workqueue: Fix pwq ref leak in
rescuer_thread()") fixes it by checking wq->rescuer which is the result
of commit1.

Both commits together really fix spurious sanity check failures caused
by the rescuer, but they both use a convoluted method by relying on
wq->rescuer state rather than the real count of work items.

Actually __WQ_DESTROYING and drain_workqueue() together already stop
send_mayday() by draining all the work items and ensuring no new work
item requeuing.

And the more proper fix to stop the pwq->mayday_node requeuing in the
rescuer is from commit3 4f3f4cf388f8 ("workqueue: avoid unneeded
requeuing the pwq in rescuer thread") and renders the checking of
wq->rescuer in commit2 unnecessary.

So __WQ_DESTROYING, drain_workqueue() and commit3 together fix spurious
sanity check failures introduced by the rescuer.

Just remove the convoluted code of using wq->rescuer.

Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

(limited to 'kernel/workqueue.c')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 656715d216b4..253311af47c6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3539,10 +3539,9 @@ repeat:
 			if (pwq->nr_active && need_to_create_worker(pool)) {
 				raw_spin_lock(&wq_mayday_lock);
 				/*
-				 * Queue iff we aren't racing destruction
-				 * and somebody else hasn't queued it already.
+				 * Queue iff somebody else hasn't queued it already.
 				 */
-				if (wq->rescuer && list_empty(&pwq->mayday_node)) {
+				if (list_empty(&pwq->mayday_node)) {
 					get_pwq(pwq);
 					list_add_tail(&pwq->mayday_node, &wq->maydays);
 				}
@@ -5905,16 +5904,10 @@ void destroy_workqueue(struct workqueue_struct *wq)
 
 	/* kill rescuer, if sanity checks fail, leave it w/o rescuer */
 	if (wq->rescuer) {
-		struct worker *rescuer = wq->rescuer;
-
-		/* this prevents new queueing */
-		raw_spin_lock_irq(&wq_mayday_lock);
-		wq->rescuer = NULL;
-		raw_spin_unlock_irq(&wq_mayday_lock);
-
 		/* rescuer will empty maydays list before exiting */
-		kthread_stop(rescuer->task);
-		kfree(rescuer);
+		kthread_stop(wq->rescuer->task);
+		kfree(wq->rescuer);
+		wq->rescuer = NULL;
 	}
 
 	/*
-- 
cgit v1.2.3