diff options
| author | Tejun Heo <tj@kernel.org> | 2026-04-20 20:03:26 -1000 |
|---|---|---|
| committer | Tejun Heo <tj@kernel.org> | 2026-04-20 20:10:50 -1000 |
| commit | 4fe985292709eeb6a4653c71660f893e26c2f2dd (patch) | |
| tree | 4fa69c4095a04f119e6919dca823e10756110891 /lib | |
| parent | 5897ca15d2c444af95eaae5f0a384401765afa00 (diff) | |
rhashtable: Bounce deferred worker kick through irq_work
Inserts past 75% load call schedule_work(&ht->run_work) to kick an
async resize. If a caller holds a raw spinlock (e.g. an
insecure_elasticity user), schedule_work() under that lock records
caller_lock -> pool->lock -> pi_lock -> rq->__lock
A cycle forms if any of these locks is acquired in the reverse
direction elsewhere. sched_ext, the only current insecure_elasticity
user, hits this: it holds scx_sched_lock across rhashtable inserts of
sub-schedulers, while scx_bypass() takes rq->__lock -> scx_sched_lock.
Exercising the resize path produces:
Chain exists of:
&pool->lock --> &rq->__lock --> scx_sched_lock
Bounce the kick from the insert paths through irq_work so
schedule_work() runs from hard IRQ context with the caller's lock no
longer held. rht_deferred_worker()'s self-rearm on error stays on
schedule_work(&ht->run_work) - the worker runs in process context with
no caller lock held, and keeping the self-requeue on @run_work lets
cancel_work_sync() in rhashtable_free_and_destroy() drain it.
v3: Keep rht_deferred_worker()'s self-rearm on schedule_work(&run_work).
Routing it through irq_work in v2 broke cancel_work_sync()'s
self-requeue handling - an irq_work queued after irq_work_sync()
returned but while cancel_work_sync() was still waiting could fire
post-teardown.
v2: Bounce unconditionally instead of gating on insecure_elasticity,
as suggested by Herbert.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/rhashtable.c | 31 |
1 files changed, 28 insertions, 3 deletions
diff --git a/lib/rhashtable.c b/lib/rhashtable.c index fb2b7bc137ba..7a67ef5b67b6 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -441,10 +441,33 @@ static void rht_deferred_worker(struct work_struct *work) mutex_unlock(&ht->mutex); + /* + * Re-arm via @run_work, not @run_irq_work. + * rhashtable_free_and_destroy() drains async work as irq_work_sync() + * followed by cancel_work_sync(). If this site queued irq_work while + * cancel_work_sync() was waiting for us, irq_work_sync() would already + * have returned and the stale irq_work could fire post-teardown. + * cancel_work_sync() natively handles self-requeue on @run_work. + */ if (err) schedule_work(&ht->run_work); } +/* + * Insert-path callers can run under a raw spinlock (e.g. an insecure_elasticity + * user). Calling schedule_work() under that lock records caller_lock -> + * pool->lock -> pi_lock -> rq->__lock, closing a locking cycle if any of + * these is acquired in the reverse direction elsewhere. Bounce through + * irq_work so the schedule_work() runs with the caller's lock no longer held. + */ +static void rht_deferred_irq_work(struct irq_work *irq_work) +{ + struct rhashtable *ht = container_of(irq_work, struct rhashtable, + run_irq_work); + + schedule_work(&ht->run_work); +} + static int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl) { @@ -477,7 +500,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht, if (err == -EEXIST) err = 0; } else - schedule_work(&ht->run_work); + irq_work_queue(&ht->run_irq_work); return err; @@ -488,7 +511,7 @@ fail: /* Schedule async rehash to retry allocation in process context. */ if (err == -ENOMEM) - schedule_work(&ht->run_work); + irq_work_queue(&ht->run_irq_work); return err; } @@ -630,7 +653,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, rht_unlock(tbl, bkt, flags); if (inserted && rht_grow_above_75(ht, tbl)) - schedule_work(&ht->run_work); + irq_work_queue(&ht->run_irq_work); } } while (!IS_ERR_OR_NULL(new_tbl)); @@ -1085,6 +1108,7 @@ int rhashtable_init_noprof(struct rhashtable *ht, RCU_INIT_POINTER(ht->tbl, tbl); INIT_WORK(&ht->run_work, rht_deferred_worker); + init_irq_work(&ht->run_irq_work, rht_deferred_irq_work); return 0; } @@ -1150,6 +1174,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, struct bucket_table *tbl, *next_tbl; unsigned int i; + irq_work_sync(&ht->run_irq_work); cancel_work_sync(&ht->run_work); mutex_lock(&ht->mutex); |
