diff options
Diffstat (limited to 'kernel/bpf')
| -rw-r--r-- | kernel/bpf/rqspinlock.c | 187 |
1 files changed, 173 insertions, 14 deletions
diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c index 361d452f027c..bddbcc47d38f 100644 --- a/kernel/bpf/rqspinlock.c +++ b/kernel/bpf/rqspinlock.c @@ -31,6 +31,7 @@ */ #include "../locking/qspinlock.h" #include "../locking/lock_events.h" +#include "rqspinlock.h" /* * The basic principle of a queue-based spinlock can best be understood @@ -74,16 +75,147 @@ struct rqspinlock_timeout { u64 timeout_end; u64 duration; + u64 cur; u16 spin; }; #define RES_TIMEOUT_VAL 2 -static noinline int check_timeout(struct rqspinlock_timeout *ts) +DEFINE_PER_CPU_ALIGNED(struct rqspinlock_held, rqspinlock_held_locks); +EXPORT_SYMBOL_GPL(rqspinlock_held_locks); + +static bool is_lock_released(rqspinlock_t *lock, u32 mask, struct rqspinlock_timeout *ts) +{ + if (!(atomic_read_acquire(&lock->val) & (mask))) + return true; + return false; +} + +static noinline int check_deadlock_AA(rqspinlock_t *lock, u32 mask, + struct rqspinlock_timeout *ts) +{ + struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks); + int cnt = min(RES_NR_HELD, rqh->cnt); + + /* + * Return an error if we hold the lock we are attempting to acquire. + * We'll iterate over max 32 locks; no need to do is_lock_released. + */ + for (int i = 0; i < cnt - 1; i++) { + if (rqh->locks[i] == lock) + return -EDEADLK; + } + return 0; +} + +/* + * This focuses on the most common case of ABBA deadlocks (or ABBA involving + * more locks, which reduce to ABBA). This is not exhaustive, and we rely on + * timeouts as the final line of defense. + */ +static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask, + struct rqspinlock_timeout *ts) +{ + struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks); + int rqh_cnt = min(RES_NR_HELD, rqh->cnt); + void *remote_lock; + int cpu; + + /* + * Find the CPU holding the lock that we want to acquire. If there is a + * deadlock scenario, we will read a stable set on the remote CPU and + * find the target. This would be a constant time operation instead of + * O(NR_CPUS) if we could determine the owning CPU from a lock value, but + * that requires increasing the size of the lock word. + */ + for_each_possible_cpu(cpu) { + struct rqspinlock_held *rqh_cpu = per_cpu_ptr(&rqspinlock_held_locks, cpu); + int real_cnt = READ_ONCE(rqh_cpu->cnt); + int cnt = min(RES_NR_HELD, real_cnt); + + /* + * Let's ensure to break out of this loop if the lock is available for + * us to potentially acquire. + */ + if (is_lock_released(lock, mask, ts)) + return 0; + + /* + * Skip ourselves, and CPUs whose count is less than 2, as they need at + * least one held lock and one acquisition attempt (reflected as top + * most entry) to participate in an ABBA deadlock. + * + * If cnt is more than RES_NR_HELD, it means the current lock being + * acquired won't appear in the table, and other locks in the table are + * already held, so we can't determine ABBA. + */ + if (cpu == smp_processor_id() || real_cnt < 2 || real_cnt > RES_NR_HELD) + continue; + + /* + * Obtain the entry at the top, this corresponds to the lock the + * remote CPU is attempting to acquire in a deadlock situation, + * and would be one of the locks we hold on the current CPU. + */ + remote_lock = READ_ONCE(rqh_cpu->locks[cnt - 1]); + /* + * If it is NULL, we've raced and cannot determine a deadlock + * conclusively, skip this CPU. + */ + if (!remote_lock) + continue; + /* + * Find if the lock we're attempting to acquire is held by this CPU. + * Don't consider the topmost entry, as that must be the latest lock + * being held or acquired. For a deadlock, the target CPU must also + * attempt to acquire a lock we hold, so for this search only 'cnt - 1' + * entries are important. + */ + for (int i = 0; i < cnt - 1; i++) { + if (READ_ONCE(rqh_cpu->locks[i]) != lock) + continue; + /* + * We found our lock as held on the remote CPU. Is the + * acquisition attempt on the remote CPU for a lock held + * by us? If so, we have a deadlock situation, and need + * to recover. + */ + for (int i = 0; i < rqh_cnt - 1; i++) { + if (rqh->locks[i] == remote_lock) + return -EDEADLK; + } + /* + * Inconclusive; retry again later. + */ + return 0; + } + } + return 0; +} + +static noinline int check_deadlock(rqspinlock_t *lock, u32 mask, + struct rqspinlock_timeout *ts) +{ + int ret; + + ret = check_deadlock_AA(lock, mask, ts); + if (ret) + return ret; + ret = check_deadlock_ABBA(lock, mask, ts); + if (ret) + return ret; + + return 0; +} + +static noinline int check_timeout(rqspinlock_t *lock, u32 mask, + struct rqspinlock_timeout *ts) { u64 time = ktime_get_mono_fast_ns(); + u64 prev = ts->cur; if (!ts->timeout_end) { + ts->cur = time; ts->timeout_end = time + ts->duration; return 0; } @@ -91,6 +223,15 @@ static noinline int check_timeout(struct rqspinlock_timeout *ts) if (time > ts->timeout_end) return -ETIMEDOUT; + /* + * A millisecond interval passed from last time? Trigger deadlock + * checks. + */ + if (prev + NSEC_PER_MSEC < time) { + ts->cur = time; + return check_deadlock(lock, mask, ts); + } + return 0; } @@ -99,21 +240,22 @@ static noinline int check_timeout(struct rqspinlock_timeout *ts) * as the macro does internal amortization for us. */ #ifndef res_smp_cond_load_acquire -#define RES_CHECK_TIMEOUT(ts, ret) \ - ({ \ - if (!(ts).spin++) \ - (ret) = check_timeout(&(ts)); \ - (ret); \ +#define RES_CHECK_TIMEOUT(ts, ret, mask) \ + ({ \ + if (!(ts).spin++) \ + (ret) = check_timeout((lock), (mask), &(ts)); \ + (ret); \ }) #else -#define RES_CHECK_TIMEOUT(ts, ret, mask) \ +#define RES_CHECK_TIMEOUT(ts, ret, mask) \ ({ (ret) = check_timeout(&(ts)); }) #endif /* * Initialize the 'spin' member. + * Set spin member to 0 to trigger AA/ABBA checks immediately. */ -#define RES_INIT_TIMEOUT(ts) ({ (ts).spin = 1; }) +#define RES_INIT_TIMEOUT(ts) ({ (ts).spin = 0; }) /* * We only need to reset 'timeout_end', 'spin' will just wrap around as necessary. @@ -142,6 +284,7 @@ static DEFINE_PER_CPU_ALIGNED(struct qnode, rqnodes[_Q_MAX_NODES]); * * Return: * * 0 - Lock was acquired successfully. + * * -EDEADLK - Lock acquisition failed because of AA/ABBA deadlock. * * -ETIMEDOUT - Lock acquisition failed because of timeout. * * (queue tail, pending bit, lock value) @@ -213,6 +356,11 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val) } /* + * Grab an entry in the held locks array, to enable deadlock detection. + */ + grab_held_lock_entry(lock); + + /* * We're pending, wait for the owner to go away. * * 0,1,1 -> *,1,0 @@ -225,7 +373,7 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val) */ if (val & _Q_LOCKED_MASK) { RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT); - res_smp_cond_load_acquire(&lock->locked, !VAL || RES_CHECK_TIMEOUT(ts, ret)); + res_smp_cond_load_acquire(&lock->locked, !VAL || RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_MASK)); } if (ret) { @@ -240,7 +388,7 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val) */ clear_pending(lock); lockevent_inc(rqspinlock_lock_timeout); - return ret; + goto err_release_entry; } /* @@ -258,6 +406,11 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val) */ queue: lockevent_inc(lock_slowpath); + /* + * Grab deadlock detection entry for the queue path. + */ + grab_held_lock_entry(lock); + node = this_cpu_ptr(&rqnodes[0].mcs); idx = node->count++; tail = encode_tail(smp_processor_id(), idx); @@ -277,9 +430,9 @@ queue: lockevent_inc(lock_no_node); RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT); while (!queued_spin_trylock(lock)) { - if (RES_CHECK_TIMEOUT(ts, ret)) { + if (RES_CHECK_TIMEOUT(ts, ret, ~0u)) { lockevent_inc(rqspinlock_lock_timeout); - break; + goto err_release_node; } cpu_relax(); } @@ -375,7 +528,7 @@ queue: */ RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT * 2); val = res_atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK) || - RES_CHECK_TIMEOUT(ts, ret)); + RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_PENDING_MASK)); waitq_timeout: if (ret) { @@ -408,7 +561,7 @@ waitq_timeout: WRITE_ONCE(next->locked, RES_TIMEOUT_VAL); } lockevent_inc(rqspinlock_lock_timeout); - goto release; + goto err_release_node; } /* @@ -455,5 +608,11 @@ release: */ __this_cpu_dec(rqnodes[0].mcs.count); return ret; +err_release_node: + trace_contention_end(lock, ret); + __this_cpu_dec(rqnodes[0].mcs.count); +err_release_entry: + release_held_lock_entry(); + return ret; } EXPORT_SYMBOL_GPL(resilient_queued_spin_lock_slowpath); |
