summaryrefslogtreecommitdiff
path: root/kernel/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/rqspinlock.c187
1 files changed, 173 insertions, 14 deletions
diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c
index 361d452f027c..bddbcc47d38f 100644
--- a/kernel/bpf/rqspinlock.c
+++ b/kernel/bpf/rqspinlock.c
@@ -31,6 +31,7 @@
*/
#include "../locking/qspinlock.h"
#include "../locking/lock_events.h"
+#include "rqspinlock.h"
/*
* The basic principle of a queue-based spinlock can best be understood
@@ -74,16 +75,147 @@
struct rqspinlock_timeout {
u64 timeout_end;
u64 duration;
+ u64 cur;
u16 spin;
};
#define RES_TIMEOUT_VAL 2
-static noinline int check_timeout(struct rqspinlock_timeout *ts)
+DEFINE_PER_CPU_ALIGNED(struct rqspinlock_held, rqspinlock_held_locks);
+EXPORT_SYMBOL_GPL(rqspinlock_held_locks);
+
+static bool is_lock_released(rqspinlock_t *lock, u32 mask, struct rqspinlock_timeout *ts)
+{
+ if (!(atomic_read_acquire(&lock->val) & (mask)))
+ return true;
+ return false;
+}
+
+static noinline int check_deadlock_AA(rqspinlock_t *lock, u32 mask,
+ struct rqspinlock_timeout *ts)
+{
+ struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks);
+ int cnt = min(RES_NR_HELD, rqh->cnt);
+
+ /*
+ * Return an error if we hold the lock we are attempting to acquire.
+ * We'll iterate over max 32 locks; no need to do is_lock_released.
+ */
+ for (int i = 0; i < cnt - 1; i++) {
+ if (rqh->locks[i] == lock)
+ return -EDEADLK;
+ }
+ return 0;
+}
+
+/*
+ * This focuses on the most common case of ABBA deadlocks (or ABBA involving
+ * more locks, which reduce to ABBA). This is not exhaustive, and we rely on
+ * timeouts as the final line of defense.
+ */
+static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask,
+ struct rqspinlock_timeout *ts)
+{
+ struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks);
+ int rqh_cnt = min(RES_NR_HELD, rqh->cnt);
+ void *remote_lock;
+ int cpu;
+
+ /*
+ * Find the CPU holding the lock that we want to acquire. If there is a
+ * deadlock scenario, we will read a stable set on the remote CPU and
+ * find the target. This would be a constant time operation instead of
+ * O(NR_CPUS) if we could determine the owning CPU from a lock value, but
+ * that requires increasing the size of the lock word.
+ */
+ for_each_possible_cpu(cpu) {
+ struct rqspinlock_held *rqh_cpu = per_cpu_ptr(&rqspinlock_held_locks, cpu);
+ int real_cnt = READ_ONCE(rqh_cpu->cnt);
+ int cnt = min(RES_NR_HELD, real_cnt);
+
+ /*
+ * Let's ensure to break out of this loop if the lock is available for
+ * us to potentially acquire.
+ */
+ if (is_lock_released(lock, mask, ts))
+ return 0;
+
+ /*
+ * Skip ourselves, and CPUs whose count is less than 2, as they need at
+ * least one held lock and one acquisition attempt (reflected as top
+ * most entry) to participate in an ABBA deadlock.
+ *
+ * If cnt is more than RES_NR_HELD, it means the current lock being
+ * acquired won't appear in the table, and other locks in the table are
+ * already held, so we can't determine ABBA.
+ */
+ if (cpu == smp_processor_id() || real_cnt < 2 || real_cnt > RES_NR_HELD)
+ continue;
+
+ /*
+ * Obtain the entry at the top, this corresponds to the lock the
+ * remote CPU is attempting to acquire in a deadlock situation,
+ * and would be one of the locks we hold on the current CPU.
+ */
+ remote_lock = READ_ONCE(rqh_cpu->locks[cnt - 1]);
+ /*
+ * If it is NULL, we've raced and cannot determine a deadlock
+ * conclusively, skip this CPU.
+ */
+ if (!remote_lock)
+ continue;
+ /*
+ * Find if the lock we're attempting to acquire is held by this CPU.
+ * Don't consider the topmost entry, as that must be the latest lock
+ * being held or acquired. For a deadlock, the target CPU must also
+ * attempt to acquire a lock we hold, so for this search only 'cnt - 1'
+ * entries are important.
+ */
+ for (int i = 0; i < cnt - 1; i++) {
+ if (READ_ONCE(rqh_cpu->locks[i]) != lock)
+ continue;
+ /*
+ * We found our lock as held on the remote CPU. Is the
+ * acquisition attempt on the remote CPU for a lock held
+ * by us? If so, we have a deadlock situation, and need
+ * to recover.
+ */
+ for (int i = 0; i < rqh_cnt - 1; i++) {
+ if (rqh->locks[i] == remote_lock)
+ return -EDEADLK;
+ }
+ /*
+ * Inconclusive; retry again later.
+ */
+ return 0;
+ }
+ }
+ return 0;
+}
+
+static noinline int check_deadlock(rqspinlock_t *lock, u32 mask,
+ struct rqspinlock_timeout *ts)
+{
+ int ret;
+
+ ret = check_deadlock_AA(lock, mask, ts);
+ if (ret)
+ return ret;
+ ret = check_deadlock_ABBA(lock, mask, ts);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static noinline int check_timeout(rqspinlock_t *lock, u32 mask,
+ struct rqspinlock_timeout *ts)
{
u64 time = ktime_get_mono_fast_ns();
+ u64 prev = ts->cur;
if (!ts->timeout_end) {
+ ts->cur = time;
ts->timeout_end = time + ts->duration;
return 0;
}
@@ -91,6 +223,15 @@ static noinline int check_timeout(struct rqspinlock_timeout *ts)
if (time > ts->timeout_end)
return -ETIMEDOUT;
+ /*
+ * A millisecond interval passed from last time? Trigger deadlock
+ * checks.
+ */
+ if (prev + NSEC_PER_MSEC < time) {
+ ts->cur = time;
+ return check_deadlock(lock, mask, ts);
+ }
+
return 0;
}
@@ -99,21 +240,22 @@ static noinline int check_timeout(struct rqspinlock_timeout *ts)
* as the macro does internal amortization for us.
*/
#ifndef res_smp_cond_load_acquire
-#define RES_CHECK_TIMEOUT(ts, ret) \
- ({ \
- if (!(ts).spin++) \
- (ret) = check_timeout(&(ts)); \
- (ret); \
+#define RES_CHECK_TIMEOUT(ts, ret, mask) \
+ ({ \
+ if (!(ts).spin++) \
+ (ret) = check_timeout((lock), (mask), &(ts)); \
+ (ret); \
})
#else
-#define RES_CHECK_TIMEOUT(ts, ret, mask) \
+#define RES_CHECK_TIMEOUT(ts, ret, mask) \
({ (ret) = check_timeout(&(ts)); })
#endif
/*
* Initialize the 'spin' member.
+ * Set spin member to 0 to trigger AA/ABBA checks immediately.
*/
-#define RES_INIT_TIMEOUT(ts) ({ (ts).spin = 1; })
+#define RES_INIT_TIMEOUT(ts) ({ (ts).spin = 0; })
/*
* We only need to reset 'timeout_end', 'spin' will just wrap around as necessary.
@@ -142,6 +284,7 @@ static DEFINE_PER_CPU_ALIGNED(struct qnode, rqnodes[_Q_MAX_NODES]);
*
* Return:
* * 0 - Lock was acquired successfully.
+ * * -EDEADLK - Lock acquisition failed because of AA/ABBA deadlock.
* * -ETIMEDOUT - Lock acquisition failed because of timeout.
*
* (queue tail, pending bit, lock value)
@@ -213,6 +356,11 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
}
/*
+ * Grab an entry in the held locks array, to enable deadlock detection.
+ */
+ grab_held_lock_entry(lock);
+
+ /*
* We're pending, wait for the owner to go away.
*
* 0,1,1 -> *,1,0
@@ -225,7 +373,7 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
*/
if (val & _Q_LOCKED_MASK) {
RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT);
- res_smp_cond_load_acquire(&lock->locked, !VAL || RES_CHECK_TIMEOUT(ts, ret));
+ res_smp_cond_load_acquire(&lock->locked, !VAL || RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_MASK));
}
if (ret) {
@@ -240,7 +388,7 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
*/
clear_pending(lock);
lockevent_inc(rqspinlock_lock_timeout);
- return ret;
+ goto err_release_entry;
}
/*
@@ -258,6 +406,11 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
*/
queue:
lockevent_inc(lock_slowpath);
+ /*
+ * Grab deadlock detection entry for the queue path.
+ */
+ grab_held_lock_entry(lock);
+
node = this_cpu_ptr(&rqnodes[0].mcs);
idx = node->count++;
tail = encode_tail(smp_processor_id(), idx);
@@ -277,9 +430,9 @@ queue:
lockevent_inc(lock_no_node);
RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT);
while (!queued_spin_trylock(lock)) {
- if (RES_CHECK_TIMEOUT(ts, ret)) {
+ if (RES_CHECK_TIMEOUT(ts, ret, ~0u)) {
lockevent_inc(rqspinlock_lock_timeout);
- break;
+ goto err_release_node;
}
cpu_relax();
}
@@ -375,7 +528,7 @@ queue:
*/
RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT * 2);
val = res_atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK) ||
- RES_CHECK_TIMEOUT(ts, ret));
+ RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_PENDING_MASK));
waitq_timeout:
if (ret) {
@@ -408,7 +561,7 @@ waitq_timeout:
WRITE_ONCE(next->locked, RES_TIMEOUT_VAL);
}
lockevent_inc(rqspinlock_lock_timeout);
- goto release;
+ goto err_release_node;
}
/*
@@ -455,5 +608,11 @@ release:
*/
__this_cpu_dec(rqnodes[0].mcs.count);
return ret;
+err_release_node:
+ trace_contention_end(lock, ret);
+ __this_cpu_dec(rqnodes[0].mcs.count);
+err_release_entry:
+ release_held_lock_entry();
+ return ret;
}
EXPORT_SYMBOL_GPL(resilient_queued_spin_lock_slowpath);