summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2026-03-06 16:56:24 -0500
committerTrond Myklebust <trond.myklebust@hammerspace.com>2026-04-13 11:56:20 -0700
commit765bde47fe7f197dabeb12da76831f40d0b20377 (patch)
treed5b0a0d233d854429857ea20090c2872ad3f6642
parent100142093e22b3f7741ac88e94878bb3694e306f (diff)
xprtrdma: Close lost-wakeup race in xprt_rdma_alloc_slot
xprt_rdma_alloc_slot() and xprt_rdma_free_slot() lack serialization between the buffer pool and the backlog queue. A buffer freed after rpcrdma_buffer_get() finds the pool empty but before rpc_sleep_on() places the task on the backlog is returned to the pool with no waiter to wake, leaving the task stuck on the backlog indefinitely. After joining the backlog, re-check the pool and route any recovered buffer through xprt_wake_up_backlog(), whose queue lock serializes with concurrent wakeups and avoids double-assignment of slots. Because xprt_rdma_free_slot() does not hold reserve_lock, the XPRT_CONGESTED double-check in xprt_throttle_congested() is ineffective: a task can join the backlog through that path after free_slot has already found it empty and cleared the bit. Avoid this by using xprt_add_backlog_noncongested(), which queues the task without setting XPRT_CONGESTED, so every allocation reaches xprt_rdma_alloc_slot() and its post-sleep re-check. Fixes: edb41e61a54e ("xprtrdma: Make rpc_rqst part of rpcrdma_req") Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
-rw-r--r--include/linux/sunrpc/xprt.h2
-rw-r--r--net/sunrpc/xprt.c16
-rw-r--r--net/sunrpc/xprtrdma/transport.c15
3 files changed, 32 insertions, 1 deletions
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index f46d1fb8f71a..a82045804d34 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -404,6 +404,8 @@ struct rpc_xprt * xprt_alloc(struct net *net, size_t size,
unsigned int max_req);
void xprt_free(struct rpc_xprt *);
void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task);
+void xprt_add_backlog_noncongested(struct rpc_xprt *xprt,
+ struct rpc_task *task);
bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req);
void xprt_cleanup_ids(void);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 4fbb57a29704..48a3618cbb29 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1663,6 +1663,22 @@ void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
}
EXPORT_SYMBOL_GPL(xprt_add_backlog);
+/**
+ * xprt_add_backlog_noncongested - queue task on backlog
+ * @xprt: transport whose backlog queue receives the task
+ * @task: task to queue
+ *
+ * Like xprt_add_backlog, but does not set XPRT_CONGESTED.
+ * For transports whose free_slot path does not synchronize
+ * with xprt_throttle_congested via reserve_lock.
+ */
+void xprt_add_backlog_noncongested(struct rpc_xprt *xprt,
+ struct rpc_task *task)
+{
+ rpc_sleep_on(&xprt->backlog, task, xprt_complete_request_init);
+}
+EXPORT_SYMBOL_GPL(xprt_add_backlog_noncongested);
+
static bool __xprt_set_rq(struct rpc_task *task, void *data)
{
struct rpc_rqst *req = data;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index ca079439f9cc..61706df5e485 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -511,7 +511,20 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
out_sleep:
task->tk_status = -EAGAIN;
- xprt_add_backlog(xprt, task);
+ xprt_add_backlog_noncongested(xprt, task);
+ /* A buffer freed between buffer_get and rpc_sleep_on
+ * goes back to the pool with no waiter to wake.
+ * Re-check after joining the backlog to close that gap.
+ */
+ req = rpcrdma_buffer_get(&r_xprt->rx_buf);
+ if (req) {
+ struct rpc_rqst *rqst = &req->rl_slot;
+
+ if (!xprt_wake_up_backlog(xprt, rqst)) {
+ memset(rqst, 0, sizeof(*rqst));
+ rpcrdma_buffer_put(&r_xprt->rx_buf, req);
+ }
+ }
}
/**