summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorEdward Srouji <edwards@nvidia.com>2026-04-27 14:02:32 +0300
committerJason Gunthorpe <jgg@nvidia.com>2026-04-29 16:37:11 -0300
commit38694f4639c45599161860e828dc4ac77abf8cea (patch)
treee2d68a7d908753b2073363864e4a87e7ebe772ba /drivers
parentc488df06bd552bb8b6e14fa0cfd5ad986c6e9525 (diff)
RDMA/mlx5: Fix UAF in SRQ destroy due to race with create
A race condition exists between mlx5_cmd_destroy_srq() and mlx5_cmd_create_srq() that can lead to a use-after-free (UAF) [1]. After destroy_srq_split() releases the SRQ to firmware, the SRQN can be immediately reallocated for a new SRQ being created concurrently. If the create path stores the new SRQ in the xarray before the destroy path erases it, the destroy will incorrectly delete the new SRQ's entry. Later accesses then hit freed memory. Fix by replacing the unconditional xa_erase_irq() with xa_cmpxchg_irq() that only erases the entry if it hasn't already been replaced (still contains XA_ZERO_ENTRY), preserving any newly created SRQ. [1] RIP: 0010:mlx5_cmd_destroy_srq+0xd8/0x110 [mlx5_ib] Code: 89 e1 ba 06 04 00 00 4c 89 f6 48 89 ef e8 80 19 70 e1 c6 83 a0 0f 00 00 00 fb 5b 44 89 e8 5d 41 5c 41 5d 41 5e c3 cc cc cc cc <0f> 0b 48 89 c2 83 e2 03 48 83 fa 02 75 08 48 3d 05 c0 ff ff 77 08 RSP: 0018:ff110001037b7d08 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ff1100010bb9c000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ff110001037b7c90 RBP: ff1100010bb9cfa0 R08: 0000000000000000 R09: 0000000000000000 R10: ff110001037b7da0 R11: ff11000104f29580 R12: ff1100010e2ac090 R13: 000000000000000d R14: 0000000000000001 R15: ff11000105336300 FS: 00007fa24787c740(0000) GS:ff1100046eb8d000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa247984e90 CR3: 0000000109d59005 CR4: 0000000000373eb0 Call Trace: <TASK> mlx5_ib_destroy_srq+0x25/0xa0 [mlx5_ib] ib_destroy_srq_user+0x21/0x90 [ib_core] uverbs_free_srq+0x1b/0x50 [ib_uverbs] destroy_hw_idr_uobject+0x1e/0x50 [ib_uverbs] uverbs_destroy_uobject+0x35/0x180 [ib_uverbs] __uverbs_cleanup_ufile+0xdd/0x140 [ib_uverbs] uverbs_destroy_ufile_hw+0x38/0xf0 [ib_uverbs] ib_uverbs_close+0x17/0xa0 [ib_uverbs] __fput+0xe0/0x2a0 __x64_sys_close+0x3a/0x80 do_syscall_64+0x55/0xac0 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7fa247984ea4 Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 80 3d a5 51 0e 00 00 74 13 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 3c c3 0f 1f 00 55 48 89 e5 48 83 ec 10 89 7d RSP: 002b:00007ffecfa79498 EFLAGS: 00000202 ORIG_RAX: 0000000000000003 RAX: ffffffffffffffda RBX: 0000200000000080 RCX: 00007fa247984ea4 RDX: 0000000000000040 RSI: 0000200000000200 RDI: 0000000000000003 RBP: 00007ffecfa794e0 R08: 00007ffecfa794e0 R09: 00007ffecfa794e0 R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000001 R13: 0000000000000000 R14: 0000200000000000 R15: 0000200000000009 </TASK> ---[ end trace 0000000000000000 ]--- Fixes: fd89099d635e ("RDMA/mlx5: Issue FW command to destroy SRQ on reentry") Link: https://patch.msgid.link/r/20260427-security-bug-fixes-v3-1-4621fa52de0e@nvidia.com Signed-off-by: Edward Srouji <edwards@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/infiniband/hw/mlx5/srq_cmd.c9
1 files changed, 8 insertions, 1 deletions
diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
index 8b3385396599..c1a088120915 100644
--- a/drivers/infiniband/hw/mlx5/srq_cmd.c
+++ b/drivers/infiniband/hw/mlx5/srq_cmd.c
@@ -683,7 +683,14 @@ int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
xa_cmpxchg_irq(&table->array, srq->srqn, XA_ZERO_ENTRY, srq, 0);
return err;
}
- xa_erase_irq(&table->array, srq->srqn);
+
+ /*
+ * A race can occur where a concurrent create gets the same srqn
+ * (after hardware released it) and overwrites XA_ZERO_ENTRY with
+ * its new SRQ before we reach here. In that case, we must not erase
+ * the entry as it now belongs to the new SRQ.
+ */
+ xa_cmpxchg_irq(&table->array, srq->srqn, XA_ZERO_ENTRY, NULL, 0);
mlx5_core_res_put(&srq->common);
wait_for_completion(&srq->common.free);