From 6b395d31146a3fae775823ea8570a37b922f6685 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Mon, 24 Mar 2025 09:39:35 +0530 Subject: RDMA/bnxt_re: Fix budget handling of notification queue The cited commit in Fixes tag introduced a bug which can cause hang of completion queue processing because of notification queue budget goes to zero. Found while doing nfs over rdma mount and umount. Below message is noticed because of the existing bug. kernel: cm_destroy_id_wait_timeout: cm_id=00000000ff6c6cc6 timed out. state 11 -> 0, refcnt=1 Fix to handle this issue - Driver will not change nq->budget upon create and destroy of cq and srq rdma resources. Fixes: cb97b377a135 ("RDMA/bnxt_re: Refurbish CQ to NQ hash calculation") Link: https://patch.msgid.link/r/20250324040935.90182-1-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Kashyap Desai Signed-off-by: Kalesh AP Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 9082b3fd2b47..e14b05cd089a 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1785,8 +1785,6 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq); ib_umem_release(srq->umem); atomic_dec(&rdev->stats.res.srq_count); - if (nq) - nq->budget--; return 0; } @@ -1908,8 +1906,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, goto fail; } } - if (nq) - nq->budget++; active_srqs = atomic_inc_return(&rdev->stats.res.srq_count); if (active_srqs > rdev->stats.res.srq_watermark) rdev->stats.res.srq_watermark = active_srqs; @@ -3079,7 +3075,6 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) ib_umem_release(cq->umem); atomic_dec(&rdev->stats.res.cq_count); - nq->budget--; kfree(cq->cql); return 0; } -- cgit v1.2.3 From 62dd71e691109bb44ba8ad7f58b3a2ac6b69d496 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Mar 2025 16:57:15 +0100 Subject: RDMA/ucaps: Avoid format-security warning Passing a non-literal format string to dev_set_name causes a warning: drivers/infiniband/core/ucaps.c:173:33: error: format string is not a string literal (potentially insecure) [-Werror,-Wformat-security] 173 | ret = dev_set_name(&ucap->dev, ucap_names[type]); | ^~~~~~~~~~~~~~~~ drivers/infiniband/core/ucaps.c:173:33: note: treat the string as an argument to avoid this 173 | ret = dev_set_name(&ucap->dev, ucap_names[type]); | ^ | "%s", Turn the name into the %s argument as suggested by gcc. Fixes: 61e51682816d ("RDMA/uverbs: Introduce UCAP (User CAPabilities) API") Link: https://patch.msgid.link/r/20250314155721.264083-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: Zhu Yanjun Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucaps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/ucaps.c b/drivers/infiniband/core/ucaps.c index 6853c6d078f9..de5cb8bf0a61 100644 --- a/drivers/infiniband/core/ucaps.c +++ b/drivers/infiniband/core/ucaps.c @@ -170,7 +170,7 @@ int ib_create_ucap(enum rdma_user_cap type) ucap->dev.class = &ucaps_class; ucap->dev.devt = MKDEV(MAJOR(ucaps_base_dev), type); ucap->dev.release = ucap_dev_release; - ret = dev_set_name(&ucap->dev, ucap_names[type]); + ret = dev_set_name(&ucap->dev, "%s", ucap_names[type]); if (ret) goto err_device; -- cgit v1.2.3 From 95ba3850fed03e01b422ab5d7943aeba130c9723 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Mon, 24 Mar 2025 20:31:32 +0800 Subject: RDMA/usnic: Fix passing zero to PTR_ERR in usnic_ib_pci_probe() drivers/infiniband/hw/usnic/usnic_ib_main.c:590 usnic_ib_pci_probe() warn: passing zero to 'PTR_ERR' Make usnic_ib_device_add() return NULL on fail path, also remove useless NULL check for usnic_ib_discover_pf() Fixes: e3cf00d0a87f ("IB/usnic: Add Cisco VIC low-level hardware driver") Link: https://patch.msgid.link/r/20250324123132.2392077-1-yuehaibing@huawei.com Signed-off-by: Yue Haibing Reviewed-by: Zhu Yanjun Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/usnic/usnic_ib_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 4ddcd5860e0f..11eca39b73a9 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -397,7 +397,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) if (!us_ibdev) { usnic_err("Device %s context alloc failed\n", netdev_name(pci_get_drvdata(dev))); - return ERR_PTR(-EFAULT); + return NULL; } us_ibdev->ufdev = usnic_fwd_dev_alloc(dev); @@ -517,8 +517,8 @@ static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic) } us_ibdev = usnic_ib_device_add(parent_pci); - if (IS_ERR_OR_NULL(us_ibdev)) { - us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT); + if (!us_ibdev) { + us_ibdev = ERR_PTR(-EFAULT); goto out; } @@ -586,10 +586,10 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev, } pf = usnic_ib_discover_pf(vf->vnic); - if (IS_ERR_OR_NULL(pf)) { - usnic_err("Failed to discover pf of vnic %s with err%ld\n", - pci_name(pdev), PTR_ERR(pf)); - err = pf ? PTR_ERR(pf) : -EFAULT; + if (IS_ERR(pf)) { + err = PTR_ERR(pf); + usnic_err("Failed to discover pf of vnic %s with err%d\n", + pci_name(pdev), err); goto out_clean_vnic; } -- cgit v1.2.3 From d247667ecd6411ec5bec9a38db7feaa599ce3ee2 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 2 Apr 2025 10:09:44 +0300 Subject: RDMA/mlx5: Fix compilation warning when USER_ACCESS isn't set The cited commit made fs.c always compile, even when INFINIBAND_USER_ACCESS isn't set. This results in a compilation warning about an unused object when compiling with W=1 and USER_ACCESS is unset. Fix this by defining uverbs_destroy_def_handler() even when USER_ACCESS isn't set. Fixes: 36e0d433672f ("RDMA/mlx5: Compile fs.c regardless of INFINIBAND_USER_ACCESS config") Link: https://patch.msgid.link/r/20250402070944.1022093-1-mbloch@nvidia.com Signed-off-by: Mark Bloch Tested-by: Arnd Bergmann Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/fs.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 251246c73b33..0ff9f18a71e8 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -3461,7 +3461,6 @@ DECLARE_UVERBS_NAMED_OBJECT( &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY)); const struct uapi_definition mlx5_ib_flow_defs[] = { -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_FLOW_MATCHER), UAPI_DEF_CHAIN_OBJ_TREE( @@ -3472,7 +3471,6 @@ const struct uapi_definition mlx5_ib_flow_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_STEERING_ANCHOR, UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)), -#endif {}, }; -- cgit v1.2.3 From 1b2fe85f3cf19026a0e9037242bcbf7e736b22e3 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 2 Apr 2025 11:26:57 +0800 Subject: RDMA/rxe: Fix null pointer dereference in ODP MR check The blktests/rnbd reported a null pointer dereference as following. Similar to the mlx5, introduce a is_odp_mr() to check if the odp is enabled in this mr. Workqueue: rxe_wq do_work [rdma_rxe] RIP: 0010:rxe_mr_copy+0x57/0x210 [rdma_rxe] Code: 7c 04 48 89 f3 48 89 d5 41 89 cf 45 89 c4 0f 84 dc 00 00 00 89 ca e8 f8 f8 ff ff 85 c0 0f 85 75 01 00 00 49 8b 86 f0 00 00 00 40 28 02 0f 85 98 01 00 00 41 8b 46 78 41 8b 8e 10 01 00 00 8d RSP: 0018:ffffa0aac02cfcf8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff9079cd440024 RCX: 0000000000000000 RDX: 000000000000003c RSI: ffff9079cd440060 RDI: ffff9079cd665600 RBP: ffff9079c0e5e45a R08: 0000000000000000 R09: 0000000000000000 R10: 000000003c000000 R11: 0000000000225510 R12: 0000000000000000 R13: 0000000000000000 R14: ffff9079cd665600 R15: 000000000000003c FS: 0000000000000000(0000) GS:ffff907ccfa80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000028 CR3: 0000000119498001 CR4: 00000000001726f0 Call Trace: ? __die_body+0x1e/0x60 ? page_fault_oops+0x14f/0x4c0 ? rxe_mr_copy+0x57/0x210 [rdma_rxe] ? search_bpf_extables+0x5f/0x80 ? exc_page_fault+0x7e/0x180 ? asm_exc_page_fault+0x26/0x30 ? rxe_mr_copy+0x57/0x210 [rdma_rxe] ? rxe_mr_copy+0x48/0x210 [rdma_rxe] ? rxe_pool_get_index+0x50/0x90 [rdma_rxe] rxe_receiver+0x1d98/0x2530 [rdma_rxe] ? psi_task_switch+0x1ff/0x250 ? finish_task_switch+0x92/0x2d0 ? __schedule+0xbdf/0x17c0 do_task+0x65/0x1e0 [rdma_rxe] process_scheduled_works+0xaa/0x3f0 worker_thread+0x117/0x240 Fixes: d03fb5c6599e ("RDMA/rxe: Allow registering MRs for On-Demand Paging") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/r/20250402032657.1762800-1-lizhijian@fujitsu.com Signed-off-by: Li Zhijian Reviewed-by: Daisuke Matsuda Reviewed-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_loc.h | 6 ++++++ drivers/infiniband/sw/rxe/rxe_mr.c | 4 ++-- drivers/infiniband/sw/rxe/rxe_resp.c | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index feb386d98d1d..0bc3fbb6554f 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -140,6 +140,12 @@ static inline int qp_mtu(struct rxe_qp *qp) return IB_MTU_4096; } +static inline bool is_odp_mr(struct rxe_mr *mr) +{ + return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem && + mr->umem->is_odp; +} + void free_rd_atomic_resource(struct resp_res *res); static inline void rxe_advance_resp_resource(struct rxe_qp *qp) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 868d2f0b74e9..432d864c3ce9 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -323,7 +323,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, return err; } - if (mr->umem->is_odp) + if (is_odp_mr(mr)) return rxe_odp_mr_copy(mr, iova, addr, length, dir); else return rxe_mr_copy_xarray(mr, iova, addr, length, dir); @@ -536,7 +536,7 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value) u64 *va; /* ODP is not supported right now. WIP. */ - if (mr->umem->is_odp) + if (is_odp_mr(mr)) return RESPST_ERR_UNSUPPORTED_OPCODE; /* See IBA oA19-28 */ diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 54ba9ee1acc5..5d9174e408db 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -650,7 +650,7 @@ static enum resp_states process_flush(struct rxe_qp *qp, struct resp_res *res = qp->resp.res; /* ODP is not supported right now. WIP. */ - if (mr->umem->is_odp) + if (is_odp_mr(mr)) return RESPST_ERR_UNSUPPORTED_OPCODE; /* oA19-14, oA19-15 */ @@ -706,7 +706,7 @@ static enum resp_states atomic_reply(struct rxe_qp *qp, if (!res->replay) { u64 iova = qp->resp.va + qp->resp.offset; - if (mr->umem->is_odp) + if (is_odp_mr(mr)) err = rxe_odp_atomic_op(mr, iova, pkt->opcode, atmeth_comp(pkt), atmeth_swap_add(pkt), -- cgit v1.2.3 From 9beb2c91fb86e0be70a5833c6730441fa3c9efa8 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Thu, 27 Mar 2025 19:47:24 +0800 Subject: RDMA/hns: Fix wrong maximum DMA segment size Set maximum DMA segment size to 2G instead of UINT_MAX due to HW limit. Fixes: e0477b34d9d1 ("RDMA: Explicitly pass in the dma_device to ib_register_device") Link: https://patch.msgid.link/r/20250327114724.3454268-3-huangjunxian6@hisilicon.com Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index cf89a8db4f64..8d0b63d4b50a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -763,7 +763,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) if (ret) return ret; } - dma_set_max_seg_size(dev, UINT_MAX); + dma_set_max_seg_size(dev, SZ_2G); ret = ib_register_device(ib_dev, "hns_%d", dev); if (ret) { dev_err(dev, "ib_register_device failed!\n"); -- cgit v1.2.3 From 45f5dcdd049719fb999393b30679605f16ebce14 Mon Sep 17 00:00:00 2001 From: Sharath Srinivasan Date: Wed, 26 Mar 2025 14:05:32 -0700 Subject: RDMA/cma: Fix workqueue crash in cma_netevent_work_handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit struct rdma_cm_id has member "struct work_struct net_work" that is reused for enqueuing cma_netevent_work_handler()s onto cma_wq. Below crash[1] can occur if more than one call to cma_netevent_callback() occurs in quick succession, which further enqueues cma_netevent_work_handler()s for the same rdma_cm_id, overwriting any previously queued work-item(s) that was just scheduled to run i.e. there is no guarantee the queued work item may run between two successive calls to cma_netevent_callback() and the 2nd INIT_WORK would overwrite the 1st work item (for the same rdma_cm_id), despite grabbing id_table_lock during enqueue. Also drgn analysis [2] indicates the work item was likely overwritten. Fix this by moving the INIT_WORK() to __rdma_create_id(), so that it doesn't race with any existing queue_work() or its worker thread. [1] Trimmed crash stack: ============================================= BUG: kernel NULL pointer dereference, address: 0000000000000008 kworker/u256:6 ... 6.12.0-0... Workqueue: cma_netevent_work_handler [rdma_cm] (rdma_cm) RIP: 0010:process_one_work+0xba/0x31a Call Trace: worker_thread+0x266/0x3a0 kthread+0xcf/0x100 ret_from_fork+0x31/0x50 ret_from_fork_asm+0x1a/0x30 ============================================= [2] drgn crash analysis: >>> trace = prog.crashed_thread().stack_trace() >>> trace (0) crash_setup_regs (./arch/x86/include/asm/kexec.h:111:15) (1) __crash_kexec (kernel/crash_core.c:122:4) (2) panic (kernel/panic.c:399:3) (3) oops_end (arch/x86/kernel/dumpstack.c:382:3) ... (8) process_one_work (kernel/workqueue.c:3168:2) (9) process_scheduled_works (kernel/workqueue.c:3310:3) (10) worker_thread (kernel/workqueue.c:3391:4) (11) kthread (kernel/kthread.c:389:9) Line workqueue.c:3168 for this kernel version is in process_one_work(): 3168 strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN); >>> trace[8]["work"] *(struct work_struct *)0xffff92577d0a21d8 = { .data = (atomic_long_t){ .counter = (s64)536870912, <=== Note }, .entry = (struct list_head){ .next = (struct list_head *)0xffff924d075924c0, .prev = (struct list_head *)0xffff924d075924c0, }, .func = (work_func_t)cma_netevent_work_handler+0x0 = 0xffffffffc2cec280, } Suspicion is that pwq is NULL: >>> trace[8]["pwq"] (struct pool_workqueue *) In process_one_work(), pwq is assigned from: struct pool_workqueue *pwq = get_work_pwq(work); and get_work_pwq() is: static struct pool_workqueue *get_work_pwq(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); if (data & WORK_STRUCT_PWQ) return work_struct_pwq(data); else return NULL; } WORK_STRUCT_PWQ is 0x4: >>> print(repr(prog['WORK_STRUCT_PWQ'])) Object(prog, 'enum work_flags', value=4) But work->data is 536870912 which is 0x20000000. So, get_work_pwq() returns NULL and we crash in process_one_work(): 3168 strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN); ============================================= Fixes: 925d046e7e52 ("RDMA/core: Add a netevent notifier to cma") Cc: stable@vger.kernel.org Co-developed-by: Håkon Bugge Signed-off-by: Håkon Bugge Signed-off-by: Sharath Srinivasan Reviewed-by: Patrisious Haddad Link: https://patch.msgid.link/bf0082f9-5b25-4593-92c6-d130aa8ba439@oracle.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index fedcdb56fb6b..ab31eefa916b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -72,6 +72,8 @@ static const char * const cma_events[] = { static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, enum ib_gid_type gid_type); +static void cma_netevent_work_handler(struct work_struct *_work); + const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) { size_t index = event; @@ -1047,6 +1049,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); id_priv->id.route.addr.dev_addr.net = get_net(net); id_priv->seq_num &= 0x00ffffff; + INIT_WORK(&id_priv->id.net_work, cma_netevent_work_handler); rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID); if (parent) @@ -5241,7 +5244,6 @@ static int cma_netevent_callback(struct notifier_block *self, if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr, neigh->ha, ETH_ALEN)) continue; - INIT_WORK(¤t_id->id.net_work, cma_netevent_work_handler); cma_id_get(current_id); queue_work(cma_wq, ¤t_id->id.net_work); } -- cgit v1.2.3 From 9a0e6f15029e1a8a21e40f06fd05aa52b7f063de Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 19 Mar 2025 14:42:21 +0200 Subject: RDMA/core: Silence oversized kvmalloc() warning syzkaller triggered an oversized kvmalloc() warning. Silence it by adding __GFP_NOWARN. syzkaller log: WARNING: CPU: 7 PID: 518 at mm/util.c:665 __kvmalloc_node_noprof+0x175/0x180 CPU: 7 UID: 0 PID: 518 Comm: c_repro Not tainted 6.11.0-rc6+ #6 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:__kvmalloc_node_noprof+0x175/0x180 RSP: 0018:ffffc90001e67c10 EFLAGS: 00010246 RAX: 0000000000000100 RBX: 0000000000000400 RCX: ffffffff8149d46b RDX: 0000000000000000 RSI: ffff8881030fae80 RDI: 0000000000000002 RBP: 000000712c800000 R08: 0000000000000100 R09: 0000000000000000 R10: ffffc90001e67c10 R11: 0030ae0601000000 R12: 0000000000000000 R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000000 FS: 00007fde79159740(0000) GS:ffff88813bdc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000180 CR3: 0000000105eb4005 CR4: 00000000003706b0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ib_umem_odp_get+0x1f6/0x390 mlx5_ib_reg_user_mr+0x1e8/0x450 ib_uverbs_reg_mr+0x28b/0x440 ib_uverbs_write+0x7d3/0xa30 vfs_write+0x1ac/0x6c0 ksys_write+0x134/0x170 ? __sanitizer_cov_trace_pc+0x1c/0x50 do_syscall_64+0x50/0x110 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 37824952dc8f ("RDMA/odp: Use kvcalloc for the dma_list and page_list") Signed-off-by: Shay Drory Link: https://patch.msgid.link/c6cb92379de668be94894f49c2cfa40e73f94d56.1742388096.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/umem_odp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index e9fa22d31c23..c48ef6083020 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -76,12 +76,14 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp, npfns = (end - start) >> PAGE_SHIFT; umem_odp->pfn_list = kvcalloc( - npfns, sizeof(*umem_odp->pfn_list), GFP_KERNEL); + npfns, sizeof(*umem_odp->pfn_list), + GFP_KERNEL | __GFP_NOWARN); if (!umem_odp->pfn_list) return -ENOMEM; umem_odp->dma_list = kvcalloc( - ndmas, sizeof(*umem_odp->dma_list), GFP_KERNEL); + ndmas, sizeof(*umem_odp->dma_list), + GFP_KERNEL | __GFP_NOWARN); if (!umem_odp->dma_list) { ret = -ENOMEM; goto out_pfn_list; -- cgit v1.2.3 From ffc59e32c67e599cc473d6427a4aa584399d5b3c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 10 Apr 2025 15:32:20 +0300 Subject: RDMA/bnxt_re: Remove unusable nq variable Remove nq variable from bnxt_re_create_srq() and bnxt_re_destroy_srq() as it generates the following compilation warnings: >> drivers/infiniband/hw/bnxt_re/ib_verbs.c:1777:24: warning: variable 'nq' set but not used [-Wunused-but-set-variable] 1777 | struct bnxt_qplib_nq *nq = NULL; | ^ drivers/infiniband/hw/bnxt_re/ib_verbs.c:1828:24: warning: variable 'nq' set but not used [-Wunused-but-set-variable] 1828 | struct bnxt_qplib_nq *nq = NULL; | ^ 2 warnings generated. Fixes: 6b395d31146a ("RDMA/bnxt_re: Fix budget handling of notification queue") Link: https://patch.msgid.link/r/8a4343e217d7d1c0a5a786b785c4ac57cb72a2a0.1744288299.git.leonro@nvidia.com Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202504091055.CzgXnk4C-lkp@intel.com/ Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index e14b05cd089a..063801384b2b 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1774,10 +1774,7 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) ib_srq); struct bnxt_re_dev *rdev = srq->rdev; struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq; - struct bnxt_qplib_nq *nq = NULL; - if (qplib_srq->cq) - nq = qplib_srq->cq->nq; if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT) { free_page((unsigned long)srq->uctx_srq_page); hash_del(&srq->hash_entry); @@ -1825,7 +1822,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, struct ib_udata *udata) { struct bnxt_qplib_dev_attr *dev_attr; - struct bnxt_qplib_nq *nq = NULL; struct bnxt_re_ucontext *uctx; struct bnxt_re_dev *rdev; struct bnxt_re_srq *srq; @@ -1871,7 +1867,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id; srq->qplib_srq.sg_info.pgsize = PAGE_SIZE; srq->qplib_srq.sg_info.pgshft = PAGE_SHIFT; - nq = &rdev->nqr->nq[0]; if (udata) { rc = bnxt_re_init_user_srq(rdev, pd, srq, udata); -- cgit v1.2.3 From f81b33582f9339d2dc17c69b92040d3650bb4bae Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sat, 12 Apr 2025 09:57:14 +0200 Subject: RDMA/rxe: Fix slab-use-after-free Read in rxe_queue_cleanup bug Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x7d/0xa0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xcf/0x610 mm/kasan/report.c:489 kasan_report+0xb5/0xe0 mm/kasan/report.c:602 rxe_queue_cleanup+0xd0/0xe0 drivers/infiniband/sw/rxe/rxe_queue.c:195 rxe_cq_cleanup+0x3f/0x50 drivers/infiniband/sw/rxe/rxe_cq.c:132 __rxe_cleanup+0x168/0x300 drivers/infiniband/sw/rxe/rxe_pool.c:232 rxe_create_cq+0x22e/0x3a0 drivers/infiniband/sw/rxe/rxe_verbs.c:1109 create_cq+0x658/0xb90 drivers/infiniband/core/uverbs_cmd.c:1052 ib_uverbs_create_cq+0xc7/0x120 drivers/infiniband/core/uverbs_cmd.c:1095 ib_uverbs_write+0x969/0xc90 drivers/infiniband/core/uverbs_main.c:679 vfs_write fs/read_write.c:677 [inline] vfs_write+0x26a/0xcc0 fs/read_write.c:659 ksys_write+0x1b8/0x200 fs/read_write.c:731 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xaa/0x1b0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f In the function rxe_create_cq, when rxe_cq_from_init fails, the function rxe_cleanup will be called to handle the allocated resources. In fact, some memory resources have already been freed in the function rxe_cq_from_init. Thus, this problem will occur. The solution is to let rxe_cleanup do all the work. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://paste.ubuntu.com/p/tJgC42wDf6/ Tested-by: liuyi Signed-off-by: Zhu Yanjun Link: https://patch.msgid.link/20250412075714.3257358-1-yanjun.zhu@linux.dev Reviewed-by: Daisuke Matsuda Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_cq.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index fec87c9030ab..fffd144d509e 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -56,11 +56,8 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, udata, cq->queue->buf, cq->queue->buf_size, &cq->queue->ip); - if (err) { - vfree(cq->queue->buf); - kfree(cq->queue); + if (err) return err; - } cq->is_user = uresp; -- cgit v1.2.3 From 80f2ab46c2ee16f046b55306dc4db4be53125016 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Mon, 14 Apr 2025 18:42:30 -0500 Subject: irdma: free iwdev->rf after removing MSI-X Currently iwdev->rf is allocated in irdma_probe(), but free in irdma_ib_dealloc_device(). It can be misleading. Move the free to irdma_remove() to be more obvious. Freeing in irdma_ib_dealloc_device() leads to KASAN use-after-free issue. Which can also lead to NULL pointer dereference. Fix this. irdma_deinit_interrupts() can't be moved before freeing iwdef->rf, because in this case deinit interrupts will be done before freeing irqs. The simplest solution is to move kfree(iwdev->rf) to irdma_remove(). Reproducer: sudo rmmod irdma Minified splat(s): BUG: KASAN: use-after-free in irdma_remove+0x257/0x2d0 [irdma] Call Trace: ? __pfx__raw_spin_lock_irqsave+0x10/0x10 ? kfree+0x253/0x450 ? irdma_remove+0x257/0x2d0 [irdma] kasan_report+0xed/0x120 ? irdma_remove+0x257/0x2d0 [irdma] irdma_remove+0x257/0x2d0 [irdma] auxiliary_bus_remove+0x56/0x80 device_release_driver_internal+0x371/0x530 ? kernfs_put.part.0+0x147/0x310 driver_detach+0xbf/0x180 bus_remove_driver+0x11b/0x2a0 auxiliary_driver_unregister+0x1a/0x50 irdma_exit_module+0x40/0x4c [irdma] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] RIP: 0010:ice_free_rdma_qvector+0x2a/0xa0 [ice] Call Trace: ? ice_free_rdma_qvector+0x2a/0xa0 [ice] irdma_remove+0x179/0x2d0 [irdma] auxiliary_bus_remove+0x56/0x80 device_release_driver_internal+0x371/0x530 ? kobject_put+0x61/0x4b0 driver_detach+0xbf/0x180 bus_remove_driver+0x11b/0x2a0 auxiliary_driver_unregister+0x1a/0x50 irdma_exit_module+0x40/0x4c [irdma] Reported-by: Marcin Szycik Closes: https://lore.kernel.org/netdev/8e533834-4564-472f-b29b-4f1cb7730053@linux.intel.com/ Fixes: 3e0d3cb3fbe0 ("ice, irdma: move interrupts code to irdma") Reviewed-by: Marcin Szycik Signed-off-by: Michal Swiatkowski Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250414234231.523-1-tatyana.e.nikolova@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/main.c | 2 ++ drivers/infiniband/hw/irdma/verbs.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 1ee8969595d3..d10fd16dcec3 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -255,6 +255,8 @@ static void irdma_remove(struct auxiliary_device *aux_dev) ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, false); irdma_deinit_interrupts(iwdev->rf, pf); + kfree(iwdev->rf); + pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn)); } diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index eeb932e58730..1e8c92826de2 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -4871,5 +4871,4 @@ void irdma_ib_dealloc_device(struct ib_device *ibdev) irdma_rt_deinit_hw(iwdev); irdma_ctrl_deinit_hw(iwdev->rf); - kfree(iwdev->rf); } -- cgit v1.2.3 From 4bcc063939a560f05b05b34be68d20045a646e6e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 14 Apr 2025 18:42:31 -0500 Subject: ice, irdma: fix an off by one in error handling code If we don't allocate the MIN number of IRQs then we need to free what we have and return -ENOMEM. The problem is this loop is off by one so it frees an entry that wasn't allocated and it doesn't free the first entry where i == 0. Fixes: 3e0d3cb3fbe0 ("ice, irdma: move interrupts code to irdma") Signed-off-by: Dan Carpenter Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250414234231.523-2-tatyana.e.nikolova@intel.com Reviewed-by: Michal Swiatkowski Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index d10fd16dcec3..7599e31b5743 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -221,7 +221,7 @@ static int irdma_init_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf) break; if (i < IRDMA_MIN_MSIX) { - for (; i > 0; i--) + while (--i >= 0) ice_free_rdma_qvector(pf, &rf->msix_entries[i]); kfree(rf->msix_entries); -- cgit v1.2.3 From d0706bfd3ee40923c001c6827b786a309e2a8713 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 6 May 2025 17:10:08 +0200 Subject: RDMA/core: Fix "KASAN: slab-use-after-free Read in ib_register_device" problem Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xc3/0x670 mm/kasan/report.c:521 kasan_report+0xe0/0x110 mm/kasan/report.c:634 strlen+0x93/0xa0 lib/string.c:420 __fortify_strlen include/linux/fortify-string.h:268 [inline] get_kobj_path_length lib/kobject.c:118 [inline] kobject_get_path+0x3f/0x2a0 lib/kobject.c:158 kobject_uevent_env+0x289/0x1870 lib/kobject_uevent.c:545 ib_register_device drivers/infiniband/core/device.c:1472 [inline] ib_register_device+0x8cf/0xe00 drivers/infiniband/core/device.c:1393 rxe_register_device+0x275/0x320 drivers/infiniband/sw/rxe/rxe_verbs.c:1552 rxe_net_add+0x8e/0xe0 drivers/infiniband/sw/rxe/rxe_net.c:550 rxe_newlink+0x70/0x190 drivers/infiniband/sw/rxe/rxe.c:225 nldev_newlink+0x3a3/0x680 drivers/infiniband/core/nldev.c:1796 rdma_nl_rcv_msg+0x387/0x6e0 drivers/infiniband/core/netlink.c:195 rdma_nl_rcv_skb.constprop.0.isra.0+0x2e5/0x450 netlink_unicast_kernel net/netlink/af_netlink.c:1313 [inline] netlink_unicast+0x53a/0x7f0 net/netlink/af_netlink.c:1339 netlink_sendmsg+0x8d1/0xdd0 net/netlink/af_netlink.c:1883 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg net/socket.c:727 [inline] ____sys_sendmsg+0xa95/0xc70 net/socket.c:2566 ___sys_sendmsg+0x134/0x1d0 net/socket.c:2620 __sys_sendmsg+0x16d/0x220 net/socket.c:2652 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xcd/0x260 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f This problem is similar to the problem that the commit 1d6a9e7449e2 ("RDMA/core: Fix use-after-free when rename device name") fixes. The root cause is: the function ib_device_rename() renames the name with lock. But in the function kobject_uevent(), this name is accessed without lock protection at the same time. The solution is to add the lock protection when this name is accessed in the function kobject_uevent(). Fixes: 779e0bf47632 ("RDMA/core: Do not indicate device ready when device enablement fails") Link: https://patch.msgid.link/r/20250506151008.75701-1-yanjun.zhu@linux.dev Reported-by: syzbot+e2ce9e275ecc70a30b72@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e2ce9e275ecc70a30b72 Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index b4e3e4beb7f4..d4263385850a 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1352,6 +1352,9 @@ static void ib_device_notify_register(struct ib_device *device) down_read(&devices_rwsem); + /* Mark for userspace that device is ready */ + kobject_uevent(&device->dev.kobj, KOBJ_ADD); + ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT); if (ret) goto out; @@ -1468,10 +1471,9 @@ int ib_register_device(struct ib_device *device, const char *name, return ret; } dev_set_uevent_suppress(&device->dev, false); - /* Mark for userspace that device is ready */ - kobject_uevent(&device->dev.kobj, KOBJ_ADD); ib_device_notify_register(device); + ib_device_put(device); return 0; -- cgit v1.2.3