From 2b670bbfd8c0244362fafddceeb2e096eb32caa2 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Wed, 13 May 2020 12:55:50 +0300 Subject: RDMA/mlx5: Add init2init as a modify command [ Upstream commit 819f7427bafd494ef7ca4942ec6322db20722d7b ] Missing INIT2INIT entry in the list of modify commands caused DEVX applications to be unable to modify_qp for this transition state. Add the MLX5_CMD_OP_INIT2INIT_QP opcode to the list of allowed DEVX opcodes. Fixes: e662e14d801b ("IB/mlx5: Add DEVX support for modify and query commands") Link: https://lore.kernel.org/r/20200513095550.211345-1-leon@kernel.org Signed-off-by: Aharon Landau Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/devx.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index d609f4659afb..bba7ab078430 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -814,6 +814,7 @@ static bool devx_is_obj_modify_cmd(const void *in) case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: case MLX5_CMD_OP_RST2INIT_QP: case MLX5_CMD_OP_INIT2RTR_QP: + case MLX5_CMD_OP_INIT2INIT_QP: case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: case MLX5_CMD_OP_SQERR2RTS_QP: -- cgit v1.2.3 From ecb9c4d344c970401fa6cd7f692127df437a496a Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 27 May 2020 16:57:03 +0300 Subject: IB/mlx5: Fix DEVX support for MLX5_CMD_OP_INIT2INIT_QP command [ Upstream commit d246a3061528be6d852156d25c02ea69d6db7e65 ] The commit citied in the Fixes line wasn't complete and solved only part of the problems. Update the mlx5_ib to properly support MLX5_CMD_OP_INIT2INIT_QP command in the DEVX, that is required when modify the QP tx_port_affinity. Fixes: 819f7427bafd ("RDMA/mlx5: Add init2init as a modify command") Link: https://lore.kernel.org/r/20200527135703.482501-1-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/devx.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index bba7ab078430..fd75a9043bf1 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -489,6 +489,10 @@ static u64 devx_get_obj_id(const void *in) obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, MLX5_GET(rst2init_qp_in, in, qpn)); break; + case MLX5_CMD_OP_INIT2INIT_QP: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(init2init_qp_in, in, qpn)); + break; case MLX5_CMD_OP_INIT2RTR_QP: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, MLX5_GET(init2rtr_qp_in, in, qpn)); -- cgit v1.2.3 From c0c8c8b10567f79f296200847f554b12a38f313d Mon Sep 17 00:00:00 2001 From: Qiushi Wu Date: Wed, 27 May 2020 22:02:30 -0500 Subject: RDMA/core: Fix several reference count leaks. [ Upstream commit 0b8e125e213204508e1b3c4bdfe69713280b7abd ] kobject_init_and_add() takes reference even when it fails. If this function returns an error, kobject_put() must be called to properly clean up the memory associated with the object. Previous commit b8eb718348b8 ("net-sysfs: Fix reference count leak in rx|netdev_queue_add_kobject") fixed a similar problem. Link: https://lore.kernel.org/r/20200528030231.9082-1-wu000273@umn.edu Signed-off-by: Qiushi Wu Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/sysfs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 7a50cedcef1f..091cca9d88ed 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1060,8 +1060,7 @@ static int add_port(struct ib_core_device *coredev, int port_num) coredev->ports_kobj, "%d", port_num); if (ret) { - kfree(p); - return ret; + goto err_put; } p->gid_attr_group = kzalloc(sizeof(*p->gid_attr_group), GFP_KERNEL); @@ -1074,8 +1073,7 @@ static int add_port(struct ib_core_device *coredev, int port_num) ret = kobject_init_and_add(&p->gid_attr_group->kobj, &gid_attr_type, &p->kobj, "gid_attrs"); if (ret) { - kfree(p->gid_attr_group); - goto err_put; + goto err_put_gid_attrs; } if (device->ops.process_mad && is_full_dev) { @@ -1406,8 +1404,10 @@ int ib_port_register_module_stat(struct ib_device *device, u8 port_num, ret = kobject_init_and_add(kobj, ktype, &port->kobj, "%s", name); - if (ret) + if (ret) { + kobject_put(kobj); return ret; + } } return 0; -- cgit v1.2.3 From 02416142fdf4b37fed04aa74ac42b9527045fecf Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 6 Apr 2020 20:35:40 +0300 Subject: RDMA/mlx5: Fix udata response upon SRQ creation [ Upstream commit cf26deff9036cd3270af562dbec545239e5c7f07 ] Fix udata response upon SRQ creation to use the UAPI structure (i.e. mlx5_ib_create_srq_resp). It did not zero the reserved field in userspace. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Link: https://lore.kernel.org/r/20200406173540.1466477-1-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/srq.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 4e7fde86c96b..c29c1f7da4a1 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -310,12 +310,18 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq, srq->msrq.event = mlx5_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; - if (udata) - if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) { + if (udata) { + struct mlx5_ib_create_srq_resp resp = { + .srqn = srq->msrq.srqn, + }; + + if (ib_copy_to_udata(udata, &resp, min(udata->outlen, + sizeof(resp)))) { mlx5_ib_dbg(dev, "copy to user failed\n"); err = -EFAULT; goto err_core; } + } init_attr->attr.max_wr = srq->msrq.max - 1; -- cgit v1.2.3 From d09de58d2b651d9bf54ef16d7f38b31b9f04b72e Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Fri, 8 May 2020 17:45:51 +0800 Subject: RDMA/hns: Bugfix for querying qkey [ Upstream commit 349be276509455ac2f19fa4051ed773082c6a27e ] The qkey queried through the query ud qp verb is a fixed value and it should be read from qp context. Fixes: 926a01dc000d ("RDMA/hns: Add QP operations support for hip08 SoC") Link: https://lore.kernel.org/r/1588931159-56875-2-git-send-email-liweihang@huawei.com Signed-off-by: Lijun Ou Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4540b00ccee9..9a8053bd01e2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4564,7 +4564,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->path_mig_state = IB_MIG_ARMED; qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; if (hr_qp->ibqp.qp_type == IB_QPT_UD) - qp_attr->qkey = V2_QKEY_VAL; + qp_attr->qkey = le32_to_cpu(context.qkey_xrcd); qp_attr->rq_psn = roce_get_field(context.byte_108_rx_reqepsn, V2_QPC_BYTE_108_RX_REQ_EPSN_M, -- cgit v1.2.3 From 1503314a335422d45078466240898b8b16f3ec66 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Fri, 8 May 2020 17:45:52 +0800 Subject: RDMA/hns: Fix cmdq parameter of querying pf timer resource [ Upstream commit 441c88d5b3ff80108ff536c6cf80591187015403 ] The firmware has reduced the number of descriptions of command HNS_ROCE_OPC_QUERY_PF_TIMER_RES to 1. The driver needs to adapt, otherwise the hardware will report error 4(CMD_NEXT_ERR). Fixes: 0e40dc2f70cd ("RDMA/hns: Add timer allocation support for hip08") Link: https://lore.kernel.org/r/1588931159-56875-3-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 32 +++++++++++------------------- 1 file changed, 12 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 9a8053bd01e2..0502c90c83ed 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1349,34 +1349,26 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) static int hns_roce_query_pf_timer_resource(struct hns_roce_dev *hr_dev) { struct hns_roce_pf_timer_res_a *req_a; - struct hns_roce_cmq_desc desc[2]; - int ret, i; + struct hns_roce_cmq_desc desc; + int ret; - for (i = 0; i < 2; i++) { - hns_roce_cmq_setup_basic_desc(&desc[i], - HNS_ROCE_OPC_QUERY_PF_TIMER_RES, - true); + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_PF_TIMER_RES, + true); - if (i == 0) - desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - } - - ret = hns_roce_cmq_send(hr_dev, desc, 2); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) return ret; - req_a = (struct hns_roce_pf_timer_res_a *)desc[0].data; + req_a = (struct hns_roce_pf_timer_res_a *)desc.data; hr_dev->caps.qpc_timer_bt_num = - roce_get_field(req_a->qpc_timer_bt_idx_num, - PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_M, - PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_S); + roce_get_field(req_a->qpc_timer_bt_idx_num, + PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_M, + PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_S); hr_dev->caps.cqc_timer_bt_num = - roce_get_field(req_a->cqc_timer_bt_idx_num, - PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_M, - PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_S); + roce_get_field(req_a->cqc_timer_bt_idx_num, + PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_M, + PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_S); return 0; } -- cgit v1.2.3 From 4820050e8438353e710ac1f629945be09f73ce38 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 21 May 2020 10:26:50 +0300 Subject: IB/cma: Fix ports memory leak in cma_configfs [ Upstream commit 63a3345c2d42a9b29e1ce2d3a4043689b3995cea ] The allocated ports structure in never freed. The free function should be called by release_cma_ports_group, but the group is never released since we don't remove its default group. Remove default groups when device group is deleted. Fixes: 045959db65c6 ("IB/cma: Add configfs for rdma_cm") Link: https://lore.kernel.org/r/20200521072650.567908-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/cma_configfs.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 8b0b5ae22e4c..726e70b68249 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -322,8 +322,21 @@ fail: return ERR_PTR(err); } +static void drop_cma_dev(struct config_group *cgroup, struct config_item *item) +{ + struct config_group *group = + container_of(item, struct config_group, cg_item); + struct cma_dev_group *cma_dev_group = + container_of(group, struct cma_dev_group, device_group); + + configfs_remove_default_groups(&cma_dev_group->ports_group); + configfs_remove_default_groups(&cma_dev_group->device_group); + config_item_put(item); +} + static struct configfs_group_operations cma_subsys_group_ops = { .make_group = make_cma_dev, + .drop_item = drop_cma_dev, }; static const struct config_item_type cma_subsys_type = { -- cgit v1.2.3 From af92e4a595e006b498c44b617ea38f697eca3469 Mon Sep 17 00:00:00 2001 From: Potnuri Bharat Teja Date: Mon, 25 May 2020 00:38:14 +0530 Subject: RDMA/iw_cxgb4: cleanup device debugfs entries on ULD remove [ Upstream commit 49ea0c036ede81f126f1a9389d377999fdf5c5a1 ] Remove device specific debugfs entries immediately if LLD detaches a particular ULD device in case of fatal PCI errors. Link: https://lore.kernel.org/r/20200524190814.17599-1-bharat@chelsio.com Signed-off-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/cxgb4/device.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 599340c1f0b8..541dbcf22d0e 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -953,6 +953,7 @@ void c4iw_dealloc(struct uld_ctx *ctx) static void c4iw_remove(struct uld_ctx *ctx) { pr_debug("c4iw_dev %p\n", ctx->dev); + debugfs_remove_recursive(ctx->dev->debugfs_root); c4iw_unregister_device(ctx->dev); c4iw_dealloc(ctx); } -- cgit v1.2.3 From 2a4c0bf5c70eabd2b1f6f730a32bcad53e860085 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 21 Jun 2020 13:47:35 +0300 Subject: IB/mad: Fix use after free when destroying MAD agent commit 116a1b9f1cb769b83e5adff323f977a62b1dcb2e upstream. Currently, when RMPP MADs are processed while the MAD agent is destroyed, it could result in use after free of rmpp_recv, as decribed below: cpu-0 cpu-1 ----- ----- ib_mad_recv_done() ib_mad_complete_recv() ib_process_rmpp_recv_wc() unregister_mad_agent() ib_cancel_rmpp_recvs() cancel_delayed_work() process_rmpp_data() start_rmpp() queue_delayed_work(rmpp_recv->cleanup_work) destroy_rmpp_recv() free_rmpp_recv() cleanup_work()[1] spin_lock_irqsave(&rmpp_recv->agent->lock) <-- use after free [1] cleanup_work() == recv_cleanup_handler Fix it by waiting for the MAD agent reference count becoming zero before calling to ib_cancel_rmpp_recvs(). Fixes: 9a41e38a467c ("IB/mad: Use IDR for agent IDs") Link: https://lore.kernel.org/r/20200621104738.54850-2-leon@kernel.org Signed-off-by: Shay Drory Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 9947d16edef2..a9e00cdf717b 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -639,10 +639,10 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); flush_workqueue(port_priv->wq); - ib_cancel_rmpp_recvs(mad_agent_priv); deref_mad_agent(mad_agent_priv); wait_for_completion(&mad_agent_priv->comp); + ib_cancel_rmpp_recvs(mad_agent_priv); ib_mad_agent_security_cleanup(&mad_agent_priv->agent); -- cgit v1.2.3 From 21d511c6c9c25bd45acfafd9768027394a0fddb3 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 23 Jun 2020 16:32:30 -0400 Subject: IB/hfi1: Fix module use count flaw due to leftover module put calls commit 822fbd37410639acdae368ea55477ddd3498651d upstream. When the try_module_get calls were removed from opening and closing of the i2c debugfs file, the corresponding module_put calls were missed. This results in an inaccurate module use count that requires a power cycle to fix. Fixes: 09fbca8e6240 ("IB/hfi1: No need to use try_module_get for debugfs") Link: https://lore.kernel.org/r/20200623203230.106975.76240.stgit@awfm-01.aw.intel.com Cc: Reviewed-by: Kaike Wan Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/debugfs.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index d268bf9c42ee..c29da2f4e339 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -985,15 +985,10 @@ static ssize_t qsfp2_debugfs_read(struct file *file, char __user *buf, static int __i2c_debugfs_open(struct inode *in, struct file *fp, u32 target) { struct hfi1_pportdata *ppd; - int ret; ppd = private2ppd(fp); - ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0); - if (ret) /* failed - release the module */ - module_put(THIS_MODULE); - - return ret; + return acquire_chip_resource(ppd->dd, i2c_target(target), 0); } static int i2c1_debugfs_open(struct inode *in, struct file *fp) @@ -1013,7 +1008,6 @@ static int __i2c_debugfs_release(struct inode *in, struct file *fp, u32 target) ppd = private2ppd(fp); release_chip_resource(ppd->dd, i2c_target(target)); - module_put(THIS_MODULE); return 0; } @@ -1031,18 +1025,10 @@ static int i2c2_debugfs_release(struct inode *in, struct file *fp) static int __qsfp_debugfs_open(struct inode *in, struct file *fp, u32 target) { struct hfi1_pportdata *ppd; - int ret; - - if (!try_module_get(THIS_MODULE)) - return -ENODEV; ppd = private2ppd(fp); - ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0); - if (ret) /* failed - release the module */ - module_put(THIS_MODULE); - - return ret; + return acquire_chip_resource(ppd->dd, i2c_target(target), 0); } static int qsfp1_debugfs_open(struct inode *in, struct file *fp) @@ -1062,7 +1048,6 @@ static int __qsfp_debugfs_release(struct inode *in, struct file *fp, u32 target) ppd = private2ppd(fp); release_chip_resource(ppd->dd, i2c_target(target)); - module_put(THIS_MODULE); return 0; } -- cgit v1.2.3 From 3947dd237ef56dc303f379f4127f321c043124bf Mon Sep 17 00:00:00 2001 From: Tom Seewald Date: Wed, 10 Jun 2020 12:47:17 -0500 Subject: RDMA/siw: Fix pointer-to-int-cast warning in siw_rx_pbl() [ Upstream commit 6769b275a313c76ddcd7d94c632032326db5f759 ] The variable buf_addr is type dma_addr_t, which may not be the same size as a pointer. To ensure it is the correct size, cast to a uintptr_t. Fixes: c536277e0db1 ("RDMA/siw: Fix 64/32bit pointer inconsistency") Link: https://lore.kernel.org/r/20200610174717.15932-1-tseewald@gmail.com Signed-off-by: Tom Seewald Reviewed-by: Bernard Metzler Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/siw/siw_qp_rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index c0a887240325..0520e70084f9 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -139,7 +139,8 @@ static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx, break; bytes = min(bytes, len); - if (siw_rx_kva(srx, (void *)buf_addr, bytes) == bytes) { + if (siw_rx_kva(srx, (void *)(uintptr_t)buf_addr, bytes) == + bytes) { copied += bytes; offset += bytes; len -= bytes; -- cgit v1.2.3 From 66143ecb9e3cfb1428b1b82feec35178e6ece844 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Sat, 13 Jun 2020 23:11:48 -0500 Subject: RDMA/rvt: Fix potential memory leak caused by rvt_alloc_rq [ Upstream commit 90a239ee25fa3a483facec3de7c144361a3d3a51 ] In case of failure of alloc_ud_wq_attr(), the memory allocated by rvt_alloc_rq() is not freed. Fix it by calling rvt_free_rq() using the existing clean-up code. Fixes: d310c4bf8aea ("IB/{rdmavt, hfi1, qib}: Remove AH refcount for UD QPs") Link: https://lore.kernel.org/r/20200614041148.131983-1-pakki001@umn.edu Signed-off-by: Aditya Pakki Acked-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rdmavt/qp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index d35465389357..19556c62c7ea 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1196,7 +1196,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, err = alloc_ud_wq_attr(qp, rdi->dparms.node); if (err) { ret = (ERR_PTR(err)); - goto bail_driver_priv; + goto bail_rq_rvt; } err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, @@ -1300,9 +1300,11 @@ bail_qpn: rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); bail_rq_wq: - rvt_free_rq(&qp->r_rq); free_ud_wq_attr(qp); +bail_rq_rvt: + rvt_free_rq(&qp->r_rq); + bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); -- cgit v1.2.3 From f0078dc6750fe46c7955de081b05cbf7d00d9093 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Tue, 16 Jun 2020 12:34:08 +0300 Subject: RDMA/qedr: Fix KASAN: use-after-free in ucma_event_handler+0x532 [ Upstream commit 0dfbd5ecf28cbcb81674c49d34ee97366db1be44 ] Private data passed to iwarp_cm_handler is copied for connection request / response, but ignored otherwise. If junk is passed, it is stored in the event and used later in the event processing. The driver passes an old junk pointer during connection close which leads to a use-after-free on event processing. Set private data to NULL for events that don 't have private data. BUG: KASAN: use-after-free in ucma_event_handler+0x532/0x560 [rdma_ucm] kernel: Read of size 4 at addr ffff8886caa71200 by task kworker/u128:1/5250 kernel: kernel: Workqueue: iw_cm_wq cm_work_handler [iw_cm] kernel: Call Trace: kernel: dump_stack+0x8c/0xc0 kernel: print_address_description.constprop.0+0x1b/0x210 kernel: ? ucma_event_handler+0x532/0x560 [rdma_ucm] kernel: ? ucma_event_handler+0x532/0x560 [rdma_ucm] kernel: __kasan_report.cold+0x1a/0x33 kernel: ? ucma_event_handler+0x532/0x560 [rdma_ucm] kernel: kasan_report+0xe/0x20 kernel: check_memory_region+0x130/0x1a0 kernel: memcpy+0x20/0x50 kernel: ucma_event_handler+0x532/0x560 [rdma_ucm] kernel: ? __rpc_execute+0x608/0x620 [sunrpc] kernel: cma_iw_handler+0x212/0x330 [rdma_cm] kernel: ? iw_conn_req_handler+0x6e0/0x6e0 [rdma_cm] kernel: ? enqueue_timer+0x86/0x140 kernel: ? _raw_write_lock_irq+0xd0/0xd0 kernel: cm_work_handler+0xd3d/0x1070 [iw_cm] Fixes: e411e0587e0d ("RDMA/qedr: Add iWARP connection management functions") Link: https://lore.kernel.org/r/20200616093408.17827-1-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 5e9732990be5..a7a926b7b562 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -150,8 +150,17 @@ qedr_iw_issue_event(void *context, if (params->cm_info) { event.ird = params->cm_info->ird; event.ord = params->cm_info->ord; - event.private_data_len = params->cm_info->private_data_len; - event.private_data = (void *)params->cm_info->private_data; + /* Only connect_request and reply have valid private data + * the rest of the events this may be left overs from + * connection establishment. CONNECT_REQUEST is issued via + * qedr_iw_mpa_request + */ + if (event_type == IW_CM_EVENT_CONNECT_REPLY) { + event.private_data_len = + params->cm_info->private_data_len; + event.private_data = + (void *)params->cm_info->private_data; + } } if (ep->cm_id) -- cgit v1.2.3 From 4aeb21584e550f20bc829c812f24764b9a0dbb09 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Tue, 16 Jun 2020 13:43:04 +0300 Subject: RDMA/cma: Protect bind_list and listen_list while finding matching cm id [ Upstream commit 730c8912484186d4623d0c76509066d285c3a755 ] The bind_list and listen_list must be accessed under a lock, add the missing locking around the access in cm_ib_id_from_event() In addition add lockdep asserts to make it clearer what the locking semantic is here. general protection fault: 0000 [#1] SMP NOPTI CPU: 226 PID: 126135 Comm: kworker/226:1 Tainted: G OE 4.12.14-150.47-default #1 SLE15 Hardware name: Cray Inc. Windom/Windom, BIOS 0.8.7 01-10-2020 Workqueue: ib_cm cm_work_handler [ib_cm] task: ffff9c5a60a1d2c0 task.stack: ffffc1d91f554000 RIP: 0010:cma_ib_req_handler+0x3f1/0x11b0 [rdma_cm] RSP: 0018:ffffc1d91f557b40 EFLAGS: 00010286 RAX: deacffffffffff30 RBX: 0000000000000001 RCX: ffff9c2af5bb6000 RDX: 00000000000000a9 RSI: ffff9c5aa4ed2f10 RDI: ffffc1d91f557b08 RBP: ffffc1d91f557d90 R08: ffff9c340cc80000 R09: ffff9c2c0f901900 R10: 0000000000000000 R11: 0000000000000001 R12: deacffffffffff30 R13: ffff9c5a48aeec00 R14: ffffc1d91f557c30 R15: ffff9c5c2eea3688 FS: 0000000000000000(0000) GS:ffff9c5c2fa80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00002b5cc03fa320 CR3: 0000003f8500a000 CR4: 00000000003406e0 Call Trace: ? rdma_addr_cancel+0xa0/0xa0 [ib_core] ? cm_process_work+0x28/0x140 [ib_cm] cm_process_work+0x28/0x140 [ib_cm] ? cm_get_bth_pkey.isra.44+0x34/0xa0 [ib_cm] cm_work_handler+0xa06/0x1a6f [ib_cm] ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __switch_to+0x7c/0x4b0 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 process_one_work+0x1da/0x400 worker_thread+0x2b/0x3f0 ? process_one_work+0x400/0x400 kthread+0x118/0x140 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x22/0x40 Code: 00 66 83 f8 02 0f 84 ca 05 00 00 49 8b 84 24 d0 01 00 00 48 85 c0 0f 84 68 07 00 00 48 2d d0 01 00 00 49 89 c4 0f 84 59 07 00 00 <41> 0f b7 44 24 20 49 8b 77 50 66 83 f8 0a 75 9e 49 8b 7c 24 28 Fixes: 4c21b5bcef73 ("IB/cma: Add net_dev and private data checks to RDMA CM") Link: https://lore.kernel.org/r/20200616104304.2426081-1-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/cma.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 8f776b7de45e..e3cd9d2b0dd2 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1631,6 +1631,8 @@ static struct rdma_id_private *cma_find_listener( { struct rdma_id_private *id_priv, *id_priv_dev; + lockdep_assert_held(&lock); + if (!bind_list) return ERR_PTR(-EINVAL); @@ -1677,6 +1679,7 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id, } } + mutex_lock(&lock); /* * Net namespace might be getting deleted while route lookup, * cm_id lookup is in progress. Therefore, perform netdevice @@ -1718,6 +1721,7 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id, id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev); err: rcu_read_unlock(); + mutex_unlock(&lock); if (IS_ERR(id_priv) && *net_dev) { dev_put(*net_dev); *net_dev = NULL; @@ -2473,6 +2477,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, struct net *net = id_priv->id.route.addr.dev_addr.net; int ret; + lockdep_assert_held(&lock); + if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return; @@ -3245,6 +3251,8 @@ static void cma_bind_port(struct rdma_bind_list *bind_list, u64 sid, mask; __be16 port; + lockdep_assert_held(&lock); + addr = cma_src_addr(id_priv); port = htons(bind_list->port); @@ -3273,6 +3281,8 @@ static int cma_alloc_port(enum rdma_ucm_port_space ps, struct rdma_bind_list *bind_list; int ret; + lockdep_assert_held(&lock); + bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); if (!bind_list) return -ENOMEM; @@ -3299,6 +3309,8 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list, struct sockaddr *saddr = cma_src_addr(id_priv); __be16 dport = cma_port(daddr); + lockdep_assert_held(&lock); + hlist_for_each_entry(cur_id, &bind_list->owners, node) { struct sockaddr *cur_daddr = cma_dst_addr(cur_id); struct sockaddr *cur_saddr = cma_src_addr(cur_id); @@ -3338,6 +3350,8 @@ static int cma_alloc_any_port(enum rdma_ucm_port_space ps, unsigned int rover; struct net *net = id_priv->id.route.addr.dev_addr.net; + lockdep_assert_held(&lock); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; rover = prandom_u32() % remaining + low; @@ -3385,6 +3399,8 @@ static int cma_check_port(struct rdma_bind_list *bind_list, struct rdma_id_private *cur_id; struct sockaddr *addr, *cur_addr; + lockdep_assert_held(&lock); + addr = cma_src_addr(id_priv); hlist_for_each_entry(cur_id, &bind_list->owners, node) { if (id_priv == cur_id) @@ -3415,6 +3431,8 @@ static int cma_use_port(enum rdma_ucm_port_space ps, unsigned short snum; int ret; + lockdep_assert_held(&lock); + snum = ntohs(cma_port(cma_src_addr(id_priv))); if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) return -EACCES; -- cgit v1.2.3 From 34f45567462d07f697e9a4ec6a0c8c374f432b83 Mon Sep 17 00:00:00 2001 From: Fan Guo Date: Fri, 12 Jun 2020 14:38:24 +0800 Subject: RDMA/mad: Fix possible memory leak in ib_mad_post_receive_mads() [ Upstream commit a17f4bed811c60712d8131883cdba11a105d0161 ] If ib_dma_mapping_error() returns non-zero value, ib_mad_post_receive_mads() will jump out of loops and return -ENOMEM without freeing mad_priv. Fix this memory-leak problem by freeing mad_priv in this case. Fixes: 2c34e68f4261 ("IB/mad: Check and handle potential DMA mapping errors") Link: https://lore.kernel.org/r/20200612063824.180611-1-guofan5@huawei.com Signed-off-by: Fan Guo Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/mad.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index a9e00cdf717b..2284930b5f91 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2960,6 +2960,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, sg_list.addr))) { + kfree(mad_priv); ret = -ENOMEM; break; } -- cgit v1.2.3 From 8a1b8e64204e8ec9e80402f0f05e30f3ba4cd402 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 21 Jun 2020 14:00:00 +0300 Subject: RDMA/counter: Query a counter before release [ Upstream commit c1d869d64a1955817c4d6fff08ecbbe8e59d36f8 ] Query a dynamically-allocated counter before release it, to update it's hwcounters and log all of them into history data. Otherwise all values of these hwcounters will be lost. Fixes: f34a55e497e8 ("RDMA/core: Get sum value of all counters when perform a sysfs stat read") Link: https://lore.kernel.org/r/20200621110000.56059-1-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/counters.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 46dd50ff7c85..11210bf7fd61 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -195,7 +195,7 @@ static int __rdma_counter_unbind_qp(struct ib_qp *qp) return ret; } -static void counter_history_stat_update(const struct rdma_counter *counter) +static void counter_history_stat_update(struct rdma_counter *counter) { struct ib_device *dev = counter->device; struct rdma_port_counter *port_counter; @@ -205,6 +205,8 @@ static void counter_history_stat_update(const struct rdma_counter *counter) if (!port_counter->hstats) return; + rdma_counter_query_stats(counter); + for (i = 0; i < counter->stats->num_counters; i++) port_counter->hstats->value[i] += counter->stats->value[i]; } -- cgit v1.2.3 From fd3a612d9828bbe705bcba52eb6adaffba10dc1f Mon Sep 17 00:00:00 2001 From: Divya Indi Date: Tue, 23 Jun 2020 19:13:09 -0700 Subject: IB/sa: Resolv use-after-free in ib_nl_make_request() [ Upstream commit f427f4d6214c183c474eeb46212d38e6c7223d6a ] There is a race condition where ib_nl_make_request() inserts the request data into the linked list but the timer in ib_nl_request_timeout() can see it and destroy it before ib_nl_send_msg() is done touching it. This could happen, for instance, if there is a long delay allocating memory during nlmsg_new() This causes a use-after-free in the send_mad() thread: [] ? ib_pack+0x17b/0x240 [ib_core] [ ] ib_sa_path_rec_get+0x181/0x200 [ib_sa] [] rdma_resolve_route+0x3c0/0x8d0 [rdma_cm] [] ? cma_bind_port+0xa0/0xa0 [rdma_cm] [] ? rds_rdma_cm_event_handler_cmn+0x850/0x850 [rds_rdma] [] rds_rdma_cm_event_handler_cmn+0x22c/0x850 [rds_rdma] [] rds_rdma_cm_event_handler+0x10/0x20 [rds_rdma] [] addr_handler+0x9e/0x140 [rdma_cm] [] process_req+0x134/0x190 [ib_addr] [] process_one_work+0x169/0x4a0 [] worker_thread+0x5b/0x560 [] ? flush_delayed_work+0x50/0x50 [] kthread+0xcb/0xf0 [] ? __schedule+0x24a/0x810 [] ? __schedule+0x24a/0x810 [] ? kthread_create_on_node+0x180/0x180 [] ret_from_fork+0x47/0x90 [] ? kthread_create_on_node+0x180/0x180 The ownership rule is once the request is on the list, ownership transfers to the list and the local thread can't touch it any more, just like for the normal MAD case in send_mad(). Thus, instead of adding before send and then trying to delete after on errors, move the entire thing under the spinlock so that the send and update of the lists are atomic to the conurrent threads. Lightly reoganize things so spinlock safe memory allocations are done in the final NL send path and the rest of the setup work is done before and outside the lock. Fixes: 3ebd2fd0d011 ("IB/sa: Put netlink request into the request list before sending") Link: https://lore.kernel.org/r/1592964789-14533-1-git-send-email-divya.indi@oracle.com Signed-off-by: Divya Indi Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/sa_query.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index bddb5434fbed..d2d70c89193f 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -829,13 +829,20 @@ static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask) return len; } -static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask) +static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask) { struct sk_buff *skb = NULL; struct nlmsghdr *nlh; void *data; struct ib_sa_mad *mad; int len; + unsigned long flags; + unsigned long delay; + gfp_t gfp_flag; + int ret; + + INIT_LIST_HEAD(&query->list); + query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq); mad = query->mad_buf->mad; len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask); @@ -860,36 +867,25 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask) /* Repair the nlmsg header length */ nlmsg_end(skb, nlh); - return rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_mask); -} + gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC : + GFP_NOWAIT; -static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask) -{ - unsigned long flags; - unsigned long delay; - int ret; + spin_lock_irqsave(&ib_nl_request_lock, flags); + ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_flag); - INIT_LIST_HEAD(&query->list); - query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq); + if (ret) + goto out; - /* Put the request on the list first.*/ - spin_lock_irqsave(&ib_nl_request_lock, flags); + /* Put the request on the list.*/ delay = msecs_to_jiffies(sa_local_svc_timeout_ms); query->timeout = delay + jiffies; list_add_tail(&query->list, &ib_nl_request_list); /* Start the timeout if this is the only request */ if (ib_nl_request_list.next == &query->list) queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay); - spin_unlock_irqrestore(&ib_nl_request_lock, flags); - ret = ib_nl_send_msg(query, gfp_mask); - if (ret) { - ret = -EIO; - /* Remove the request */ - spin_lock_irqsave(&ib_nl_request_lock, flags); - list_del(&query->list); - spin_unlock_irqrestore(&ib_nl_request_lock, flags); - } +out: + spin_unlock_irqrestore(&ib_nl_request_lock, flags); return ret; } -- cgit v1.2.3 From 9e8f4623e29286649273197b89ca5524f0d98ce8 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 7 Jul 2020 16:09:31 +0300 Subject: RDMA/siw: Fix reporting vendor_part_id [ Upstream commit 04340645f69ab7abb6f9052688a60f0213b3f79c ] Move the initialization of the vendor_part_id to be before calling ib_register_device(), this is needed because the query_device() callback is called from the context of ib_register_device() before initializing the vendor_part_id, so the reported value is wrong. Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface") Link: https://lore.kernel.org/r/20200707130931.444724-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Bernard Metzler Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/siw/siw_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 130b1e31b978..fb66d6757278 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -66,12 +66,13 @@ static int siw_device_register(struct siw_device *sdev, const char *name) static int dev_id = 1; int rv; + sdev->vendor_part_id = dev_id++; + rv = ib_register_device(base_dev, name); if (rv) { pr_warn("siw: device registration error %d\n", rv); return rv; } - sdev->vendor_part_id = dev_id++; siw_dbg(base_dev, "HWaddr=%pM\n", sdev->netdev->dev_addr); -- cgit v1.2.3 From e89b828ae357eafa3a78591ed0410fd9078034f5 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 7 Jul 2020 14:06:11 +0300 Subject: IB/mlx5: Fix 50G per lane indication [ Upstream commit 530c8632b547ff72f11ff83654b22462a73f1f7b ] Some released FW versions mistakenly don't set the capability that 50G per lane link-modes are supported for VFs (ptys_extended_ethernet capability bit). Use PTYS.ext_eth_proto_capability instead, as this indication is always accurate. If PTYS.ext_eth_proto_capability is valid (has a non-zero value) conclude that the HCA supports 50G per lane. Otherwise, conclude that the HCA doesn't support 50G per lane. Fixes: 08e8676f1607 ("IB/mlx5: Add support for 50Gbps per lane link modes") Link: https://lore.kernel.org/r/20200707110612.882962-3-leon@kernel.org Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4f44a731a48e..b781ad74e6de 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -517,7 +517,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, mdev_port_num); if (err) goto out; - ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet); + ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability); eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); props->active_width = IB_WIDTH_4X; -- cgit v1.2.3 From 607fbc27d75fbe31d8c65f4a2bcdabbabc71f691 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Tue, 23 Jun 2020 16:40:47 -0400 Subject: IB/hfi1: Do not destroy hfi1_wq when the device is shut down commit 28b70cd9236563e1a88a6094673fef3c08db0d51 upstream. The workqueue hfi1_wq is destroyed in function shutdown_device(), which is called by either shutdown_one() or remove_one(). The function shutdown_one() is called when the kernel is rebooted while remove_one() is called when the hfi1 driver is unloaded. When the kernel is rebooted, hfi1_wq is destroyed while all qps are still active, leading to a kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000102 IP: [] __queue_work+0x32/0x3e0 PGD 0 Oops: 0000 [#1] SMP Modules linked in: dm_round_robin nvme_rdma(OE) nvme_fabrics(OE) nvme_core(OE) ib_isert iscsi_target_mod target_core_mod ib_ucm mlx4_ib iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm rpcrdma sunrpc irqbypass crc32_pclmul ghash_clmulni_intel rdma_ucm aesni_intel ib_uverbs lrw gf128mul opa_vnic glue_helper ablk_helper ib_iser cryptd ib_umad rdma_cm iw_cm ses enclosure libiscsi scsi_transport_sas pcspkr joydev ib_ipoib(OE) scsi_transport_iscsi ib_cm sg ipmi_ssif mei_me lpc_ich i2c_i801 mei ioatdma ipmi_si dm_multipath ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter hangcheck_timer ip_tables ext4 mbcache jbd2 mlx4_en sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm hfi1(OE) crct10dif_pclmul crct10dif_common crc32c_intel drm ahci mlx4_core libahci rdmavt(OE) igb megaraid_sas ib_core libata drm_panel_orientation_quirks ptp pps_core devlink dca i2c_algo_bit dm_mirror dm_region_hash dm_log dm_mod CPU: 19 PID: 0 Comm: swapper/19 Kdump: loaded Tainted: G OE ------------ 3.10.0-957.el7.x86_64 #1 Hardware name: Phegda X2226A/S2600CW, BIOS SE5C610.86B.01.01.0024.021320181901 02/13/2018 task: ffff8a799ba0d140 ti: ffff8a799bad8000 task.ti: ffff8a799bad8000 RIP: 0010:[] [] __queue_work+0x32/0x3e0 RSP: 0018:ffff8a90dde43d80 EFLAGS: 00010046 RAX: 0000000000000082 RBX: 0000000000000086 RCX: 0000000000000000 RDX: ffff8a90b924fcb8 RSI: 0000000000000000 RDI: 000000000000001b RBP: ffff8a90dde43db8 R08: ffff8a799ba0d6d8 R09: ffff8a90dde53900 R10: 0000000000000002 R11: ffff8a90dde43de8 R12: ffff8a90b924fcb8 R13: 000000000000001b R14: 0000000000000000 R15: ffff8a90d2890000 FS: 0000000000000000(0000) GS:ffff8a90dde40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000102 CR3: 0000001a70410000 CR4: 00000000001607e0 Call Trace: [] queue_work_on+0x45/0x50 [] _hfi1_schedule_send+0x6e/0xc0 [hfi1] [] hfi1_schedule_send+0x32/0x70 [hfi1] [] rvt_rc_timeout+0xe9/0x130 [rdmavt] [] ? trigger_load_balance+0x6a/0x280 [] ? rvt_free_qpn+0x40/0x40 [rdmavt] [] call_timer_fn+0x38/0x110 [] ? rvt_free_qpn+0x40/0x40 [rdmavt] [] run_timer_softirq+0x24d/0x300 [] __do_softirq+0xf5/0x280 [] call_softirq+0x1c/0x30 [] do_softirq+0x65/0xa0 [] irq_exit+0x105/0x110 [] smp_apic_timer_interrupt+0x48/0x60 [] apic_timer_interrupt+0x162/0x170 [] ? cpuidle_enter_state+0x57/0xd0 [] cpuidle_idle_call+0xde/0x230 [] arch_cpu_idle+0xe/0xc0 [] cpu_startup_entry+0x14a/0x1e0 [] start_secondary+0x1f7/0x270 [] start_cpu+0x5/0x14 The solution is to destroy the workqueue only when the hfi1 driver is unloaded, not when the device is shut down. In addition, when the device is shut down, no more work should be scheduled on the workqueues and the workqueues are flushed. Fixes: 8d3e71136a08 ("IB/{hfi1, qib}: Add handling of kernel restart") Link: https://lore.kernel.org/r/20200623204047.107638.77646.stgit@awfm-01.aw.intel.com Cc: Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/init.c | 27 +++++++++++++++++++++++---- drivers/infiniband/hw/hfi1/qp.c | 5 ++++- drivers/infiniband/hw/hfi1/tid_rdma.c | 5 ++++- 3 files changed, 31 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 26b792bb1027..d5165a6179d4 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -844,6 +844,25 @@ wq_error: return -ENOMEM; } +/** + * destroy_workqueues - destroy per port workqueues + * @dd: the hfi1_ib device + */ +static void destroy_workqueues(struct hfi1_devdata *dd) +{ + int pidx; + struct hfi1_pportdata *ppd; + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + + if (ppd->hfi1_wq) { + destroy_workqueue(ppd->hfi1_wq); + ppd->hfi1_wq = NULL; + } + } +} + /** * enable_general_intr() - Enable the IRQs that will be handled by the * general interrupt handler. @@ -1118,11 +1137,10 @@ static void shutdown_device(struct hfi1_devdata *dd) */ hfi1_quiet_serdes(ppd); - if (ppd->hfi1_wq) { - destroy_workqueue(ppd->hfi1_wq); - ppd->hfi1_wq = NULL; - } + if (ppd->hfi1_wq) + flush_workqueue(ppd->hfi1_wq); if (ppd->link_wq) { + flush_workqueue(ppd->link_wq); destroy_workqueue(ppd->link_wq); ppd->link_wq = NULL; } @@ -1814,6 +1832,7 @@ static void remove_one(struct pci_dev *pdev) * clear dma engines, etc. */ shutdown_device(dd); + destroy_workqueues(dd); stop_timers(dd); diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index f8e733aa3bb8..acd4400b0092 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -381,7 +381,10 @@ bool _hfi1_schedule_send(struct rvt_qp *qp) struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + struct hfi1_devdata *dd = ppd->dd; + + if (dd->flags & HFI1_SHUTDOWN) + return true; return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, priv->s_sde ? diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 8a2e0d9351e9..7c6fd720fb2e 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -5406,7 +5406,10 @@ static bool _hfi1_schedule_tid_send(struct rvt_qp *qp) struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + struct hfi1_devdata *dd = ppd->dd; + + if ((dd->flags & HFI1_SHUTDOWN)) + return true; return iowait_tid_schedule(&priv->s_iowait, ppd->hfi1_wq, priv->s_sde ? -- cgit v1.2.3 From 4a215725dec7da679ab448294935125fb364f2ba Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Tue, 23 Jun 2020 16:40:53 -0400 Subject: IB/hfi1: Do not destroy link_wq when the device is shut down commit 2315ec12ee8e8257bb335654c62e0cae71dc278d upstream. The workqueue link_wq should only be destroyed when the hfi1 driver is unloaded, not when the device is shut down. Fixes: 71d47008ca1b ("IB/hfi1: Create workqueue for link events") Link: https://lore.kernel.org/r/20200623204053.107638.70315.stgit@awfm-01.aw.intel.com Cc: Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/init.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index d5165a6179d4..fbff6b2f00e7 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -860,6 +860,10 @@ static void destroy_workqueues(struct hfi1_devdata *dd) destroy_workqueue(ppd->hfi1_wq); ppd->hfi1_wq = NULL; } + if (ppd->link_wq) { + destroy_workqueue(ppd->link_wq); + ppd->link_wq = NULL; + } } } @@ -1136,14 +1140,10 @@ static void shutdown_device(struct hfi1_devdata *dd) * We can't count on interrupts since we are stopping. */ hfi1_quiet_serdes(ppd); - if (ppd->hfi1_wq) flush_workqueue(ppd->hfi1_wq); - if (ppd->link_wq) { + if (ppd->link_wq) flush_workqueue(ppd->link_wq); - destroy_workqueue(ppd->link_wq); - ppd->link_wq = NULL; - } } sdma_exit(dd); } -- cgit v1.2.3 From eec70178983fcd08536ae748c803af0107799a33 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Mon, 27 Apr 2020 18:46:36 +0300 Subject: RDMA/mlx5: Verify that QP is created with RQ or SQ commit 0eacc574aae7300bf46c10c7116c3ba5825505b7 upstream. RAW packet QP and underlay QP must be created with either RQ or SQ, check that. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Link: https://lore.kernel.org/r/20200427154636.381474-37-leon@kernel.org Signed-off-by: Aharon Landau Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/qp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 96edc5c30204..09e29c6cb66d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1463,6 +1463,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, u16 uid = to_mpd(pd)->uid; u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {}; + if (!qp->sq.wqe_cnt && !qp->rq.wqe_cnt) + return -EINVAL; if (qp->sq.wqe_cnt) { err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd); if (err) -- cgit v1.2.3 From 613e7c52aaaa34885639bade7ec28f8fc28fe877 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 12 Jul 2020 13:26:41 +0300 Subject: RDMA/mlx5: Use xa_lock_irq when access to SRQ table [ Upstream commit c3d6057e07a5d15be7c69ea545b3f91877808c96 ] SRQ table is accessed both from interrupt and process context, therefore we must use xa_lock_irq. inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. kworker/u17:9/8573 takes: ffff8883e3503d30 (&xa->xa_lock#13){?...}-{2:2}, at: mlx5_cmd_get_srq+0x18/0x70 [mlx5_ib] {IN-HARDIRQ-W} state was registered at: lock_acquire+0xb9/0x3a0 _raw_spin_lock+0x25/0x30 srq_event_notifier+0x2b/0xc0 [mlx5_ib] notifier_call_chain+0x45/0x70 __atomic_notifier_call_chain+0x69/0x100 forward_event+0x36/0xc0 [mlx5_core] notifier_call_chain+0x45/0x70 __atomic_notifier_call_chain+0x69/0x100 mlx5_eq_async_int+0xc5/0x160 [mlx5_core] notifier_call_chain+0x45/0x70 __atomic_notifier_call_chain+0x69/0x100 mlx5_irq_int_handler+0x19/0x30 [mlx5_core] __handle_irq_event_percpu+0x43/0x2a0 handle_irq_event_percpu+0x30/0x70 handle_irq_event+0x34/0x60 handle_edge_irq+0x7c/0x1b0 do_IRQ+0x60/0x110 ret_from_intr+0x0/0x2a default_idle+0x34/0x160 do_idle+0x1ec/0x220 cpu_startup_entry+0x19/0x20 start_secondary+0x153/0x1a0 secondary_startup_64+0xa4/0xb0 irq event stamp: 20907 hardirqs last enabled at (20907): _raw_spin_unlock_irq+0x24/0x30 hardirqs last disabled at (20906): _raw_spin_lock_irq+0xf/0x40 softirqs last enabled at (20746): __do_softirq+0x2c9/0x436 softirqs last disabled at (20681): irq_exit+0xb3/0xc0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&xa->xa_lock#13); lock(&xa->xa_lock#13); *** DEADLOCK *** 2 locks held by kworker/u17:9/8573: #0: ffff888295218d38 ((wq_completion)mlx5_ib_page_fault){+.+.}-{0:0}, at: process_one_work+0x1f1/0x5f0 #1: ffff888401647e78 ((work_completion)(&pfault->work)){+.+.}-{0:0}, at: process_one_work+0x1f1/0x5f0 stack backtrace: CPU: 0 PID: 8573 Comm: kworker/u17:9 Tainted: GO 5.7.0_for_upstream_min_debug_2020_06_14_11_31_46_41 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5_ib_page_fault mlx5_ib_eqe_pf_action [mlx5_ib] Call Trace: dump_stack+0x71/0x9b mark_lock+0x4f2/0x590 ? print_shortest_lock_dependencies+0x200/0x200 __lock_acquire+0xa00/0x1eb0 lock_acquire+0xb9/0x3a0 ? mlx5_cmd_get_srq+0x18/0x70 [mlx5_ib] _raw_spin_lock+0x25/0x30 ? mlx5_cmd_get_srq+0x18/0x70 [mlx5_ib] mlx5_cmd_get_srq+0x18/0x70 [mlx5_ib] mlx5_ib_eqe_pf_action+0x257/0xa30 [mlx5_ib] ? process_one_work+0x209/0x5f0 process_one_work+0x27b/0x5f0 ? __schedule+0x280/0x7e0 worker_thread+0x2d/0x3c0 ? process_one_work+0x5f0/0x5f0 kthread+0x111/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x24/0x30 Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Link: https://lore.kernel.org/r/20200712102641.15210-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/srq_cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index 8fc3630a9d4c..0224231a2e6f 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -83,11 +83,11 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) struct mlx5_srq_table *table = &dev->srq_table; struct mlx5_core_srq *srq; - xa_lock(&table->array); + xa_lock_irq(&table->array); srq = xa_load(&table->array, srqn); if (srq) refcount_inc(&srq->common.refcount); - xa_unlock(&table->array); + xa_unlock_irq(&table->array); return srq; } -- cgit v1.2.3 From 951117a2079bf0f725e0f297d4bfb5a9a1371864 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 28 Jul 2020 14:38:48 -0400 Subject: IB/rdmavt: Fix RQ counting issues causing use of an invalid RWQE commit 54a485e9ec084da1a4b32dcf7749c7d760ed8aa5 upstream. The lookaside count is improperly initialized to the size of the Receive Queue with the additional +1. In the traces below, the RQ size is 384, so the count was set to 385. The lookaside count is then rarely refreshed. Note the high and incorrect count in the trace below: rvt_get_rwqe: [hfi1_0] wqe ffffc900078e9008 wr_id 55c7206d75a0 qpn c qpt 2 pid 3018 num_sge 1 head 1 tail 0, count 385 rvt_get_rwqe: (hfi1_rc_rcv+0x4eb/0x1480 [hfi1] <- rvt_get_rwqe) ret=0x1 The head,tail indicate there is only one RWQE posted although the count says 385 and we correctly return the element 0. The next call to rvt_get_rwqe with the decremented count: rvt_get_rwqe: [hfi1_0] wqe ffffc900078e9058 wr_id 0 qpn c qpt 2 pid 3018 num_sge 0 head 1 tail 1, count 384 rvt_get_rwqe: (hfi1_rc_rcv+0x4eb/0x1480 [hfi1] <- rvt_get_rwqe) ret=0x1 Note that the RQ is empty (head == tail) yet we return the RWQE at tail 1, which is not valid because of the bogus high count. Best case, the RWQE has never been posted and the rc logic sees an RWQE that is too small (all zeros) and puts the QP into an error state. In the worst case, a server slow at posting receive buffers might fool rvt_get_rwqe() into fetching an old RWQE and corrupt memory. Fix by deleting the faulty initialization code and creating an inline to fetch the posted count and convert all callers to use new inline. Fixes: f592ae3c999f ("IB/rdmavt: Fracture single lock used for posting and processing RWQEs") Link: https://lore.kernel.org/r/20200728183848.22226.29132.stgit@awfm-01.aw.intel.com Reported-by: Zhaojuan Guo Cc: # 5.4.x Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Tested-by: Honggang Li Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rdmavt/qp.c | 33 ++++----------------------------- drivers/infiniband/sw/rdmavt/rc.c | 4 +--- 2 files changed, 5 insertions(+), 32 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 19556c62c7ea..d14ad523f96c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -898,8 +898,6 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, qp->s_tail_ack_queue = 0; qp->s_acked_ack_queue = 0; qp->s_num_rd_atomic = 0; - if (qp->r_rq.kwq) - qp->r_rq.kwq->count = qp->r_rq.size; qp->r_sge.num_sge = 0; atomic_set(&qp->s_reserved_used, 0); } @@ -2352,31 +2350,6 @@ bad_lkey: return 0; } -/** - * get_count - count numbers of request work queue entries - * in circular buffer - * @rq: data structure for request queue entry - * @tail: tail indices of the circular buffer - * @head: head indices of the circular buffer - * - * Return - total number of entries in the circular buffer - */ -static u32 get_count(struct rvt_rq *rq, u32 tail, u32 head) -{ - u32 count; - - count = head; - - if (count >= rq->size) - count = 0; - if (count < tail) - count += rq->size - tail; - else - count -= tail; - - return count; -} - /** * get_rvt_head - get head indices of the circular buffer * @rq: data structure for request queue entry @@ -2451,7 +2424,7 @@ int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only) if (kwq->count < RVT_RWQ_COUNT_THRESHOLD) { head = get_rvt_head(rq, ip); - kwq->count = get_count(rq, tail, head); + kwq->count = rvt_get_rq_count(rq, head, tail); } if (unlikely(kwq->count == 0)) { ret = 0; @@ -2486,7 +2459,9 @@ int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only) * the number of remaining WQEs. */ if (kwq->count < srq->limit) { - kwq->count = get_count(rq, tail, get_rvt_head(rq, ip)); + kwq->count = + rvt_get_rq_count(rq, + get_rvt_head(rq, ip), tail); if (kwq->count < srq->limit) { struct ib_event ev; diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c index 890d7b760d2e..27415185d862 100644 --- a/drivers/infiniband/sw/rdmavt/rc.c +++ b/drivers/infiniband/sw/rdmavt/rc.c @@ -127,9 +127,7 @@ __be32 rvt_compute_aeth(struct rvt_qp *qp) * not atomic, which is OK, since the fuzziness is * resolved as further ACKs go out. */ - credits = head - tail; - if ((int)credits < 0) - credits += qp->r_rq.size; + credits = rvt_get_rq_count(&qp->r_rq, head, tail); } /* * Binary search the credit table to find the code to -- cgit v1.2.3 From 691081c0558a2eb9165765a329ee75d98cf4c56e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 26 Jun 2020 14:49:10 -0300 Subject: RDMA/core: Fix bogus WARN_ON during ib_unregister_device_queued() [ Upstream commit 0cb42c0265837fafa2b4f302c8a7fed2631d7869 ] ib_unregister_device_queued() can only be used by drivers using the new dealloc_device callback flow, and it has a safety WARN_ON to ensure drivers are using it properly. However, if unregister and register are raced there is a special destruction path that maintains the uniform error handling semantic of 'caller does ib_dealloc_device() on failure'. This requires disabling the dealloc_device callback which triggers the WARN_ON. Instead of using NULL to disable the callback use a special function pointer so the WARN_ON does not trigger. Fixes: d0899892edd0 ("RDMA/device: Provide APIs from the core code to help unregistration") Link: https://lore.kernel.org/r/0-v1-a36d512e0a99+762-syz_dealloc_driver_jgg@nvidia.com Reported-by: syzbot+4088ed905e4ae2b0e13b@syzkaller.appspotmail.com Suggested-by: Hillf Danton Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/device.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 10ae6c6eab0a..59dc9f3cfb37 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1330,6 +1330,10 @@ out: return ret; } +static void prevent_dealloc_device(struct ib_device *ib_dev) +{ +} + /** * ib_register_device - Register an IB device with IB core * @device:Device to register @@ -1397,11 +1401,11 @@ int ib_register_device(struct ib_device *device, const char *name) * possibility for a parallel unregistration along with this * error flow. Since we have a refcount here we know any * parallel flow is stopped in disable_device and will see the - * NULL pointers, causing the responsibility to + * special dealloc_driver pointer, causing the responsibility to * ib_dealloc_device() to revert back to this thread. */ dealloc_fn = device->ops.dealloc_driver; - device->ops.dealloc_driver = NULL; + device->ops.dealloc_driver = prevent_dealloc_device; ib_device_put(device); __ib_unregister_device(device); device->ops.dealloc_driver = dealloc_fn; @@ -1449,7 +1453,8 @@ static void __ib_unregister_device(struct ib_device *ib_dev) * Drivers using the new flow may not call ib_dealloc_device except * in error unwind prior to registration success. */ - if (ib_dev->ops.dealloc_driver) { + if (ib_dev->ops.dealloc_driver && + ib_dev->ops.dealloc_driver != prevent_dealloc_device) { WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1); ib_dealloc_device(ib_dev); } -- cgit v1.2.3 From 8fbefed6c3a05148a9713f1636f5adaf3dcdafda Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 30 Jun 2020 15:36:05 +0300 Subject: RDMA/rxe: Skip dgid check in loopback mode [ Upstream commit 5c99274be8864519328aa74bc550ba410095bc1c ] In the loopback tests, the following call trace occurs. Call Trace: __rxe_do_task+0x1a/0x30 [rdma_rxe] rxe_qp_destroy+0x61/0xa0 [rdma_rxe] rxe_destroy_qp+0x20/0x60 [rdma_rxe] ib_destroy_qp_user+0xcc/0x220 [ib_core] uverbs_free_qp+0x3c/0xc0 [ib_uverbs] destroy_hw_idr_uobject+0x24/0x70 [ib_uverbs] uverbs_destroy_uobject+0x43/0x1b0 [ib_uverbs] uobj_destroy+0x41/0x70 [ib_uverbs] __uobj_get_destroy+0x39/0x70 [ib_uverbs] ib_uverbs_destroy_qp+0x88/0xc0 [ib_uverbs] ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0xb9/0xf0 [ib_uverbs] ib_uverbs_cmd_verbs+0xb16/0xc30 [ib_uverbs] The root cause is that the actual RDMA connection is not created in the loopback tests and the rxe_match_dgid will fail randomly. To fix this call trace which appear in the loopback tests, skip check of the dgid. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200630123605.446959-1-leon@kernel.org Signed-off-by: Zhu Yanjun Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_recv.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 831ad578a7b2..46e111c218fd 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -330,10 +330,14 @@ err1: static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) { + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); const struct ib_gid_attr *gid_attr; union ib_gid dgid; union ib_gid *pdgid; + if (pkt->mask & RXE_LOOPBACK_MASK) + return 0; + if (skb->protocol == htons(ETH_P_IP)) { ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, (struct in6_addr *)&dgid); @@ -366,7 +370,7 @@ void rxe_rcv(struct sk_buff *skb) if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES)) goto drop; - if (unlikely(rxe_match_dgid(rxe, skb) < 0)) { + if (rxe_match_dgid(rxe, skb) < 0) { pr_warn_ratelimited("failed matching dgid\n"); goto drop; } -- cgit v1.2.3 From 4cf66d70b5efcd360d0214f93009758b55c2025d Mon Sep 17 00:00:00 2001 From: Yuval Basson Date: Wed, 8 Jul 2020 22:55:26 +0300 Subject: RDMA/qedr: SRQ's bug fixes [ Upstream commit acca72e2b031b9fbb4184511072bd246a0abcebc ] QP's with the same SRQ, working on different CQs and running in parallel on different CPUs could lead to a race when maintaining the SRQ consumer count, and leads to FW running out of SRQs. Update the consumer atomically. Make sure the wqe_prod is updated after the sge_prod due to FW requirements. Fixes: 3491c9e799fb ("qedr: Add support for kernel mode SRQ's") Link: https://lore.kernel.org/r/20200708195526.31040-1-ybason@marvell.com Signed-off-by: Michal Kalderon Signed-off-by: Yuval Basson Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/qedr.h | 4 ++-- drivers/infiniband/hw/qedr/verbs.c | 22 ++++++++++------------ 2 files changed, 12 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 8e927f6c1520..ed56df319d2d 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -349,10 +349,10 @@ struct qedr_srq_hwq_info { u32 wqe_prod; u32 sge_prod; u32 wr_prod_cnt; - u32 wr_cons_cnt; + atomic_t wr_cons_cnt; u32 num_elems; - u32 *virt_prod_pair_addr; + struct rdma_srq_producers *virt_prod_pair_addr; dma_addr_t phy_prod_pair_addr; }; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 8b4240c1cc76..16a994fd7d0a 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -3460,7 +3460,7 @@ static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq) * count and consumer count and subtract it from max * work request supported so that we get elements left. */ - used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt; + used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt); return hw_srq->max_wr - used; } @@ -3475,7 +3475,6 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, unsigned long flags; int status = 0; u32 num_sge; - u32 offset; spin_lock_irqsave(&srq->lock, flags); @@ -3488,7 +3487,8 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, if (!qedr_srq_elem_left(hw_srq) || wr->num_sge > srq->hw_srq.max_sges) { DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n", - hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt, + hw_srq->wr_prod_cnt, + atomic_read(&hw_srq->wr_cons_cnt), wr->num_sge, srq->hw_srq.max_sges); status = -ENOMEM; *bad_wr = wr; @@ -3522,22 +3522,20 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, hw_srq->sge_prod++; } - /* Flush WQE and SGE information before + /* Update WQE and SGE information before * updating producer. */ - wmb(); + dma_wmb(); /* SRQ producer is 8 bytes. Need to update SGE producer index * in first 4 bytes and need to update WQE producer in * next 4 bytes. */ - *srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod; - offset = offsetof(struct rdma_srq_producers, wqe_prod); - *((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) = - hw_srq->wqe_prod; + srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod; + /* Make sure sge producer is updated first */ + dma_wmb(); + srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod; - /* Flush producer after updating it. */ - wmb(); wr = wr->next; } @@ -3956,7 +3954,7 @@ static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp, } else { __process_resp_one(dev, qp, cq, wc, resp, wr_id); } - srq->hw_srq.wr_cons_cnt++; + atomic_inc(&srq->hw_srq.wr_cons_cnt); return 1; } -- cgit v1.2.3 From 7ecfbee3b9c38f263e64c06aac5ca861ac57362a Mon Sep 17 00:00:00 2001 From: Mikhail Malygin Date: Thu, 16 Jul 2020 22:03:41 +0300 Subject: RDMA/rxe: Prevent access to wr->next ptr afrer wr is posted to send queue [ Upstream commit 5f0b2a6093a4d9aab093964c65083fe801ef1e58 ] rxe_post_send_kernel() iterates over linked list of wr's, until the wr->next ptr is NULL. However if we've got an interrupt after last wr is posted, control may be returned to the code after send completion callback is executed and wr memory is freed. As a result, wr->next pointer may contain incorrect value leading to panic. Store the wr->next on the stack before posting it. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200716190340.23453-1-m.malygin@yadro.com Signed-off-by: Mikhail Malygin Signed-off-by: Sergey Kojushev Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_verbs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 623129f27f5a..71358b0b8910 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -679,6 +679,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, unsigned int mask; unsigned int length = 0; int i; + struct ib_send_wr *next; while (wr) { mask = wr_opcode_mask(wr->opcode, qp); @@ -695,6 +696,8 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, break; } + next = wr->next; + length = 0; for (i = 0; i < wr->num_sge; i++) length += wr->sg_list[i].length; @@ -705,7 +708,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, *bad_wr = wr; break; } - wr = wr->next; + wr = next; } rxe_run_task(&qp->req.task, 1); -- cgit v1.2.3 From 3a2cd06a3d93ea4018cf1938fcfdbad868923232 Mon Sep 17 00:00:00 2001 From: Li Heng Date: Sat, 25 Jul 2020 10:56:27 +0800 Subject: RDMA/core: Fix return error value in _ib_modify_qp() to negative [ Upstream commit 47fda651d5af2506deac57d54887cf55ce26e244 ] The error codes in _ib_modify_qp() are supposed to be negative errno. Fixes: 7a5c938b9ed0 ("IB/core: Check for rdma_protocol_ib only after validating port_num") Link: https://lore.kernel.org/r/1595645787-20375-1-git-send-email-liheng40@huawei.com Reported-by: Hulk Robot Signed-off-by: Li Heng Reviewed-by: Parav Pandit Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 6c4093d0a91d..d4815f29cfd2 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1648,7 +1648,7 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, if (!(rdma_protocol_ib(qp->device, attr->alt_ah_attr.port_num) && rdma_protocol_ib(qp->device, port))) { - ret = EINVAL; + ret = -EINVAL; goto out; } } -- cgit v1.2.3 From 07783db29f89878dca1b859373e7bf3a0896a7e9 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 27 Jul 2020 12:58:28 +0300 Subject: RDMA/netlink: Remove CAP_NET_RAW check when dump a raw QP [ Upstream commit 1d70ad0f85435a7262de802b104e49e6598c50ff ] When dumping QPs bound to a counter, raw QPs should be allowed to dump without the CAP_NET_RAW privilege. This is consistent with what "rdma res show qp" does. Fixes: c4ffee7c9bdb ("RDMA/netlink: Implement counter dumpit calback") Link: https://lore.kernel.org/r/20200727095828.496195-1-leon@kernel.org Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/nldev.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 244ebf285fc3..e4905d9fecb0 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -702,9 +702,6 @@ static int fill_stat_counter_qps(struct sk_buff *msg, continue; qp = container_of(res, struct ib_qp, res); - if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) - continue; - if (!qp->counter || (qp->counter->id != counter->id)) continue; -- cgit v1.2.3 From 5412efa6285af7c2cd20b1ecf3412468bc76afbe Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 23 Jun 2020 13:52:36 +0300 Subject: RDMA/ipoib: Return void from ipoib_ib_dev_stop() [ Upstream commit 95a5631f6c9f3045f26245e6045244652204dfdb ] The return value from ipoib_ib_dev_stop() is always 0 - change it to be void. Link: https://lore.kernel.org/r/20200623105236.18683-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 +- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 0e5f27caf2b2..50a355738609 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -515,7 +515,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev); int ipoib_ib_dev_open_default(struct net_device *dev); int ipoib_ib_dev_open(struct net_device *dev); -int ipoib_ib_dev_stop(struct net_device *dev); +void ipoib_ib_dev_stop(struct net_device *dev); void ipoib_ib_dev_up(struct net_device *dev); void ipoib_ib_dev_down(struct net_device *dev); int ipoib_ib_dev_stop_default(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index da3c5315bbb5..6ee64c25aaff 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -846,7 +846,7 @@ timeout: return 0; } -int ipoib_ib_dev_stop(struct net_device *dev) +void ipoib_ib_dev_stop(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -854,8 +854,6 @@ int ipoib_ib_dev_stop(struct net_device *dev) clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); ipoib_flush_ah(dev); - - return 0; } int ipoib_ib_dev_open_default(struct net_device *dev) -- cgit v1.2.3 From 0f334b6684558658d1a92ba98695432223015c81 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 25 Jun 2020 20:42:19 +0300 Subject: RDMA/ipoib: Fix ABBA deadlock with ipoib_reap_ah() [ Upstream commit 65936bf25f90fe440bb2d11624c7d10fab266639 ] ipoib_mcast_carrier_on_task() insanely open codes a rtnl_lock() such that the only time flush_workqueue() can be called is if it also clears IPOIB_FLAG_OPER_UP. Thus the flush inside ipoib_flush_ah() will deadlock if it gets unlucky enough, and lockdep doesn't help us to find it early: CPU0 CPU1 CPU2 __ipoib_ib_dev_flush() down_read(vlan_rwsem) ipoib_vlan_add() rtnl_trylock() down_write(vlan_rwsem) ipoib_mcast_carrier_on_task() while (!rtnl_trylock()) msleep(20); ipoib_flush_ah() flush_workqueue(priv->wq) Clean up the ah_reaper related functions and lifecycle to make sense: - Start/Stop of the reaper should only be done in open/stop NDOs, not in any other places - cancel and flush of the reaper should only happen in the stop NDO. cancel is only functional when combined with IPOIB_STOP_REAPER. - Non-stop places were flushing the AH's just need to flush out dead AH's synchronously and ignore the background task completely. It is fully locked and harmless to leave running. Which ultimately fixes the ABBA deadlock by removing the unnecessary flush_workqueue() from the problematic place under the vlan_rwsem. Fixes: efc82eeeae4e ("IB/ipoib: No longer use flush as a parameter") Link: https://lore.kernel.org/r/20200625174219.290842-1-kamalheib1@gmail.com Reported-by: Kamal Heib Tested-by: Kamal Heib Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 65 ++++++++++++++----------------- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 + 2 files changed, 31 insertions(+), 36 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 6ee64c25aaff..494f413dc3c6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -670,13 +670,12 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, return rc; } -static void __ipoib_reap_ah(struct net_device *dev) +static void ipoib_reap_dead_ahs(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_ah *ah, *tah; unsigned long flags; - netif_tx_lock_bh(dev); + netif_tx_lock_bh(priv->dev); spin_lock_irqsave(&priv->lock, flags); list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) @@ -687,37 +686,37 @@ static void __ipoib_reap_ah(struct net_device *dev) } spin_unlock_irqrestore(&priv->lock, flags); - netif_tx_unlock_bh(dev); + netif_tx_unlock_bh(priv->dev); } void ipoib_reap_ah(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, ah_reap_task.work); - struct net_device *dev = priv->dev; - __ipoib_reap_ah(dev); + ipoib_reap_dead_ahs(priv); if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) queue_delayed_work(priv->wq, &priv->ah_reap_task, round_jiffies_relative(HZ)); } -static void ipoib_flush_ah(struct net_device *dev) +static void ipoib_start_ah_reaper(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); - - cancel_delayed_work(&priv->ah_reap_task); - flush_workqueue(priv->wq); - ipoib_reap_ah(&priv->ah_reap_task.work); + clear_bit(IPOIB_STOP_REAPER, &priv->flags); + queue_delayed_work(priv->wq, &priv->ah_reap_task, + round_jiffies_relative(HZ)); } -static void ipoib_stop_ah(struct net_device *dev) +static void ipoib_stop_ah_reaper(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); - set_bit(IPOIB_STOP_REAPER, &priv->flags); - ipoib_flush_ah(dev); + cancel_delayed_work(&priv->ah_reap_task); + /* + * After ipoib_stop_ah_reaper() we always go through + * ipoib_reap_dead_ahs() which ensures the work is really stopped and + * does a final flush out of the dead_ah's list + */ } static int recvs_pending(struct net_device *dev) @@ -846,16 +845,6 @@ timeout: return 0; } -void ipoib_ib_dev_stop(struct net_device *dev) -{ - struct ipoib_dev_priv *priv = ipoib_priv(dev); - - priv->rn_ops->ndo_stop(dev); - - clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); - ipoib_flush_ah(dev); -} - int ipoib_ib_dev_open_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -899,10 +888,7 @@ int ipoib_ib_dev_open(struct net_device *dev) return -1; } - clear_bit(IPOIB_STOP_REAPER, &priv->flags); - queue_delayed_work(priv->wq, &priv->ah_reap_task, - round_jiffies_relative(HZ)); - + ipoib_start_ah_reaper(priv); if (priv->rn_ops->ndo_open(dev)) { pr_warn("%s: Failed to open dev\n", dev->name); goto dev_stop; @@ -913,13 +899,20 @@ int ipoib_ib_dev_open(struct net_device *dev) return 0; dev_stop: - set_bit(IPOIB_STOP_REAPER, &priv->flags); - cancel_delayed_work(&priv->ah_reap_task); - set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); - ipoib_ib_dev_stop(dev); + ipoib_stop_ah_reaper(priv); return -1; } +void ipoib_ib_dev_stop(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + priv->rn_ops->ndo_stop(dev); + + clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); + ipoib_stop_ah_reaper(priv); +} + void ipoib_pkey_dev_check_presence(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -1230,7 +1223,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_mcast_dev_flush(dev); if (oper_up) set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); - ipoib_flush_ah(dev); + ipoib_reap_dead_ahs(priv); } if (level >= IPOIB_FLUSH_NORMAL) @@ -1305,7 +1298,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) * the neighbor garbage collection is stopped and reaped. * That should all be done now, so make a final ah flush. */ - ipoib_stop_ah(dev); + ipoib_reap_dead_ahs(priv); clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 4fd095fd63b6..044bcacad6e4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1979,6 +1979,8 @@ static void ipoib_ndo_uninit(struct net_device *dev) /* no more works over the priv->wq */ if (priv->wq) { + /* See ipoib_mcast_carrier_on_task() */ + WARN_ON(test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)); flush_workqueue(priv->wq); destroy_workqueue(priv->wq); priv->wq = NULL; -- cgit v1.2.3 From 95c736a2910572f7e2d36d1ffbc7b795b3832bb0 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 30 Jun 2020 12:39:11 +0300 Subject: IB/uverbs: Set IOVA on IB MR in uverbs layer [ Upstream commit 04c0a5fcfcf65aade2fb238b6336445f1a99b646 ] Set IOVA on IB MR in uverbs layer to let all drivers have it, this includes both reg/rereg MR flows. As part of this change cleaned-up this setting from the drivers that already did it by themselves in their user flows. Fixes: e6f0330106f4 ("mlx4_ib: set user mr attributes in struct ib_mr") Link: https://lore.kernel.org/r/20200630093916.332097-3-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/uverbs_cmd.c | 4 ++++ drivers/infiniband/hw/cxgb4/mem.c | 1 - drivers/infiniband/hw/mlx4/mr.c | 1 - 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e2ddcb0dc4ee..c398d1a64614 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -757,6 +757,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) mr->uobject = uobj; atomic_inc(&pd->usecnt); mr->res.type = RDMA_RESTRACK_MR; + mr->iova = cmd.hca_va; rdma_restrack_uadd(&mr->res); uobj->object = mr; @@ -847,6 +848,9 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) atomic_dec(&old_pd->usecnt); } + if (cmd.flags & IB_MR_REREG_TRANS) + mr->iova = cmd.hca_va; + memset(&resp, 0, sizeof(resp)); resp.lkey = mr->lkey; resp.rkey = mr->rkey; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 35c284af574d..dcb58cef336d 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -399,7 +399,6 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) mmid = stag >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; mhp->ibmr.length = mhp->attr.len; - mhp->ibmr.iova = mhp->attr.va_fbo; mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12); pr_debug("mmid 0x%x mhp %p\n", mmid, mhp); return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 6ae503cfc526..9114cb730769 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -439,7 +439,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; mr->ibmr.length = length; - mr->ibmr.iova = virt_addr; mr->ibmr.page_size = 1U << shift; return &mr->ibmr; -- cgit v1.2.3 From e5a9bb4f12432168abffd646bbc41342fc8a4236 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 2 Jul 2020 11:29:32 +0300 Subject: RDMA/counter: Only bind user QPs in auto mode [ Upstream commit c9f557421e505f75da4234a6af8eff46bc08614b ] In auto mode only bind user QPs to a dynamic counter, since this feature is mainly used for system statistic and diagnostic purpose, while there's no need to counter kernel QPs so far. Fixes: 99fa331dc862 ("RDMA/counter: Add "auto" configuration mode support") Link: https://lore.kernel.org/r/20200702082933.424537-3-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/counters.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 11210bf7fd61..42809f612c2c 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -284,7 +284,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) struct rdma_counter *counter; int ret; - if (!qp->res.valid) + if (!qp->res.valid || rdma_is_kernel_res(&qp->res)) return 0; if (!rdma_is_port_valid(dev, port)) -- cgit v1.2.3 From b638533ec6fa306389b2a1972792456a234acf92 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 2 Jul 2020 11:29:33 +0300 Subject: RDMA/counter: Allow manually bind QPs with different pids to same counter [ Upstream commit cbeb7d896c0f296451ffa7b67e7706786b8364c8 ] In manual mode allow bind user QPs with different pids to same counter, since this is allowed in auto mode. Bind kernel QPs and user QPs to the same counter are not allowed. Fixes: 1bd8e0a9d0fd ("RDMA/counter: Allow manual mode configuration support") Link: https://lore.kernel.org/r/20200702082933.424537-4-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/counters.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 42809f612c2c..f454d63008d6 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -487,7 +487,7 @@ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, goto err; } - if (counter->res.task != qp->res.task) { + if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) { ret = -EINVAL; goto err_task; } -- cgit v1.2.3 From 59af0759bd46e2da3266bfbb6d74ba306109bdf9 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Tue, 11 Aug 2020 13:49:31 -0400 Subject: RDMA/hfi1: Correct an interlock issue for TID RDMA WRITE request commit b25e8e85e75a61af1ddc88c4798387dd3132dd43 upstream. The following message occurs when running an AI application with TID RDMA enabled: hfi1 0000:7f:00.0: hfi1_0: [QP74] hfi1_tid_timeout 4084 hfi1 0000:7f:00.0: hfi1_0: [QP70] hfi1_tid_timeout 4084 The issue happens when TID RDMA WRITE request is followed by an IB_WR_RDMA_WRITE_WITH_IMM request, the latter could be completed first on the responder side. As a result, no ACK packet for the latter could be sent because the TID RDMA WRITE request is still being processed on the responder side. When the TID RDMA WRITE request is eventually completed, the requester will wait for the IB_WR_RDMA_WRITE_WITH_IMM request to be acknowledged. If the next request is another TID RDMA WRITE request, no TID RDMA WRITE DATA packet could be sent because the preceding IB_WR_RDMA_WRITE_WITH_IMM request is not completed yet. Consequently the IB_WR_RDMA_WRITE_WITH_IMM will be retried but it will be ignored on the responder side because the responder thinks it has already been completed. Eventually the retry will be exhausted and the qp will be put into error state on the requester side. On the responder side, the TID resource timer will eventually expire because no TID RDMA WRITE DATA packets will be received for the second TID RDMA WRITE request. There is also risk of a write-after-write memory corruption due to the issue. Fix by adding a requester side interlock to prevent any potential data corruption and TID RDMA protocol error. Fixes: a0b34f75ec20 ("IB/hfi1: Add interlock between a TID RDMA request and other requests") Link: https://lore.kernel.org/r/20200811174931.191210.84093.stgit@awfm-01.aw.intel.com Cc: # 5.4.x+ Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/tid_rdma.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 7c6fd720fb2e..c018fc633cca 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -3215,6 +3215,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: switch (prev->wr.opcode) { case IB_WR_TID_RDMA_WRITE: req = wqe_to_tid_req(prev); -- cgit v1.2.3 From 140ac9370b16ff82df62e40049b90014c0add059 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 5 Aug 2020 21:45:48 -0700 Subject: RDMA/bnxt_re: Do not add user qps to flushlist [ Upstream commit a812f2d60a9fb7818f9c81f967180317b52545c0 ] Driver shall add only the kernel qps to the flush list for clean up. During async error events from the HW, driver is adding qps to this list without checking if the qp is kernel qp or not. Add a check to avoid user qp addition to the flush list. Fixes: 942c9b6ca8de ("RDMA/bnxt_re: Avoid Hard lockup during error CQE processing") Fixes: c50866e2853a ("bnxt_re: fix the regression due to changes in alloc_pbl") Link: https://lore.kernel.org/r/1596689148-4023-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 27e2df44d043..cfe5f47d9890 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -789,7 +789,8 @@ static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event, struct ib_event event; unsigned int flags; - if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { + if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR && + rdma_is_kernel_res(&qp->ib_qp.res)) { flags = bnxt_re_lock_cqs(qp); bnxt_qplib_add_flush_qp(&qp->qplib_qp); bnxt_re_unlock_cqs(qp, flags); -- cgit v1.2.3 From ca337b53fff3919e99be7256f0a46c73aa8043bc Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Wed, 19 Aug 2020 15:56:32 +0800 Subject: RDMA/rxe: Fix memleak in rxe_mem_init_user [ Upstream commit e3ddd6067ee62f6e76ebcf61ff08b2c729ae412b ] When page_address() fails, umem should be freed just like when rxe_mem_alloc() fails. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200819075632.22285-1-dinghao.liu@zju.edu.cn Signed-off-by: Dinghao Liu Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_mr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index ea6a819b7167..ffbc50341a55 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -207,6 +207,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, vaddr = page_address(sg_page_iter_page(&sg_iter)); if (!vaddr) { pr_warn("null vaddr\n"); + ib_umem_release(umem); err = -ENOMEM; goto err1; } -- cgit v1.2.3 From d1878b298fc6b7ca679cfa252db5f955234e03ea Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Sun, 5 Jul 2020 13:43:10 +0300 Subject: RDMA/rxe: Drop pointless checks in rxe_init_ports [ Upstream commit 6112ef62826e91afbae5446d5d47b38e25f47e3f ] Both pkey_tbl_len and gid_tbl_len are set in rxe_init_port_param() - so no need to check if they aren't set. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200705104313.283034-2-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index a8c11b5e1e94..dee0c2b7897a 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -157,9 +157,6 @@ static int rxe_init_ports(struct rxe_dev *rxe) rxe_init_port_param(port); - if (!port->attr.pkey_tbl_len || !port->attr.gid_tbl_len) - return -EINVAL; - port->pkey_tbl = kcalloc(port->attr.pkey_tbl_len, sizeof(*port->pkey_tbl), GFP_KERNEL); -- cgit v1.2.3 From aaca6867137690c1ed6ccef74d7897784234114a Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 25 Aug 2020 18:17:25 +0300 Subject: RDMA/rxe: Fix panic when calling kmem_cache_create() [ Upstream commit d862060a4b43479887ae8e2c0b74a58c4e27e5f3 ] To avoid the following kernel panic when calling kmem_cache_create() with a NULL pointer from pool_cache(), Block the rxe_param_set_add() from running if the rdma_rxe module is not initialized. BUG: unable to handle kernel NULL pointer dereference at 000000000000000b PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI CPU: 4 PID: 8512 Comm: modprobe Kdump: loaded Not tainted 4.18.0-231.el8.x86_64 #1 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 10/02/2018 RIP: 0010:kmem_cache_alloc+0xd1/0x1b0 Code: 8b 57 18 45 8b 77 1c 48 8b 5c 24 30 0f 1f 44 00 00 5b 48 89 e8 5d 41 5c 41 5d 41 5e 41 5f c3 81 e3 00 00 10 00 75 0e 4d 89 fe <41> f6 47 0b 04 0f 84 6c ff ff ff 4c 89 ff e8 cc da 01 00 49 89 c6 RSP: 0018:ffffa2b8c773f9d0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000005 RDX: 0000000000000004 RSI: 00000000006080c0 RDI: 0000000000000000 RBP: ffff8ea0a8634fd0 R08: ffffa2b8c773f988 R09: 00000000006000c0 R10: 0000000000000000 R11: 0000000000000230 R12: 00000000006080c0 R13: ffffffffc0a97fc8 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f9138ed9740(0000) GS:ffff8ea4ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000000b CR3: 000000046d59a000 CR4: 00000000003406e0 Call Trace: rxe_alloc+0xc8/0x160 [rdma_rxe] rxe_get_dma_mr+0x25/0xb0 [rdma_rxe] __ib_alloc_pd+0xcb/0x160 [ib_core] ib_mad_init_device+0x296/0x8b0 [ib_core] add_client_context+0x11a/0x160 [ib_core] enable_device_and_get+0xdc/0x1d0 [ib_core] ib_register_device+0x572/0x6b0 [ib_core] ? crypto_create_tfm+0x32/0xe0 ? crypto_create_tfm+0x7a/0xe0 ? crypto_alloc_tfm+0x58/0xf0 rxe_register_device+0x19d/0x1c0 [rdma_rxe] rxe_net_add+0x3d/0x70 [rdma_rxe] ? dev_get_by_name_rcu+0x73/0x90 rxe_param_set_add+0xaf/0xc0 [rdma_rxe] parse_args+0x179/0x370 ? ref_module+0x1b0/0x1b0 load_module+0x135e/0x17e0 ? ref_module+0x1b0/0x1b0 ? __do_sys_init_module+0x13b/0x180 __do_sys_init_module+0x13b/0x180 do_syscall_64+0x5b/0x1a0 entry_SYSCALL_64_after_hwframe+0x65/0xca RIP: 0033:0x7f9137ed296e This can be triggered if a user tries to use the 'module option' which is not actually a real module option but some idiotic (and thankfully no obsolete) sysfs interface. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200825151725.254046-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe.c | 4 ++++ drivers/infiniband/sw/rxe/rxe.h | 2 ++ drivers/infiniband/sw/rxe/rxe_sysfs.c | 5 +++++ 3 files changed, 11 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index dee0c2b7897a..70c4ea438664 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -48,6 +48,8 @@ static void rxe_cleanup_ports(struct rxe_dev *rxe) } +bool rxe_initialized; + /* free resources for a rxe device all objects created for this device must * have been destroyed */ @@ -355,6 +357,7 @@ static int __init rxe_module_init(void) return err; rdma_link_register(&rxe_link_ops); + rxe_initialized = true; pr_info("loaded\n"); return 0; } @@ -366,6 +369,7 @@ static void __exit rxe_module_exit(void) rxe_net_exit(); rxe_cache_exit(); + rxe_initialized = false; pr_info("unloaded\n"); } diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index fb07eed9e402..cae1b0a24c85 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -67,6 +67,8 @@ #define RXE_ROCE_V2_SPORT (0xc000) +extern bool rxe_initialized; + static inline u32 rxe_crc32(struct rxe_dev *rxe, u32 crc, void *next, size_t len) { diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c index ccda5f5a3bc0..2af31d421bfc 100644 --- a/drivers/infiniband/sw/rxe/rxe_sysfs.c +++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c @@ -61,6 +61,11 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp) struct net_device *ndev; struct rxe_dev *exists; + if (!rxe_initialized) { + pr_err("Module parameters are not supported, use rdma link add or rxe_cfg\n"); + return -EAGAIN; + } + len = sanitize_arg(val, intf, sizeof(intf)); if (!len) { pr_err("add: invalid interface name\n"); -- cgit v1.2.3 From 1a2d6e722b49cc67b27b4cbd41ccf783710c2217 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 24 Aug 2020 11:14:32 -0700 Subject: RDMA/bnxt_re: Do not report transparent vlan from QP1 [ Upstream commit 2d0e60ee322d512fa6bc62d23a6760b39a380847 ] QP1 Rx CQE reports transparent VLAN ID in the completion and this is used while reporting the completion for received MAD packet. Check if the vlan id is configured before reporting it in the work completion. Fixes: 84511455ac5b ("RDMA/bnxt_re: report vlan_id and sl in qp1 recv completion") Link: https://lore.kernel.org/r/1598292876-26529-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index ebc3e3d4a6e2..3b05c0640338 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2973,6 +2973,19 @@ static void bnxt_re_process_res_rawqp1_wc(struct ib_wc *wc, wc->wc_flags |= IB_WC_GRH; } +static bool bnxt_re_check_if_vlan_valid(struct bnxt_re_dev *rdev, + u16 vlan_id) +{ + /* + * Check if the vlan is configured in the host. If not configured, it + * can be a transparent VLAN. So dont report the vlan id. + */ + if (!__vlan_find_dev_deep_rcu(rdev->netdev, + htons(ETH_P_8021Q), vlan_id)) + return false; + return true; +} + static bool bnxt_re_is_vlan_pkt(struct bnxt_qplib_cqe *orig_cqe, u16 *vid, u8 *sl) { @@ -3041,9 +3054,11 @@ static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *qp, wc->src_qp = orig_cqe->src_qp; memcpy(wc->smac, orig_cqe->smac, ETH_ALEN); if (bnxt_re_is_vlan_pkt(orig_cqe, &vlan_id, &sl)) { - wc->vlan_id = vlan_id; - wc->sl = sl; - wc->wc_flags |= IB_WC_WITH_VLAN; + if (bnxt_re_check_if_vlan_valid(rdev, vlan_id)) { + wc->vlan_id = vlan_id; + wc->sl = sl; + wc->wc_flags |= IB_WC_WITH_VLAN; + } } wc->port_num = 1; wc->vendor_err = orig_cqe->status; -- cgit v1.2.3 From 0b4662709cedf1b1d044bb63e03f64cd3e33f4ce Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 2 Sep 2020 15:43:04 +0300 Subject: RDMA/core: Fix reported speed and width [ Upstream commit 28b0865714b315e318ac45c4fc9156f3d4649646 ] When the returned speed from __ethtool_get_link_ksettings() is SPEED_UNKNOWN this will lead to reporting a wrong speed and width for providers that uses ib_get_eth_speed(), fix that by defaulting the netdev_speed to SPEED_1000 in case the returned value from __ethtool_get_link_ksettings() is SPEED_UNKNOWN. Fixes: d41861942fc5 ("IB/core: Add generic function to extract IB speed from netdev") Link: https://lore.kernel.org/r/20200902124304.170912-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index d4815f29cfd2..5d896f6b2b61 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1749,7 +1749,7 @@ int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width) dev_put(netdev); - if (!rc) { + if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) { netdev_speed = lksettings.base.speed; } else { netdev_speed = SPEED_1000; -- cgit v1.2.3 From 0f632bc483553fd2203face1384119d49edeff61 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 4 Sep 2020 12:50:39 -0700 Subject: IB/isert: Fix unaligned immediate-data handling [ Upstream commit 0b089c1ef7047652b13b4cdfdb1e0e7dbdb8c9ab ] Currently we allocate rx buffers in a single contiguous buffers for headers (iser and iscsi) and data trailer. This means that most likely the data starting offset is aligned to 76 bytes (size of both headers). This worked fine for years, but at some point this broke, resulting in data corruptions in isert when a command comes with immediate data and the underlying backend device assumes 512 bytes buffer alignment. We assume a hard-requirement for all direct I/O buffers to be 512 bytes aligned. To fix this, we should avoid passing unaligned buffers for I/O. Instead, we allocate our recv buffers with some extra space such that we can have the data portion align to 512 byte boundary. This also means that we cannot reference headers or data using structure but rather accessors (as they may move based on alignment). Also, get rid of the wrong __packed annotation from iser_rx_desc as this has only harmful effects (not aligned to anything). This affects the rx descriptors for iscsi login and data plane. Fixes: 3d75ca0adef4 ("block: introduce multi-page bvec helpers") Link: https://lore.kernel.org/r/20200904195039.31687-1-sagi@grimberg.me Reported-by: Stephen Rust Tested-by: Doug Dumitru Signed-off-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/isert/ib_isert.c | 93 +++++++++++++++++---------------- drivers/infiniband/ulp/isert/ib_isert.h | 41 +++++++++++---- 2 files changed, 78 insertions(+), 56 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index a1a035270cab..71268d61d2b8 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -182,15 +182,15 @@ isert_alloc_rx_descriptors(struct isert_conn *isert_conn) rx_desc = isert_conn->rx_descs; for (i = 0; i < ISERT_QP_MAX_RECV_DTOS; i++, rx_desc++) { - dma_addr = ib_dma_map_single(ib_dev, (void *)rx_desc, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + dma_addr = ib_dma_map_single(ib_dev, rx_desc->buf, + ISER_RX_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(ib_dev, dma_addr)) goto dma_map_fail; rx_desc->dma_addr = dma_addr; rx_sg = &rx_desc->rx_sg; - rx_sg->addr = rx_desc->dma_addr; + rx_sg->addr = rx_desc->dma_addr + isert_get_hdr_offset(rx_desc); rx_sg->length = ISER_RX_PAYLOAD_SIZE; rx_sg->lkey = device->pd->local_dma_lkey; rx_desc->rx_cqe.done = isert_recv_done; @@ -202,7 +202,7 @@ dma_map_fail: rx_desc = isert_conn->rx_descs; for (j = 0; j < i; j++, rx_desc++) { ib_dma_unmap_single(ib_dev, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ISER_RX_SIZE, DMA_FROM_DEVICE); } kfree(isert_conn->rx_descs); isert_conn->rx_descs = NULL; @@ -223,7 +223,7 @@ isert_free_rx_descriptors(struct isert_conn *isert_conn) rx_desc = isert_conn->rx_descs; for (i = 0; i < ISERT_QP_MAX_RECV_DTOS; i++, rx_desc++) { ib_dma_unmap_single(ib_dev, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ISER_RX_SIZE, DMA_FROM_DEVICE); } kfree(isert_conn->rx_descs); @@ -408,10 +408,9 @@ isert_free_login_buf(struct isert_conn *isert_conn) ISER_RX_PAYLOAD_SIZE, DMA_TO_DEVICE); kfree(isert_conn->login_rsp_buf); - ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma, - ISER_RX_PAYLOAD_SIZE, - DMA_FROM_DEVICE); - kfree(isert_conn->login_req_buf); + ib_dma_unmap_single(ib_dev, isert_conn->login_desc->dma_addr, + ISER_RX_SIZE, DMA_FROM_DEVICE); + kfree(isert_conn->login_desc); } static int @@ -420,25 +419,25 @@ isert_alloc_login_buf(struct isert_conn *isert_conn, { int ret; - isert_conn->login_req_buf = kzalloc(sizeof(*isert_conn->login_req_buf), + isert_conn->login_desc = kzalloc(sizeof(*isert_conn->login_desc), GFP_KERNEL); - if (!isert_conn->login_req_buf) + if (!isert_conn->login_desc) return -ENOMEM; - isert_conn->login_req_dma = ib_dma_map_single(ib_dev, - isert_conn->login_req_buf, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); - ret = ib_dma_mapping_error(ib_dev, isert_conn->login_req_dma); + isert_conn->login_desc->dma_addr = ib_dma_map_single(ib_dev, + isert_conn->login_desc->buf, + ISER_RX_SIZE, DMA_FROM_DEVICE); + ret = ib_dma_mapping_error(ib_dev, isert_conn->login_desc->dma_addr); if (ret) { - isert_err("login_req_dma mapping error: %d\n", ret); - isert_conn->login_req_dma = 0; - goto out_free_login_req_buf; + isert_err("login_desc dma mapping error: %d\n", ret); + isert_conn->login_desc->dma_addr = 0; + goto out_free_login_desc; } isert_conn->login_rsp_buf = kzalloc(ISER_RX_PAYLOAD_SIZE, GFP_KERNEL); if (!isert_conn->login_rsp_buf) { ret = -ENOMEM; - goto out_unmap_login_req_buf; + goto out_unmap_login_desc; } isert_conn->login_rsp_dma = ib_dma_map_single(ib_dev, @@ -455,11 +454,11 @@ isert_alloc_login_buf(struct isert_conn *isert_conn, out_free_login_rsp_buf: kfree(isert_conn->login_rsp_buf); -out_unmap_login_req_buf: - ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); -out_free_login_req_buf: - kfree(isert_conn->login_req_buf); +out_unmap_login_desc: + ib_dma_unmap_single(ib_dev, isert_conn->login_desc->dma_addr, + ISER_RX_SIZE, DMA_FROM_DEVICE); +out_free_login_desc: + kfree(isert_conn->login_desc); return ret; } @@ -578,7 +577,7 @@ isert_connect_release(struct isert_conn *isert_conn) ib_destroy_qp(isert_conn->qp); } - if (isert_conn->login_req_buf) + if (isert_conn->login_desc) isert_free_login_buf(isert_conn); isert_device_put(device); @@ -964,17 +963,18 @@ isert_login_post_recv(struct isert_conn *isert_conn) int ret; memset(&sge, 0, sizeof(struct ib_sge)); - sge.addr = isert_conn->login_req_dma; + sge.addr = isert_conn->login_desc->dma_addr + + isert_get_hdr_offset(isert_conn->login_desc); sge.length = ISER_RX_PAYLOAD_SIZE; sge.lkey = isert_conn->device->pd->local_dma_lkey; isert_dbg("Setup sge: addr: %llx length: %d 0x%08x\n", sge.addr, sge.length, sge.lkey); - isert_conn->login_req_buf->rx_cqe.done = isert_login_recv_done; + isert_conn->login_desc->rx_cqe.done = isert_login_recv_done; memset(&rx_wr, 0, sizeof(struct ib_recv_wr)); - rx_wr.wr_cqe = &isert_conn->login_req_buf->rx_cqe; + rx_wr.wr_cqe = &isert_conn->login_desc->rx_cqe; rx_wr.sg_list = &sge; rx_wr.num_sge = 1; @@ -1051,7 +1051,7 @@ post_send: static void isert_rx_login_req(struct isert_conn *isert_conn) { - struct iser_rx_desc *rx_desc = isert_conn->login_req_buf; + struct iser_rx_desc *rx_desc = isert_conn->login_desc; int rx_buflen = isert_conn->login_req_len; struct iscsi_conn *conn = isert_conn->conn; struct iscsi_login *login = conn->conn_login; @@ -1063,7 +1063,7 @@ isert_rx_login_req(struct isert_conn *isert_conn) if (login->first_request) { struct iscsi_login_req *login_req = - (struct iscsi_login_req *)&rx_desc->iscsi_header; + (struct iscsi_login_req *)isert_get_iscsi_hdr(rx_desc); /* * Setup the initial iscsi_login values from the leading * login request PDU. @@ -1082,13 +1082,13 @@ isert_rx_login_req(struct isert_conn *isert_conn) login->tsih = be16_to_cpu(login_req->tsih); } - memcpy(&login->req[0], (void *)&rx_desc->iscsi_header, ISCSI_HDR_LEN); + memcpy(&login->req[0], isert_get_iscsi_hdr(rx_desc), ISCSI_HDR_LEN); size = min(rx_buflen, MAX_KEY_VALUE_PAIRS); isert_dbg("Using login payload size: %d, rx_buflen: %d " "MAX_KEY_VALUE_PAIRS: %d\n", size, rx_buflen, MAX_KEY_VALUE_PAIRS); - memcpy(login->req_buf, &rx_desc->data[0], size); + memcpy(login->req_buf, isert_get_data(rx_desc), size); if (login->first_request) { complete(&isert_conn->login_comp); @@ -1153,14 +1153,15 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn, if (imm_data_len != data_len) { sg_nents = max(1UL, DIV_ROUND_UP(imm_data_len, PAGE_SIZE)); sg_copy_from_buffer(cmd->se_cmd.t_data_sg, sg_nents, - &rx_desc->data[0], imm_data_len); + isert_get_data(rx_desc), imm_data_len); isert_dbg("Copy Immediate sg_nents: %u imm_data_len: %d\n", sg_nents, imm_data_len); } else { sg_init_table(&isert_cmd->sg, 1); cmd->se_cmd.t_data_sg = &isert_cmd->sg; cmd->se_cmd.t_data_nents = 1; - sg_set_buf(&isert_cmd->sg, &rx_desc->data[0], imm_data_len); + sg_set_buf(&isert_cmd->sg, isert_get_data(rx_desc), + imm_data_len); isert_dbg("Transfer Immediate imm_data_len: %d\n", imm_data_len); } @@ -1229,9 +1230,9 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn, } isert_dbg("Copying DataOut: sg_start: %p, sg_off: %u " "sg_nents: %u from %p %u\n", sg_start, sg_off, - sg_nents, &rx_desc->data[0], unsol_data_len); + sg_nents, isert_get_data(rx_desc), unsol_data_len); - sg_copy_from_buffer(sg_start, sg_nents, &rx_desc->data[0], + sg_copy_from_buffer(sg_start, sg_nents, isert_get_data(rx_desc), unsol_data_len); rc = iscsit_check_dataout_payload(cmd, hdr, false); @@ -1290,7 +1291,7 @@ isert_handle_text_cmd(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd } cmd->text_in_ptr = text_in; - memcpy(cmd->text_in_ptr, &rx_desc->data[0], payload_length); + memcpy(cmd->text_in_ptr, isert_get_data(rx_desc), payload_length); return iscsit_process_text_cmd(conn, cmd, hdr); } @@ -1300,7 +1301,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, uint32_t read_stag, uint64_t read_va, uint32_t write_stag, uint64_t write_va) { - struct iscsi_hdr *hdr = &rx_desc->iscsi_header; + struct iscsi_hdr *hdr = isert_get_iscsi_hdr(rx_desc); struct iscsi_conn *conn = isert_conn->conn; struct iscsi_cmd *cmd; struct isert_cmd *isert_cmd; @@ -1398,8 +1399,8 @@ isert_recv_done(struct ib_cq *cq, struct ib_wc *wc) struct isert_conn *isert_conn = wc->qp->qp_context; struct ib_device *ib_dev = isert_conn->cm_id->device; struct iser_rx_desc *rx_desc = cqe_to_rx_desc(wc->wr_cqe); - struct iscsi_hdr *hdr = &rx_desc->iscsi_header; - struct iser_ctrl *iser_ctrl = &rx_desc->iser_header; + struct iscsi_hdr *hdr = isert_get_iscsi_hdr(rx_desc); + struct iser_ctrl *iser_ctrl = isert_get_iser_hdr(rx_desc); uint64_t read_va = 0, write_va = 0; uint32_t read_stag = 0, write_stag = 0; @@ -1413,7 +1414,7 @@ isert_recv_done(struct ib_cq *cq, struct ib_wc *wc) rx_desc->in_use = true; ib_dma_sync_single_for_cpu(ib_dev, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ISER_RX_SIZE, DMA_FROM_DEVICE); isert_dbg("DMA: 0x%llx, iSCSI opcode: 0x%02x, ITT: 0x%08x, flags: 0x%02x dlen: %d\n", rx_desc->dma_addr, hdr->opcode, hdr->itt, hdr->flags, @@ -1448,7 +1449,7 @@ isert_recv_done(struct ib_cq *cq, struct ib_wc *wc) read_stag, read_va, write_stag, write_va); ib_dma_sync_single_for_device(ib_dev, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ISER_RX_SIZE, DMA_FROM_DEVICE); } static void @@ -1462,8 +1463,8 @@ isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc) return; } - ib_dma_sync_single_for_cpu(ib_dev, isert_conn->login_req_dma, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ib_dma_sync_single_for_cpu(ib_dev, isert_conn->login_desc->dma_addr, + ISER_RX_SIZE, DMA_FROM_DEVICE); isert_conn->login_req_len = wc->byte_len - ISER_HEADERS_LEN; @@ -1478,8 +1479,8 @@ isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc) complete(&isert_conn->login_req_comp); mutex_unlock(&isert_conn->mutex); - ib_dma_sync_single_for_device(ib_dev, isert_conn->login_req_dma, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ib_dma_sync_single_for_device(ib_dev, isert_conn->login_desc->dma_addr, + ISER_RX_SIZE, DMA_FROM_DEVICE); } static void diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 3b296bac4f60..d267a6d60d87 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -59,9 +59,11 @@ ISERT_MAX_TX_MISC_PDUS + \ ISERT_MAX_RX_MISC_PDUS) -#define ISER_RX_PAD_SIZE (ISCSI_DEF_MAX_RECV_SEG_LEN + 4096 - \ - (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge) + \ - sizeof(struct ib_cqe) + sizeof(bool))) +/* + * RX size is default of 8k plus headers, but data needs to align to + * 512 boundary, so use 1024 to have the extra space for alignment. + */ +#define ISER_RX_SIZE (ISCSI_DEF_MAX_RECV_SEG_LEN + 1024) #define ISCSI_ISER_SG_TABLESIZE 256 @@ -80,21 +82,41 @@ enum iser_conn_state { }; struct iser_rx_desc { - struct iser_ctrl iser_header; - struct iscsi_hdr iscsi_header; - char data[ISCSI_DEF_MAX_RECV_SEG_LEN]; + char buf[ISER_RX_SIZE]; u64 dma_addr; struct ib_sge rx_sg; struct ib_cqe rx_cqe; bool in_use; - char pad[ISER_RX_PAD_SIZE]; -} __packed; +}; static inline struct iser_rx_desc *cqe_to_rx_desc(struct ib_cqe *cqe) { return container_of(cqe, struct iser_rx_desc, rx_cqe); } +static void *isert_get_iser_hdr(struct iser_rx_desc *desc) +{ + return PTR_ALIGN(desc->buf + ISER_HEADERS_LEN, 512) - ISER_HEADERS_LEN; +} + +static size_t isert_get_hdr_offset(struct iser_rx_desc *desc) +{ + return isert_get_iser_hdr(desc) - (void *)desc->buf; +} + +static void *isert_get_iscsi_hdr(struct iser_rx_desc *desc) +{ + return isert_get_iser_hdr(desc) + sizeof(struct iser_ctrl); +} + +static void *isert_get_data(struct iser_rx_desc *desc) +{ + void *data = isert_get_iser_hdr(desc) + ISER_HEADERS_LEN; + + WARN_ON((uintptr_t)data & 511); + return data; +} + struct iser_tx_desc { struct iser_ctrl iser_header; struct iscsi_hdr iscsi_header; @@ -141,9 +163,8 @@ struct isert_conn { u32 responder_resources; u32 initiator_depth; bool pi_support; - struct iser_rx_desc *login_req_buf; + struct iser_rx_desc *login_desc; char *login_rsp_buf; - u64 login_req_dma; int login_req_len; u64 login_rsp_dma; struct iser_rx_desc *rx_descs; -- cgit v1.2.3 From 616a0c13e43219a9803c993e256637d3e8117296 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 20 Aug 2020 23:36:46 +0800 Subject: RDMA/rxe: Fix the parent sysfs read when the interface has 15 chars commit 60b1af64eb35074a4f2d41cc1e503a7671e68963 upstream. 'parent' sysfs reads will yield '\0' bytes when the interface name has 15 chars, and there will no "\n" output. To reproduce, create one interface with 15 chars: [root@test ~]# ip a s enp0s29u1u7u3c2 2: enp0s29u1u7u3c2: mtu 1500 qdisc fq_codel state UNKNOWN group default qlen 1000 link/ether 02:21:28:57:47:17 brd ff:ff:ff:ff:ff:ff inet6 fe80::ac41:338f:5bcd:c222/64 scope link noprefixroute valid_lft forever preferred_lft forever [root@test ~]# modprobe rdma_rxe [root@test ~]# echo enp0s29u1u7u3c2 > /sys/module/rdma_rxe/parameters/add [root@test ~]# cat /sys/class/infiniband/rxe0/parent enp0s29u1u7u3c2[root@test ~]# [root@test ~]# f="/sys/class/infiniband/rxe0/parent" [root@test ~]# echo "$(<"$f")" -bash: warning: command substitution: ignored null byte in input enp0s29u1u7u3c2 Use scnprintf and PAGE_SIZE to fill the sysfs output buffer. Cc: stable@vger.kernel.org Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200820153646.31316-1-yi.zhang@redhat.com Suggested-by: Jason Gunthorpe Signed-off-by: Yi Zhang Reviewed-by: Bart Van Assche Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 71358b0b8910..d1fe57ac87f5 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1078,7 +1078,7 @@ static ssize_t parent_show(struct device *device, struct rxe_dev *rxe = rdma_device_to_drv_device(device, struct rxe_dev, ib_dev); - return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1)); + return scnprintf(buf, PAGE_SIZE, "%s\n", rxe_parent_name(rxe, 1)); } static DEVICE_ATTR_RO(parent); -- cgit v1.2.3 From a0f6bdafaa4c5bb057c8aea5ac2ddfc52a5355f5 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 24 Aug 2020 14:02:29 +0300 Subject: RDMA/mlx4: Read pkey table length instead of hardcoded value commit ec78b3bd66bc9a015505df0ef0eb153d9e64b03b upstream. If the pkey_table is not available (which is the case when RoCE is not supported), the cited commit caused a regression where mlx4_devices without RoCE are not created. Fix this by returning a pkey table length of zero in procedure eth_link_query_port() if the pkey-table length reported by the device is zero. Link: https://lore.kernel.org/r/20200824110229.1094376-1-leon@kernel.org Cc: Fixes: 1901b91f9982 ("IB/core: Fix potential NULL pointer dereference in pkey cache") Fixes: fa417f7b520e ("IB/mlx4: Add support for IBoE") Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 61a1b0bdede0..b8274c6fc43e 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -781,7 +781,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, props->ip_gids = true; props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; props->max_msg_sz = mdev->dev->caps.max_msg_sz; - props->pkey_tbl_len = 1; + if (mdev->dev->caps.pkey_table_len[port]) + props->pkey_tbl_len = 1; props->max_mtu = IB_MTU_4096; props->max_vl_num = 2; props->state = IB_PORT_DOWN; -- cgit v1.2.3 From a73e9ea38d5d77bebda2b776673be6d84919ae70 Mon Sep 17 00:00:00 2001 From: Naresh Kumar PBS Date: Mon, 24 Aug 2020 11:14:35 -0700 Subject: RDMA/bnxt_re: Restrict the max_gids to 256 commit 847b97887ed4569968d5b9a740f2334abca9f99a upstream. Some adapters report more than 256 gid entries. Restrict it to 256 for now. Fixes: 1ac5a4047975("RDMA/bnxt_re: Add bnxt_re RoCE driver") Link: https://lore.kernel.org/r/1598292876-26529-6-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Naresh Kumar PBS Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 2 +- drivers/infiniband/hw/bnxt_re/qplib_sp.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 40296b97d21e..079aaaaffec7 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -152,7 +152,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->max_inline_data = le32_to_cpu(sb->max_inline_data); attr->l2_db_size = (sb->l2_db_space_size + 1) * (0x01 << RCFW_DBR_BASE_PAGE_SHIFT); - attr->max_sgid = le32_to_cpu(sb->max_gid); + attr->max_sgid = BNXT_QPLIB_NUM_GIDS_SUPPORTED; bnxt_qplib_query_version(rcfw, attr->fw_ver); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 13d9432d5ce2..194f5ef45ca6 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -47,6 +47,7 @@ struct bnxt_qplib_dev_attr { #define FW_VER_ARR_LEN 4 u8 fw_ver[FW_VER_ARR_LEN]; +#define BNXT_QPLIB_NUM_GIDS_SUPPORTED 256 u16 max_sgid; u16 max_mrw; u32 max_qp; -- cgit v1.2.3 From f3ebf81ee6346c8230d5dda80ff112611fd78982 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 6 Nov 2019 14:23:54 +0800 Subject: RDMA/qedr: Fix potential use after free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 960657b732e1ce21b07be5ab48a7ad3913d72ba4 ] Move the release operation after error log to avoid possible use after free. Link: https://lore.kernel.org/r/1573021434-18768-1-git-send-email-bianpan2016@163.com Signed-off-by: Pan Bian Acked-by: Michal Kalderon  Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index a7a926b7b562..6dea49e11f5f 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -490,10 +490,10 @@ qedr_addr6_resolve(struct qedr_dev *dev, if ((!dst) || dst->error) { if (dst) { - dst_release(dst); DP_ERR(dev, "ip6_route_output returned dst->error = %d\n", dst->error); + dst_release(dst); } return -EINVAL; } -- cgit v1.2.3 From 3161ea6733825629372a49eda0b6e48fcffd776f Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 6 Nov 2019 14:44:11 +0800 Subject: RDMA/i40iw: Fix potential use after free [ Upstream commit da046d5f895fca18d63b15ac8faebd5bf784e23a ] Release variable dst after logging dst->error to avoid possible use after free. Link: https://lore.kernel.org/r/1573022651-37171-1-git-send-email-bianpan2016@163.com Signed-off-by: Pan Bian Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index b1df93b69df4..fa7a5ff498c7 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2074,9 +2074,9 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev, dst = i40iw_get_dst_ipv6(&src_addr, &dst_addr); if (!dst || dst->error) { if (dst) { - dst_release(dst); i40iw_pr_err("ip6_route_output returned dst->error = %d\n", dst->error); + dst_release(dst); } return rc; } -- cgit v1.2.3 From 40afadc0577122239550efb5d22b7291efa7b088 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 23 Sep 2019 21:07:46 +0200 Subject: RDMA/iw_cgxb4: Fix an error handling path in 'c4iw_connect()' [ Upstream commit 9067f2f0b41d7e817fc8c5259bab1f17512b0147 ] We should jump to fail3 in order to undo the 'xa_insert_irq()' call. Link: https://lore.kernel.org/r/20190923190746.10964-1-christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/cxgb4/cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 6b4e7235d2f5..30e08bcc9afb 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3382,7 +3382,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) { err = pick_local_ipaddrs(dev, cm_id); if (err) - goto fail2; + goto fail3; } /* find a route */ @@ -3404,7 +3404,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) { err = pick_local_ip6addrs(dev, cm_id); if (err) - goto fail2; + goto fail3; } /* find a route */ -- cgit v1.2.3 From a4d17715a85299f880d986003589b054abefd95c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 17 Feb 2020 12:57:14 -0800 Subject: RDMA/rxe: Fix configuration of atomic queue pair attributes [ Upstream commit fb3063d31995cc4cf1d47a406bb61d6fb1b1d58d ] From the comment above the definition of the roundup_pow_of_two() macro: The result is undefined when n == 0. Hence only pass positive values to roundup_pow_of_two(). This patch fixes the following UBSAN complaint: UBSAN: Undefined behaviour in ./include/linux/log2.h:57:13 shift exponent 64 is too large for 64-bit type 'long unsigned int' Call Trace: dump_stack+0xa5/0xe6 ubsan_epilogue+0x9/0x26 __ubsan_handle_shift_out_of_bounds.cold+0x4c/0xf9 rxe_qp_from_attr.cold+0x37/0x5d [rdma_rxe] rxe_modify_qp+0x59/0x70 [rdma_rxe] _ib_modify_qp+0x5aa/0x7c0 [ib_core] ib_modify_qp+0x3b/0x50 [ib_core] cma_modify_qp_rtr+0x234/0x260 [rdma_cm] __rdma_accept+0x1a7/0x650 [rdma_cm] nvmet_rdma_cm_handler+0x1286/0x14cd [nvmet_rdma] cma_cm_event_handler+0x6b/0x330 [rdma_cm] cma_ib_req_handler+0xe60/0x22d0 [rdma_cm] cm_process_work+0x30/0x140 [ib_cm] cm_req_handler+0x11f4/0x1cd0 [ib_cm] cm_work_handler+0xb8/0x344e [ib_cm] process_one_work+0x569/0xb60 worker_thread+0x7a/0x5d0 kthread+0x1e6/0x210 ret_from_fork+0x24/0x30 Link: https://lore.kernel.org/r/20200217205714.26937-1-bvanassche@acm.org Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Bart Van Assche Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_qp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index e2c6d1cedf41..f85273883794 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -592,15 +592,16 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, int err; if (mask & IB_QP_MAX_QP_RD_ATOMIC) { - int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic); + int max_rd_atomic = attr->max_rd_atomic ? + roundup_pow_of_two(attr->max_rd_atomic) : 0; qp->attr.max_rd_atomic = max_rd_atomic; atomic_set(&qp->req.rd_atomic, max_rd_atomic); } if (mask & IB_QP_MAX_DEST_RD_ATOMIC) { - int max_dest_rd_atomic = - __roundup_pow_of_two(attr->max_dest_rd_atomic); + int max_dest_rd_atomic = attr->max_dest_rd_atomic ? + roundup_pow_of_two(attr->max_dest_rd_atomic) : 0; qp->attr.max_dest_rd_atomic = max_dest_rd_atomic; -- cgit v1.2.3 From c3e39fa0aa108a201a3a05499705bc4356803bab Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Mar 2020 11:25:33 +0200 Subject: RDMA/cm: Remove a race freeing timewait_info [ Upstream commit bede86a39d9dc3387ac00dcb8e1ac221676b2f25 ] When creating a cm_id during REQ the id immediately becomes visible to the other MAD handlers, and shortly after the state is moved to IB_CM_REQ_RCVD This allows cm_rej_handler() to run concurrently and free the work: CPU 0 CPU1 cm_req_handler() ib_create_cm_id() cm_match_req() id_priv->state = IB_CM_REQ_RCVD cm_rej_handler() cm_acquire_id() spin_lock(&id_priv->lock) switch (id_priv->state) case IB_CM_REQ_RCVD: cm_reset_to_idle() kfree(id_priv->timewait_info); goto destroy destroy: kfree(id_priv->timewait_info); id_priv->timewait_info = NULL Causing a double free or worse. Do not free the timewait_info without also holding the id_priv->lock. Simplify this entire flow by making the free unconditional during cm_destroy_id() and removing the confusing special case error unwind during creation of the timewait_info. This also fixes a leak of the timewait if cm_destroy_id() is called in IB_CM_ESTABLISHED with an XRC TGT QP. The state machine will be left in ESTABLISHED while it needed to transition through IB_CM_TIMEWAIT to release the timewait pointer. Also fix a leak of the timewait_info if the caller mis-uses the API and does ib_send_cm_reqs(). Fixes: a977049dacde ("[PATCH] IB: Add the kernel CM implementation") Link: https://lore.kernel.org/r/20200310092545.251365-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/cm.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 09af96ec41dd..c1d6a068f50f 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1092,14 +1092,22 @@ retest: break; } - spin_lock_irq(&cm.lock); + spin_lock_irq(&cm_id_priv->lock); + spin_lock(&cm.lock); + /* Required for cleanup paths related cm_req_handler() */ + if (cm_id_priv->timewait_info) { + cm_cleanup_timewait(cm_id_priv->timewait_info); + kfree(cm_id_priv->timewait_info); + cm_id_priv->timewait_info = NULL; + } if (!list_empty(&cm_id_priv->altr_list) && (!cm_id_priv->altr_send_port_not_ready)) list_del(&cm_id_priv->altr_list); if (!list_empty(&cm_id_priv->prim_list) && (!cm_id_priv->prim_send_port_not_ready)) list_del(&cm_id_priv->prim_list); - spin_unlock_irq(&cm.lock); + spin_unlock(&cm.lock); + spin_unlock_irq(&cm_id_priv->lock); cm_free_id(cm_id->local_id); cm_deref_id(cm_id_priv); @@ -1416,7 +1424,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, /* Verify that we're not in timewait. */ cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_IDLE) { + if (cm_id->state != IB_CM_IDLE || WARN_ON(cm_id_priv->timewait_info)) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = -EINVAL; goto out; @@ -1434,12 +1442,12 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, param->ppath_sgid_attr, &cm_id_priv->av, cm_id_priv); if (ret) - goto error1; + goto out; if (param->alternate_path) { ret = cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av, cm_id_priv); if (ret) - goto error1; + goto out; } cm_id->service_id = param->service_id; cm_id->service_mask = ~cpu_to_be64(0); @@ -1457,7 +1465,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); if (ret) - goto error1; + goto out; req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad; cm_format_req(req_msg, cm_id_priv, param); @@ -1480,7 +1488,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, return 0; error2: cm_free_msg(cm_id_priv->msg); -error1: kfree(cm_id_priv->timewait_info); out: return ret; } EXPORT_SYMBOL(ib_send_cm_req); @@ -1965,7 +1972,7 @@ static int cm_req_handler(struct cm_work *work) pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__, be32_to_cpu(cm_id->local_id)); ret = -EINVAL; - goto free_timeinfo; + goto destroy; } cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; @@ -2050,8 +2057,6 @@ static int cm_req_handler(struct cm_work *work) rejected: atomic_dec(&cm_id_priv->refcount); cm_deref_id(listen_cm_id_priv); -free_timeinfo: - kfree(cm_id_priv->timewait_info); destroy: ib_destroy_cm_id(cm_id); return ret; -- cgit v1.2.3 From d5e7198bcf7e7fe2726420e05df226838fa48dfc Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Mon, 23 Mar 2020 13:28:00 +0200 Subject: RDMA/rxe: Set sys_image_guid to be aligned with HW IB devices [ Upstream commit d0ca2c35dd15a3d989955caec02beea02f735ee6 ] The RXE driver doesn't set sys_image_guid and user space applications see zeros. This causes to pyverbs tests to fail with the following traceback, because the IBTA spec requires to have valid sys_image_guid. Traceback (most recent call last): File "./tests/test_device.py", line 51, in test_query_device self.verify_device_attr(attr) File "./tests/test_device.py", line 74, in verify_device_attr assert attr.sys_image_guid != 0 In order to fix it, set sys_image_guid to be equal to node_guid. Before: 5: rxe0: ... node_guid 5054:00ff:feaa:5363 sys_image_guid 0000:0000:0000:0000 After: 5: rxe0: ... node_guid 5054:00ff:feaa:5363 sys_image_guid 5054:00ff:feaa:5363 Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200323112800.1444784-1-leon@kernel.org Signed-off-by: Zhu Yanjun Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 70c4ea438664..de5f3efe9fcb 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -118,6 +118,8 @@ static void rxe_init_device_param(struct rxe_dev *rxe) rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN; rxe->attr.max_pkeys = RXE_MAX_PKEYS; rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY; + addrconf_addr_eui48((unsigned char *)&rxe->attr.sys_image_guid, + rxe->ndev->dev_addr); rxe->max_ucontext = RXE_MAX_UCONTEXT; } -- cgit v1.2.3 From 6e3b662d86a5e981c74bf88b8f4cdc3305040dfc Mon Sep 17 00:00:00 2001 From: Sergey Gorenko Date: Wed, 25 Mar 2020 15:12:10 +0000 Subject: IB/iser: Always check sig MR before putting it to the free pool [ Upstream commit 26e28deb813eed908cf31a6052870b6493ec0e86 ] libiscsi calls the check_protection transport handler only if SCSI-Respose is received. So, the handler is never called if iSCSI task is completed for some other reason like a timeout or error handling. And this behavior looks correct. But the iSER does not handle this case properly because it puts a non-checked signature MR to the free pool. Then the error occurs at reusing the MR because it is not allowed to invalidate a signature MR without checking. This commit adds an extra check to iser_unreg_mem_fastreg(), which is a part of the task cleanup flow. Now the signature MR is checked there if it is needed. Link: https://lore.kernel.org/r/20200325151210.1548-1-sergeygo@mellanox.com Signed-off-by: Sergey Gorenko Reviewed-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/iser/iser_memory.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 2cc89a9b9e9b..ea8e611397a3 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -292,12 +292,27 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, { struct iser_device *device = iser_task->iser_conn->ib_conn.device; struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir]; + struct iser_fr_desc *desc; + struct ib_mr_status mr_status; - if (!reg->mem_h) + desc = reg->mem_h; + if (!desc) return; - device->reg_ops->reg_desc_put(&iser_task->iser_conn->ib_conn, - reg->mem_h); + /* + * The signature MR cannot be invalidated and reused without checking. + * libiscsi calls the check_protection transport handler only if + * SCSI-Response is received. And the signature MR is not checked if + * the task is completed for some other reason like a timeout or error + * handling. That's why we must check the signature MR here before + * putting it to the free pool. + */ + if (unlikely(desc->sig_protected)) { + desc->sig_protected = false; + ib_check_mr_status(desc->rsc.sig_mr, IB_MR_CHECK_SIG_STATUS, + &mr_status); + } + device->reg_ops->reg_desc_put(&iser_task->iser_conn->ib_conn, desc); reg->mem_h = NULL; } -- cgit v1.2.3 From 1fe669e9ad1935f7d8d917ad2ae130b4d11dc3a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Bugge?= Date: Mon, 3 Aug 2020 08:19:39 +0200 Subject: IB/mlx4: Fix starvation in paravirt mux/demux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7fd1507df7cee9c533f38152fcd1dd769fcac6ce ] The mlx4 driver will proxy MAD packets through the PF driver. A VM or an instantiated VF will send its MAD packets to the PF driver using loop-back. The PF driver will be informed by an interrupt, but defer the handling and polling of CQEs to a worker thread running on an ordered work-queue. Consider the following scenario: the VMs will in short proximity in time, for example due to a network event, send many MAD packets to the PF driver. Lets say there are K VMs, each sending N packets. The interrupt from the first VM will start the worker thread, which will poll N CQEs. A common case here is where the PF driver will multiplex the packets received from the VMs out on the wire QP. But before the wire QP has returned a send CQE and associated interrupt, the other K - 1 VMs have sent their N packets as well. The PF driver has to multiplex K * N packets out on the wire QP. But the send-queue on the wire QP has a finite capacity. So, in this scenario, if K * N is larger than the send-queue capacity of the wire QP, we will get MAD packets dropped on the floor with this dynamic debug message: mlx4_ib_multiplex_mad: failed sending GSI to wire on behalf of slave 2 (-11) and this despite the fact that the wire send-queue could have capacity, but the PF driver isn't aware, because the wire send CQEs have not yet been polled. We can also have a similar scenario inbound, with a wire recv-queue larger than the tunnel QP's send-queue. If many remote peers send MAD packets to the very same VM, the tunnel send-queue destined to the VM could allegedly be construed to be full by the PF driver. This starvation is fixed by introducing separate work queues for the wire QPs vs. the tunnel QPs. With this fix, using a dual ported HCA, 8 VFs instantiated, we could run cmtime on each of the 18 interfaces towards a similar configured peer, each cmtime instance with 800 QPs (all in all 14400 QPs) without a single CM packet getting lost. Fixes: 3cf69cc8dbeb ("IB/mlx4: Add CM paravirtualization") Link: https://lore.kernel.org/r/20200803061941.1139994-5-haakon.bugge@oracle.com Signed-off-by: Håkon Bugge Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx4/mad.c | 34 +++++++++++++++++++++++++++++++--- drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 ++ 2 files changed, 33 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 57079110af9b..08eccf2b6967 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1307,6 +1307,18 @@ static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg) spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); } +static void mlx4_ib_wire_comp_handler(struct ib_cq *cq, void *arg) +{ + unsigned long flags; + struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context; + struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); + + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE) + queue_work(ctx->wi_wq, &ctx->work); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); +} + static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx, struct mlx4_ib_demux_pv_qp *tun_qp, int index) @@ -2009,7 +2021,8 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, cq_size *= 2; cq_attr.cqe = cq_size; - ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler, + ctx->cq = ib_create_cq(ctx->ib_dev, + create_tun ? mlx4_ib_tunnel_comp_handler : mlx4_ib_wire_comp_handler, NULL, ctx, &cq_attr); if (IS_ERR(ctx->cq)) { ret = PTR_ERR(ctx->cq); @@ -2046,6 +2059,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker); ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq; + ctx->wi_wq = to_mdev(ibdev)->sriov.demux[port - 1].wi_wq; ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP); if (ret) { @@ -2189,7 +2203,7 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, goto err_mcg; } - snprintf(name, sizeof name, "mlx4_ibt%d", port); + snprintf(name, sizeof(name), "mlx4_ibt%d", port); ctx->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!ctx->wq) { pr_err("Failed to create tunnelling WQ for port %d\n", port); @@ -2197,7 +2211,15 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, goto err_wq; } - snprintf(name, sizeof name, "mlx4_ibud%d", port); + snprintf(name, sizeof(name), "mlx4_ibwi%d", port); + ctx->wi_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); + if (!ctx->wi_wq) { + pr_err("Failed to create wire WQ for port %d\n", port); + ret = -ENOMEM; + goto err_wiwq; + } + + snprintf(name, sizeof(name), "mlx4_ibud%d", port); ctx->ud_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!ctx->ud_wq) { pr_err("Failed to create up/down WQ for port %d\n", port); @@ -2208,6 +2230,10 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, return 0; err_udwq: + destroy_workqueue(ctx->wi_wq); + ctx->wi_wq = NULL; + +err_wiwq: destroy_workqueue(ctx->wq); ctx->wq = NULL; @@ -2255,12 +2281,14 @@ static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx) ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING; } flush_workqueue(ctx->wq); + flush_workqueue(ctx->wi_wq); for (i = 0; i < dev->dev->caps.sqp_demux; i++) { destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0); free_pv_object(dev, i, ctx->port); } kfree(ctx->tun); destroy_workqueue(ctx->ud_wq); + destroy_workqueue(ctx->wi_wq); destroy_workqueue(ctx->wq); } } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index eb53bb4c0c91..0173e3931cc7 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -459,6 +459,7 @@ struct mlx4_ib_demux_pv_ctx { struct ib_pd *pd; struct work_struct work; struct workqueue_struct *wq; + struct workqueue_struct *wi_wq; struct mlx4_ib_demux_pv_qp qp[2]; }; @@ -466,6 +467,7 @@ struct mlx4_ib_demux_ctx { struct ib_device *ib_dev; int port; struct workqueue_struct *wq; + struct workqueue_struct *wi_wq; struct workqueue_struct *ud_wq; spinlock_t ud_lock; atomic64_t subnet_prefix; -- cgit v1.2.3 From 4f74f179a335f55fde667fe27233eca6b1f975d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Bugge?= Date: Mon, 3 Aug 2020 08:19:41 +0200 Subject: IB/mlx4: Adjust delayed work when a dup is observed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 785167a114855c5aa75efca97000e405c2cc85bf ] When scheduling delayed work to clean up the cache, if the entry already has been scheduled for deletion, we adjust the delay. Fixes: 3cf69cc8dbeb ("IB/mlx4: Add CM paravirtualization") Link: https://lore.kernel.org/r/20200803061941.1139994-7-haakon.bugge@oracle.com Signed-off-by: Håkon Bugge Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx4/cm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index b591861934b3..81d6a3460b55 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -280,6 +280,9 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id) if (!sriov->is_going_down && !id->scheduled_delete) { id->scheduled_delete = 1; schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT); + } else if (id->scheduled_delete) { + /* Adjust timeout if already scheduled */ + mod_delayed_work(system_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT); } spin_unlock_irqrestore(&sriov->going_down_lock, flags); spin_unlock(&sriov->id_map_lock); -- cgit v1.2.3 From cc8ebd76b10a8b7169a81c532ba686af163d6c15 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 18 Aug 2020 15:05:21 +0300 Subject: RDMA/ucma: Fix locking for ctx->events_reported [ Upstream commit 98837c6c3d7285f6eca86480b6f7fac6880e27a8 ] This value is locked under the file->mut, ensure it is held whenever touching it. The case in ucma_migrate_id() is a race, while in ucma_free_uctx() it is already not possible for the write side to run, the movement is just for clarity. Fixes: 88314e4dda1e ("RDMA/cma: add support for rdma_migrate_id()") Link: https://lore.kernel.org/r/20200818120526.702120-10-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/ucma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index f4f79f1292b9..d7c74f095805 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -581,6 +581,7 @@ static int ucma_free_ctx(struct ucma_context *ctx) list_move_tail(&uevent->list, &list); } list_del(&ctx->list); + events_reported = ctx->events_reported; mutex_unlock(&ctx->file->mut); list_for_each_entry_safe(uevent, tmp, &list, list) { @@ -590,7 +591,6 @@ static int ucma_free_ctx(struct ucma_context *ctx) kfree(uevent); } - events_reported = ctx->events_reported; mutex_destroy(&ctx->mutex); kfree(ctx); return events_reported; @@ -1639,7 +1639,9 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, cur_file = ctx->file; if (cur_file == new_file) { + mutex_lock(&cur_file->mut); resp.events_reported = ctx->events_reported; + mutex_unlock(&cur_file->mut); goto response; } -- cgit v1.2.3 From 1564884a41760e492dd702528ea644a7c8b6e407 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 18 Aug 2020 15:05:22 +0300 Subject: RDMA/ucma: Add missing locking around rdma_leave_multicast() [ Upstream commit 38e03d092699891c3237b5aee9e8029d4ede0956 ] All entry points to the rdma_cm from a ULP must be single threaded, even this error unwinds. Add the missing locking. Fixes: 7c11910783a1 ("RDMA/ucma: Put a lock around every call to the rdma_cm layer") Link: https://lore.kernel.org/r/20200818120526.702120-11-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/ucma.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index d7c74f095805..ef4be14af3bb 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1473,7 +1473,9 @@ static ssize_t ucma_process_join(struct ucma_file *file, return 0; err3: + mutex_lock(&ctx->mutex); rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); + mutex_unlock(&ctx->mutex); ucma_cleanup_mc_events(mc); err2: xa_erase(&multicast_table, mc->id); -- cgit v1.2.3 From a43f936da88fa696fa6dd813ef181c6d5c8d9902 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Tue, 25 Aug 2020 19:07:54 +0800 Subject: RDMA/hns: Add a check for current state before modifying QP [ Upstream commit e0ef0f68c4c0d85b1eb63f38d5d10324361280e8 ] It should be considered an illegal operation if the ULP attempts to modify a QP from another state to the current hardware state. Otherwise, the ULP can modify some fields of QPC at any time. For example, for a QP in state of RTS, modify it from RTR to RTS can change the PSN, which is always not as expected. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Link: https://lore.kernel.org/r/1598353674-24270-1-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_qp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 8dd2d666f687..730e50c87a76 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1181,8 +1181,10 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, mutex_lock(&hr_qp->mutex); - cur_state = attr_mask & IB_QP_CUR_STATE ? - attr->cur_qp_state : (enum ib_qp_state)hr_qp->state; + if (attr_mask & IB_QP_CUR_STATE && attr->cur_qp_state != hr_qp->state) + goto out; + + cur_state = hr_qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; if (ibqp->uobject && -- cgit v1.2.3 From b8d4f65c6ae2fa9fd5fced741d143b030a492cc7 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 30 Aug 2020 11:40:04 +0300 Subject: RDMA/mlx5: Fix potential race between destroy and CQE poll [ Upstream commit 4b916ed9f9e85f705213ca8d69771d3c1cd6ee5a ] The SRQ can be destroyed right before mlx5_cmd_get_srq is called. In such case the latter will return NULL instead of expected SRQ. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Link: https://lore.kernel.org/r/20200830084010.102381-5-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/cq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index ff664355de55..73d5b8dc74d8 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -167,7 +167,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, { enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1); struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); - struct mlx5_ib_srq *srq; + struct mlx5_ib_srq *srq = NULL; struct mlx5_ib_wq *wq; u16 wqe_ctr; u8 roce_packet_type; @@ -179,7 +179,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, if (qp->ibqp.xrcd) { msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn)); - srq = to_mibsrq(msrq); + if (msrq) + srq = to_mibsrq(msrq); } else { srq = to_msrq(qp->ibqp.srq); } -- cgit v1.2.3 From 85e40ba1c4a5246b35e91c2ed69d4680426904d7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 4 Sep 2020 19:41:42 -0300 Subject: RDMA/umem: Fix ib_umem_find_best_pgsz() for mappings that cross a page boundary [ Upstream commit a40c20dabdf9045270767c75918feb67f0727c89 ] It is possible for a single SGL to span an aligned boundary, eg if the SGL is 61440 -> 90112 Then the length is 28672, which currently limits the block size to 32k. With a 32k page size the two covering blocks will be: 32768->65536 and 65536->98304 However, the correct answer is a 128K block size which will span the whole 28672 bytes in a single block. Instead of limiting based on length figure out which high IOVA bits don't change between the start and end addresses. That is the highest useful page size. Fixes: 4a35339958f1 ("RDMA/umem: Add API to find best driver supported page size in an MR") Link: https://lore.kernel.org/r/1-v2-270386b7e60b+28f4-umem_1_jgg@nvidia.com Reviewed-by: Leon Romanovsky Reviewed-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/umem.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 0d42ba8c0b69..9be8f6c622db 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -156,8 +156,13 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, return 0; va = virt; - /* max page size not to exceed MR length */ - mask = roundup_pow_of_two(umem->length); + /* The best result is the smallest page size that results in the minimum + * number of required pages. Compute the largest page size that could + * work based on VA address bits that don't change. + */ + mask = pgsz_bitmap & + GENMASK(BITS_PER_LONG - 1, + bits_per((umem->length - 1 + virt) ^ virt)); /* offset into first SGL */ pgoff = umem->address & ~PAGE_MASK; -- cgit v1.2.3 From 1738b03e34ad4bdeead724e252577dd472eb154a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 4 Sep 2020 19:41:43 -0300 Subject: RDMA/umem: Prevent small pages from being returned by ib_umem_find_best_pgsz() [ Upstream commit 10c75ccb54e4fe548cb16d7ed426d7d709e6ae76 ] rdma_for_each_block() makes assumptions about how the SGL is constructed that don't work if the block size is below the page size used to to build the SGL. The rules for umem SGL construction require that the SG's all be PAGE_SIZE aligned and we don't encode the actual byte offset of the VA range inside the SGL using offset and length. So rdma_for_each_block() has no idea where the actual starting/ending point is to compute the first/last block boundary if the starting address should be within a SGL. Fixing the SGL construction turns out to be really hard, and will be the subject of other patches. For now block smaller pages. Fixes: 4a35339958f1 ("RDMA/umem: Add API to find best driver supported page size in an MR") Link: https://lore.kernel.org/r/2-v2-270386b7e60b+28f4-umem_1_jgg@nvidia.com Reviewed-by: Leon Romanovsky Reviewed-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/umem.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 9be8f6c622db..650f71dd4ab9 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -151,6 +151,12 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, dma_addr_t mask; int i; + /* rdma_for_each_block() has a bug if the page size is smaller than the + * page size used to build the umem. For now prevent smaller page sizes + * from being returned. + */ + pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT); + /* At minimum, drivers must support PAGE_SIZE or smaller */ if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0)))) return 0; -- cgit v1.2.3 From e0a970d8f627d2606f1ec4214a5ca7eb611e70c8 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 2 Sep 2020 19:57:34 +0300 Subject: RDMA/qedr: Fix qp structure memory leak [ Upstream commit 098e345a1a8faaad6e4e54d138773466cecc45d4 ] The qedr_qp structure wasn't freed when the protocol was RoCE. kmemleak output when running basic RoCE scenario. unreferenced object 0xffff927ad7e22c00 (size 1024): comm "ib_send_bw", pid 7082, jiffies 4384133693 (age 274.698s) hex dump (first 32 bytes): 00 b0 cd a2 79 92 ff ff 00 3f a1 a2 79 92 ff ff ....y....?..y... 00 ee 5c dd 80 92 ff ff 00 f6 5c dd 80 92 ff ff ..\.......\..... backtrace: [<00000000b2ba0f35>] qedr_create_qp+0xb3/0x6c0 [qedr] [<00000000e85a43dd>] ib_uverbs_handler_UVERBS_METHOD_QP_CREATE+0x555/0xad0 [ib_uverbs] [<00000000fee4d029>] ib_uverbs_cmd_verbs+0xa5a/0xb80 [ib_uverbs] [<000000005d622660>] ib_uverbs_ioctl+0xa4/0x110 [ib_uverbs] [<00000000eb4cdc71>] ksys_ioctl+0x87/0xc0 [<00000000abe6b23a>] __x64_sys_ioctl+0x16/0x20 [<0000000046e7cef4>] do_syscall_64+0x4d/0x90 [<00000000c6948f76>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 1212767e23bb ("qedr: Add wrapping generic structure for qpidr and adjust idr routines.") Link: https://lore.kernel.org/r/20200902165741.8355-2-michal.kalderon@marvell.com Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/verbs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 16a994fd7d0a..682329789d00 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2518,6 +2518,8 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) if (rdma_protocol_iwarp(&dev->ibdev, 1)) qedr_iw_qp_rem_ref(&qp->ibqp); + else + kfree(qp); return 0; } -- cgit v1.2.3 From 4c5f385ab49e7fbdd3765e7fd906e1d8d9ba884b Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 2 Sep 2020 19:57:36 +0300 Subject: RDMA/qedr: Fix use of uninitialized field [ Upstream commit a379ad54e55a12618cae7f6333fd1b3071de9606 ] dev->attr.page_size_caps was used uninitialized when setting device attributes Fixes: ec72fce401c6 ("qedr: Add support for RoCE HW init") Link: https://lore.kernel.org/r/20200902165741.8355-4-michal.kalderon@marvell.com Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 4494dab8c3d8..93040c994e2e 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -601,7 +601,7 @@ static int qedr_set_device_attr(struct qedr_dev *dev) qed_attr = dev->ops->rdma_query_device(dev->rdma_ctx); /* Part 2 - check capabilities */ - page_size = ~dev->attr.page_size_caps + 1; + page_size = ~qed_attr->page_size_caps + 1; if (page_size > PAGE_SIZE) { DP_ERR(dev, "Kernel PAGE_SIZE is %ld which is smaller than minimum page size (%d) required by qedr\n", -- cgit v1.2.3 From 97336c8296b50d02794fdc6153f3a19cbac36fe8 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 2 Sep 2020 19:57:37 +0300 Subject: RDMA/qedr: Fix return code if accept is called on a destroyed qp [ Upstream commit 8a5a10a1a74465065c75d9de1aa6685e1f1aa117 ] In iWARP, accept could be called after a QP is already destroyed. In this case an error should be returned and not success. Fixes: 82af6d19d8d9 ("RDMA/qedr: Fix synchronization methods and memory leaks in qedr") Link: https://lore.kernel.org/r/20200902165741.8355-5-michal.kalderon@marvell.com Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 6dea49e11f5f..e521f3c3dbbf 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -736,7 +736,7 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct qedr_dev *dev = ep->dev; struct qedr_qp *qp; struct qed_iwarp_accept_in params; - int rc = 0; + int rc; DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn); @@ -759,8 +759,10 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) params.ord = conn_param->ord; if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, - &qp->iwarp_cm_flags)) + &qp->iwarp_cm_flags)) { + rc = -EINVAL; goto err; /* QP already destroyed */ + } rc = dev->ops->iwarp_accept(dev->rdma_ctx, ¶ms); if (rc) { -- cgit v1.2.3 From 8fd3154eb0ee3485827ff27bccf7b54a0e91c621 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 2 Sep 2020 19:57:40 +0300 Subject: RDMA/qedr: Fix inline size returned for iWARP [ Upstream commit fbf58026b2256e9cd5f241a4801d79d3b2b7b89d ] commit 59e8970b3798 ("RDMA/qedr: Return max inline data in QP query result") changed query_qp max_inline size to return the max roce inline size. When iwarp was introduced, this should have been modified to return the max inline size based on protocol. This size is cached in the device attributes Fixes: 69ad0e7fe845 ("RDMA/qedr: Add support for iWARP in user space") Link: https://lore.kernel.org/r/20200902165741.8355-8-michal.kalderon@marvell.com Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 682329789d00..4408d3364664 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2405,7 +2405,7 @@ int qedr_query_qp(struct ib_qp *ibqp, qp_attr->cap.max_recv_wr = qp->rq.max_wr; qp_attr->cap.max_send_sge = qp->sq.max_sges; qp_attr->cap.max_recv_sge = qp->rq.max_sges; - qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE; + qp_attr->cap.max_inline_data = dev->attr.max_inline; qp_init_attr->cap = qp_attr->cap; qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; -- cgit v1.2.3 From 7c4fec28980dc3b64fa735af175179612e79d36a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 2 Sep 2020 11:11:20 +0300 Subject: RDMA/cma: Remove dead code for kernel rdmacm multicast [ Upstream commit 1bb5091def706732c749df9aae45fbca003696f2 ] There is no kernel user of RDMA CM multicast so this code managing the multicast subscription of the kernel-only internal QP is dead. Remove it. This makes the bug fixes in the next patches much simpler. Link: https://lore.kernel.org/r/20200902081122.745412-7-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/cma.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e3cd9d2b0dd2..10525c91cac6 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4182,16 +4182,6 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) else pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n", status); - mutex_lock(&id_priv->qp_mutex); - if (!status && id_priv->id.qp) { - status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, - be16_to_cpu(multicast->rec.mlid)); - if (status) - pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n", - status); - } - mutex_unlock(&id_priv->qp_mutex); - event.status = status; event.param.ud.private_data = mc->context; if (!status) { @@ -4446,6 +4436,10 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, struct cma_multicast *mc; int ret; + /* Not supported for kernel QPs */ + if (WARN_ON(id->qp)) + return -EINVAL; + if (!id->device) return -EINVAL; @@ -4500,11 +4494,6 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) list_del(&mc->list); spin_unlock_irq(&id_priv->lock); - if (id->qp) - ib_detach_mcast(id->qp, - &mc->multicast.ib->rec.mgid, - be16_to_cpu(mc->multicast.ib->rec.mlid)); - BUG_ON(id_priv->cma_dev->device != id->device); if (rdma_cap_ib_mcast(id->device, id->port_num)) { -- cgit v1.2.3 From cee5080a0776649f5479bed736a046dd647cb005 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 2 Sep 2020 11:11:21 +0300 Subject: RDMA/cma: Consolidate the destruction of a cma_multicast in one place [ Upstream commit 3788d2997bc0150ea911a964d5b5a2e11808a936 ] Two places were open coding this sequence, and also pull in cma_leave_roce_mc_group() which was called only once. Link: https://lore.kernel.org/r/20200902081122.745412-8-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/cma.c | 63 +++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 32 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 10525c91cac6..98d2d74b96f7 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1803,19 +1803,30 @@ static void cma_release_port(struct rdma_id_private *id_priv) mutex_unlock(&lock); } -static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv, - struct cma_multicast *mc) +static void destroy_mc(struct rdma_id_private *id_priv, + struct cma_multicast *mc) { - struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; - struct net_device *ndev = NULL; + if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num)) { + ib_sa_free_multicast(mc->multicast.ib); + kfree(mc); + return; + } - if (dev_addr->bound_dev_if) - ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); - if (ndev) { - cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false); - dev_put(ndev); + if (rdma_protocol_roce(id_priv->id.device, + id_priv->id.port_num)) { + struct rdma_dev_addr *dev_addr = + &id_priv->id.route.addr.dev_addr; + struct net_device *ndev = NULL; + + if (dev_addr->bound_dev_if) + ndev = dev_get_by_index(dev_addr->net, + dev_addr->bound_dev_if); + if (ndev) { + cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false); + dev_put(ndev); + } + kref_put(&mc->mcref, release_mc); } - kref_put(&mc->mcref, release_mc); } static void cma_leave_mc_groups(struct rdma_id_private *id_priv) @@ -1823,16 +1834,10 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) struct cma_multicast *mc; while (!list_empty(&id_priv->mc_list)) { - mc = container_of(id_priv->mc_list.next, - struct cma_multicast, list); + mc = list_first_entry(&id_priv->mc_list, struct cma_multicast, + list); list_del(&mc->list); - if (rdma_cap_ib_mcast(id_priv->cma_dev->device, - id_priv->id.port_num)) { - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); - } else { - cma_leave_roce_mc_group(id_priv, mc); - } + destroy_mc(id_priv, mc); } } @@ -4490,20 +4495,14 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irq(&id_priv->lock); list_for_each_entry(mc, &id_priv->mc_list, list) { - if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { - list_del(&mc->list); - spin_unlock_irq(&id_priv->lock); - - BUG_ON(id_priv->cma_dev->device != id->device); + if (memcmp(&mc->addr, addr, rdma_addr_size(addr)) != 0) + continue; + list_del(&mc->list); + spin_unlock_irq(&id_priv->lock); - if (rdma_cap_ib_mcast(id->device, id->port_num)) { - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); - } else if (rdma_protocol_roce(id->device, id->port_num)) { - cma_leave_roce_mc_group(id_priv, mc); - } - return; - } + WARN_ON(id_priv->cma_dev->device != id->device); + destroy_mc(id_priv, mc); + return; } spin_unlock_irq(&id_priv->lock); } -- cgit v1.2.3 From 4b1d559cc5c6fcb551ad7c72e279119b340206b4 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Thu, 17 Sep 2020 21:50:15 +0800 Subject: RDMA/hns: Set the unsupported wr opcode [ Upstream commit 22d3e1ed2cc837af87f76c3c8a4ccf4455e225c5 ] hip06 does not support IB_WR_LOCAL_INV, so the ps_opcode should be set to an invalid value instead of being left uninitialized. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Fixes: a2f3d4479fe9 ("RDMA/hns: Avoid unncessary initialization") Link: https://lore.kernel.org/r/1600350615-115217-1-git-send-email-oulijun@huawei.com Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index a79fa67df871..a405c64d2a82 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -271,7 +271,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, ps_opcode = HNS_ROCE_WQE_OPCODE_SEND; break; case IB_WR_LOCAL_INV: - break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_LSO: -- cgit v1.2.3 From 61ad14e24ebac485be56426f03857a2a438e92ff Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 14 Sep 2020 14:26:52 +0300 Subject: RDMA/mlx5: Disable IB_DEVICE_MEM_MGT_EXTENSIONS if IB_WR_REG_MR can't work [ Upstream commit 0ec52f0194638e2d284ad55eba5a7aff753de1b9 ] set_reg_wr() always fails if !umr_modify_entity_size_disabled because mlx5_ib_can_use_umr() always fails. Without set_reg_wr() IB_WR_REG_MR doesn't work and that means the device should not advertise IB_DEVICE_MEM_MGT_EXTENSIONS. Fixes: 841b07f99a47 ("IB/mlx5: Block MR WR if UMR is not possible") Link: https://lore.kernel.org/r/20200914112653.345244-5-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b781ad74e6de..40c1a05c2445 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -888,7 +888,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, /* We support 'Gappy' memory registration too */ props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG; } - props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + /* IB_WR_REG_MR always requires changing the entity size with UMR */ + if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (MLX5_CAP_GEN(mdev, sho)) { props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER; /* At this stage no support for signature handover */ -- cgit v1.2.3 From b79dd191680ff4a3f79a0cca5fd58009020a6b3d Mon Sep 17 00:00:00 2001 From: "Sindhu, Devale" Date: Wed, 16 Sep 2020 08:18:12 -0500 Subject: i40iw: Add support to make destroy QP synchronous [ Upstream commit f2334964e969762e266a616acf9377f6046470a2 ] Occasionally ib_write_bw crash is seen due to access of a pd object in i40iw_sc_qp_destroy after it is freed. Destroy qp is not synchronous in i40iw and thus the iwqp object could be referencing a pd object that is freed by ib core as a result of successful return from i40iw_destroy_qp. Wait in i40iw_destroy_qp till all QP references are released and destroy the QP and its associated resources before returning. Switch to use the refcount API vs atomic API for lifetime management of the qp. RIP: 0010:i40iw_sc_qp_destroy+0x4b/0x120 [i40iw] [...] RSP: 0018:ffffb4a7042e3ba8 EFLAGS: 00010002 RAX: 0000000000000000 RBX: 0000000000000001 RCX: dead000000000122 RDX: ffffb4a7042e3bac RSI: ffff8b7ef9b1e940 RDI: ffff8b7efbf09080 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 R10: 8080808080808080 R11: 0000000000000010 R12: ffff8b7efbf08050 R13: 0000000000000001 R14: ffff8b7f15042928 R15: ffff8b7ef9b1e940 FS: 0000000000000000(0000) GS:ffff8b7f2fa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000400 CR3: 000000020d60a006 CR4: 00000000001606e0 Call Trace: i40iw_exec_cqp_cmd+0x4d3/0x5c0 [i40iw] ? try_to_wake_up+0x1ea/0x5d0 ? __switch_to_asm+0x40/0x70 i40iw_process_cqp_cmd+0x95/0xa0 [i40iw] i40iw_handle_cqp_op+0x42/0x1a0 [i40iw] ? cm_event_handler+0x13c/0x1f0 [iw_cm] i40iw_rem_ref+0xa0/0xf0 [i40iw] cm_work_handler+0x99c/0xd10 [iw_cm] process_one_work+0x1a1/0x360 worker_thread+0x30/0x380 ? process_one_work+0x360/0x360 kthread+0x10c/0x130 ? kthread_park+0x80/0x80 ret_from_fork+0x35/0x40 Fixes: d37498417947 ("i40iw: add files for iwarp interface") Link: https://lore.kernel.org/r/20200916131811.2077-1-shiraz.saleem@intel.com Reported-by: Kamal Heib Signed-off-by: Sindhu, Devale Signed-off-by: Shiraz, Saleem Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/i40iw/i40iw.h | 9 +++-- drivers/infiniband/hw/i40iw/i40iw_cm.c | 10 +++--- drivers/infiniband/hw/i40iw/i40iw_hw.c | 4 +-- drivers/infiniband/hw/i40iw/i40iw_utils.c | 59 ++++++------------------------- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 31 +++++++++++----- drivers/infiniband/hw/i40iw/i40iw_verbs.h | 3 +- 6 files changed, 45 insertions(+), 71 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 8feec35f95a7..6d6719fa7e46 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -398,8 +398,8 @@ static inline struct i40iw_qp *to_iwqp(struct ib_qp *ibqp) } /* i40iw.c */ -void i40iw_add_ref(struct ib_qp *); -void i40iw_rem_ref(struct ib_qp *); +void i40iw_qp_add_ref(struct ib_qp *ibqp); +void i40iw_qp_rem_ref(struct ib_qp *ibqp); struct ib_qp *i40iw_get_qp(struct ib_device *, int); void i40iw_flush_wqes(struct i40iw_device *iwdev, @@ -543,9 +543,8 @@ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev, bool wait); void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf); void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp); -void i40iw_free_qp_resources(struct i40iw_device *iwdev, - struct i40iw_qp *iwqp, - u32 qp_num); +void i40iw_free_qp_resources(struct i40iw_qp *iwqp); + enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev, struct i40iw_dma_mem *memptr, u32 size, u32 mask); diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index fa7a5ff498c7..56c1e9abc52d 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2322,7 +2322,7 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node) iwqp = cm_node->iwqp; if (iwqp) { iwqp->cm_node = NULL; - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); cm_node->iwqp = NULL; } else if (cm_node->qhash_set) { i40iw_get_addr_info(cm_node, &nfo); @@ -3452,7 +3452,7 @@ void i40iw_cm_disconn(struct i40iw_qp *iwqp) kfree(work); return; } - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); spin_unlock_irqrestore(&iwdev->qptable_lock, flags); work->iwqp = iwqp; @@ -3623,7 +3623,7 @@ static void i40iw_disconnect_worker(struct work_struct *work) kfree(dwork); i40iw_cm_disconn_true(iwqp); - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); } /** @@ -3745,7 +3745,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->lsmm_size = accept.size + conn_param->private_data_len; i40iw_cm_init_tsa_conn(iwqp, cm_node); cm_id->add_ref(cm_id); - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); attr.qp_state = IB_QPS_RTS; cm_node->qhash_set = false; @@ -3908,7 +3908,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) iwqp->cm_node = cm_node; cm_node->iwqp = iwqp; iwqp->cm_id = cm_id; - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); if (cm_node->state != I40IW_CM_STATE_OFFLOADED) { cm_node->state = I40IW_CM_STATE_SYN_SENT; diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index ae8b97c30665..a7512508f7e6 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -313,7 +313,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) __func__, info->qp_cq_id); continue; } - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); spin_unlock_irqrestore(&iwdev->qptable_lock, flags); qp = &iwqp->sc_qp; spin_lock_irqsave(&iwqp->lock, flags); @@ -427,7 +427,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) break; } if (info->qp) - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); } while (1); if (aeqcnt) diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 016524683e17..72db7c1dc299 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -479,25 +479,6 @@ void i40iw_cleanup_pending_cqp_op(struct i40iw_device *iwdev) } } -/** - * i40iw_free_qp - callback after destroy cqp completes - * @cqp_request: cqp request for destroy qp - * @num: not used - */ -static void i40iw_free_qp(struct i40iw_cqp_request *cqp_request, u32 num) -{ - struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)cqp_request->param; - struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp; - struct i40iw_device *iwdev; - u32 qp_num = iwqp->ibqp.qp_num; - - iwdev = iwqp->iwdev; - - i40iw_rem_pdusecount(iwqp->iwpd, iwdev); - i40iw_free_qp_resources(iwdev, iwqp, qp_num); - i40iw_rem_devusecount(iwdev); -} - /** * i40iw_wait_event - wait for completion * @iwdev: iwarp device @@ -618,26 +599,23 @@ void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev) } /** - * i40iw_add_ref - add refcount for qp + * i40iw_qp_add_ref - add refcount for qp * @ibqp: iqarp qp */ -void i40iw_add_ref(struct ib_qp *ibqp) +void i40iw_qp_add_ref(struct ib_qp *ibqp) { struct i40iw_qp *iwqp = (struct i40iw_qp *)ibqp; - atomic_inc(&iwqp->refcount); + refcount_inc(&iwqp->refcount); } /** - * i40iw_rem_ref - rem refcount for qp and free if 0 + * i40iw_qp_rem_ref - rem refcount for qp and free if 0 * @ibqp: iqarp qp */ -void i40iw_rem_ref(struct ib_qp *ibqp) +void i40iw_qp_rem_ref(struct ib_qp *ibqp) { struct i40iw_qp *iwqp; - enum i40iw_status_code status; - struct i40iw_cqp_request *cqp_request; - struct cqp_commands_info *cqp_info; struct i40iw_device *iwdev; u32 qp_num; unsigned long flags; @@ -645,7 +623,7 @@ void i40iw_rem_ref(struct ib_qp *ibqp) iwqp = to_iwqp(ibqp); iwdev = iwqp->iwdev; spin_lock_irqsave(&iwdev->qptable_lock, flags); - if (!atomic_dec_and_test(&iwqp->refcount)) { + if (!refcount_dec_and_test(&iwqp->refcount)) { spin_unlock_irqrestore(&iwdev->qptable_lock, flags); return; } @@ -653,25 +631,8 @@ void i40iw_rem_ref(struct ib_qp *ibqp) qp_num = iwqp->ibqp.qp_num; iwdev->qp_table[qp_num] = NULL; spin_unlock_irqrestore(&iwdev->qptable_lock, flags); - cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false); - if (!cqp_request) - return; - - cqp_request->callback_fcn = i40iw_free_qp; - cqp_request->param = (void *)&iwqp->sc_qp; - cqp_info = &cqp_request->info; - cqp_info->cqp_cmd = OP_QP_DESTROY; - cqp_info->post_sq = 1; - cqp_info->in.u.qp_destroy.qp = &iwqp->sc_qp; - cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request; - cqp_info->in.u.qp_destroy.remove_hash_idx = true; - status = i40iw_handle_cqp_op(iwdev, cqp_request); - if (!status) - return; + complete(&iwqp->free_qp); - i40iw_rem_pdusecount(iwqp->iwpd, iwdev); - i40iw_free_qp_resources(iwdev, iwqp, qp_num); - i40iw_rem_devusecount(iwdev); } /** @@ -938,7 +899,7 @@ static void i40iw_terminate_timeout(struct timer_list *t) struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp; i40iw_terminate_done(qp, 1); - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); } /** @@ -950,7 +911,7 @@ void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp) struct i40iw_qp *iwqp; iwqp = (struct i40iw_qp *)qp->back_qp; - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); timer_setup(&iwqp->terminate_timer, i40iw_terminate_timeout, 0); iwqp->terminate_timer.expires = jiffies + HZ; add_timer(&iwqp->terminate_timer); @@ -966,7 +927,7 @@ void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp) iwqp = (struct i40iw_qp *)qp->back_qp; if (del_timer(&iwqp->terminate_timer)) - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); } /** diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index cd9ee1664a69..22bf4f09c064 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -366,11 +366,11 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va, * @iwqp: qp ptr (user or kernel) * @qp_num: qp number assigned */ -void i40iw_free_qp_resources(struct i40iw_device *iwdev, - struct i40iw_qp *iwqp, - u32 qp_num) +void i40iw_free_qp_resources(struct i40iw_qp *iwqp) { struct i40iw_pbl *iwpbl = &iwqp->iwpbl; + struct i40iw_device *iwdev = iwqp->iwdev; + u32 qp_num = iwqp->ibqp.qp_num; i40iw_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp); i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp); @@ -404,6 +404,10 @@ static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq) static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct i40iw_qp *iwqp = to_iwqp(ibqp); + struct ib_qp_attr attr; + struct i40iw_device *iwdev = iwqp->iwdev; + + memset(&attr, 0, sizeof(attr)); iwqp->destroyed = 1; @@ -418,7 +422,15 @@ static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) } } - i40iw_rem_ref(&iwqp->ibqp); + attr.qp_state = IB_QPS_ERR; + i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); + i40iw_qp_rem_ref(&iwqp->ibqp); + wait_for_completion(&iwqp->free_qp); + i40iw_cqp_qp_destroy_cmd(&iwdev->sc_dev, &iwqp->sc_qp); + i40iw_rem_pdusecount(iwqp->iwpd, iwdev); + i40iw_free_qp_resources(iwqp); + i40iw_rem_devusecount(iwdev); + return 0; } @@ -579,6 +591,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, qp->back_qp = (void *)iwqp; qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; + iwqp->iwdev = iwdev; iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info; if (i40iw_allocate_dma_mem(dev->hw, @@ -603,7 +616,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } - iwqp->iwdev = iwdev; iwqp->iwpd = iwpd; iwqp->ibqp.qp_num = qp_num; qp = &iwqp->sc_qp; @@ -717,7 +729,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } - i40iw_add_ref(&iwqp->ibqp); + refcount_set(&iwqp->refcount, 1); spin_lock_init(&iwqp->lock); iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; iwdev->qp_table[qp_num] = iwqp; @@ -739,10 +751,11 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, } init_completion(&iwqp->sq_drained); init_completion(&iwqp->rq_drained); + init_completion(&iwqp->free_qp); return &iwqp->ibqp; error: - i40iw_free_qp_resources(iwdev, iwqp, qp_num); + i40iw_free_qp_resources(iwqp); return ERR_PTR(err_code); } @@ -2654,13 +2667,13 @@ static const struct ib_device_ops i40iw_dev_ops = { .get_hw_stats = i40iw_get_hw_stats, .get_port_immutable = i40iw_port_immutable, .iw_accept = i40iw_accept, - .iw_add_ref = i40iw_add_ref, + .iw_add_ref = i40iw_qp_add_ref, .iw_connect = i40iw_connect, .iw_create_listen = i40iw_create_listen, .iw_destroy_listen = i40iw_destroy_listen, .iw_get_qp = i40iw_get_qp, .iw_reject = i40iw_reject, - .iw_rem_ref = i40iw_rem_ref, + .iw_rem_ref = i40iw_qp_rem_ref, .map_mr_sg = i40iw_map_mr_sg, .mmap = i40iw_mmap, .modify_qp = i40iw_modify_qp, diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h index 3a413752ccc3..ad7d81041bc9 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h @@ -140,7 +140,7 @@ struct i40iw_qp { struct i40iw_qp_host_ctx_info ctx_info; struct i40iwarp_offload_info iwarp_info; void *allocated_buffer; - atomic_t refcount; + refcount_t refcount; struct iw_cm_id *cm_id; void *cm_node; struct ib_mr *lsmm_mr; @@ -175,5 +175,6 @@ struct i40iw_qp { struct i40iw_dma_mem ietf_mem; struct completion sq_drained; struct completion rq_drained; + struct completion free_qp; }; #endif -- cgit v1.2.3 From eff57fbc2377821fefd17db953978c604187c7a2 Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Sat, 19 Sep 2020 18:03:20 +0800 Subject: RDMA/hns: Fix the wrong value of rnr_retry when querying qp [ Upstream commit 99fcf82521d91468ee6115a3c253aa032dc63cbc ] The rnr_retry returned to the user is not correct, it should be got from another fields in QPC. Fixes: bfe860351e31 ("RDMA/hns: Fix cast from or to restricted __le32 for driver") Link: https://lore.kernel.org/r/1600509802-44382-7-git-send-email-liweihang@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0502c90c83ed..def266626223 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4616,7 +4616,9 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->retry_cnt = roce_get_field(context.byte_212_lsn, V2_QPC_BYTE_212_RETRY_CNT_M, V2_QPC_BYTE_212_RETRY_CNT_S); - qp_attr->rnr_retry = le32_to_cpu(context.rq_rnr_timer); + qp_attr->rnr_retry = roce_get_field(context.byte_244_rnr_rxack, + V2_QPC_BYTE_244_RNR_CNT_M, + V2_QPC_BYTE_244_RNR_CNT_S); done: qp_attr->cur_qp_state = qp_attr->qp_state; -- cgit v1.2.3 From 5e3782b1fae1ea154a1599206b5b276780d3302f Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Sat, 19 Sep 2020 18:03:22 +0800 Subject: RDMA/hns: Fix missing sq_sig_type when querying QP [ Upstream commit 05df49279f8926178ecb3ce88e61b63104cd6293 ] The sq_sig_type field should be filled when querying QP, or the users may get a wrong value. Fixes: 926a01dc000d ("RDMA/hns: Add QP operations support for hip08 SoC") Link: https://lore.kernel.org/r/1600509802-44382-9-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index def266626223..bb7532819395 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4634,6 +4634,7 @@ done: } qp_init_attr->cap = qp_attr->cap; + qp_init_attr->sq_sig_type = hr_qp->sq_signal_bits; out: mutex_unlock(&hr_qp->mutex); -- cgit v1.2.3 From dc1d4c658b9c123e31054fffcbc0b23566694b1a Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Sun, 4 Oct 2020 16:29:48 +0300 Subject: RDMA/ipoib: Set rtnl_link_ops for ipoib interfaces [ Upstream commit 5ce2dced8e95e76ff7439863a118a053a7fc6f91 ] Report the "ipoib pkey", "mode" and "umcast" netlink attributes for every IPoiB interface type, not just children created with 'ip link add'. After setting the rtnl_link_ops for the parent interface, implement the dellink() callback to block users from trying to remove it. Fixes: 862096a8bbf8 ("IB/ipoib: Add more rtnl_link_ops callbacks") Link: https://lore.kernel.org/r/20201004132948.26669-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 ++ drivers/infiniband/ulp/ipoib/ipoib_netlink.c | 11 +++++++++++ drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 2 ++ 3 files changed, 15 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 044bcacad6e4..69ecf37053a8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2463,6 +2463,8 @@ static struct net_device *ipoib_add_port(const char *format, /* call event handler to ensure pkey in sync */ queue_work(ipoib_workqueue, &priv->flush_heavy); + ndev->rtnl_link_ops = ipoib_get_link_ops(); + result = register_netdev(ndev); if (result) { pr_warn("%s: couldn't register ipoib port %d; error %d\n", diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c index 38c984d16996..d5a90a66b45c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -144,6 +144,16 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, return 0; } +static void ipoib_del_child_link(struct net_device *dev, struct list_head *head) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + if (!priv->parent) + return; + + unregister_netdevice_queue(dev, head); +} + static size_t ipoib_get_size(const struct net_device *dev) { return nla_total_size(2) + /* IFLA_IPOIB_PKEY */ @@ -158,6 +168,7 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = { .priv_size = sizeof(struct ipoib_dev_priv), .setup = ipoib_setup_common, .newlink = ipoib_new_child_link, + .dellink = ipoib_del_child_link, .changelink = ipoib_changelink, .get_size = ipoib_get_size, .fill_info = ipoib_fill_info, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 8ac8e18fbe0c..58ca5e9c6079 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -192,6 +192,8 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) } priv = ipoib_priv(ndev); + ndev->rtnl_link_ops = ipoib_get_link_ops(); + result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD); if (result && ndev->reg_state == NETREG_UNINITIALIZED) -- cgit v1.2.3 From 7efb373881f7c1a699f856cce7de51b6e859c92b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 8 Oct 2020 10:52:04 +0100 Subject: IB/rdmavt: Fix sizeof mismatch [ Upstream commit 8e71f694e0c819db39af2336f16eb9689f1ae53f ] An incorrect sizeof is being used, struct rvt_ibport ** is not correct, it should be struct rvt_ibport *. Note that since ** is the same size as * this is not causing any issues. Improve this fix by using sizeof(*rdi->ports) as this allows us to not even reference the type of the pointer. Also remove line breaks as the entire statement can fit on one line. Link: https://lore.kernel.org/r/20201008095204.82683-1-colin.king@canonical.com Addresses-Coverity: ("Sizeof not portable (SIZEOF_MISMATCH)") Fixes: ff6acd69518e ("IB/rdmavt: Add device structure allocation") Signed-off-by: Colin Ian King Reviewed-by: Ira Weiny Acked-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rdmavt/vt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 18da1e1ea979..833f3f1b87f5 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -95,9 +95,7 @@ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports) if (!rdi) return rdi; - rdi->ports = kcalloc(nports, - sizeof(struct rvt_ibport **), - GFP_KERNEL); + rdi->ports = kcalloc(nports, sizeof(*rdi->ports), GFP_KERNEL); if (!rdi->ports) ib_dealloc_device(&rdi->ibdev); -- cgit v1.2.3 From e7f0b9ab8b7d49dd28aece9ee598f1faada34380 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 8 Oct 2020 15:36:52 -0500 Subject: RDMA/rxe: Fix skb lifetime in rxe_rcv_mcast_pkt() [ Upstream commit e7ec96fc7932f48a6d6cdd05bf82004a1a04285b ] The changes referenced below replaced sbk_clone)_ by taking additional references, passing the skb along and then freeing the skb. This deleted the packets before they could be processed and additionally passed bad data in each packet. Since pkt is stored in skb->cb changing pkt->qp changed it for all the packets. Replace skb_get() by sbk_clone() in rxe_rcv_mcast_pkt() for cases where multiple QPs are receiving multicast packets on the same address. Delete kfree_skb() because the packets need to live until they have been processed by each QP. They are freed later. Fixes: 86af61764151 ("IB/rxe: remove unnecessary skb_clone") Fixes: fe896ceb5772 ("IB/rxe: replace refcount_inc with skb_get") Link: https://lore.kernel.org/r/20201008203651.256958-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_recv.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 46e111c218fd..be6416a982c7 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -281,6 +281,8 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) struct rxe_mc_elem *mce; struct rxe_qp *qp; union ib_gid dgid; + struct sk_buff *per_qp_skb; + struct rxe_pkt_info *per_qp_pkt; int err; if (skb->protocol == htons(ETH_P_IP)) @@ -309,21 +311,26 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) if (err) continue; - /* if *not* the last qp in the list - * increase the users of the skb then post to the next qp + /* for all but the last qp create a new clone of the + * skb and pass to the qp. */ if (mce->qp_list.next != &mcg->qp_list) - skb_get(skb); + per_qp_skb = skb_clone(skb, GFP_ATOMIC); + else + per_qp_skb = skb; - pkt->qp = qp; + per_qp_pkt = SKB_TO_PKT(per_qp_skb); + per_qp_pkt->qp = qp; rxe_add_ref(qp); - rxe_rcv_pkt(pkt, skb); + rxe_rcv_pkt(per_qp_pkt, per_qp_skb); } spin_unlock_bh(&mcg->mcg_lock); rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ + return; + err1: kfree_skb(skb); } -- cgit v1.2.3 From 06727f797f457d6cb409326107203c415d361912 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Tue, 13 Oct 2020 13:42:37 -0500 Subject: RDMA/rxe: Handle skb_clone() failure in rxe_recv.c [ Upstream commit 71abf20b28ff87fee6951ec2218d5ce7969c4e87 ] If skb_clone() is unable to allocate memory for a new sk_buff this is not detected by the current code. Check for a NULL return and continue. This is similar to other errors in this loop over QPs attached to the multicast address and consistent with the unreliable UD transport. Fixes: e7ec96fc7932f ("RDMA/rxe: Fix skb lifetime in rxe_rcv_mcast_pkt()") Addresses-Coverity-ID: 1497804: Null pointer dereferences (NULL_RETURNS) Link: https://lore.kernel.org/r/20201013184236.5231-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_recv.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index be6416a982c7..9bfb98056fc2 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -319,6 +319,9 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) else per_qp_skb = skb; + if (unlikely(!per_qp_skb)) + continue; + per_qp_pkt = SKB_TO_PKT(per_qp_skb); per_qp_pkt->qp = qp; rxe_add_ref(qp); -- cgit v1.2.3 From bc67eeb9781bce842a9fcf4bc9c72d17956d468a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 30 Sep 2020 10:20:07 +0300 Subject: RDMA/addr: Fix race with netevent_callback()/rdma_addr_cancel() commit 2ee9bf346fbfd1dad0933b9eb3a4c2c0979b633e upstream. This three thread race can result in the work being run once the callback becomes NULL: CPU1 CPU2 CPU3 netevent_callback() process_one_req() rdma_addr_cancel() [..] spin_lock_bh() set_timeout() spin_unlock_bh() spin_lock_bh() list_del_init(&req->list); spin_unlock_bh() req->callback = NULL spin_lock_bh() if (!list_empty(&req->list)) // Skipped! // cancel_delayed_work(&req->work); spin_unlock_bh() process_one_req() // again req->callback() // BOOM cancel_delayed_work_sync() The solution is to always cancel the work once it is completed so any in between set_timeout() does not result in it running again. Cc: stable@vger.kernel.org Fixes: 44e75052bc2a ("RDMA/rdma_cm: Make rdma_addr_cancel into a fence") Link: https://lore.kernel.org/r/20200930072007.1009692-1-leon@kernel.org Reported-by: Dan Aloni Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/addr.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 1753a9801b70..8beed4197e73 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -645,13 +645,12 @@ static void process_one_req(struct work_struct *_work) req->callback = NULL; spin_lock_bh(&lock); + /* + * Although the work will normally have been canceled by the workqueue, + * it can still be requeued as long as it is on the req_list. + */ + cancel_delayed_work(&req->work); if (!list_empty(&req->list)) { - /* - * Although the work will normally have been canceled by the - * workqueue, it can still be requeued as long as it is on the - * req_list. - */ - cancel_delayed_work(&req->work); list_del_init(&req->list); kfree(req); } -- cgit v1.2.3