From cdbecc8d24b642b67ae79a0acc2ff18d3d0e677e Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 1 Sep 2016 09:12:25 -0700 Subject: nvme_rdma: keep a ref on the ctrl during delete/flush Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'drivers/nvme/host/rdma.c') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index ab545fb347a0..15b0c1d025f5 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1351,9 +1351,15 @@ static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) ret = 1; } - /* Queue controller deletion */ + /* + * Queue controller deletion. Keep a reference until all + * work is flushed since delete_work will free the ctrl mem + */ + kref_get(&ctrl->ctrl.kref); queue_work(nvme_rdma_wq, &ctrl->delete_work); flush_work(&ctrl->delete_work); + nvme_put_ctrl(&ctrl->ctrl); + return ret; } @@ -1700,15 +1706,19 @@ static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl) static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - int ret; + int ret = 0; + /* + * Keep a reference until all work is flushed since + * __nvme_rdma_del_ctrl can free the ctrl mem + */ + if (!kref_get_unless_zero(&ctrl->ctrl.kref)) + return -EBUSY; ret = __nvme_rdma_del_ctrl(ctrl); - if (ret) - return ret; - - flush_work(&ctrl->delete_work); - - return 0; + if (!ret) + flush_work(&ctrl->delete_work); + nvme_put_ctrl(&ctrl->ctrl); + return ret; } static void nvme_rdma_remove_ctrl_work(struct work_struct *work) -- cgit v1.2.3 From f361e5a01ed35c0f9a00816d76a910d8a5cb4547 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 2 Sep 2016 09:01:27 -0700 Subject: nvme-rdma: destroy nvme queue rdma resources on connect failure After address resolution, the nvme_rdma_queue rdma resources are allocated. If rdma route resolution or the connect fails, or the controller reconnect times out and gives up, then the rdma resources need to be freed. Otherwise, rdma resources are leaked. Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/nvme/host/rdma.c') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 15b0c1d025f5..a9d43f09963f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -82,6 +82,7 @@ struct nvme_rdma_request { enum nvme_rdma_queue_flags { NVME_RDMA_Q_CONNECTED = (1 << 0), + NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1), }; struct nvme_rdma_queue { @@ -480,9 +481,14 @@ out_err: static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) { - struct nvme_rdma_device *dev = queue->device; - struct ib_device *ibdev = dev->dev; + struct nvme_rdma_device *dev; + struct ib_device *ibdev; + if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags)) + return; + + dev = queue->device; + ibdev = dev->dev; rdma_destroy_qp(queue->cm_id); ib_free_cq(queue->ib_cq); @@ -533,6 +539,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue, ret = -ENOMEM; goto out_destroy_qp; } + set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags); return 0; @@ -590,6 +597,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, return 0; out_destroy_cm_id: + nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); return ret; } @@ -652,7 +660,7 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) return 0; out_free_queues: - for (; i >= 1; i--) + for (i--; i >= 1; i--) nvme_rdma_stop_and_free_queue(&ctrl->queues[i]); return ret; -- cgit v1.2.3 From e89ca58f9c901c8c4cfb09f96d879b186bb01492 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 2 Sep 2016 09:01:54 -0700 Subject: nvme-rdma: add DELETING queue flag When we get a surprise disconnect from the target we queue a periodic reconnect (which is the sane thing to do...). We only move the queues out of CONNECTED when we retry to reconnect (after 10 seconds in the default case) but we stop the blk queues immediately so we are not bothered with traffic from now on. If delete() is kicking off in this period the queues are still in CONNECTED state. Part of the delete sequence is trying to issue ctrl shutdown if the admin queue is CONNECTED (which it is!). This request is issued but stuck in blk-mq waiting for the queues to start again. This might be the one preventing us from forward progress... The patch separates the queue flags to CONNECTED and DELETING. Now we will move out of CONNECTED as soon as error recovery kicks in (before stopping the queues) and DELETING is on when we start the queue deletion. Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/nvme/host/rdma.c') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a9d43f09963f..eeb08b658640 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -83,6 +83,7 @@ struct nvme_rdma_request { enum nvme_rdma_queue_flags { NVME_RDMA_Q_CONNECTED = (1 << 0), NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1), + NVME_RDMA_Q_DELETING = (1 << 2), }; struct nvme_rdma_queue { @@ -559,6 +560,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, queue = &ctrl->queues[idx]; queue->ctrl = ctrl; + queue->flags = 0; init_completion(&queue->cm_done); if (idx > 0) @@ -616,7 +618,7 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) static void nvme_rdma_stop_and_free_queue(struct nvme_rdma_queue *queue) { - if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) + if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) return; nvme_rdma_stop_queue(queue); nvme_rdma_free_queue(queue); @@ -769,8 +771,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, err_work); + int i; nvme_stop_keep_alive(&ctrl->ctrl); + + for (i = 0; i < ctrl->queue_count; i++) + clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags); + if (ctrl->queue_count > 1) nvme_stop_queues(&ctrl->ctrl); blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); @@ -1350,7 +1357,7 @@ static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) cancel_delayed_work_sync(&ctrl->reconnect_work); /* Disable the queue so ctrl delete won't free it */ - if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) { + if (!test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) { /* Free this queue ourselves */ nvme_rdma_stop_queue(queue); nvme_rdma_destroy_queue_ib(queue); -- cgit v1.2.3 From e87a911fed07e368c6f97e75152e6297a7dfba48 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 2 Sep 2016 09:01:54 -0700 Subject: nvme-rdma: use ib_client API to detect device removal Change nvme-rdma to use the IB Client API to detect device removal. This has the wonderful benefit of being able to blow away all the ib/rdma_cm resources for the device being removed. No craziness about not destroying the cm_id handling the event. No deadlocks due to broken iw_cm/rdma_cm/iwarp dependencies. And no need to have a bound cm_id around during controller recovery/reconnect to catch device removal events. We don't use the device_add aspect of the ib_client service since we only want to create resources for an IB device if we have a target utilizing that device. Reviewed-by: Christoph Hellwig Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 108 ++++++++++++++++++----------------------------- 1 file changed, 40 insertions(+), 68 deletions(-) (limited to 'drivers/nvme/host/rdma.c') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index eeb08b658640..d6bdf55a969e 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1320,64 +1320,6 @@ out_destroy_queue_ib: return ret; } -/** - * nvme_rdma_device_unplug() - Handle RDMA device unplug - * @queue: Queue that owns the cm_id that caught the event - * - * DEVICE_REMOVAL event notifies us that the RDMA device is about - * to unplug so we should take care of destroying our RDMA resources. - * This event will be generated for each allocated cm_id. - * - * In our case, the RDMA resources are managed per controller and not - * only per queue. So the way we handle this is we trigger an implicit - * controller deletion upon the first DEVICE_REMOVAL event we see, and - * hold the event inflight until the controller deletion is completed. - * - * One exception that we need to handle is the destruction of the cm_id - * that caught the event. Since we hold the callout until the controller - * deletion is completed, we'll deadlock if the controller deletion will - * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership - * of destroying this queue before-hand, destroy the queue resources, - * then queue the controller deletion which won't destroy this queue and - * we destroy the cm_id implicitely by returning a non-zero rc to the callout. - */ -static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) -{ - struct nvme_rdma_ctrl *ctrl = queue->ctrl; - int ret = 0; - - /* Own the controller deletion */ - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) - return 0; - - dev_warn(ctrl->ctrl.device, - "Got rdma device removal event, deleting ctrl\n"); - - /* Get rid of reconnect work if its running */ - cancel_delayed_work_sync(&ctrl->reconnect_work); - - /* Disable the queue so ctrl delete won't free it */ - if (!test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) { - /* Free this queue ourselves */ - nvme_rdma_stop_queue(queue); - nvme_rdma_destroy_queue_ib(queue); - - /* Return non-zero so the cm_id will destroy implicitly */ - ret = 1; - } - - /* - * Queue controller deletion. Keep a reference until all - * work is flushed since delete_work will free the ctrl mem - */ - kref_get(&ctrl->ctrl.kref); - queue_work(nvme_rdma_wq, &ctrl->delete_work); - flush_work(&ctrl->delete_work); - nvme_put_ctrl(&ctrl->ctrl); - - return ret; -} - static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *ev) { @@ -1419,8 +1361,8 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, nvme_rdma_error_recovery(queue->ctrl); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: - /* return 1 means impliciy CM ID destroy */ - return nvme_rdma_device_unplug(queue); + /* device removal is handled via the ib_client API */ + break; default: dev_err(queue->ctrl->ctrl.device, "Unexpected RDMA CM event (%d)\n", ev->event); @@ -2030,27 +1972,57 @@ static struct nvmf_transport_ops nvme_rdma_transport = { .create_ctrl = nvme_rdma_create_ctrl, }; +static void nvme_rdma_add_one(struct ib_device *ib_device) +{ +} + +static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) +{ + struct nvme_rdma_ctrl *ctrl; + + /* Delete all controllers using this device */ + mutex_lock(&nvme_rdma_ctrl_mutex); + list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { + if (ctrl->device->dev != ib_device) + continue; + dev_info(ctrl->ctrl.device, + "Removing ctrl: NQN \"%s\", addr %pISp\n", + ctrl->ctrl.opts->subsysnqn, &ctrl->addr); + __nvme_rdma_del_ctrl(ctrl); + } + mutex_unlock(&nvme_rdma_ctrl_mutex); + + flush_workqueue(nvme_rdma_wq); +} + +static struct ib_client nvme_rdma_ib_client = { + .name = "nvme_rdma", + .add = nvme_rdma_add_one, + .remove = nvme_rdma_remove_one +}; + static int __init nvme_rdma_init_module(void) { + int ret; + nvme_rdma_wq = create_workqueue("nvme_rdma_wq"); if (!nvme_rdma_wq) return -ENOMEM; + ret = ib_register_client(&nvme_rdma_ib_client); + if (ret) { + destroy_workqueue(nvme_rdma_wq); + return ret; + } + nvmf_register_transport(&nvme_rdma_transport); return 0; } static void __exit nvme_rdma_cleanup_module(void) { - struct nvme_rdma_ctrl *ctrl; - nvmf_unregister_transport(&nvme_rdma_transport); - - mutex_lock(&nvme_rdma_ctrl_mutex); - list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) - __nvme_rdma_del_ctrl(ctrl); - mutex_unlock(&nvme_rdma_ctrl_mutex); - + ib_unregister_client(&nvme_rdma_ib_client); destroy_workqueue(nvme_rdma_wq); } -- cgit v1.2.3 From 1bda18de8f15a611a61d1a935b679db2e153338a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 5 Sep 2016 16:24:38 +0100 Subject: nvme-rdma: fix null pointer dereference on req->mr If there is an error on req->mr, req->mr is set to null, however the following statement sets req->mr->need_inval causing a null pointer dereference. Fix this by bailing out to label 'out' to immediately return and hence skip over the offending null pointer dereference. Fixes: f5b7b559e1488 ("nvme-rdma: Get rid of duplicate variable") Signed-off-by: Colin Ian King Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvme/host/rdma.c') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d6bdf55a969e..c2c2c28e6eb5 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -293,6 +293,7 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq) if (IS_ERR(req->mr)) { ret = PTR_ERR(req->mr); req->mr = NULL; + goto out; } req->mr->need_inval = false; -- cgit v1.2.3