summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/tcp.c')
-rw-r--r--drivers/nvme/host/tcp.c108
1 files changed, 75 insertions, 33 deletions
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 11e84ed4de36..e159b78b5f3b 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -110,6 +110,7 @@ struct nvme_tcp_ctrl {
struct sockaddr_storage src_addr;
struct nvme_ctrl ctrl;
+ struct mutex teardown_lock;
struct work_struct err_work;
struct delayed_work connect_work;
struct nvme_tcp_request async_req;
@@ -420,6 +421,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
return;
+ dev_warn(ctrl->device, "starting error recovery\n");
queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work);
}
@@ -784,11 +786,11 @@ static void nvme_tcp_data_ready(struct sock *sk)
{
struct nvme_tcp_queue *queue;
- read_lock(&sk->sk_callback_lock);
+ read_lock_bh(&sk->sk_callback_lock);
queue = sk->sk_user_data;
if (likely(queue && queue->rd_enabled))
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
- read_unlock(&sk->sk_callback_lock);
+ read_unlock_bh(&sk->sk_callback_lock);
}
static void nvme_tcp_write_space(struct sock *sk)
@@ -859,12 +861,11 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
else
flags |= MSG_MORE;
- /* can't zcopy slab pages */
- if (unlikely(PageSlab(page))) {
- ret = sock_no_sendpage(queue->sock, page, offset, len,
+ if (sendpage_ok(page)) {
+ ret = kernel_sendpage(queue->sock, page, offset, len,
flags);
} else {
- ret = kernel_sendpage(queue->sock, page, offset, len,
+ ret = sock_no_sendpage(queue->sock, page, offset, len,
flags);
}
if (ret <= 0)
@@ -1319,6 +1320,9 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
}
}
+ /* Set 10 seconds timeout for icresp recvmsg */
+ queue->sock->sk->sk_rcvtimeo = 10 * HZ;
+
queue->sock->sk->sk_allocation = GFP_ATOMIC;
if (!qid)
n = 0;
@@ -1435,7 +1439,6 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
if (!test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
return;
-
__nvme_tcp_stop_queue(queue);
}
@@ -1503,6 +1506,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
static void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl)
{
if (to_tcp_ctrl(ctrl)->async_req.pdu) {
+ cancel_work_sync(&ctrl->async_event_work);
nvme_tcp_free_async_req(to_tcp_ctrl(ctrl));
to_tcp_ctrl(ctrl)->async_req.pdu = NULL;
}
@@ -1681,17 +1685,33 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
ret = PTR_ERR(ctrl->connect_q);
goto out_free_tag_set;
}
- } else {
- blk_mq_update_nr_hw_queues(ctrl->tagset,
- ctrl->queue_count - 1);
}
ret = nvme_tcp_start_io_queues(ctrl);
if (ret)
goto out_cleanup_connect_q;
+ if (!new) {
+ nvme_start_queues(ctrl);
+ if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
+ /*
+ * If we timed out waiting for freeze we are likely to
+ * be stuck. Fail the controller initialization just
+ * to be safe.
+ */
+ ret = -ENODEV;
+ goto out_wait_freeze_timed_out;
+ }
+ blk_mq_update_nr_hw_queues(ctrl->tagset,
+ ctrl->queue_count - 1);
+ nvme_unfreeze(ctrl);
+ }
+
return 0;
+out_wait_freeze_timed_out:
+ nvme_stop_queues(ctrl);
+ nvme_tcp_stop_io_queues(ctrl);
out_cleanup_connect_q:
if (new)
blk_cleanup_queue(ctrl->connect_q);
@@ -1777,6 +1797,7 @@ out_free_queue:
static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
bool remove)
{
+ mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
blk_mq_quiesce_queue(ctrl->admin_q);
nvme_tcp_stop_queue(ctrl, 0);
if (ctrl->admin_tagset) {
@@ -1787,13 +1808,17 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
if (remove)
blk_mq_unquiesce_queue(ctrl->admin_q);
nvme_tcp_destroy_admin_queue(ctrl, remove);
+ mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
}
static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
bool remove)
{
+ mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
if (ctrl->queue_count <= 1)
- return;
+ goto out;
+ blk_mq_quiesce_queue(ctrl->admin_q);
+ nvme_start_freeze(ctrl);
nvme_stop_queues(ctrl);
nvme_tcp_stop_io_queues(ctrl);
if (ctrl->tagset) {
@@ -1804,6 +1829,8 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
if (remove)
nvme_start_queues(ctrl);
nvme_tcp_destroy_io_queues(ctrl, remove);
+out:
+ mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
}
static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
@@ -2042,40 +2069,55 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
nvme_tcp_queue_request(&ctrl->async_req);
}
+static void nvme_tcp_complete_timed_out(struct request *rq)
+{
+ struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
+ struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
+
+ /* fence other contexts that may complete the command */
+ mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
+ nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
+ if (!blk_mq_request_completed(rq)) {
+ nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
+ blk_mq_complete_request(rq);
+ }
+ mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
+}
+
static enum blk_eh_timer_return
nvme_tcp_timeout(struct request *rq, bool reserved)
{
struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
- struct nvme_tcp_ctrl *ctrl = req->queue->ctrl;
+ struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
struct nvme_tcp_cmd_pdu *pdu = req->pdu;
- /*
- * Restart the timer if a controller reset is already scheduled. Any
- * timed out commands would be handled before entering the connecting
- * state.
- */
- if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
- return BLK_EH_RESET_TIMER;
-
- dev_warn(ctrl->ctrl.device,
+ dev_warn(ctrl->device,
"queue %d: timeout request %#x type %d\n",
nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type);
- if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
+ if (ctrl->state != NVME_CTRL_LIVE) {
/*
- * Teardown immediately if controller times out while starting
- * or we are already started error recovery. all outstanding
- * requests are completed on shutdown, so we return BLK_EH_DONE.
+ * If we are resetting, connecting or deleting we should
+ * complete immediately because we may block controller
+ * teardown or setup sequence
+ * - ctrl disable/shutdown fabrics requests
+ * - connect requests
+ * - initialization admin requests
+ * - I/O requests that entered after unquiescing and
+ * the controller stopped responding
+ *
+ * All other requests should be cancelled by the error
+ * recovery work, so it's fine that we fail it here.
*/
- flush_work(&ctrl->err_work);
- nvme_tcp_teardown_io_queues(&ctrl->ctrl, false);
- nvme_tcp_teardown_admin_queue(&ctrl->ctrl, false);
+ nvme_tcp_complete_timed_out(rq);
return BLK_EH_DONE;
}
- dev_warn(ctrl->ctrl.device, "starting error recovery\n");
- nvme_tcp_error_recovery(&ctrl->ctrl);
-
+ /*
+ * LIVE state should trigger the normal error recovery which will
+ * handle completing this request.
+ */
+ nvme_tcp_error_recovery(ctrl);
return BLK_EH_RESET_TIMER;
}
@@ -2302,6 +2344,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
nvme_tcp_reconnect_ctrl_work);
INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work);
INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work);
+ mutex_init(&ctrl->teardown_lock);
if (!(opts->mask & NVMF_OPT_TRSVCID)) {
opts->trsvcid =
@@ -2360,8 +2403,6 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
- nvme_get_ctrl(&ctrl->ctrl);
-
mutex_lock(&nvme_tcp_ctrl_mutex);
list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
mutex_unlock(&nvme_tcp_ctrl_mutex);
@@ -2371,6 +2412,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
out_uninit_ctrl:
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
+ nvme_put_ctrl(&ctrl->ctrl);
if (ret > 0)
ret = -EIO;
return ERR_PTR(ret);