diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-01-15 10:27:56 +1300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-01-15 10:27:56 +1300 |
commit | 31238e61b51f5a68bb8630ddb60ede475846d698 (patch) | |
tree | 64ed1e7160d79a345ee357d9a8445176e722e170 | |
parent | 188c901941efd43cbf21e8f4f9e9a276536b989c (diff) | |
parent | 7a509a6b07dd5a08d91f8a7e0cccb9a6438ce439 (diff) |
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block layer fixes from Jens Axboe:
"The major part is an update to the NVMe driver, fixing various issues
around surprise removal and hung controllers. Most of that is from
Keith, and parts are simple blk-mq fixes or exports/additions of minor
functions to aid this effort, and parts are changes directly to the
NVMe driver.
Apart from the above, this contains:
- Small blk-mq change from me, killing an unused member of the
hardware queue structure.
- Small fix from Ming Lei, fixing up a few drivers that didn't
properly check for ERR_PTR() returns from blk_mq_init_queue()"
* 'for-linus' of git://git.kernel.dk/linux-block:
NVMe: Fix locking on abort handling
NVMe: Start and stop h/w queues on reset
NVMe: Command abort handling fixes
NVMe: Admin queue removal handling
NVMe: Reference count admin queue usage
NVMe: Start all requests
blk-mq: End unstarted requests on a dying queue
blk-mq: Allow requests to never expire
blk-mq: Add helper to abort requeued requests
blk-mq: Let drivers cancel requeue_work
blk-mq: Export if requests were started
blk-mq: Wake tasks entering queue on dying
blk-mq: get rid of ->cmd_size in the hardware queue
block: fix checking return value of blk_mq_init_queue
block: wake up waiters when a queue is marked dying
NVMe: Fix double free irq
blk-mq: Export freeze/unfreeze functions
blk-mq: Exit queue on alloc failure
-rw-r--r-- | block/blk-core.c | 21 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 14 | ||||
-rw-r--r-- | block/blk-mq-tag.h | 1 | ||||
-rw-r--r-- | block/blk-mq.c | 75 | ||||
-rw-r--r-- | block/blk-mq.h | 1 | ||||
-rw-r--r-- | block/blk-timeout.c | 3 | ||||
-rw-r--r-- | drivers/block/null_blk.c | 2 | ||||
-rw-r--r-- | drivers/block/nvme-core.c | 175 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 2 | ||||
-rw-r--r-- | include/linux/blk-mq.h | 8 | ||||
-rw-r--r-- | include/linux/blk_types.h | 2 |
11 files changed, 240 insertions, 64 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 30f6153a40c2..3ad405571dcc 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -473,6 +473,25 @@ void blk_queue_bypass_end(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_queue_bypass_end); +void blk_set_queue_dying(struct request_queue *q) +{ + queue_flag_set_unlocked(QUEUE_FLAG_DYING, q); + + if (q->mq_ops) + blk_mq_wake_waiters(q); + else { + struct request_list *rl; + + blk_queue_for_each_rl(rl, q) { + if (rl->rq_pool) { + wake_up(&rl->wait[BLK_RW_SYNC]); + wake_up(&rl->wait[BLK_RW_ASYNC]); + } + } + } +} +EXPORT_SYMBOL_GPL(blk_set_queue_dying); + /** * blk_cleanup_queue - shutdown a request queue * @q: request queue to shutdown @@ -486,7 +505,7 @@ void blk_cleanup_queue(struct request_queue *q) /* mark @q DYING, no new request or merges will be allowed afterwards */ mutex_lock(&q->sysfs_lock); - queue_flag_set_unlocked(QUEUE_FLAG_DYING, q); + blk_set_queue_dying(q); spin_lock_irq(lock); /* diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 32e8dbb9ad1c..60c9d4a93fe4 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -68,9 +68,9 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) } /* - * Wakeup all potentially sleeping on normal (non-reserved) tags + * Wakeup all potentially sleeping on tags */ -static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags) +void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve) { struct blk_mq_bitmap_tags *bt; int i, wake_index; @@ -85,6 +85,12 @@ static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags) wake_index = bt_index_inc(wake_index); } + + if (include_reserve) { + bt = &tags->breserved_tags; + if (waitqueue_active(&bt->bs[0].wait)) + wake_up(&bt->bs[0].wait); + } } /* @@ -100,7 +106,7 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) atomic_dec(&tags->active_queues); - blk_mq_tag_wakeup_all(tags); + blk_mq_tag_wakeup_all(tags, false); } /* @@ -584,7 +590,7 @@ int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth) * static and should never need resizing. */ bt_update_count(&tags->bitmap_tags, tdepth); - blk_mq_tag_wakeup_all(tags); + blk_mq_tag_wakeup_all(tags, false); return 0; } diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 6206ed17ef76..a6fa0fc9d41a 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -54,6 +54,7 @@ extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag); extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth); +extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); enum { BLK_MQ_TAG_CACHE_MIN = 1, diff --git a/block/blk-mq.c b/block/blk-mq.c index da1ab5641227..2f95747c287e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -107,7 +107,7 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref) wake_up_all(&q->mq_freeze_wq); } -static void blk_mq_freeze_queue_start(struct request_queue *q) +void blk_mq_freeze_queue_start(struct request_queue *q) { bool freeze; @@ -120,6 +120,7 @@ static void blk_mq_freeze_queue_start(struct request_queue *q) blk_mq_run_queues(q, false); } } +EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); static void blk_mq_freeze_queue_wait(struct request_queue *q) { @@ -136,7 +137,7 @@ void blk_mq_freeze_queue(struct request_queue *q) blk_mq_freeze_queue_wait(q); } -static void blk_mq_unfreeze_queue(struct request_queue *q) +void blk_mq_unfreeze_queue(struct request_queue *q) { bool wake; @@ -149,6 +150,24 @@ static void blk_mq_unfreeze_queue(struct request_queue *q) wake_up_all(&q->mq_freeze_wq); } } +EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); + +void blk_mq_wake_waiters(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + unsigned int i; + + queue_for_each_hw_ctx(q, hctx, i) + if (blk_mq_hw_queue_mapped(hctx)) + blk_mq_tag_wakeup_all(hctx->tags, true); + + /* + * If we are called because the queue has now been marked as + * dying, we need to ensure that processes currently waiting on + * the queue are notified as well. + */ + wake_up_all(&q->mq_freeze_wq); +} bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) { @@ -258,8 +277,10 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, ctx = alloc_data.ctx; } blk_mq_put_ctx(ctx); - if (!rq) + if (!rq) { + blk_mq_queue_exit(q); return ERR_PTR(-EWOULDBLOCK); + } return rq; } EXPORT_SYMBOL(blk_mq_alloc_request); @@ -383,6 +404,12 @@ void blk_mq_complete_request(struct request *rq) } EXPORT_SYMBOL(blk_mq_complete_request); +int blk_mq_request_started(struct request *rq) +{ + return test_bit(REQ_ATOM_STARTED, &rq->atomic_flags); +} +EXPORT_SYMBOL_GPL(blk_mq_request_started); + void blk_mq_start_request(struct request *rq) { struct request_queue *q = rq->q; @@ -500,12 +527,38 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head) } EXPORT_SYMBOL(blk_mq_add_to_requeue_list); +void blk_mq_cancel_requeue_work(struct request_queue *q) +{ + cancel_work_sync(&q->requeue_work); +} +EXPORT_SYMBOL_GPL(blk_mq_cancel_requeue_work); + void blk_mq_kick_requeue_list(struct request_queue *q) { kblockd_schedule_work(&q->requeue_work); } EXPORT_SYMBOL(blk_mq_kick_requeue_list); +void blk_mq_abort_requeue_list(struct request_queue *q) +{ + unsigned long flags; + LIST_HEAD(rq_list); + + spin_lock_irqsave(&q->requeue_lock, flags); + list_splice_init(&q->requeue_list, &rq_list); + spin_unlock_irqrestore(&q->requeue_lock, flags); + + while (!list_empty(&rq_list)) { + struct request *rq; + + rq = list_first_entry(&rq_list, struct request, queuelist); + list_del_init(&rq->queuelist); + rq->errors = -EIO; + blk_mq_end_request(rq, rq->errors); + } +} +EXPORT_SYMBOL(blk_mq_abort_requeue_list); + static inline bool is_flush_request(struct request *rq, struct blk_flush_queue *fq, unsigned int tag) { @@ -566,13 +619,24 @@ void blk_mq_rq_timed_out(struct request *req, bool reserved) break; } } - + static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, struct request *rq, void *priv, bool reserved) { struct blk_mq_timeout_data *data = priv; - if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) + if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) { + /* + * If a request wasn't started before the queue was + * marked dying, kill it here or it'll go unnoticed. + */ + if (unlikely(blk_queue_dying(rq->q))) { + rq->errors = -EIO; + blk_mq_complete_request(rq); + } + return; + } + if (rq->cmd_flags & REQ_NO_TIMEOUT) return; if (time_after_eq(jiffies, rq->deadline)) { @@ -1601,7 +1665,6 @@ static int blk_mq_init_hctx(struct request_queue *q, hctx->queue = q; hctx->queue_num = hctx_idx; hctx->flags = set->flags; - hctx->cmd_size = set->cmd_size; blk_mq_init_cpu_notifier(&hctx->cpu_notifier, blk_mq_hctx_notify, hctx); diff --git a/block/blk-mq.h b/block/blk-mq.h index 206230e64f79..4f4f943c22c3 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -32,6 +32,7 @@ void blk_mq_free_queue(struct request_queue *q); void blk_mq_clone_flush_request(struct request *flush_rq, struct request *orig_rq); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); +void blk_mq_wake_waiters(struct request_queue *q); /* * CPU hotplug helpers diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 56c025894cdf..246dfb16c3d9 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -190,6 +190,9 @@ void blk_add_timer(struct request *req) struct request_queue *q = req->q; unsigned long expiry; + if (req->cmd_flags & REQ_NO_TIMEOUT) + return; + /* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */ if (!q->mq_ops && !q->rq_timed_out_fn) return; diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index ae9f615382f6..aa2224aa7caa 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -530,7 +530,7 @@ static int null_add_dev(void) goto out_cleanup_queues; nullb->q = blk_mq_init_queue(&nullb->tag_set); - if (!nullb->q) { + if (IS_ERR(nullb->q)) { rv = -ENOMEM; goto out_cleanup_tags; } diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index b1d5d8797315..cb529e9a82dd 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -215,6 +215,7 @@ static void nvme_set_info(struct nvme_cmd_info *cmd, void *ctx, cmd->fn = handler; cmd->ctx = ctx; cmd->aborted = 0; + blk_mq_start_request(blk_mq_rq_from_pdu(cmd)); } /* Special values must be less than 0x1000 */ @@ -431,8 +432,13 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, if (unlikely(status)) { if (!(status & NVME_SC_DNR || blk_noretry_request(req)) && (jiffies - req->start_time) < req->timeout) { + unsigned long flags; + blk_mq_requeue_request(req); - blk_mq_kick_requeue_list(req->q); + spin_lock_irqsave(req->q->queue_lock, flags); + if (!blk_queue_stopped(req->q)) + blk_mq_kick_requeue_list(req->q); + spin_unlock_irqrestore(req->q->queue_lock, flags); return; } req->errors = nvme_error_status(status); @@ -664,8 +670,6 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, } } - blk_mq_start_request(req); - nvme_set_info(cmd, iod, req_completion); spin_lock_irq(&nvmeq->q_lock); if (req->cmd_flags & REQ_DISCARD) @@ -835,6 +839,7 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev) if (IS_ERR(req)) return PTR_ERR(req); + req->cmd_flags |= REQ_NO_TIMEOUT; cmd_info = blk_mq_rq_to_pdu(req); nvme_set_info(cmd_info, req, async_req_completion); @@ -1016,14 +1021,19 @@ static void nvme_abort_req(struct request *req) struct nvme_command cmd; if (!nvmeq->qid || cmd_rq->aborted) { + unsigned long flags; + + spin_lock_irqsave(&dev_list_lock, flags); if (work_busy(&dev->reset_work)) - return; + goto out; list_del_init(&dev->node); dev_warn(&dev->pci_dev->dev, "I/O %d QID %d timeout, reset controller\n", req->tag, nvmeq->qid); dev->reset_workfn = nvme_reset_failed_dev; queue_work(nvme_workq, &dev->reset_work); + out: + spin_unlock_irqrestore(&dev_list_lock, flags); return; } @@ -1064,15 +1074,22 @@ static void nvme_cancel_queue_ios(struct blk_mq_hw_ctx *hctx, void *ctx; nvme_completion_fn fn; struct nvme_cmd_info *cmd; - static struct nvme_completion cqe = { - .status = cpu_to_le16(NVME_SC_ABORT_REQ << 1), - }; + struct nvme_completion cqe; + + if (!blk_mq_request_started(req)) + return; cmd = blk_mq_rq_to_pdu(req); if (cmd->ctx == CMD_CTX_CANCELLED) return; + if (blk_queue_dying(req->q)) + cqe.status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); + else + cqe.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); + + dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid); ctx = cancel_cmd_info(cmd, &fn); @@ -1084,17 +1101,29 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = cmd->nvmeq; - dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag, - nvmeq->qid); - if (nvmeq->dev->initialized) - nvme_abort_req(req); - /* * The aborted req will be completed on receiving the abort req. * We enable the timer again. If hit twice, it'll cause a device reset, * as the device then is in a faulty state. */ - return BLK_EH_RESET_TIMER; + int ret = BLK_EH_RESET_TIMER; + + dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag, + nvmeq->qid); + + spin_lock_irq(&nvmeq->q_lock); + if (!nvmeq->dev->initialized) { + /* + * Force cancelled command frees the request, which requires we + * return BLK_EH_NOT_HANDLED. + */ + nvme_cancel_queue_ios(nvmeq->hctx, req, nvmeq, reserved); + ret = BLK_EH_NOT_HANDLED; + } else + nvme_abort_req(req); + spin_unlock_irq(&nvmeq->q_lock); + + return ret; } static void nvme_free_queue(struct nvme_queue *nvmeq) @@ -1131,10 +1160,16 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest) */ static int nvme_suspend_queue(struct nvme_queue *nvmeq) { - int vector = nvmeq->dev->entry[nvmeq->cq_vector].vector; + int vector; spin_lock_irq(&nvmeq->q_lock); + if (nvmeq->cq_vector == -1) { + spin_unlock_irq(&nvmeq->q_lock); + return 1; + } + vector = nvmeq->dev->entry[nvmeq->cq_vector].vector; nvmeq->dev->online_queues--; + nvmeq->cq_vector = -1; spin_unlock_irq(&nvmeq->q_lock); irq_set_affinity_hint(vector, NULL); @@ -1169,11 +1204,13 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid) adapter_delete_sq(dev, qid); adapter_delete_cq(dev, qid); } + if (!qid && dev->admin_q) + blk_mq_freeze_queue_start(dev->admin_q); nvme_clear_queue(nvmeq); } static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, - int depth, int vector) + int depth) { struct device *dmadev = &dev->pci_dev->dev; struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL); @@ -1199,7 +1236,6 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; nvmeq->q_depth = depth; - nvmeq->cq_vector = vector; nvmeq->qid = qid; dev->queue_count++; dev->queues[qid] = nvmeq; @@ -1244,6 +1280,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) struct nvme_dev *dev = nvmeq->dev; int result; + nvmeq->cq_vector = qid - 1; result = adapter_alloc_cq(dev, qid, nvmeq); if (result < 0) return result; @@ -1355,6 +1392,14 @@ static struct blk_mq_ops nvme_mq_ops = { .timeout = nvme_timeout, }; +static void nvme_dev_remove_admin(struct nvme_dev *dev) +{ + if (dev->admin_q && !blk_queue_dying(dev->admin_q)) { + blk_cleanup_queue(dev->admin_q); + blk_mq_free_tag_set(&dev->admin_tagset); + } +} + static int nvme_alloc_admin_tags(struct nvme_dev *dev) { if (!dev->admin_q) { @@ -1370,21 +1415,20 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) return -ENOMEM; dev->admin_q = blk_mq_init_queue(&dev->admin_tagset); - if (!dev->admin_q) { + if (IS_ERR(dev->admin_q)) { blk_mq_free_tag_set(&dev->admin_tagset); return -ENOMEM; } - } + if (!blk_get_queue(dev->admin_q)) { + nvme_dev_remove_admin(dev); + return -ENODEV; + } + } else + blk_mq_unfreeze_queue(dev->admin_q); return 0; } -static void nvme_free_admin_tags(struct nvme_dev *dev) -{ - if (dev->admin_q) - blk_mq_free_tag_set(&dev->admin_tagset); -} - static int nvme_configure_admin_queue(struct nvme_dev *dev) { int result; @@ -1416,7 +1460,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) nvmeq = dev->queues[0]; if (!nvmeq) { - nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH, 0); + nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH); if (!nvmeq) return -ENOMEM; } @@ -1439,18 +1483,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) if (result) goto free_nvmeq; - result = nvme_alloc_admin_tags(dev); - if (result) - goto free_nvmeq; - + nvmeq->cq_vector = 0; result = queue_request_irq(dev, nvmeq, nvmeq->irqname); if (result) - goto free_tags; + goto free_nvmeq; return result; - free_tags: - nvme_free_admin_tags(dev); free_nvmeq: nvme_free_queues(dev, 0); return result; @@ -1944,7 +1983,7 @@ static void nvme_create_io_queues(struct nvme_dev *dev) unsigned i; for (i = dev->queue_count; i <= dev->max_qid; i++) - if (!nvme_alloc_queue(dev, i, dev->q_depth, i - 1)) + if (!nvme_alloc_queue(dev, i, dev->q_depth)) break; for (i = dev->online_queues; i <= dev->queue_count - 1; i++) @@ -2235,13 +2274,18 @@ static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev) break; if (!schedule_timeout(ADMIN_TIMEOUT) || fatal_signal_pending(current)) { + /* + * Disable the controller first since we can't trust it + * at this point, but leave the admin queue enabled + * until all queue deletion requests are flushed. + * FIXME: This may take a while if there are more h/w + * queues than admin tags. + */ set_current_state(TASK_RUNNING); - nvme_disable_ctrl(dev, readq(&dev->bar->cap)); - nvme_disable_queue(dev, 0); - - send_sig(SIGKILL, dq->worker->task, 1); + nvme_clear_queue(dev->queues[0]); flush_kthread_worker(dq->worker); + nvme_disable_queue(dev, 0); return; } } @@ -2318,7 +2362,6 @@ static void nvme_del_queue_start(struct kthread_work *work) { struct nvme_queue *nvmeq = container_of(work, struct nvme_queue, cmdinfo.work); - allow_signal(SIGKILL); if (nvme_delete_sq(nvmeq)) nvme_del_queue_end(nvmeq); } @@ -2376,6 +2419,34 @@ static void nvme_dev_list_remove(struct nvme_dev *dev) kthread_stop(tmp); } +static void nvme_freeze_queues(struct nvme_dev *dev) +{ + struct nvme_ns *ns; + + list_for_each_entry(ns, &dev->namespaces, list) { + blk_mq_freeze_queue_start(ns->queue); + + spin_lock(ns->queue->queue_lock); + queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue); + spin_unlock(ns->queue->queue_lock); + + blk_mq_cancel_requeue_work(ns->queue); + blk_mq_stop_hw_queues(ns->queue); + } +} + +static void nvme_unfreeze_queues(struct nvme_dev *dev) +{ + struct nvme_ns *ns; + + list_for_each_entry(ns, &dev->namespaces, list) { + queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue); + blk_mq_unfreeze_queue(ns->queue); + blk_mq_start_stopped_hw_queues(ns->queue, true); + blk_mq_kick_requeue_list(ns->queue); + } +} + static void nvme_dev_shutdown(struct nvme_dev *dev) { int i; @@ -2384,8 +2455,10 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) dev->initialized = 0; nvme_dev_list_remove(dev); - if (dev->bar) + if (dev->bar) { + nvme_freeze_queues(dev); csts = readl(&dev->bar->csts); + } if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) { for (i = dev->queue_count - 1; i >= 0; i--) { struct nvme_queue *nvmeq = dev->queues[i]; @@ -2400,12 +2473,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) nvme_dev_unmap(dev); } -static void nvme_dev_remove_admin(struct nvme_dev *dev) -{ - if (dev->admin_q && !blk_queue_dying(dev->admin_q)) - blk_cleanup_queue(dev->admin_q); -} - static void nvme_dev_remove(struct nvme_dev *dev) { struct nvme_ns *ns; @@ -2413,8 +2480,10 @@ static void nvme_dev_remove(struct nvme_dev *dev) list_for_each_entry(ns, &dev->namespaces, list) { if (ns->disk->flags & GENHD_FL_UP) del_gendisk(ns->disk); - if (!blk_queue_dying(ns->queue)) + if (!blk_queue_dying(ns->queue)) { + blk_mq_abort_requeue_list(ns->queue); blk_cleanup_queue(ns->queue); + } } } @@ -2495,6 +2564,7 @@ static void nvme_free_dev(struct kref *kref) nvme_free_namespaces(dev); nvme_release_instance(dev); blk_mq_free_tag_set(&dev->tagset); + blk_put_queue(dev->admin_q); kfree(dev->queues); kfree(dev->entry); kfree(dev); @@ -2591,15 +2661,20 @@ static int nvme_dev_start(struct nvme_dev *dev) } nvme_init_queue(dev->queues[0], 0); + result = nvme_alloc_admin_tags(dev); + if (result) + goto disable; result = nvme_setup_io_queues(dev); if (result) - goto disable; + goto free_tags; nvme_set_irq_hints(dev); return result; + free_tags: + nvme_dev_remove_admin(dev); disable: nvme_disable_queue(dev, 0); nvme_dev_list_remove(dev); @@ -2639,6 +2714,9 @@ static int nvme_dev_resume(struct nvme_dev *dev) dev->reset_workfn = nvme_remove_disks; queue_work(nvme_workq, &dev->reset_work); spin_unlock(&dev_list_lock); + } else { + nvme_unfreeze_queues(dev); + nvme_set_irq_hints(dev); } dev->initialized = 1; return 0; @@ -2776,11 +2854,10 @@ static void nvme_remove(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); flush_work(&dev->reset_work); misc_deregister(&dev->miscdev); - nvme_dev_remove(dev); nvme_dev_shutdown(dev); + nvme_dev_remove(dev); nvme_dev_remove_admin(dev); nvme_free_queues(dev, 0); - nvme_free_admin_tags(dev); nvme_release_prp_pools(dev); kref_put(&dev->kref, nvme_free_dev); } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 7ef7c098708f..cdfbd21e3597 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -638,7 +638,7 @@ static int virtblk_probe(struct virtio_device *vdev) goto out_put_disk; q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set); - if (!q) { + if (IS_ERR(q)) { err = -ENOMEM; goto out_free_tags; } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8aded9ab2e4e..5735e7130d63 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -34,7 +34,6 @@ struct blk_mq_hw_ctx { unsigned long flags; /* BLK_MQ_F_* flags */ struct request_queue *queue; - unsigned int queue_num; struct blk_flush_queue *fq; void *driver_data; @@ -54,7 +53,7 @@ struct blk_mq_hw_ctx { unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; unsigned int numa_node; - unsigned int cmd_size; /* per-request extra data */ + unsigned int queue_num; atomic_t nr_active; @@ -195,13 +194,16 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); +int blk_mq_request_started(struct request *rq); void blk_mq_start_request(struct request *rq); void blk_mq_end_request(struct request *rq, int error); void __blk_mq_end_request(struct request *rq, int error); void blk_mq_requeue_request(struct request *rq); void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); +void blk_mq_cancel_requeue_work(struct request_queue *q); void blk_mq_kick_requeue_list(struct request_queue *q); +void blk_mq_abort_requeue_list(struct request_queue *q); void blk_mq_complete_request(struct request *rq); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); @@ -212,6 +214,8 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, void *priv); +void blk_mq_unfreeze_queue(struct request_queue *q); +void blk_mq_freeze_queue_start(struct request_queue *q); /* * Driver command data is immediately after the request. So subtract request diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 445d59231bc4..c294e3e25e37 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -190,6 +190,7 @@ enum rq_flag_bits { __REQ_PM, /* runtime pm request */ __REQ_HASHED, /* on IO scheduler merge hash */ __REQ_MQ_INFLIGHT, /* track inflight for MQ */ + __REQ_NO_TIMEOUT, /* requests may never expire */ __REQ_NR_BITS, /* stops here */ }; @@ -243,5 +244,6 @@ enum rq_flag_bits { #define REQ_PM (1ULL << __REQ_PM) #define REQ_HASHED (1ULL << __REQ_HASHED) #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) +#define REQ_NO_TIMEOUT (1ULL << __REQ_NO_TIMEOUT) #endif /* __LINUX_BLK_TYPES_H */ |