summaryrefslogtreecommitdiff
path: root/drivers/nvme/host
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r--drivers/nvme/host/core.c36
-rw-r--r--drivers/nvme/host/fabrics.c2
-rw-r--r--drivers/nvme/host/nvme.h3
-rw-r--r--drivers/nvme/host/pci.c27
-rw-r--r--drivers/nvme/host/rdma.c48
5 files changed, 64 insertions, 52 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index da10b484bd25..fbeca065f18c 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1204,8 +1204,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
}
- if (ctrl->stripe_size)
- blk_queue_chunk_sectors(q, ctrl->stripe_size >> 9);
+ if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE)
+ blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
blk_queue_virt_boundary(q, ctrl->page_size - 1);
if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
vwc = true;
@@ -1261,19 +1261,6 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->max_hw_sectors =
min_not_zero(ctrl->max_hw_sectors, max_hw_sectors);
- if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) {
- unsigned int max_hw_sectors;
-
- ctrl->stripe_size = 1 << (id->vs[3] + page_shift);
- max_hw_sectors = ctrl->stripe_size >> (page_shift - 9);
- if (ctrl->max_hw_sectors) {
- ctrl->max_hw_sectors = min(max_hw_sectors,
- ctrl->max_hw_sectors);
- } else {
- ctrl->max_hw_sectors = max_hw_sectors;
- }
- }
-
nvme_set_queue_limits(ctrl, ctrl->admin_q);
ctrl->sgls = le32_to_cpu(id->sgls);
ctrl->kas = le16_to_cpu(id->kas);
@@ -1738,7 +1725,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
&nvme_ns_attr_group);
del_gendisk(ns->disk);
- blk_mq_abort_requeue_list(ns->queue);
blk_cleanup_queue(ns->queue);
}
@@ -2057,12 +2043,20 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
* Revalidating a dead namespace sets capacity to 0. This will
* end buffered writers dirtying pages that can't be synced.
*/
- if (ns->disk && !test_and_set_bit(NVME_NS_DEAD, &ns->flags))
- revalidate_disk(ns->disk);
-
+ if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
+ continue;
+ revalidate_disk(ns->disk);
blk_set_queue_dying(ns->queue);
- blk_mq_abort_requeue_list(ns->queue);
- blk_mq_start_stopped_hw_queues(ns->queue, true);
+
+ /*
+ * Forcibly start all queues to avoid having stuck requests.
+ * Note that we must ensure the queues are not stopped
+ * when the final removal happens.
+ */
+ blk_mq_start_hw_queues(ns->queue);
+
+ /* draining requests in requeue list */
+ blk_mq_kick_requeue_list(ns->queue);
}
mutex_unlock(&ctrl->namespaces_mutex);
}
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 5a3f008d3480..eef1a68e5d95 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -77,7 +77,7 @@ static struct nvmf_host *nvmf_host_default(void)
kref_init(&host->ref);
uuid_be_gen(&host->id);
snprintf(host->nqn, NVMF_NQN_SIZE,
- "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id);
+ "nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id);
mutex_lock(&nvmf_hosts_mutex);
list_add_tail(&host->list, &nvmf_hosts);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index d47f5a5d18c7..5c52a6182765 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -84,7 +84,7 @@ enum nvme_quirks {
* NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was
* found empirically.
*/
-#define NVME_QUIRK_DELAY_AMOUNT 2000
+#define NVME_QUIRK_DELAY_AMOUNT 2300
enum nvme_ctrl_state {
NVME_CTRL_NEW,
@@ -121,7 +121,6 @@ struct nvme_ctrl {
u32 page_size;
u32 max_hw_sectors;
- u32 stripe_size;
u16 oncs;
u16 vid;
atomic_t abort_limit;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 5e52034ab010..e48ecb9303ca 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -96,7 +96,7 @@ struct nvme_dev {
struct mutex shutdown_lock;
bool subsystem;
void __iomem *cmb;
- dma_addr_t cmb_dma_addr;
+ pci_bus_addr_t cmb_bus_addr;
u64 cmb_size;
u32 cmbsz;
u32 cmbloc;
@@ -1037,7 +1037,7 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
dev->ctrl.page_size);
- nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
+ nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
nvmeq->sq_cmds_io = dev->cmb + offset;
} else {
nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
@@ -1343,7 +1343,7 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
resource_size_t bar_size;
struct pci_dev *pdev = to_pci_dev(dev->dev);
void __iomem *cmb;
- dma_addr_t dma_addr;
+ int bar;
dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
if (!(NVME_CMB_SZ(dev->cmbsz)))
@@ -1356,7 +1356,8 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
size = szu * NVME_CMB_SZ(dev->cmbsz);
offset = szu * NVME_CMB_OFST(dev->cmbloc);
- bar_size = pci_resource_len(pdev, NVME_CMB_BIR(dev->cmbloc));
+ bar = NVME_CMB_BIR(dev->cmbloc);
+ bar_size = pci_resource_len(pdev, bar);
if (offset > bar_size)
return NULL;
@@ -1369,12 +1370,11 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
if (size > bar_size - offset)
size = bar_size - offset;
- dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(dev->cmbloc)) + offset;
- cmb = ioremap_wc(dma_addr, size);
+ cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size);
if (!cmb)
return NULL;
- dev->cmb_dma_addr = dma_addr;
+ dev->cmb_bus_addr = pci_bus_address(pdev, bar) + offset;
dev->cmb_size = size;
return cmb;
}
@@ -1384,6 +1384,11 @@ static inline void nvme_release_cmb(struct nvme_dev *dev)
if (dev->cmb) {
iounmap(dev->cmb);
dev->cmb = NULL;
+ if (dev->cmbsz) {
+ sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
+ &dev_attr_cmb.attr, NULL);
+ dev->cmbsz = 0;
+ }
}
}
@@ -1655,6 +1660,7 @@ static void nvme_pci_disable(struct nvme_dev *dev)
{
struct pci_dev *pdev = to_pci_dev(dev->dev);
+ nvme_release_cmb(dev);
pci_free_irq_vectors(pdev);
if (pci_is_enabled(pdev)) {
@@ -1983,15 +1989,16 @@ static void nvme_remove(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL);
- if (!pci_device_is_present(pdev))
+ if (!pci_device_is_present(pdev)) {
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
+ nvme_dev_disable(dev, false);
+ }
flush_work(&dev->reset_work);
nvme_uninit_ctrl(&dev->ctrl);
nvme_dev_disable(dev, true);
nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0);
- nvme_release_cmb(dev);
nvme_release_prp_pools(dev);
nvme_dev_unmap(dev);
nvme_put_ctrl(&dev->ctrl);
@@ -2102,6 +2109,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
{ PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+ { PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */
+ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 3d25add36d91..ab4f8db2a8ca 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -88,7 +88,7 @@ enum nvme_rdma_queue_flags {
struct nvme_rdma_queue {
struct nvme_rdma_qe *rsp_ring;
- u8 sig_count;
+ atomic_t sig_count;
int queue_size;
size_t cmnd_capsule_len;
struct nvme_rdma_ctrl *ctrl;
@@ -337,8 +337,6 @@ static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl,
struct ib_device *ibdev = dev->dev;
int ret;
- BUG_ON(queue_idx >= ctrl->queue_count);
-
ret = nvme_rdma_alloc_qe(ibdev, &req->sqe, sizeof(struct nvme_command),
DMA_TO_DEVICE);
if (ret)
@@ -555,6 +553,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
queue->cmnd_capsule_len = sizeof(struct nvme_command);
queue->queue_size = queue_size;
+ atomic_set(&queue->sig_count, 0);
queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
RDMA_PS_TCP, IB_QPT_RC);
@@ -642,8 +641,22 @@ out_free_queues:
static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
{
+ struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+ unsigned int nr_io_queues;
int i, ret;
+ nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
+ ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
+ if (ret)
+ return ret;
+
+ ctrl->queue_count = nr_io_queues + 1;
+ if (ctrl->queue_count < 2)
+ return 0;
+
+ dev_info(ctrl->ctrl.device,
+ "creating %d I/O queues.\n", nr_io_queues);
+
for (i = 1; i < ctrl->queue_count; i++) {
ret = nvme_rdma_init_queue(ctrl, i,
ctrl->ctrl.opts->queue_size);
@@ -1011,6 +1024,18 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
nvme_rdma_wr_error(cq, wc, "SEND");
}
+/*
+ * We want to signal completion at least every queue depth/2. This returns the
+ * largest power of two that is not above half of (queue size + 1) to optimize
+ * (avoid divisions).
+ */
+static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
+{
+ int limit = 1 << ilog2((queue->queue_size + 1) / 2);
+
+ return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0;
+}
+
static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
struct ib_send_wr *first, bool flush)
@@ -1038,9 +1063,6 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
* Would have been way to obvious to handle this in hardware or
* at least the RDMA stack..
*
- * This messy and racy code sniplet is copy and pasted from the iSER
- * initiator, and the magic '32' comes from there as well.
- *
* Always signal the flushes. The magic request used for the flush
* sequencer is not allocated in our driver's tagset and it's
* triggered to be freed by blk_cleanup_queue(). So we need to
@@ -1048,7 +1070,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
* embeded in request's payload, is not freed when __ib_process_cq()
* calls wr_cqe->done().
*/
- if ((++queue->sig_count % 32) == 0 || flush)
+ if (nvme_rdma_queue_sig_limit(queue) || flush)
wr.send_flags |= IB_SEND_SIGNALED;
if (first)
@@ -1785,20 +1807,8 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl)
{
- struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
int ret;
- ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
- if (ret)
- return ret;
-
- ctrl->queue_count = opts->nr_io_queues + 1;
- if (ctrl->queue_count < 2)
- return 0;
-
- dev_info(ctrl->ctrl.device,
- "creating %d I/O queues.\n", opts->nr_io_queues);
-
ret = nvme_rdma_init_io_queues(ctrl);
if (ret)
return ret;