diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-05 14:27:02 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-05 14:27:02 -0700 |
commit | 3526dd0c7832f1011a0477cc6d903662bae05ea8 (patch) | |
tree | 22fbac64eb40a0b29bfa4c029695f39b2f591e62 /drivers/nvme | |
parent | dd972f924df6bdbc0ab185a38d5d2361dbc26311 (diff) | |
parent | bc6d65e6dc89c3b7ff78e4ad797117c122ffde8e (diff) |
Merge tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe:
"It's a pretty quiet round this time, which is nice. This contains:
- series from Bart, cleaning up the way we set/test/clear atomic
queue flags.
- series from Bart, fixing races between gendisk and queue
registration and removal.
- set of bcache fixes and improvements from various folks, by way of
Michael Lyle.
- set of lightnvm updates from Matias, most of it being the 1.2 to
2.0 transition.
- removal of unused DIO flags from Nikolay.
- blk-mq/sbitmap memory ordering fixes from Omar.
- divide-by-zero fix for BFQ from Paolo.
- minor documentation patches from Randy.
- timeout fix from Tejun.
- Alpha "can't write a char atomically" fix from Mikulas.
- set of NVMe fixes by way of Keith.
- bsg and bsg-lib improvements from Christoph.
- a few sed-opal fixes from Jonas.
- cdrom check-disk-change deadlock fix from Maurizio.
- various little fixes, comment fixes, etc from various folks"
* tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block: (139 commits)
blk-mq: Directly schedule q->timeout_work when aborting a request
blktrace: fix comment in blktrace_api.h
lightnvm: remove function name in strings
lightnvm: pblk: remove some unnecessary NULL checks
lightnvm: pblk: don't recover unwritten lines
lightnvm: pblk: implement 2.0 support
lightnvm: pblk: implement get log report chunk
lightnvm: pblk: rename ppaf* to addrf*
lightnvm: pblk: check for supported version
lightnvm: implement get log report chunk helpers
lightnvm: make address conversions depend on generic device
lightnvm: add support for 2.0 address format
lightnvm: normalize geometry nomenclature
lightnvm: complete geo structure with maxoc*
lightnvm: add shorten OCSSD version in geo
lightnvm: add minor version to generic geometry
lightnvm: simplify geometry structure
lightnvm: pblk: refactor init/exit sequences
lightnvm: Avoid validation of default op value
lightnvm: centralize permission check for lightnvm ioctl
...
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/Makefile | 1 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 123 | ||||
-rw-r--r-- | drivers/nvme/host/fault_inject.c | 79 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 36 | ||||
-rw-r--r-- | drivers/nvme/host/lightnvm.c | 757 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 8 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 35 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 26 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 34 | ||||
-rw-r--r-- | drivers/nvme/target/configfs.c | 65 | ||||
-rw-r--r-- | drivers/nvme/target/core.c | 12 | ||||
-rw-r--r-- | drivers/nvme/target/discovery.c | 30 | ||||
-rw-r--r-- | drivers/nvme/target/fc.c | 23 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 4 | ||||
-rw-r--r-- | drivers/nvme/target/nvmet.h | 12 | ||||
-rw-r--r-- | drivers/nvme/target/rdma.c | 72 |
16 files changed, 975 insertions, 342 deletions
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index 441e67e3a9d7..aea459c65ae1 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -12,6 +12,7 @@ nvme-core-y := core.o nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o nvme-core-$(CONFIG_NVM) += lightnvm.o +nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o nvme-y += pci.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7aeca5db7916..197a6ba9700f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -100,11 +100,6 @@ static struct class *nvme_subsys_class; static void nvme_ns_remove(struct nvme_ns *ns); static int nvme_revalidate_disk(struct gendisk *disk); -static __le32 nvme_get_log_dw10(u8 lid, size_t size) -{ - return cpu_to_le32((((size / 4) - 1) << 16) | lid); -} - int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) @@ -135,6 +130,9 @@ static void nvme_delete_ctrl_work(struct work_struct *work) struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, delete_work); + dev_info(ctrl->device, + "Removing ctrl: NQN \"%s\"\n", ctrl->opts->subsysnqn); + flush_work(&ctrl->reset_work); nvme_stop_ctrl(ctrl); nvme_remove_namespaces(ctrl); @@ -948,7 +946,8 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n c.identify.opcode = nvme_admin_identify; c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST; c.identify.nsid = cpu_to_le32(nsid); - return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); + return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, + NVME_IDENTIFY_DATA_SIZE); } static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, @@ -1124,13 +1123,13 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl) struct nvme_ns *ns, *next; LIST_HEAD(rm_list); - mutex_lock(&ctrl->namespaces_mutex); + down_write(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { if (ns->disk && nvme_revalidate_disk(ns->disk)) { list_move_tail(&ns->list, &rm_list); } } - mutex_unlock(&ctrl->namespaces_mutex); + up_write(&ctrl->namespaces_rwsem); list_for_each_entry_safe(ns, next, &rm_list, list) nvme_ns_remove(ns); @@ -1358,7 +1357,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, blk_queue_max_discard_sectors(queue, UINT_MAX); blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, queue); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, queue); if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); @@ -1449,6 +1448,8 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->noiob) nvme_set_chunk_size(ns); nvme_update_disk_info(disk, ns, id); + if (ns->ndev) + nvme_nvm_update_nvm_info(ns); #ifdef CONFIG_NVME_MULTIPATH if (ns->head->disk) nvme_update_disk_info(ns->head->disk, ns, id); @@ -2217,18 +2218,35 @@ out_unlock: return ret; } -static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log, - size_t size) +int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + u8 log_page, void *log, + size_t size, size_t offset) { struct nvme_command c = { }; + unsigned long dwlen = size / 4 - 1; + + c.get_log_page.opcode = nvme_admin_get_log_page; + + if (ns) + c.get_log_page.nsid = cpu_to_le32(ns->head->ns_id); + else + c.get_log_page.nsid = cpu_to_le32(NVME_NSID_ALL); - c.common.opcode = nvme_admin_get_log_page; - c.common.nsid = cpu_to_le32(NVME_NSID_ALL); - c.common.cdw10[0] = nvme_get_log_dw10(log_page, size); + c.get_log_page.lid = log_page; + c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1)); + c.get_log_page.numdu = cpu_to_le16(dwlen >> 16); + c.get_log_page.lpol = cpu_to_le32(offset & ((1ULL << 32) - 1)); + c.get_log_page.lpou = cpu_to_le32(offset >> 32ULL); return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size); } +static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log, + size_t size) +{ + return nvme_get_log_ext(ctrl, NULL, log_page, log, size, 0); +} + static int nvme_get_effects_log(struct nvme_ctrl *ctrl) { int ret; @@ -2440,7 +2458,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) struct nvme_ns *ns; int ret; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); if (list_empty(&ctrl->namespaces)) { ret = -ENOTTY; goto out_unlock; @@ -2457,14 +2475,14 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) dev_warn(ctrl->device, "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); kref_get(&ns->kref); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); ret = nvme_user_cmd(ctrl, ns, argp); nvme_put_ns(ns); return ret; out_unlock: - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); return ret; } @@ -2793,6 +2811,7 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, list_for_each_entry(h, &subsys->nsheads, entry) { if (nvme_ns_ids_valid(&new->ids) && + !list_empty(&h->list) && nvme_ns_ids_equal(&new->ids, &h->ids)) return -EINVAL; } @@ -2893,7 +2912,7 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns, *ret = NULL; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { if (ns->head->ns_id == nsid) { if (!kref_get_unless_zero(&ns->kref)) @@ -2904,7 +2923,7 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (ns->head->ns_id > nsid) break; } - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); return ret; } @@ -2949,7 +2968,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns->queue = blk_mq_init_queue(ctrl->tagset); if (IS_ERR(ns->queue)) goto out_free_ns; - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); + blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue); ns->queue->queuedata = ns; ns->ctrl = ctrl; @@ -3015,9 +3034,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) __nvme_revalidate_disk(disk, id); - mutex_lock(&ctrl->namespaces_mutex); + down_write(&ctrl->namespaces_rwsem); list_add_tail(&ns->list, &ctrl->namespaces); - mutex_unlock(&ctrl->namespaces_mutex); + up_write(&ctrl->namespaces_rwsem); nvme_get_ctrl(ctrl); @@ -3033,6 +3052,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns->disk->disk_name); nvme_mpath_add_disk(ns->head); + nvme_fault_inject_init(ns); return; out_unlink_ns: mutex_lock(&ctrl->subsys->lock); @@ -3051,6 +3071,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) return; + nvme_fault_inject_fini(ns); if (ns->disk && ns->disk->flags & GENHD_FL_UP) { sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, &nvme_ns_id_attr_group); @@ -3067,9 +3088,9 @@ static void nvme_ns_remove(struct nvme_ns *ns) list_del_rcu(&ns->siblings); mutex_unlock(&ns->ctrl->subsys->lock); - mutex_lock(&ns->ctrl->namespaces_mutex); + down_write(&ns->ctrl->namespaces_rwsem); list_del_init(&ns->list); - mutex_unlock(&ns->ctrl->namespaces_mutex); + up_write(&ns->ctrl->namespaces_rwsem); synchronize_srcu(&ns->head->srcu); nvme_mpath_check_last_path(ns); @@ -3093,11 +3114,18 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns, *next; + LIST_HEAD(rm_list); + down_write(&ctrl->namespaces_rwsem); list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) { if (ns->head->ns_id > nsid) - nvme_ns_remove(ns); + list_move_tail(&ns->list, &rm_list); } + up_write(&ctrl->namespaces_rwsem); + + list_for_each_entry_safe(ns, next, &rm_list, list) + nvme_ns_remove(ns); + } static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) @@ -3107,7 +3135,7 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024); int ret = 0; - ns_list = kzalloc(0x1000, GFP_KERNEL); + ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL); if (!ns_list) return -ENOMEM; @@ -3173,9 +3201,9 @@ static void nvme_scan_work(struct work_struct *work) } nvme_scan_ns_sequential(ctrl, nn); done: - mutex_lock(&ctrl->namespaces_mutex); + down_write(&ctrl->namespaces_rwsem); list_sort(NULL, &ctrl->namespaces, ns_cmp); - mutex_unlock(&ctrl->namespaces_mutex); + up_write(&ctrl->namespaces_rwsem); kfree(id); } @@ -3197,6 +3225,7 @@ EXPORT_SYMBOL_GPL(nvme_queue_scan); void nvme_remove_namespaces(struct nvme_ctrl *ctrl) { struct nvme_ns *ns, *next; + LIST_HEAD(ns_list); /* * The dead states indicates the controller was not gracefully @@ -3207,7 +3236,11 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) if (ctrl->state == NVME_CTRL_DEAD) nvme_kill_queues(ctrl); - list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) + down_write(&ctrl->namespaces_rwsem); + list_splice_init(&ctrl->namespaces, &ns_list); + up_write(&ctrl->namespaces_rwsem); + + list_for_each_entry_safe(ns, next, &ns_list, list) nvme_ns_remove(ns); } EXPORT_SYMBOL_GPL(nvme_remove_namespaces); @@ -3337,6 +3370,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) flush_work(&ctrl->async_event_work); flush_work(&ctrl->scan_work); cancel_work_sync(&ctrl->fw_act_work); + if (ctrl->ops->stop_ctrl) + ctrl->ops->stop_ctrl(ctrl); } EXPORT_SYMBOL_GPL(nvme_stop_ctrl); @@ -3394,7 +3429,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ctrl->state = NVME_CTRL_NEW; spin_lock_init(&ctrl->lock); INIT_LIST_HEAD(&ctrl->namespaces); - mutex_init(&ctrl->namespaces_mutex); + init_rwsem(&ctrl->namespaces_rwsem); ctrl->dev = dev; ctrl->ops = ops; ctrl->quirks = quirks; @@ -3455,7 +3490,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); /* Forcibly unquiesce queues to avoid blocking dispatch */ if (ctrl->admin_q) @@ -3474,7 +3509,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) /* Forcibly unquiesce queues to avoid blocking dispatch */ blk_mq_unquiesce_queue(ns->queue); } - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_kill_queues); @@ -3482,10 +3517,10 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) blk_mq_unfreeze_queue(ns->queue); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_unfreeze); @@ -3493,13 +3528,13 @@ void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout); if (timeout <= 0) break; } - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout); @@ -3507,10 +3542,10 @@ void nvme_wait_freeze(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) blk_mq_freeze_queue_wait(ns->queue); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_wait_freeze); @@ -3518,10 +3553,10 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) blk_freeze_queue_start(ns->queue); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_start_freeze); @@ -3529,10 +3564,10 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) blk_mq_quiesce_queue(ns->queue); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_stop_queues); @@ -3540,10 +3575,10 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) blk_mq_unquiesce_queue(ns->queue); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_start_queues); diff --git a/drivers/nvme/host/fault_inject.c b/drivers/nvme/host/fault_inject.c new file mode 100644 index 000000000000..02632266ac06 --- /dev/null +++ b/drivers/nvme/host/fault_inject.c @@ -0,0 +1,79 @@ +/* + * fault injection support for nvme. + * + * Copyright (c) 2018, Oracle and/or its affiliates + * + */ + +#include <linux/moduleparam.h> +#include "nvme.h" + +static DECLARE_FAULT_ATTR(fail_default_attr); +/* optional fault injection attributes boot time option: + * nvme_core.fail_request=<interval>,<probability>,<space>,<times> + */ +static char *fail_request; +module_param(fail_request, charp, 0000); + +void nvme_fault_inject_init(struct nvme_ns *ns) +{ + struct dentry *dir, *parent; + char *name = ns->disk->disk_name; + struct nvme_fault_inject *fault_inj = &ns->fault_inject; + struct fault_attr *attr = &fault_inj->attr; + + /* set default fault injection attribute */ + if (fail_request) + setup_fault_attr(&fail_default_attr, fail_request); + + /* create debugfs directory and attribute */ + parent = debugfs_create_dir(name, NULL); + if (!parent) { + pr_warn("%s: failed to create debugfs directory\n", name); + return; + } + + *attr = fail_default_attr; + dir = fault_create_debugfs_attr("fault_inject", parent, attr); + if (IS_ERR(dir)) { + pr_warn("%s: failed to create debugfs attr\n", name); + debugfs_remove_recursive(parent); + return; + } + ns->fault_inject.parent = parent; + + /* create debugfs for status code and dont_retry */ + fault_inj->status = NVME_SC_INVALID_OPCODE; + fault_inj->dont_retry = true; + debugfs_create_x16("status", 0600, dir, &fault_inj->status); + debugfs_create_bool("dont_retry", 0600, dir, &fault_inj->dont_retry); +} + +void nvme_fault_inject_fini(struct nvme_ns *ns) +{ + /* remove debugfs directories */ + debugfs_remove_recursive(ns->fault_inject.parent); +} + +void nvme_should_fail(struct request *req) +{ + struct gendisk *disk = req->rq_disk; + struct nvme_ns *ns = NULL; + u16 status; + + /* + * make sure this request is coming from a valid namespace + */ + if (!disk) + return; + + ns = disk->private_data; + if (ns && should_fail(&ns->fault_inject.attr, 1)) { + /* inject status code and DNR bit */ + status = ns->fault_inject.status; + if (ns->fault_inject.dont_retry) + status |= NVME_SC_DNR; + nvme_req(req)->status = status; + } +} +EXPORT_SYMBOL_GPL(nvme_should_fail); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 1dc1387b7134..c6e719b2f3ca 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -588,6 +588,8 @@ nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport, return ERR_PTR(-ESTALE); } + rport->remoteport.port_role = pinfo->port_role; + rport->remoteport.port_id = pinfo->port_id; rport->remoteport.port_state = FC_OBJSTATE_ONLINE; rport->dev_loss_end = 0; @@ -768,8 +770,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) */ if (nvme_reset_ctrl(&ctrl->ctrl)) { dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: Couldn't schedule reset. " - "Deleting controller.\n", + "NVME-FC{%d}: Couldn't schedule reset.\n", ctrl->cnum); nvme_delete_ctrl(&ctrl->ctrl); } @@ -836,8 +837,7 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) /* if dev_loss_tmo==0, dev loss is immediate */ if (!portptr->dev_loss_tmo) { dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: controller connectivity lost. " - "Deleting controller.\n", + "NVME-FC{%d}: controller connectivity lost.\n", ctrl->cnum); nvme_delete_ctrl(&ctrl->ctrl); } else @@ -2076,20 +2076,10 @@ nvme_fc_timeout(struct request *rq, bool reserved) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_ctrl *ctrl = op->ctrl; - int ret; - - if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE || - atomic_read(&op->state) == FCPOP_STATE_ABORTED) - return BLK_EH_RESET_TIMER; - - ret = __nvme_fc_abort_op(ctrl, op); - if (ret) - /* io wasn't active to abort */ - return BLK_EH_NOT_HANDLED; /* * we can't individually ABTS an io without affecting the queue, - * thus killing the queue, adn thus the association. + * thus killing the queue, and thus the association. * So resolve by performing a controller reset, which will stop * the host/io stack, terminate the association on the link, * and recreate an association on the link. @@ -2191,7 +2181,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; struct nvme_command *sqe = &cmdiu->sqe; u32 csn; - int ret; + int ret, opstate; /* * before attempting to send the io, check to see if we believe @@ -2269,6 +2259,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, queue->lldd_handle, &op->fcp_req); if (ret) { + opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); + __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); + if (!(op->flags & FCOP_FLAGS_AEN)) nvme_fc_unmap_data(ctrl, op->rq, op); @@ -2889,14 +2882,13 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) if (portptr->port_state == FC_OBJSTATE_ONLINE) dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: Max reconnect attempts (%d) " - "reached. Removing controller\n", + "reached.\n", ctrl->cnum, ctrl->ctrl.nr_reconnects); else dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: dev_loss_tmo (%d) expired " - "while waiting for remoteport connectivity. " - "Removing controller\n", ctrl->cnum, - portptr->dev_loss_tmo); + "while waiting for remoteport connectivity.\n", + ctrl->cnum, portptr->dev_loss_tmo); WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); } } @@ -3133,6 +3125,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, } if (ret) { + nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); + cancel_work_sync(&ctrl->ctrl.reset_work); + cancel_delayed_work_sync(&ctrl->connect_work); + /* couldn't schedule retry - fail out */ dev_err(ctrl->ctrl.device, "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 50ef71ee3d86..41279da799ed 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -35,6 +35,10 @@ enum nvme_nvm_admin_opcode { nvme_nvm_admin_set_bb_tbl = 0xf1, }; +enum nvme_nvm_log_page { + NVME_NVM_LOG_REPORT_CHUNK = 0xca, +}; + struct nvme_nvm_ph_rw { __u8 opcode; __u8 flags; @@ -51,6 +55,21 @@ struct nvme_nvm_ph_rw { __le64 resv; }; +struct nvme_nvm_erase_blk { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd[2]; + __le64 prp1; + __le64 prp2; + __le64 spba; + __le16 length; + __le16 control; + __le32 dsmgmt; + __le64 resv; +}; + struct nvme_nvm_identity { __u8 opcode; __u8 flags; @@ -59,8 +78,7 @@ struct nvme_nvm_identity { __u64 rsvd[2]; __le64 prp1; __le64 prp2; - __le32 chnl_off; - __u32 rsvd11[5]; + __u32 rsvd11[6]; }; struct nvme_nvm_getbbtbl { @@ -90,44 +108,18 @@ struct nvme_nvm_setbbtbl { __u32 rsvd4[3]; }; -struct nvme_nvm_erase_blk { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd[2]; - __le64 prp1; - __le64 prp2; - __le64 spba; - __le16 length; - __le16 control; - __le32 dsmgmt; - __le64 resv; -}; - struct nvme_nvm_command { union { struct nvme_common_command common; - struct nvme_nvm_identity identity; struct nvme_nvm_ph_rw ph_rw; + struct nvme_nvm_erase_blk erase; + struct nvme_nvm_identity identity; struct nvme_nvm_getbbtbl get_bb; struct nvme_nvm_setbbtbl set_bb; - struct nvme_nvm_erase_blk erase; }; }; -#define NVME_NVM_LP_MLC_PAIRS 886 -struct nvme_nvm_lp_mlc { - __le16 num_pairs; - __u8 pairs[NVME_NVM_LP_MLC_PAIRS]; -}; - -struct nvme_nvm_lp_tbl { - __u8 id[8]; - struct nvme_nvm_lp_mlc mlc; -}; - -struct nvme_nvm_id_group { +struct nvme_nvm_id12_grp { __u8 mtype; __u8 fmtype; __le16 res16; @@ -150,11 +142,10 @@ struct nvme_nvm_id_group { __le32 mpos; __le32 mccap; __le16 cpar; - __u8 reserved[10]; - struct nvme_nvm_lp_tbl lptbl; + __u8 reserved[906]; } __packed; -struct nvme_nvm_addr_format { +struct nvme_nvm_id12_addrf { __u8 ch_offset; __u8 ch_len; __u8 lun_offset; @@ -165,21 +156,22 @@ struct nvme_nvm_addr_format { __u8 blk_len; __u8 pg_offset; __u8 pg_len; - __u8 sect_offset; - __u8 sect_len; + __u8 sec_offset; + __u8 sec_len; __u8 res[4]; } __packed; -struct nvme_nvm_id { +struct nvme_nvm_id12 { __u8 ver_id; __u8 vmnt; __u8 cgrps; __u8 res; __le32 cap; __le32 dom; - struct nvme_nvm_addr_format ppaf; + struct nvme_nvm_id12_addrf ppaf; __u8 resv[228]; - struct nvme_nvm_id_group groups[4]; + struct nvme_nvm_id12_grp grp; + __u8 resv2[2880]; } __packed; struct nvme_nvm_bb_tbl { @@ -196,6 +188,68 @@ struct nvme_nvm_bb_tbl { __u8 blk[0]; }; +struct nvme_nvm_id20_addrf { + __u8 grp_len; + __u8 pu_len; + __u8 chk_len; + __u8 lba_len; + __u8 resv[4]; +}; + +struct nvme_nvm_id20 { + __u8 mjr; + __u8 mnr; + __u8 resv[6]; + + struct nvme_nvm_id20_addrf lbaf; + + __le32 mccap; + __u8 resv2[12]; + + __u8 wit; + __u8 resv3[31]; + + /* Geometry */ + __le16 num_grp; + __le16 num_pu; + __le32 num_chk; + __le32 clba; + __u8 resv4[52]; + + /* Write data requirements */ + __le32 ws_min; + __le32 ws_opt; + __le32 mw_cunits; + __le32 maxoc; + __le32 maxocpu; + __u8 resv5[44]; + + /* Performance related metrics */ + __le32 trdt; + __le32 trdm; + __le32 twrt; + __le32 twrm; + __le32 tcrst; + __le32 tcrsm; + __u8 resv6[40]; + + /* Reserved area */ + __u8 resv7[2816]; + + /* Vendor specific */ + __u8 vs[1024]; +}; + +struct nvme_nvm_chk_meta { + __u8 state; + __u8 type; + __u8 wi; + __u8 rsvd[5]; + __le64 slba; + __le64 cnlb; + __le64 wp; +}; + /* * Check we didn't inadvertently grow the command struct */ @@ -203,105 +257,238 @@ static inline void _nvme_nvm_check_size(void) { BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64); + BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960); - BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_grp) != 960); + BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_addrf) != 16); + BUILD_BUG_ON(sizeof(struct nvme_nvm_id12) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64); + BUILD_BUG_ON(sizeof(struct nvme_nvm_id20_addrf) != 8); + BUILD_BUG_ON(sizeof(struct nvme_nvm_id20) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != 32); + BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != + sizeof(struct nvm_chk_meta)); +} + +static void nvme_nvm_set_addr_12(struct nvm_addrf_12 *dst, + struct nvme_nvm_id12_addrf *src) +{ + dst->ch_len = src->ch_len; + dst->lun_len = src->lun_len; + dst->blk_len = src->blk_len; + dst->pg_len = src->pg_len; + dst->pln_len = src->pln_len; + dst->sec_len = src->sec_len; + + dst->ch_offset = src->ch_offset; + dst->lun_offset = src->lun_offset; + dst->blk_offset = src->blk_offset; + dst->pg_offset = src->pg_offset; + dst->pln_offset = src->pln_offset; + dst->sec_offset = src->sec_offset; + + dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset; + dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset; + dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset; + dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset; + dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset; + dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset; } -static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) +static int nvme_nvm_setup_12(struct nvme_nvm_id12 *id, + struct nvm_geo *geo) { - struct nvme_nvm_id_group *src; - struct nvm_id_group *grp; + struct nvme_nvm_id12_grp *src; int sec_per_pg, sec_per_pl, pg_per_blk; - if (nvme_nvm_id->cgrps != 1) + if (id->cgrps != 1) + return -EINVAL; + + src = &id->grp; + + if (src->mtype != 0) { + pr_err("nvm: memory type not supported\n"); return -EINVAL; + } + + /* 1.2 spec. only reports a single version id - unfold */ + geo->major_ver_id = id->ver_id; + geo->minor_ver_id = 2; - src = &nvme_nvm_id->groups[0]; - grp = &nvm_id->grp; + /* Set compacted version for upper layers */ + geo->version = NVM_OCSSD_SPEC_12; - grp->mtype = src->mtype; - grp->fmtype = src->fmtype; + geo->num_ch = src->num_ch; + geo->num_lun = src->num_lun; + geo->all_luns = geo->num_ch * geo->num_lun; - grp->num_ch = src->num_ch; - grp->num_lun = src->num_lun; + geo->num_chk = le16_to_cpu(src->num_chk); - grp->num_chk = le16_to_cpu(src->num_chk); - grp->csecs = le16_to_cpu(src->csecs); - grp->sos = le16_to_cpu(src->sos); + geo->csecs = le16_to_cpu(src->csecs); + geo->sos = le16_to_cpu(src->sos); pg_per_blk = le16_to_cpu(src->num_pg); - sec_per_pg = le16_to_cpu(src->fpg_sz) / grp->csecs; + sec_per_pg = le16_to_cpu(src->fpg_sz) / geo->csecs; sec_per_pl = sec_per_pg * src->num_pln; - grp->clba = sec_per_pl * pg_per_blk; - grp->ws_per_chk = pg_per_blk; - - grp->mpos = le32_to_cpu(src->mpos); - grp->cpar = le16_to_cpu(src->cpar); - grp->mccap = le32_to_cpu(src->mccap); - - grp->ws_opt = grp->ws_min = sec_per_pg; - grp->ws_seq = NVM_IO_SNGL_ACCESS; - - if (grp->mpos & 0x020202) { - grp->ws_seq = NVM_IO_DUAL_ACCESS; - grp->ws_opt <<= 1; - } else if (grp->mpos & 0x040404) { - grp->ws_seq = NVM_IO_QUAD_ACCESS; - grp->ws_opt <<= 2; - } + geo->clba = sec_per_pl * pg_per_blk; + + geo->all_chunks = geo->all_luns * geo->num_chk; + geo->total_secs = geo->clba * geo->all_chunks; + + geo->ws_min = sec_per_pg; + geo->ws_opt = sec_per_pg; + geo->mw_cunits = geo->ws_opt << 3; /* default to MLC safe values */ - grp->trdt = le32_to_cpu(src->trdt); - grp->trdm = le32_to_cpu(src->trdm); - grp->tprt = le32_to_cpu(src->tprt); - grp->tprm = le32_to_cpu(src->tprm); - grp->tbet = le32_to_cpu(src->tbet); - grp->tbem = le32_to_cpu(src->tbem); + /* Do not impose values for maximum number of open blocks as it is + * unspecified in 1.2. Users of 1.2 must be aware of this and eventually + * specify these values through a quirk if restrictions apply. + */ + geo->maxoc = geo->all_luns * geo->num_chk; + geo->maxocpu = geo->num_chk; + + geo->mccap = le32_to_cpu(src->mccap); + + geo->trdt = le32_to_cpu(src->trdt); + geo->trdm = le32_to_cpu(src->trdm); + geo->tprt = le32_to_cpu(src->tprt); + geo->tprm = le32_to_cpu(src->tprm); + geo->tbet = le32_to_cpu(src->tbet); + geo->tbem = le32_to_cpu(src->tbem); /* 1.2 compatibility */ - grp->num_pln = src->num_pln; - grp->num_pg = le16_to_cpu(src->num_pg); - grp->fpg_sz = le16_to_cpu(src->fpg_sz); + geo->vmnt = id->vmnt; + geo->cap = le32_to_cpu(id->cap); + geo->dom = le32_to_cpu(id->dom); + + geo->mtype = src->mtype; + geo->fmtype = src->fmtype; + + geo->cpar = le16_to_cpu(src->cpar); + geo->mpos = le32_to_cpu(src->mpos); + + geo->pln_mode = NVM_PLANE_SINGLE; + + if (geo->mpos & 0x020202) { + geo->pln_mode = NVM_PLANE_DOUBLE; + geo->ws_opt <<= 1; + } else if (geo->mpos & 0x040404) { + geo->pln_mode = NVM_PLANE_QUAD; + geo->ws_opt <<= 2; + } + + geo->num_pln = src->num_pln; + geo->num_pg = le16_to_cpu(src->num_pg); + geo->fpg_sz = le16_to_cpu(src->fpg_sz); + + nvme_nvm_set_addr_12((struct nvm_addrf_12 *)&geo->addrf, &id->ppaf); return 0; } -static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id) +static void nvme_nvm_set_addr_20(struct nvm_addrf *dst, + struct nvme_nvm_id20_addrf *src) +{ + dst->ch_len = src->grp_len; + dst->lun_len = src->pu_len; + dst->chk_len = src->chk_len; + dst->sec_len = src->lba_len; + + dst->sec_offset = 0; + dst->chk_offset = dst->sec_len; + dst->lun_offset = dst->chk_offset + dst->chk_len; + dst->ch_offset = dst->lun_offset + dst->lun_len; + + dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset; + dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset; + dst->chk_mask = ((1ULL << dst->chk_len) - 1) << dst->chk_offset; + dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset; +} + +static int nvme_nvm_setup_20(struct nvme_nvm_id20 *id, + struct nvm_geo *geo) +{ + geo->major_ver_id = id->mjr; + geo->minor_ver_id = id->mnr; + + /* Set compacted version for upper layers */ + geo->version = NVM_OCSSD_SPEC_20; + + if (!(geo->major_ver_id == 2 && geo->minor_ver_id == 0)) { + pr_err("nvm: OCSSD version not supported (v%d.%d)\n", + geo->major_ver_id, geo->minor_ver_id); + return -EINVAL; + } + + geo->num_ch = le16_to_cpu(id->num_grp); + geo->num_lun = le16_to_cpu(id->num_pu); + geo->all_luns = geo->num_ch * geo->num_lun; + + geo->num_chk = le32_to_cpu(id->num_chk); + geo->clba = le32_to_cpu(id->clba); + + geo->all_chunks = geo->all_luns * geo->num_chk; + geo->total_secs = geo->clba * geo->all_chunks; + + geo->ws_min = le32_to_cpu(id->ws_min); + geo->ws_opt = le32_to_cpu(id->ws_opt); + geo->mw_cunits = le32_to_cpu(id->mw_cunits); + geo->maxoc = le32_to_cpu(id->maxoc); + geo->maxocpu = le32_to_cpu(id->maxocpu); + + geo->trdt = le32_to_cpu(id->trdt); + geo->trdm = le32_to_cpu(id->trdm); + geo->tprt = le32_to_cpu(id->twrt); + geo->tprm = le32_to_cpu(id->twrm); + geo->tbet = le32_to_cpu(id->tcrst); + geo->tbem = le32_to_cpu(id->tcrsm); + + nvme_nvm_set_addr_20(&geo->addrf, &id->lbaf); + + return 0; +} + +static int nvme_nvm_identity(struct nvm_dev *nvmdev) { struct nvme_ns *ns = nvmdev->q->queuedata; - struct nvme_nvm_id *nvme_nvm_id; + struct nvme_nvm_id12 *id; struct nvme_nvm_command c = {}; int ret; c.identity.opcode = nvme_nvm_admin_identity; c.identity.nsid = cpu_to_le32(ns->head->ns_id); - c.identity.chnl_off = 0; - nvme_nvm_id = kmalloc(sizeof(struct nvme_nvm_id), GFP_KERNEL); - if (!nvme_nvm_id) + id = kmalloc(sizeof(struct nvme_nvm_id12), GFP_KERNEL); + if (!id) return -ENOMEM; ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c, - nvme_nvm_id, sizeof(struct nvme_nvm_id)); + id, sizeof(struct nvme_nvm_id12)); if (ret) { ret = -EIO; goto out; } - nvm_id->ver_id = nvme_nvm_id->ver_id; - nvm_id->vmnt = nvme_nvm_id->vmnt; - nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap); - nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom); - memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf, - sizeof(struct nvm_addr_format)); + /* + * The 1.2 and 2.0 specifications share the first byte in their geometry + * command to make it possible to know what version a device implements. + */ + switch (id->ver_id) { + case 1: + ret = nvme_nvm_setup_12(id, &nvmdev->geo); + break; + case 2: + ret = nvme_nvm_setup_20((struct nvme_nvm_id20 *)id, + &nvmdev->geo); + break; + default: + dev_err(ns->ctrl->device, "OCSSD revision not supported (%d)\n", + id->ver_id); + ret = -EINVAL; + } - ret = init_grps(nvm_id, nvme_nvm_id); out: - kfree(nvme_nvm_id); + kfree(id); return ret; } @@ -314,7 +501,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_nvm_command c = {}; struct nvme_nvm_bb_tbl *bb_tbl; - int nr_blks = geo->nr_chks * geo->plane_mode; + int nr_blks = geo->num_chk * geo->num_pln; int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks; int ret = 0; @@ -355,7 +542,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, goto out; } - memcpy(blks, bb_tbl->blk, geo->nr_chks * geo->plane_mode); + memcpy(blks, bb_tbl->blk, geo->num_chk * geo->num_pln); out: kfree(bb_tbl); return ret; @@ -382,6 +569,61 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas, return ret; } +/* + * Expect the lba in device format + */ +static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, + struct nvm_chk_meta *meta, + sector_t slba, int nchks) +{ + struct nvm_geo *geo = &ndev->geo; + struct nvme_ns *ns = ndev->q->queuedata; + struct nvme_ctrl *ctrl = ns->ctrl; + struct nvme_nvm_chk_meta *dev_meta = (struct nvme_nvm_chk_meta *)meta; + struct ppa_addr ppa; + size_t left = nchks * sizeof(struct nvme_nvm_chk_meta); + size_t log_pos, offset, len; + int ret, i; + + /* Normalize lba address space to obtain log offset */ + ppa.ppa = slba; + ppa = dev_to_generic_addr(ndev, ppa); + + log_pos = ppa.m.chk; + log_pos += ppa.m.pu * geo->num_chk; + log_pos += ppa.m.grp * geo->num_lun * geo->num_chk; + + offset = log_pos * sizeof(struct nvme_nvm_chk_meta); + + while (left) { + len = min_t(unsigned int, left, ctrl->max_hw_sectors << 9); + + ret = nvme_get_log_ext(ctrl, ns, NVME_NVM_LOG_REPORT_CHUNK, + dev_meta, len, offset); + if (ret) { + dev_err(ctrl->device, "Get REPORT CHUNK log error\n"); + break; + } + + for (i = 0; i < len; i += sizeof(struct nvme_nvm_chk_meta)) { + meta->state = dev_meta->state; + meta->type = dev_meta->type; + meta->wi = dev_meta->wi; + meta->slba = le64_to_cpu(dev_meta->slba); + meta->cnlb = le64_to_cpu(dev_meta->cnlb); + meta->wp = le64_to_cpu(dev_meta->wp); + + meta++; + dev_meta++; + } + + offset += len; + left -= len; + } + + return ret; +} + static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns, struct nvme_nvm_command *c) { @@ -513,6 +755,8 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .get_bb_tbl = nvme_nvm_get_bb_tbl, .set_bb_tbl = nvme_nvm_set_bb_tbl, + .get_chk_meta = nvme_nvm_get_chk_meta, + .submit_io = nvme_nvm_submit_io, .submit_io_sync = nvme_nvm_submit_io_sync, @@ -520,8 +764,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .destroy_dma_pool = nvme_nvm_destroy_dma_pool, .dev_dma_alloc = nvme_nvm_dev_dma_alloc, .dev_dma_free = nvme_nvm_dev_dma_free, - - .max_phys_sect = 64, }; static int nvme_nvm_submit_user_cmd(struct request_queue *q, @@ -722,6 +964,15 @@ int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg) } } +void nvme_nvm_update_nvm_info(struct nvme_ns *ns) +{ + struct nvm_dev *ndev = ns->ndev; + struct nvm_geo *geo = &ndev->geo; + + geo->csecs = 1 << ns->lba_shift; + geo->sos = ns->ms; +} + int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) { struct request_queue *q = ns->queue; @@ -748,125 +999,205 @@ void nvme_nvm_unregister(struct nvme_ns *ns) } static ssize_t nvm_dev_attr_show(struct device *dev, - struct device_attribute *dattr, char *page) + struct device_attribute *dattr, char *page) { struct nvme_ns *ns = nvme_get_ns_from_dev(dev); struct nvm_dev *ndev = ns->ndev; - struct nvm_id *id; - struct nvm_id_group *grp; + struct nvm_geo *geo = &ndev->geo; struct attribute *attr; if (!ndev) return 0; - id = &ndev->identity; - grp = &id->grp; attr = &dattr->attr; if (strcmp(attr->name, "version") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", id->ver_id); - } else if (strcmp(attr->name, "vendor_opcode") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", id->vmnt); + if (geo->major_ver_id == 1) + return scnprintf(page, PAGE_SIZE, "%u\n", + geo->major_ver_id); + else + return scnprintf(page, PAGE_SIZE, "%u.%u\n", + geo->major_ver_id, + geo->minor_ver_id); } else if (strcmp(attr->name, "capabilities") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", id->cap); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->cap); + } else if (strcmp(attr->name, "read_typ") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdt); + } else if (strcmp(attr->name, "read_max") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdm); + } else { + return scnprintf(page, + PAGE_SIZE, + "Unhandled attr(%s) in `%s`\n", + attr->name, __func__); + } +} + +static ssize_t nvm_dev_attr_show_ppaf(struct nvm_addrf_12 *ppaf, char *page) +{ + return scnprintf(page, PAGE_SIZE, + "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", + ppaf->ch_offset, ppaf->ch_len, + ppaf->lun_offset, ppaf->lun_len, + ppaf->pln_offset, ppaf->pln_len, + ppaf->blk_offset, ppaf->blk_len, + ppaf->pg_offset, ppaf->pg_len, + ppaf->sec_offset, ppaf->sec_len); +} + +static ssize_t nvm_dev_attr_show_12(struct device *dev, + struct device_attribute *dattr, char *page) +{ + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + struct nvm_dev *ndev = ns->ndev; + struct nvm_geo *geo = &ndev->geo; + struct attribute *attr; + + if (!ndev) + return 0; + + attr = &dattr->attr; + + if (strcmp(attr->name, "vendor_opcode") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->vmnt); } else if (strcmp(attr->name, "device_mode") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", id->dom); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->dom); /* kept for compatibility */ } else if (strcmp(attr->name, "media_manager") == 0) { return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm"); } else if (strcmp(attr->name, "ppa_format") == 0) { - return scnprintf(page, PAGE_SIZE, - "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", - id->ppaf.ch_offset, id->ppaf.ch_len, - id->ppaf.lun_offset, id->ppaf.lun_len, - id->ppaf.pln_offset, id->ppaf.pln_len, - id->ppaf.blk_offset, id->ppaf.blk_len, - id->ppaf.pg_offset, id->ppaf.pg_len, - id->ppaf.sect_offset, id->ppaf.sect_len); + return nvm_dev_attr_show_ppaf((void *)&geo->addrf, page); } else if (strcmp(attr->name, "media_type") == 0) { /* u8 */ - return scnprintf(page, PAGE_SIZE, "%u\n", grp->mtype); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->mtype); } else if (strcmp(attr->name, "flash_media_type") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->fmtype); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->fmtype); } else if (strcmp(attr->name, "num_channels") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_ch); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch); } else if (strcmp(attr->name, "num_luns") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_lun); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun); } else if (strcmp(attr->name, "num_planes") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pln); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pln); } else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */ - return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_chk); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk); } else if (strcmp(attr->name, "num_pages") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pg); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pg); } else if (strcmp(attr->name, "page_size") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->fpg_sz); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->fpg_sz); } else if (strcmp(attr->name, "hw_sector_size") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->csecs); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->csecs); } else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */ - return scnprintf(page, PAGE_SIZE, "%u\n", grp->sos); - } else if (strcmp(attr->name, "read_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdt); - } else if (strcmp(attr->name, "read_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdm); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->sos); } else if (strcmp(attr->name, "prog_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprt); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt); } else if (strcmp(attr->name, "prog_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprm); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm); } else if (strcmp(attr->name, "erase_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbet); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet); } else if (strcmp(attr->name, "erase_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbem); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem); } else if (strcmp(attr->name, "multiplane_modes") == 0) { - return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mpos); + return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mpos); } else if (strcmp(attr->name, "media_capabilities") == 0) { - return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mccap); + return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mccap); } else if (strcmp(attr->name, "max_phys_secs") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", - ndev->ops->max_phys_sect); + return scnprintf(page, PAGE_SIZE, "%u\n", NVM_MAX_VLBA); } else { - return scnprintf(page, - PAGE_SIZE, - "Unhandled attr(%s) in `nvm_dev_attr_show`\n", - attr->name); + return scnprintf(page, PAGE_SIZE, + "Unhandled attr(%s) in `%s`\n", + attr->name, __func__); + } +} + +static ssize_t nvm_dev_attr_show_20(struct device *dev, + struct device_attribute *dattr, char *page) +{ + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + struct nvm_dev *ndev = ns->ndev; + struct nvm_geo *geo = &ndev->geo; + struct attribute *attr; + + if (!ndev) + return 0; + + attr = &dattr->attr; + + if (strcmp(attr->name, "groups") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch); + } else if (strcmp(attr->name, "punits") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun); + } else if (strcmp(attr->name, "chunks") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk); + } else if (strcmp(attr->name, "clba") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->clba); + } else if (strcmp(attr->name, "ws_min") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_min); + } else if (strcmp(attr->name, "ws_opt") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_opt); + } else if (strcmp(attr->name, "maxoc") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxoc); + } else if (strcmp(attr->name, "maxocpu") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxocpu); + } else if (strcmp(attr->name, "mw_cunits") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->mw_cunits); + } else if (strcmp(attr->name, "write_typ") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt); + } else if (strcmp(attr->name, "write_max") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm); + } else if (strcmp(attr->name, "reset_typ") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet); + } else if (strcmp(attr->name, "reset_max") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem); + } else { + return scnprintf(page, PAGE_SIZE, + "Unhandled attr(%s) in `%s`\n", + attr->name, __func__); } } -#define NVM_DEV_ATTR_RO(_name) \ +#define NVM_DEV_ATTR_RO(_name) \ DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL) +#define NVM_DEV_ATTR_12_RO(_name) \ + DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_12, NULL) +#define NVM_DEV_ATTR_20_RO(_name) \ + DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_20, NULL) +/* general attributes */ static NVM_DEV_ATTR_RO(version); -static NVM_DEV_ATTR_RO(vendor_opcode); static NVM_DEV_ATTR_RO(capabilities); -static NVM_DEV_ATTR_RO(device_mode); -static NVM_DEV_ATTR_RO(ppa_format); -static NVM_DEV_ATTR_RO(media_manager); - -static NVM_DEV_ATTR_RO(media_type); -static NVM_DEV_ATTR_RO(flash_media_type); -static NVM_DEV_ATTR_RO(num_channels); -static NVM_DEV_ATTR_RO(num_luns); -static NVM_DEV_ATTR_RO(num_planes); -static NVM_DEV_ATTR_RO(num_blocks); -static NVM_DEV_ATTR_RO(num_pages); -static NVM_DEV_ATTR_RO(page_size); -static NVM_DEV_ATTR_RO(hw_sector_size); -static NVM_DEV_ATTR_RO(oob_sector_size); + static NVM_DEV_ATTR_RO(read_typ); static NVM_DEV_ATTR_RO(read_max); -static NVM_DEV_ATTR_RO(prog_typ); -static NVM_DEV_ATTR_RO(prog_max); -static NVM_DEV_ATTR_RO(erase_typ); -static NVM_DEV_ATTR_RO(erase_max); -static NVM_DEV_ATTR_RO(multiplane_modes); -static NVM_DEV_ATTR_RO(media_capabilities); -static NVM_DEV_ATTR_RO(max_phys_secs); - -static struct attribute *nvm_dev_attrs[] = { + +/* 1.2 values */ +static NVM_DEV_ATTR_12_RO(vendor_opcode); +static NVM_DEV_ATTR_12_RO(device_mode); +static NVM_DEV_ATTR_12_RO(ppa_format); +static NVM_DEV_ATTR_12_RO(media_manager); +static NVM_DEV_ATTR_12_RO(media_type); +static NVM_DEV_ATTR_12_RO(flash_media_type); +static NVM_DEV_ATTR_12_RO(num_channels); +static NVM_DEV_ATTR_12_RO(num_luns); +static NVM_DEV_ATTR_12_RO(num_planes); +static NVM_DEV_ATTR_12_RO(num_blocks); +static NVM_DEV_ATTR_12_RO(num_pages); +static NVM_DEV_ATTR_12_RO(page_size); +static NVM_DEV_ATTR_12_RO(hw_sector_size); +static NVM_DEV_ATTR_12_RO(oob_sector_size); +static NVM_DEV_ATTR_12_RO(prog_typ); +static NVM_DEV_ATTR_12_RO(prog_max); +static NVM_DEV_ATTR_12_RO(erase_typ); +static NVM_DEV_ATTR_12_RO(erase_max); +static NVM_DEV_ATTR_12_RO(multiplane_modes); +static NVM_DEV_ATTR_12_RO(media_capabilities); +static NVM_DEV_ATTR_12_RO(max_phys_secs); + +static struct attribute *nvm_dev_attrs_12[] = { &dev_attr_version.attr, - &dev_attr_vendor_opcode.attr, &dev_attr_capabilities.attr, + + &dev_attr_vendor_opcode.attr, &dev_attr_device_mode.attr, &dev_attr_media_manager.attr, - &dev_attr_ppa_format.attr, &dev_attr_media_type.attr, &dev_attr_flash_media_type.attr, @@ -887,22 +1218,92 @@ static struct attribute *nvm_dev_attrs[] = { &dev_attr_multiplane_modes.attr, &dev_attr_media_capabilities.attr, &dev_attr_max_phys_secs.attr, + NULL, }; -static const struct attribute_group nvm_dev_attr_group = { +static const struct attribute_group nvm_dev_attr_group_12 = { .name = "lightnvm", - .attrs = nvm_dev_attrs, + .attrs = nvm_dev_attrs_12, +}; + +/* 2.0 values */ +static NVM_DEV_ATTR_20_RO(groups); +static NVM_DEV_ATTR_20_RO(punits); +static NVM_DEV_ATTR_20_RO(chunks); +static NVM_DEV_ATTR_20_RO(clba); +static NVM_DEV_ATTR_20_RO(ws_min); +static NVM_DEV_ATTR_20_RO(ws_opt); +static NVM_DEV_ATTR_20_RO(maxoc); +static NVM_DEV_ATTR_20_RO(maxocpu); +static NVM_DEV_ATTR_20_RO(mw_cunits); +static NVM_DEV_ATTR_20_RO(write_typ); +static NVM_DEV_ATTR_20_RO(write_max); +static NVM_DEV_ATTR_20_RO(reset_typ); +static NVM_DEV_ATTR_20_RO(reset_max); + +static struct attribute *nvm_dev_attrs_20[] = { + &dev_attr_version.attr, + &dev_attr_capabilities.attr, + + &dev_attr_groups.attr, + &dev_attr_punits.attr, + &dev_attr_chunks.attr, + &dev_attr_clba.attr, + &dev_attr_ws_min.attr, + &dev_attr_ws_opt.attr, + &dev_attr_maxoc.attr, + &dev_attr_maxocpu.attr, + &dev_attr_mw_cunits.attr, + + &dev_attr_read_typ.attr, + &dev_attr_read_max.attr, + &dev_attr_write_typ.attr, + &dev_attr_write_max.attr, + &dev_attr_reset_typ.attr, + &dev_attr_reset_max.attr, + + NULL, +}; + +static const struct attribute_group nvm_dev_attr_group_20 = { + .name = "lightnvm", + .attrs = nvm_dev_attrs_20, }; int nvme_nvm_register_sysfs(struct nvme_ns *ns) { - return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group); + struct nvm_dev *ndev = ns->ndev; + struct nvm_geo *geo = &ndev->geo; + + if (!ndev) + return -EINVAL; + + switch (geo->major_ver_id) { + case 1: + return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, + &nvm_dev_attr_group_12); + case 2: + return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, + &nvm_dev_attr_group_20); + } + + return -EINVAL; } void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) { - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group); + struct nvm_dev *ndev = ns->ndev; + struct nvm_geo *geo = &ndev->geo; + + switch (geo->major_ver_id) { + case 1: + sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, + &nvm_dev_attr_group_12); + break; + case 2: + sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, + &nvm_dev_attr_group_20); + break; + } } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 060f69e03427..956e0b8e9c4d 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -44,12 +44,12 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { if (ns->head->disk) kblockd_schedule_work(&ns->head->requeue_work); } - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head) @@ -162,13 +162,13 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) return 0; - q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); + q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL); if (!q) goto out; q->queuedata = head; blk_queue_make_request(q, nvme_ns_head_make_request); q->poll_fn = nvme_ns_head_poll; - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* set to a default value for 512 until disk is validated */ blk_queue_logical_block_size(q, 512); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index d733b14ede9d..cf93690b3ffc 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -21,6 +21,7 @@ #include <linux/blk-mq.h> #include <linux/lightnvm.h> #include <linux/sed-opal.h> +#include <linux/fault-inject.h> extern unsigned int nvme_io_timeout; #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) @@ -140,7 +141,7 @@ struct nvme_ctrl { struct blk_mq_tag_set *tagset; struct blk_mq_tag_set *admin_tagset; struct list_head namespaces; - struct mutex namespaces_mutex; + struct rw_semaphore namespaces_rwsem; struct device ctrl_device; struct device *device; /* char device */ struct cdev cdev; @@ -261,6 +262,15 @@ struct nvme_ns_head { int instance; }; +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS +struct nvme_fault_inject { + struct fault_attr attr; + struct dentry *parent; + bool dont_retry; /* DNR, do not retry */ + u16 status; /* status code */ +}; +#endif + struct nvme_ns { struct list_head list; @@ -282,6 +292,11 @@ struct nvme_ns { #define NVME_NS_REMOVING 0 #define NVME_NS_DEAD 1 u16 noiob; + +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS + struct nvme_fault_inject fault_inject; +#endif + }; struct nvme_ctrl_ops { @@ -298,8 +313,19 @@ struct nvme_ctrl_ops { void (*delete_ctrl)(struct nvme_ctrl *ctrl); int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); int (*reinit_request)(void *data, struct request *rq); + void (*stop_ctrl)(struct nvme_ctrl *ctrl); }; +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS +void nvme_fault_inject_init(struct nvme_ns *ns); +void nvme_fault_inject_fini(struct nvme_ns *ns); +void nvme_should_fail(struct request *req); +#else +static inline void nvme_fault_inject_init(struct nvme_ns *ns) {} +static inline void nvme_fault_inject_fini(struct nvme_ns *ns) {} +static inline void nvme_should_fail(struct request *req) {} +#endif + static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl) { u32 val = 0; @@ -336,6 +362,8 @@ static inline void nvme_end_request(struct request *req, __le16 status, rq->status = le16_to_cpu(status) >> 1; rq->result = result; + /* inject error when permitted by fault injection framework */ + nvme_should_fail(req); blk_mq_complete_request(req); } @@ -401,6 +429,9 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); +int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + u8 log_page, void *log, size_t size, size_t offset); + extern const struct attribute_group nvme_ns_id_attr_group; extern const struct block_device_operations nvme_ns_head_ops; @@ -461,12 +492,14 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) #endif /* CONFIG_NVME_MULTIPATH */ #ifdef CONFIG_NVM +void nvme_nvm_update_nvm_info(struct nvme_ns *ns); int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); void nvme_nvm_unregister(struct nvme_ns *ns); int nvme_nvm_register_sysfs(struct nvme_ns *ns); void nvme_nvm_unregister_sysfs(struct nvme_ns *ns); int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg); #else +static inline void nvme_nvm_update_nvm_info(struct nvme_ns *ns) {}; static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) { diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b6f43b738f03..295fbec1e5f2 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -414,7 +414,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) { struct nvme_dev *dev = set->driver_data; - return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev)); + return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev), 0); } /** @@ -2197,7 +2197,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) if (!dead) { if (shutdown) nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); + } + + nvme_stop_queues(&dev->ctrl); + if (!dead) { /* * If the controller is still alive tell it to stop using the * host memory buffer. In theory the shutdown / reset should @@ -2206,11 +2210,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) */ if (dev->host_mem_descs) nvme_set_host_mem(dev, 0); - - } - nvme_stop_queues(&dev->ctrl); - - if (!dead) { nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); } @@ -2416,6 +2415,13 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) return 0; } +static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size) +{ + struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev); + + return snprintf(buf, size, "%s", dev_name(&pdev->dev)); +} + static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .name = "pcie", .module = THIS_MODULE, @@ -2425,6 +2431,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .reg_read64 = nvme_pci_reg_read64, .free_ctrl = nvme_pci_free_ctrl, .submit_async_event = nvme_pci_submit_async_event, + .get_address = nvme_pci_get_address, }; static int nvme_dev_map(struct nvme_dev *dev) @@ -2461,10 +2468,13 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) } else if (pdev->vendor == 0x144d && pdev->device == 0xa804) { /* * Samsung SSD 960 EVO drops off the PCIe bus after system - * suspend on a Ryzen board, ASUS PRIME B350M-A. + * suspend on a Ryzen board, ASUS PRIME B350M-A, as well as + * within few minutes after bootup on a Coffee Lake board - + * ASUS PRIME Z370-A */ if (dmi_match(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC.") && - dmi_match(DMI_BOARD_NAME, "PRIME B350M-A")) + (dmi_match(DMI_BOARD_NAME, "PRIME B350M-A") || + dmi_match(DMI_BOARD_NAME, "PRIME Z370-A"))) return NVME_QUIRK_NO_APST; } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 4d84a73ee12d..758537e9ba07 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -867,6 +867,14 @@ out_free_io_queues: return ret; } +static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl) +{ + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); + + cancel_work_sync(&ctrl->err_work); + cancel_delayed_work_sync(&ctrl->reconnect_work); +} + static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); @@ -899,7 +907,6 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) queue_delayed_work(nvme_wq, &ctrl->reconnect_work, ctrl->ctrl.opts->reconnect_delay * HZ); } else { - dev_info(ctrl->ctrl.device, "Removing controller...\n"); nvme_delete_ctrl(&ctrl->ctrl); } } @@ -974,8 +981,8 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) nvme_start_queues(&ctrl->ctrl); if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { - /* state change failure should never happen */ - WARN_ON_ONCE(1); + /* state change failure is ok if we're in DELETING state */ + WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING); return; } @@ -1719,9 +1726,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { - cancel_work_sync(&ctrl->err_work); - cancel_delayed_work_sync(&ctrl->reconnect_work); - if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, @@ -1799,6 +1803,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .submit_async_event = nvme_rdma_submit_async_event, .delete_ctrl = nvme_rdma_delete_ctrl, .get_address = nvmf_get_address, + .stop_ctrl = nvme_rdma_stop_ctrl, }; static inline bool @@ -2025,15 +2030,26 @@ static struct nvmf_transport_ops nvme_rdma_transport = { static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) { struct nvme_rdma_ctrl *ctrl; + struct nvme_rdma_device *ndev; + bool found = false; + + mutex_lock(&device_list_mutex); + list_for_each_entry(ndev, &device_list, entry) { + if (ndev->dev == ib_device) { + found = true; + break; + } + } + mutex_unlock(&device_list_mutex); + + if (!found) + return; /* Delete all controllers using this device */ mutex_lock(&nvme_rdma_ctrl_mutex); list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { if (ctrl->device->dev != ib_device) continue; - dev_info(ctrl->ctrl.device, - "Removing ctrl: NQN \"%s\", addr %pISp\n", - ctrl->ctrl.opts->subsysnqn, &ctrl->addr); nvme_delete_ctrl(&ctrl->ctrl); } mutex_unlock(&nvme_rdma_ctrl_mutex); diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index e6b2d2af81b6..ad9ff27234b5 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -23,6 +23,15 @@ static const struct config_item_type nvmet_host_type; static const struct config_item_type nvmet_subsys_type; +static const struct nvmet_transport_name { + u8 type; + const char *name; +} nvmet_transport_names[] = { + { NVMF_TRTYPE_RDMA, "rdma" }, + { NVMF_TRTYPE_FC, "fc" }, + { NVMF_TRTYPE_LOOP, "loop" }, +}; + /* * nvmet_port Generic ConfigFS definitions. * Used in any place in the ConfigFS tree that refers to an address. @@ -208,43 +217,30 @@ CONFIGFS_ATTR(nvmet_, addr_trsvcid); static ssize_t nvmet_addr_trtype_show(struct config_item *item, char *page) { - switch (to_nvmet_port(item)->disc_addr.trtype) { - case NVMF_TRTYPE_RDMA: - return sprintf(page, "rdma\n"); - case NVMF_TRTYPE_LOOP: - return sprintf(page, "loop\n"); - case NVMF_TRTYPE_FC: - return sprintf(page, "fc\n"); - default: - return sprintf(page, "\n"); + struct nvmet_port *port = to_nvmet_port(item); + int i; + + for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) { + if (port->disc_addr.trtype != nvmet_transport_names[i].type) + continue; + return sprintf(page, "%s\n", nvmet_transport_names[i].name); } + + return sprintf(page, "\n"); } static void nvmet_port_init_tsas_rdma(struct nvmet_port *port) { - port->disc_addr.trtype = NVMF_TRTYPE_RDMA; - memset(&port->disc_addr.tsas.rdma, 0, NVMF_TSAS_SIZE); port->disc_addr.tsas.rdma.qptype = NVMF_RDMA_QPTYPE_CONNECTED; port->disc_addr.tsas.rdma.prtype = NVMF_RDMA_PRTYPE_NOT_SPECIFIED; port->disc_addr.tsas.rdma.cms = NVMF_RDMA_CMS_RDMA_CM; } -static void nvmet_port_init_tsas_loop(struct nvmet_port *port) -{ - port->disc_addr.trtype = NVMF_TRTYPE_LOOP; - memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE); -} - -static void nvmet_port_init_tsas_fc(struct nvmet_port *port) -{ - port->disc_addr.trtype = NVMF_TRTYPE_FC; - memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE); -} - static ssize_t nvmet_addr_trtype_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); + int i; if (port->enabled) { pr_err("Cannot modify address while enabled\n"); @@ -252,17 +248,18 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item, return -EACCES; } - if (sysfs_streq(page, "rdma")) { - nvmet_port_init_tsas_rdma(port); - } else if (sysfs_streq(page, "loop")) { - nvmet_port_init_tsas_loop(port); - } else if (sysfs_streq(page, "fc")) { - nvmet_port_init_tsas_fc(port); - } else { - pr_err("Invalid value '%s' for trtype\n", page); - return -EINVAL; + for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) { + if (sysfs_streq(page, nvmet_transport_names[i].name)) + goto found; } + pr_err("Invalid value '%s' for trtype\n", page); + return -EINVAL; +found: + memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE); + port->disc_addr.trtype = nvmet_transport_names[i].type; + if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA) + nvmet_port_init_tsas_rdma(port); return count; } @@ -333,13 +330,13 @@ out_unlock: return ret ? ret : count; } +CONFIGFS_ATTR(nvmet_ns_, device_uuid); + static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page) { return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid); } -CONFIGFS_ATTR(nvmet_ns_, device_uuid); - static ssize_t nvmet_ns_device_nguid_store(struct config_item *item, const char *page, size_t count) { diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a78029e4e5f4..e95424f172fd 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -18,7 +18,7 @@ #include "nvmet.h" -static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; +static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; static DEFINE_IDA(cntlid_ida); /* @@ -137,7 +137,7 @@ static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, schedule_work(&ctrl->async_event_work); } -int nvmet_register_transport(struct nvmet_fabrics_ops *ops) +int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) { int ret = 0; @@ -152,7 +152,7 @@ int nvmet_register_transport(struct nvmet_fabrics_ops *ops) } EXPORT_SYMBOL_GPL(nvmet_register_transport); -void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops) +void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops) { down_write(&nvmet_config_sem); nvmet_transports[ops->type] = NULL; @@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(nvmet_unregister_transport); int nvmet_enable_port(struct nvmet_port *port) { - struct nvmet_fabrics_ops *ops; + const struct nvmet_fabrics_ops *ops; int ret; lockdep_assert_held(&nvmet_config_sem); @@ -195,7 +195,7 @@ int nvmet_enable_port(struct nvmet_port *port) void nvmet_disable_port(struct nvmet_port *port) { - struct nvmet_fabrics_ops *ops; + const struct nvmet_fabrics_ops *ops; lockdep_assert_held(&nvmet_config_sem); @@ -500,7 +500,7 @@ int nvmet_sq_init(struct nvmet_sq *sq) EXPORT_SYMBOL_GPL(nvmet_sq_init); bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, - struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops) + struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops) { u8 flags = req->cmd->common.flags; u16 status; diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 8f3b57b4c97b..a72425d8bce0 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -43,7 +43,8 @@ void nvmet_referral_disable(struct nvmet_port *port) } static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr, - struct nvmet_port *port, char *subsys_nqn, u8 type, u32 numrec) + struct nvmet_port *port, char *subsys_nqn, char *traddr, + u8 type, u32 numrec) { struct nvmf_disc_rsp_page_entry *e = &hdr->entries[numrec]; @@ -56,11 +57,30 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr, e->asqsz = cpu_to_le16(NVME_AQ_DEPTH); e->subtype = type; memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE); - memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); + memcpy(e->traddr, traddr, NVMF_TRADDR_SIZE); memcpy(e->tsas.common, port->disc_addr.tsas.common, NVMF_TSAS_SIZE); memcpy(e->subnqn, subsys_nqn, NVMF_NQN_SIZE); } +/* + * nvmet_set_disc_traddr - set a correct discovery log entry traddr + * + * IP based transports (e.g RDMA) can listen on "any" ipv4/ipv6 addresses + * (INADDR_ANY or IN6ADDR_ANY_INIT). The discovery log page traddr reply + * must not contain that "any" IP address. If the transport implements + * .disc_traddr, use it. this callback will set the discovery traddr + * from the req->port address in case the port in question listens + * "any" IP address. + */ +static void nvmet_set_disc_traddr(struct nvmet_req *req, struct nvmet_port *port, + char *traddr) +{ + if (req->ops->disc_traddr) + req->ops->disc_traddr(req, port, traddr); + else + memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); +} + static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) { const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry); @@ -90,8 +110,11 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) if (!nvmet_host_allowed(req, p->subsys, ctrl->hostnqn)) continue; if (residual_len >= entry_size) { + char traddr[NVMF_TRADDR_SIZE]; + + nvmet_set_disc_traddr(req, req->port, traddr); nvmet_format_discovery_entry(hdr, req->port, - p->subsys->subsysnqn, + p->subsys->subsysnqn, traddr, NVME_NQN_NVME, numrec); residual_len -= entry_size; } @@ -102,6 +125,7 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) if (residual_len >= entry_size) { nvmet_format_discovery_entry(hdr, r, NVME_DISC_SUBSYS_NAME, + r->disc_addr.traddr, NVME_NQN_DISC, numrec); residual_len -= entry_size; } diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 9b39a6cb1935..33ee8d3145f8 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -87,6 +87,7 @@ struct nvmet_fc_fcp_iod { struct nvmet_req req; struct work_struct work; struct work_struct done_work; + struct work_struct defer_work; struct nvmet_fc_tgtport *tgtport; struct nvmet_fc_tgt_queue *queue; @@ -224,6 +225,7 @@ static DEFINE_IDA(nvmet_fc_tgtport_cnt); static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work); static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work); static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work); +static void nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work); static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc); static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); @@ -429,6 +431,7 @@ nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport, for (i = 0; i < queue->sqsize; fod++, i++) { INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work); INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work); + INIT_WORK(&fod->defer_work, nvmet_fc_fcp_rqst_op_defer_work); fod->tgtport = tgtport; fod->queue = queue; fod->active = false; @@ -512,6 +515,17 @@ nvmet_fc_queue_fcp_req(struct nvmet_fc_tgtport *tgtport, } static void +nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work) +{ + struct nvmet_fc_fcp_iod *fod = + container_of(work, struct nvmet_fc_fcp_iod, defer_work); + + /* Submit deferred IO for processing */ + nvmet_fc_queue_fcp_req(fod->tgtport, fod->queue, fod->fcpreq); + +} + +static void nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, struct nvmet_fc_fcp_iod *fod) { @@ -568,13 +582,12 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, /* inform LLDD IO is now being processed */ tgtport->ops->defer_rcv(&tgtport->fc_target_port, fcpreq); - /* Submit deferred IO for processing */ - nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq); - /* * Leave the queue lookup get reference taken when * fod was originally allocated. */ + + queue_work(queue->work_q, &fod->defer_work); } static int @@ -1550,7 +1563,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, static void nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req); -static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops; +static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops; static void nvmet_fc_xmt_ls_rsp_done(struct nvmefc_tgt_ls_req *lsreq) @@ -2505,7 +2518,7 @@ nvmet_fc_remove_port(struct nvmet_port *port) /* nothing to do */ } -static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { +static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_FC, .msdbd = 1, diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 861d1509b22b..a350765d2d5c 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -71,7 +71,7 @@ static DEFINE_MUTEX(nvme_loop_ctrl_mutex); static void nvme_loop_queue_response(struct nvmet_req *nvme_req); static void nvme_loop_delete_ctrl(struct nvmet_ctrl *ctrl); -static struct nvmet_fabrics_ops nvme_loop_ops; +static const struct nvmet_fabrics_ops nvme_loop_ops; static inline int nvme_loop_queue_idx(struct nvme_loop_queue *queue) { @@ -675,7 +675,7 @@ static void nvme_loop_remove_port(struct nvmet_port *port) nvmet_loop_port = NULL; } -static struct nvmet_fabrics_ops nvme_loop_ops = { +static const struct nvmet_fabrics_ops nvme_loop_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_LOOP, .add_port = nvme_loop_add_port, diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 417f6c0331cc..15fd84ab21f8 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -130,7 +130,7 @@ struct nvmet_ctrl { struct delayed_work ka_work; struct work_struct fatal_err_work; - struct nvmet_fabrics_ops *ops; + const struct nvmet_fabrics_ops *ops; char subsysnqn[NVMF_NQN_FIELD_LEN]; char hostnqn[NVMF_NQN_FIELD_LEN]; @@ -209,6 +209,8 @@ struct nvmet_fabrics_ops { int (*add_port)(struct nvmet_port *port); void (*remove_port)(struct nvmet_port *port); void (*delete_ctrl)(struct nvmet_ctrl *ctrl); + void (*disc_traddr)(struct nvmet_req *req, + struct nvmet_port *port, char *traddr); }; #define NVMET_MAX_INLINE_BIOVEC 8 @@ -231,7 +233,7 @@ struct nvmet_req { struct nvmet_port *port; void (*execute)(struct nvmet_req *req); - struct nvmet_fabrics_ops *ops; + const struct nvmet_fabrics_ops *ops; }; static inline void nvmet_set_status(struct nvmet_req *req, u16 status) @@ -267,7 +269,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req); u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, - struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops); + struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops); void nvmet_req_uninit(struct nvmet_req *req); void nvmet_req_execute(struct nvmet_req *req); void nvmet_req_complete(struct nvmet_req *req, u16 status); @@ -301,8 +303,8 @@ void nvmet_ns_disable(struct nvmet_ns *ns); struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid); void nvmet_ns_free(struct nvmet_ns *ns); -int nvmet_register_transport(struct nvmet_fabrics_ops *ops); -void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops); +int nvmet_register_transport(const struct nvmet_fabrics_ops *ops); +void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops); int nvmet_enable_port(struct nvmet_port *port); void nvmet_disable_port(struct nvmet_port *port); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 978e169c11bf..52e0c5d579a7 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -77,7 +77,6 @@ enum nvmet_rdma_queue_state { NVMET_RDMA_Q_CONNECTING, NVMET_RDMA_Q_LIVE, NVMET_RDMA_Q_DISCONNECTING, - NVMET_RDMA_IN_DEVICE_REMOVAL, }; struct nvmet_rdma_queue { @@ -137,7 +136,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_qp_event(struct ib_event *event, void *priv); static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue); -static struct nvmet_fabrics_ops nvmet_rdma_ops; +static const struct nvmet_fabrics_ops nvmet_rdma_ops; /* XXX: really should move to a generic header sooner or later.. */ static inline u32 get_unaligned_le24(const u8 *p) @@ -914,8 +913,11 @@ err_destroy_cq: static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue) { - ib_drain_qp(queue->cm_id->qp); - rdma_destroy_qp(queue->cm_id); + struct ib_qp *qp = queue->cm_id->qp; + + ib_drain_qp(qp); + rdma_destroy_id(queue->cm_id); + ib_destroy_qp(qp); ib_free_cq(queue->cq); } @@ -940,15 +942,10 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w) { struct nvmet_rdma_queue *queue = container_of(w, struct nvmet_rdma_queue, release_work); - struct rdma_cm_id *cm_id = queue->cm_id; struct nvmet_rdma_device *dev = queue->dev; - enum nvmet_rdma_queue_state state = queue->state; nvmet_rdma_free_queue(queue); - if (state != NVMET_RDMA_IN_DEVICE_REMOVAL) - rdma_destroy_id(cm_id); - kref_put(&dev->ref, nvmet_rdma_free_dev); } @@ -1153,8 +1150,11 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, } ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); - if (ret) - goto release_queue; + if (ret) { + schedule_work(&queue->release_work); + /* Destroying rdma_cm id is not needed here */ + return 0; + } mutex_lock(&nvmet_rdma_queue_mutex); list_add_tail(&queue->queue_list, &nvmet_rdma_queue_list); @@ -1162,8 +1162,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, return 0; -release_queue: - nvmet_rdma_free_queue(queue); put_device: kref_put(&ndev->ref, nvmet_rdma_free_dev); @@ -1209,7 +1207,6 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) case NVMET_RDMA_Q_CONNECTING: case NVMET_RDMA_Q_LIVE: queue->state = NVMET_RDMA_Q_DISCONNECTING; - case NVMET_RDMA_IN_DEVICE_REMOVAL: disconnect = true; break; case NVMET_RDMA_Q_DISCONNECTING: @@ -1322,13 +1319,7 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_ADDR_CHANGE: case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_TIMEWAIT_EXIT: - /* - * We might end up here when we already freed the qp - * which means queue release sequence is in progress, - * so don't get in the way... - */ - if (queue) - nvmet_rdma_queue_disconnect(queue); + nvmet_rdma_queue_disconnect(queue); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: ret = nvmet_rdma_device_removal(cm_id, queue); @@ -1445,7 +1436,24 @@ static void nvmet_rdma_remove_port(struct nvmet_port *port) rdma_destroy_id(cm_id); } -static struct nvmet_fabrics_ops nvmet_rdma_ops = { +static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, + struct nvmet_port *port, char *traddr) +{ + struct rdma_cm_id *cm_id = port->priv; + + if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) { + struct nvmet_rdma_rsp *rsp = + container_of(req, struct nvmet_rdma_rsp, req); + struct rdma_cm_id *req_cm_id = rsp->queue->cm_id; + struct sockaddr *addr = (void *)&req_cm_id->route.addr.src_addr; + + sprintf(traddr, "%pISc", addr); + } else { + memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); + } +} + +static const struct nvmet_fabrics_ops nvmet_rdma_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_RDMA, .sqe_inline_size = NVMET_RDMA_INLINE_DATA_SIZE, @@ -1455,13 +1463,31 @@ static struct nvmet_fabrics_ops nvmet_rdma_ops = { .remove_port = nvmet_rdma_remove_port, .queue_response = nvmet_rdma_queue_response, .delete_ctrl = nvmet_rdma_delete_ctrl, + .disc_traddr = nvmet_rdma_disc_port_addr, }; static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data) { struct nvmet_rdma_queue *queue, *tmp; + struct nvmet_rdma_device *ndev; + bool found = false; - /* Device is being removed, delete all queues using this device */ + mutex_lock(&device_list_mutex); + list_for_each_entry(ndev, &device_list, entry) { + if (ndev->device == ib_device) { + found = true; + break; + } + } + mutex_unlock(&device_list_mutex); + + if (!found) + return; + + /* + * IB Device that is used by nvmet controllers is being removed, + * delete all queues using this device. + */ mutex_lock(&nvmet_rdma_queue_mutex); list_for_each_entry_safe(queue, tmp, &nvmet_rdma_queue_list, queue_list) { |