From 63bcf9014e95a7d279d10d8e2caa5d88db2b1855 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Sat, 14 Sep 2024 14:01:22 +0200 Subject: nvme-multipath: system fails to create generic nvme device NVME_NSHEAD_DISK_LIVE is a flag for struct nvme_ns_head, not nvme_ns. The current code has a typo causing NVME_NSHEAD_DISK_LIVE never to be cleared once device_add_disk_fails, causing the system never to create the 'generic' character device. Even several rescan attempts will change the situation and the system has to be rebooted to fix the issue. Fixes: 11384580e332 ("nvme-multipath: add error handling support for add_disk()") Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/multipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 518e22dd4f9b..6d97058cde7a 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -648,7 +648,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) rc = device_add_disk(&head->subsys->dev, head->disk, nvme_ns_attr_groups); if (rc) { - clear_bit(NVME_NSHEAD_DISK_LIVE, &ns->flags); + clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags); return; } nvme_add_ns_head_cdev(head); -- cgit v1.2.3 From 3b97f5a05cfc55e7729ff3769f63eef64e2178bb Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Sat, 14 Sep 2024 14:01:23 +0200 Subject: nvme-multipath: avoid hang on inaccessible namespaces During repetitive namespace remapping operations on the target the namespace might have changed between the time the initial scan was performed, and partition scan was invoked by device_add_disk() in nvme_mpath_set_live(). We then end up with a stuck scanning process: [<0>] folio_wait_bit_common+0x12a/0x310 [<0>] filemap_read_folio+0x97/0xd0 [<0>] do_read_cache_folio+0x108/0x390 [<0>] read_part_sector+0x31/0xa0 [<0>] read_lba+0xc5/0x160 [<0>] efi_partition+0xd9/0x8f0 [<0>] bdev_disk_changed+0x23d/0x6d0 [<0>] blkdev_get_whole+0x78/0xc0 [<0>] bdev_open+0x2c6/0x3b0 [<0>] bdev_file_open_by_dev+0xcb/0x120 [<0>] disk_scan_partitions+0x5d/0x100 [<0>] device_add_disk+0x402/0x420 [<0>] nvme_mpath_set_live+0x4f/0x1f0 [nvme_core] [<0>] nvme_mpath_add_disk+0x107/0x120 [nvme_core] [<0>] nvme_alloc_ns+0xac6/0xe60 [nvme_core] [<0>] nvme_scan_ns+0x2dd/0x3e0 [nvme_core] [<0>] nvme_scan_work+0x1a3/0x490 [nvme_core] This happens when we have several paths, some of which are inaccessible, and the active paths are removed first. Then nvme_find_path() will requeue I/O in the ns_head (as paths are present), but the requeue list is never triggered as all remaining paths are inactive. This patch checks for NVME_NSHEAD_DISK_LIVE in nvme_available_path(), and requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared once the last path has been removed to properly terminate pending I/O. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/multipath.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 6d97058cde7a..48e7a8906d01 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -421,6 +421,9 @@ static bool nvme_available_path(struct nvme_ns_head *head) { struct nvme_ns *ns; + if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) + return NULL; + list_for_each_entry_rcu(ns, &head->list, siblings) { if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags)) continue; @@ -969,11 +972,16 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) { if (!head->disk) return; - kblockd_schedule_work(&head->requeue_work); - if (test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { + if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { nvme_cdev_del(&head->cdev, &head->cdev_device); del_gendisk(head->disk); } + /* + * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared + * to allow multipath to fail all I/O. + */ + synchronize_srcu(&head->srcu); + kblockd_schedule_work(&head->requeue_work); } void nvme_mpath_remove_disk(struct nvme_ns_head *head) -- cgit v1.2.3 From 83340d9c6178107df581c3ebbae0e28d0b15e879 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Tue, 24 Sep 2024 18:01:34 +0900 Subject: nvme: null terminate nvme_tls_attrs Commit 1e48b34c9bc7 ("nvme: split off TLS sysfs attributes into a separate group") introduced the struct attribute array nvme_tls_attrs. However, the array was not null terminated and caused BUG KASAN global- out-of-bounds. To avoid the BUG, null terminate the array. Reported-by: Yi Zhang Closes: https://lore.kernel.org/linux-nvme/jhllwfxcedrcxcnbajwl4x2l2ujcqowqcd4ps574zrafrqhjna@f4icvecutekm/ Fixes: 1e48b34c9bc7 ("nvme: split off TLS sysfs attributes into a separate group") Signed-off-by: Shin'ichiro Kawasaki Tested-by: Yi Zhang Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index eb345551d6fe..b68a9e5f1ea3 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -767,6 +767,7 @@ static struct attribute *nvme_tls_attrs[] = { &dev_attr_tls_key.attr, &dev_attr_tls_configured_key.attr, &dev_attr_tls_keyring.attr, + NULL, }; static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj, -- cgit v1.2.3 From 9064610348b16356d43e59e286aedfec31825541 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 4 Sep 2024 14:48:50 -0700 Subject: nvme: remove CC register read-back during enabling Any non-posted read should flush the previous write, so we don't necessarily need to read back the value we just wrote. I've found at least some controllers that respond with 0 for short moments after writing the CC register with EN (enable) cleared, so the read-back is overwriting our valid ctrl_config value and ends up breaking on the subsequent enabling. Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ca9959a8fb9e..ba6508455e18 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2468,11 +2468,6 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) if (ret) return ret; - /* Flush write to device (required if transport is PCI) */ - ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CC, &ctrl->ctrl_config); - if (ret) - return ret; - /* CAP value may change after initial CC write */ ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap); if (ret) -- cgit v1.2.3