From 6d24b170a9db0456f577b1ab01226a2254c016a8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 2 Jun 2023 23:13:54 -0700 Subject: dax: Fix dax_mapping_release() use after free A CONFIG_DEBUG_KOBJECT_RELEASE test of removing a device-dax region provider (like modprobe -r dax_hmem) yields: kobject: 'mapping0' (ffff93eb460e8800): kobject_release, parent 0000000000000000 (delayed 2000) [..] DEBUG_LOCKS_WARN_ON(1) WARNING: CPU: 23 PID: 282 at kernel/locking/lockdep.c:232 __lock_acquire+0x9fc/0x2260 [..] RIP: 0010:__lock_acquire+0x9fc/0x2260 [..] Call Trace: [..] lock_acquire+0xd4/0x2c0 ? ida_free+0x62/0x130 _raw_spin_lock_irqsave+0x47/0x70 ? ida_free+0x62/0x130 ida_free+0x62/0x130 dax_mapping_release+0x1f/0x30 device_release+0x36/0x90 kobject_delayed_cleanup+0x46/0x150 Due to attempting ida_free() on an ida object that has already been freed. Devices typically only hold a reference on their parent while registered. If a child needs a parent object to complete its release it needs to hold a reference that it drops from its release callback. Arrange for a dax_mapping to pin its parent dev_dax instance until dax_mapping_release(). Fixes: 0b07ce872a9e ("device-dax: introduce 'mapping' devices") Signed-off-by: Dan Williams Link: https://lore.kernel.org/r/168577283412.1672036.16111545266174261446.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Reviewed-by: Ira Weiny Signed-off-by: Vishal Verma --- drivers/dax/bus.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 227800053309..aee695f86b44 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -635,10 +635,12 @@ EXPORT_SYMBOL_GPL(alloc_dax_region); static void dax_mapping_release(struct device *dev) { struct dax_mapping *mapping = to_dax_mapping(dev); - struct dev_dax *dev_dax = to_dev_dax(dev->parent); + struct device *parent = dev->parent; + struct dev_dax *dev_dax = to_dev_dax(parent); ida_free(&dev_dax->ida, mapping->id); kfree(mapping); + put_device(parent); } static void unregister_dax_mapping(void *data) @@ -778,6 +780,7 @@ static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id) dev = &mapping->dev; device_initialize(dev); dev->parent = &dev_dax->dev; + get_device(dev->parent); dev->type = &dax_mapping_type; dev_set_name(dev, "mapping%d", mapping->id); rc = device_add(dev); -- cgit v1.2.3 From 82b4ceeccb89cfd0b03706f1b15e31a7db6a027d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 2 Jun 2023 23:13:59 -0700 Subject: dax: Use device_unregister() in unregister_dax_mapping() Replace an open-coded device_unregister() sequence with the helper. Signed-off-by: Dan Williams Link: https://lore.kernel.org/r/168577283989.1672036.7777592498865470652.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Reviewed-by: Ira Weiny Signed-off-by: Vishal Verma --- drivers/dax/bus.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index aee695f86b44..c99ea08aafc3 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -657,8 +657,7 @@ static void unregister_dax_mapping(void *data) dev_dax->ranges[mapping->range_id].mapping = NULL; mapping->range_id = -1; - device_del(dev); - put_device(dev); + device_unregister(dev); } static struct dev_dax_range *get_dax_range(struct device *dev) -- cgit v1.2.3 From 70aab281e18c68a1284bc387de127c2fc0bed3f8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 2 Jun 2023 23:14:05 -0700 Subject: dax: Introduce alloc_dev_dax_id() The reference counting of dax_region objects is needlessly complicated, has lead to confusion [1], and has hidden a bug [2]. Towards cleaning up that mess introduce alloc_dev_dax_id() to minimize the holding of a dax_region reference to only what dev_dax_release() needs, the dax_region->ida. Part of the reason for the mess was the design to dereference a dax_region in all cases in free_dev_dax_id() even if the id was statically assigned by the upper level dax_region driver. Remove the need to call "is_static(dax_region)" by tracking whether the id is dynamic directly in the dev_dax instance itself. With that flag the dax_region pinning and release per dev_dax instance can move to alloc_dev_dax_id() and free_dev_dax_id() respectively. A follow-on cleanup address the unnecessary references in the dax_region setup and drivers. Fixes: 0f3da14a4f05 ("device-dax: introduce 'seed' devices") Link: http://lore.kernel.org/r/20221203095858.612027-1-liuyongqiang13@huawei.com [1] Link: http://lore.kernel.org/r/3cf0890b-4eb0-e70e-cd9c-2ecc3d496263@hpe.com [2] Reported-by: Yongqiang Liu Reported-by: Paul Cassella Reported-by: Ira Weiny Signed-off-by: Dan Williams Link: https://lore.kernel.org/r/168577284563.1672036.13493034988900989554.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Ira Weiny Signed-off-by: Vishal Verma --- drivers/dax/bus.c | 56 ++++++++++++++++++++++++++++------------------- drivers/dax/dax-private.h | 4 +++- 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index c99ea08aafc3..a4cc3eca774f 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -446,18 +446,34 @@ static void unregister_dev_dax(void *dev) put_device(dev); } +static void dax_region_free(struct kref *kref) +{ + struct dax_region *dax_region; + + dax_region = container_of(kref, struct dax_region, kref); + kfree(dax_region); +} + +void dax_region_put(struct dax_region *dax_region) +{ + kref_put(&dax_region->kref, dax_region_free); +} +EXPORT_SYMBOL_GPL(dax_region_put); + /* a return value >= 0 indicates this invocation invalidated the id */ static int __free_dev_dax_id(struct dev_dax *dev_dax) { - struct dax_region *dax_region = dev_dax->region; struct device *dev = &dev_dax->dev; + struct dax_region *dax_region; int rc = dev_dax->id; device_lock_assert(dev); - if (is_static(dax_region) || dev_dax->id < 0) + if (!dev_dax->dyn_id || dev_dax->id < 0) return -1; + dax_region = dev_dax->region; ida_free(&dax_region->ida, dev_dax->id); + dax_region_put(dax_region); dev_dax->id = -1; return rc; } @@ -473,6 +489,20 @@ static int free_dev_dax_id(struct dev_dax *dev_dax) return rc; } +static int alloc_dev_dax_id(struct dev_dax *dev_dax) +{ + struct dax_region *dax_region = dev_dax->region; + int id; + + id = ida_alloc(&dax_region->ida, GFP_KERNEL); + if (id < 0) + return id; + kref_get(&dax_region->kref); + dev_dax->dyn_id = true; + dev_dax->id = id; + return id; +} + static ssize_t delete_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -560,20 +590,6 @@ static const struct attribute_group *dax_region_attribute_groups[] = { NULL, }; -static void dax_region_free(struct kref *kref) -{ - struct dax_region *dax_region; - - dax_region = container_of(kref, struct dax_region, kref); - kfree(dax_region); -} - -void dax_region_put(struct dax_region *dax_region) -{ - kref_put(&dax_region->kref, dax_region_free); -} -EXPORT_SYMBOL_GPL(dax_region_put); - static void dax_region_unregister(void *region) { struct dax_region *dax_region = region; @@ -1297,12 +1313,10 @@ static const struct attribute_group *dax_attribute_groups[] = { static void dev_dax_release(struct device *dev) { struct dev_dax *dev_dax = to_dev_dax(dev); - struct dax_region *dax_region = dev_dax->region; struct dax_device *dax_dev = dev_dax->dax_dev; put_dax(dax_dev); free_dev_dax_id(dev_dax); - dax_region_put(dax_region); kfree(dev_dax->pgmap); kfree(dev_dax); } @@ -1326,6 +1340,7 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) if (!dev_dax) return ERR_PTR(-ENOMEM); + dev_dax->region = dax_region; if (is_static(dax_region)) { if (dev_WARN_ONCE(parent, data->id < 0, "dynamic id specified to static region\n")) { @@ -1341,13 +1356,11 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) goto err_id; } - rc = ida_alloc(&dax_region->ida, GFP_KERNEL); + rc = alloc_dev_dax_id(dev_dax); if (rc < 0) goto err_id; - dev_dax->id = rc; } - dev_dax->region = dax_region; dev = &dev_dax->dev; device_initialize(dev); dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id); @@ -1388,7 +1401,6 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) dev_dax->target_node = dax_region->target_node; dev_dax->align = dax_region->align; ida_init(&dev_dax->ida); - kref_get(&dax_region->kref); inode = dax_inode(dax_dev); dev->devt = inode->i_rdev; diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h index db032680d941..27cf2daaaa79 100644 --- a/drivers/dax/dax-private.h +++ b/drivers/dax/dax-private.h @@ -52,7 +52,8 @@ struct dax_mapping { * @region - parent region * @dax_dev - core dax functionality * @target_node: effective numa node if dev_dax memory range is onlined - * @id: ida allocated id + * @dyn_id: is this a dynamic or statically created instance + * @id: ida allocated id when the dax_region is not static * @ida: mapping id allocator * @dev - device core * @pgmap - pgmap for memmap setup / lifetime (driver owned) @@ -64,6 +65,7 @@ struct dev_dax { struct dax_device *dax_dev; unsigned int align; int target_node; + bool dyn_id; int id; struct ida ida; struct device dev; -- cgit v1.2.3 From 2532f41607c4308733239dd43278f8a5540f3ec7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 2 Jun 2023 23:14:11 -0700 Subject: dax: Cleanup extra dax_region references Now that free_dev_dax_id() internally manages the references it needs the extra references taken by the dax_region drivers are not needed. Reported-by: Ira Weiny Signed-off-by: Dan Williams Link: https://lore.kernel.org/r/168577285161.1672036.8111253437794419696.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Reviewed-by: Ira Weiny Signed-off-by: Vishal Verma --- drivers/dax/bus.c | 4 +--- drivers/dax/bus.h | 1 - drivers/dax/cxl.c | 8 +------- drivers/dax/hmem/hmem.c | 8 +------- drivers/dax/pmem.c | 7 +------ 5 files changed, 4 insertions(+), 24 deletions(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index a4cc3eca774f..0ee96e6fc426 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -454,11 +454,10 @@ static void dax_region_free(struct kref *kref) kfree(dax_region); } -void dax_region_put(struct dax_region *dax_region) +static void dax_region_put(struct dax_region *dax_region) { kref_put(&dax_region->kref, dax_region_free); } -EXPORT_SYMBOL_GPL(dax_region_put); /* a return value >= 0 indicates this invocation invalidated the id */ static int __free_dev_dax_id(struct dev_dax *dev_dax) @@ -641,7 +640,6 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, return NULL; } - kref_get(&dax_region->kref); if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region)) return NULL; return dax_region; diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index 43f490e9ce65..1ccd23360124 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -9,7 +9,6 @@ struct dev_dax; struct resource; struct dax_device; struct dax_region; -void dax_region_put(struct dax_region *dax_region); /* dax bus specific ioresource flags */ #define IORESOURCE_DAX_STATIC BIT(0) diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c index ccdf8de85bd5..8bc9d04034d6 100644 --- a/drivers/dax/cxl.c +++ b/drivers/dax/cxl.c @@ -13,7 +13,6 @@ static int cxl_dax_region_probe(struct device *dev) struct cxl_region *cxlr = cxlr_dax->cxlr; struct dax_region *dax_region; struct dev_dax_data data; - struct dev_dax *dev_dax; if (nid == NUMA_NO_NODE) nid = memory_add_physaddr_to_nid(cxlr_dax->hpa_range.start); @@ -28,13 +27,8 @@ static int cxl_dax_region_probe(struct device *dev) .id = -1, .size = range_len(&cxlr_dax->hpa_range), }; - dev_dax = devm_create_dev_dax(&data); - if (IS_ERR(dev_dax)) - return PTR_ERR(dev_dax); - /* child dev_dax instances now own the lifetime of the dax_region */ - dax_region_put(dax_region); - return 0; + return PTR_ERR_OR_ZERO(devm_create_dev_dax(&data)); } static struct cxl_driver cxl_dax_region_driver = { diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index e5fe8b39fb94..5d2ddef0f8f5 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -16,7 +16,6 @@ static int dax_hmem_probe(struct platform_device *pdev) struct dax_region *dax_region; struct memregion_info *mri; struct dev_dax_data data; - struct dev_dax *dev_dax; /* * @region_idle == true indicates that an administrative agent @@ -38,13 +37,8 @@ static int dax_hmem_probe(struct platform_device *pdev) .id = -1, .size = region_idle ? 0 : range_len(&mri->range), }; - dev_dax = devm_create_dev_dax(&data); - if (IS_ERR(dev_dax)) - return PTR_ERR(dev_dax); - /* child dev_dax instances now own the lifetime of the dax_region */ - dax_region_put(dax_region); - return 0; + return PTR_ERR_OR_ZERO(devm_create_dev_dax(&data)); } static struct platform_driver dax_hmem_driver = { diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c index f050ea78bb83..ae0cb113a5d3 100644 --- a/drivers/dax/pmem.c +++ b/drivers/dax/pmem.c @@ -13,7 +13,6 @@ static struct dev_dax *__dax_pmem_probe(struct device *dev) int rc, id, region_id; resource_size_t offset; struct nd_pfn_sb *pfn_sb; - struct dev_dax *dev_dax; struct dev_dax_data data; struct nd_namespace_io *nsio; struct dax_region *dax_region; @@ -65,12 +64,8 @@ static struct dev_dax *__dax_pmem_probe(struct device *dev) .pgmap = &pgmap, .size = range_len(&range), }; - dev_dax = devm_create_dev_dax(&data); - /* child dev_dax instances now own the lifetime of the dax_region */ - dax_region_put(dax_region); - - return dev_dax; + return devm_create_dev_dax(&data); } static int dax_pmem_probe(struct device *dev) -- cgit v1.2.3 From dd0c64258a9d9e74b4896f05c7e77fa3365b5f12 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 21 Jun 2023 14:02:56 +0100 Subject: fsdax: remove redundant variable 'error' The variable 'error' is being assigned a value that is never read, the assignment and the variable and redundant and can be removed. Cleans up clang scan build warning: fs/dax.c:1880:10: warning: Although the value stored to 'error' is used in the enclosing expression, the value is never actually read from 'error' [deadcode.DeadStores] Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20230621130256.2676126-1-colin.i.king@gmail.com Reviewed-by: Jan Kara Signed-off-by: Vishal Verma --- fs/dax.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 2ababb89918d..cb36c6746fc4 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1830,7 +1830,6 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, vm_fault_t ret = VM_FAULT_FALLBACK; pgoff_t max_pgoff; void *entry; - int error; if (vmf->flags & FAULT_FLAG_WRITE) iter.flags |= IOMAP_WRITE; @@ -1877,7 +1876,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, } iter.pos = (loff_t)xas.xa_index << PAGE_SHIFT; - while ((error = iomap_iter(&iter, ops)) > 0) { + while (iomap_iter(&iter, ops) > 0) { if (iomap_length(&iter) < PMD_SIZE) continue; /* actually breaks out of the loop */ -- cgit v1.2.3 From 46e66dab8565f742374e9cc4ff7d35f344d774e2 Mon Sep 17 00:00:00 2001 From: Tarun Sahu Date: Wed, 21 Jun 2023 21:20:25 +0530 Subject: dax/kmem: Pass valid argument to memory_group_register_static memory_group_register_static takes maximum number of pages as the argument while dev_dax_kmem_probe passes total_len (in bytes) as the argument. IIUC, I don't see any crash/panic impact as such. As, memory_group_register_static just set the max_pages limit which is used in auto_movable_zone_for_pfn to determine the zone. which might cause these condition to behave differently, This will be true always so jump will happen to kernel_zone ... if (!auto_movable_can_online_movable(NUMA_NO_NODE, group, nr_pages)) goto kernel_zone; ... kernel_zone: return default_kernel_zone_for_pfn(nid, pfn, nr_pages); Here, In below, zone_intersects compare range will be larger as nr_pages will be higher (derived from total_len passed in dev_dax_kmem_probe). ... static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn, unsigned long nr_pages) { struct pglist_data *pgdat = NODE_DATA(nid); int zid; for (zid = 0; zid < ZONE_NORMAL; zid++) { struct zone *zone = &pgdat->node_zones[zid]; if (zone_intersects(zone, start_pfn, nr_pages)) return zone; } return &pgdat->node_zones[ZONE_NORMAL]; } Incorrect zone will be returned here, which in later time might cause bigger problem. Fixes: eedf634aac3b ("dax/kmem: use a single static memory group for a single probed unit") Signed-off-by: Tarun Sahu Link: https://lore.kernel.org/r/20230621155025.370672-1-tsahu@linux.ibm.com Reviewed-by: Vishal Verma Signed-off-by: Vishal Verma --- drivers/dax/kmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c index 7b36db6f1cbd..898ca9505754 100644 --- a/drivers/dax/kmem.c +++ b/drivers/dax/kmem.c @@ -99,7 +99,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) if (!data->res_name) goto err_res_name; - rc = memory_group_register_static(numa_node, total_len); + rc = memory_group_register_static(numa_node, PFN_UP(total_len)); if (rc < 0) goto err_reg_mgid; data->mgid = rc; -- cgit v1.2.3