From a455aa72f7c46b881721668b3ee810713adc7a5b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 15 Oct 2020 20:04:22 -0700 Subject: device-dax/kmem: fix resource release The conversion to request_mem_region() is broken because it assumes that the range is marked busy prior to release. However, due to the way that the kmem driver manipulates the IORESOURCE_BUSY flag (clears it to let {add,remove}_memory() handle busy) it requires a manual release_resource() to perform cleanup. Given that the actual 'struct resource *' needs to be recalled, not just the range, add that tracking to the kmem driver-data. Fixes: 0513bd5bb114 ("device-dax/kmem: replace release_resource() with release_mem_region()") Reported-by: David Hildenbrand Signed-off-by: Dan Williams Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Cc: Vishal Verma Cc: Dave Hansen Cc: Pavel Tatashin Cc: Brice Goglin Cc: Dave Jiang Cc: Ira Weiny Cc: Jia He Cc: Joao Martins Cc: Jonathan Cameron Link: https://lkml.kernel.org/r/160272252925.3136502.17220638073995895400.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Linus Torvalds --- drivers/dax/kmem.c | 48 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c index 6c933f2b604e..af04b6d1d263 100644 --- a/drivers/dax/kmem.c +++ b/drivers/dax/kmem.c @@ -35,11 +35,17 @@ static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r) return 0; } +struct dax_kmem_data { + const char *res_name; + struct resource *res[]; +}; + static int dev_dax_kmem_probe(struct dev_dax *dev_dax) { struct device *dev = &dev_dax->dev; + struct dax_kmem_data *data; + int rc = -ENOMEM; int i, mapped = 0; - char *res_name; int numa_node; /* @@ -55,14 +61,17 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) return -EINVAL; } - res_name = kstrdup(dev_name(dev), GFP_KERNEL); - if (!res_name) + data = kzalloc(sizeof(*data) + sizeof(struct resource *) * dev_dax->nr_range, GFP_KERNEL); + if (!data) return -ENOMEM; + data->res_name = kstrdup(dev_name(dev), GFP_KERNEL); + if (!data->res_name) + goto err_res_name; + for (i = 0; i < dev_dax->nr_range; i++) { struct resource *res; struct range range; - int rc; rc = dax_kmem_range(dev_dax, i, &range); if (rc) { @@ -72,7 +81,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) } /* Region is permanently reserved if hotremove fails. */ - res = request_mem_region(range.start, range_len(&range), res_name); + res = request_mem_region(range.start, range_len(&range), data->res_name); if (!res) { dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve region\n", i, range.start, range.end); @@ -82,9 +91,10 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) */ if (mapped) continue; - kfree(res_name); - return -EBUSY; + rc = -EBUSY; + goto err_request_mem; } + data->res[i] = res; /* * Set flags appropriate for System RAM. Leave ..._BUSY clear @@ -104,18 +114,25 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) if (rc) { dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n", i, range.start, range.end); - release_mem_region(range.start, range_len(&range)); + release_resource(res); + kfree(res); + data->res[i] = NULL; if (mapped) continue; - kfree(res_name); - return rc; + goto err_request_mem; } mapped++; } - dev_set_drvdata(dev, res_name); + dev_set_drvdata(dev, data); return 0; + +err_request_mem: + kfree(data->res_name); +err_res_name: + kfree(data); + return rc; } #ifdef CONFIG_MEMORY_HOTREMOVE @@ -123,7 +140,7 @@ static int dev_dax_kmem_remove(struct dev_dax *dev_dax) { int i, success = 0; struct device *dev = &dev_dax->dev; - const char *res_name = dev_get_drvdata(dev); + struct dax_kmem_data *data = dev_get_drvdata(dev); /* * We have one shot for removing memory, if some memory blocks were not @@ -142,7 +159,9 @@ static int dev_dax_kmem_remove(struct dev_dax *dev_dax) rc = remove_memory(dev_dax->target_node, range.start, range_len(&range)); if (rc == 0) { - release_mem_region(range.start, range_len(&range)); + release_resource(data->res[i]); + kfree(data->res[i]); + data->res[i] = NULL; success++; continue; } @@ -153,7 +172,8 @@ static int dev_dax_kmem_remove(struct dev_dax *dev_dax) } if (success >= dev_dax->nr_range) { - kfree(res_name); + kfree(data->res_name); + kfree(data); dev_set_drvdata(dev, NULL); } -- cgit v1.2.3 From b6117199787c60539105d2de0d010146e8396fc3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 15 Oct 2020 20:08:44 -0700 Subject: mm/memory_hotplug: prepare passing flags to add_memory() and friends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We soon want to pass flags, e.g., to mark added System RAM resources. mergeable. Prepare for that. This patch is based on a similar patch by Oscar Salvador: https://lkml.kernel.org/r/20190625075227.15193-3-osalvador@suse.de Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Reviewed-by: Juergen Gross # Xen related part Reviewed-by: Pankaj Gupta Acked-by: Wei Liu Cc: Michal Hocko Cc: Dan Williams Cc: Jason Gunthorpe Cc: Baoquan He Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: Greg Kroah-Hartman Cc: Vishal Verma Cc: Dave Jiang Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Wei Liu Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: David Hildenbrand Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Boris Ostrovsky Cc: Stefano Stabellini Cc: "Oliver O'Halloran" Cc: Pingfan Liu Cc: Nathan Lynch Cc: Libor Pechacek Cc: Anton Blanchard Cc: Leonardo Bras Cc: Ard Biesheuvel Cc: Eric Biederman Cc: Julien Grall Cc: Kees Cook Cc: Roger Pau Monné Cc: Thomas Gleixner Cc: Wei Yang Link: https://lkml.kernel.org/r/20200911103459.10306-5-david@redhat.com Signed-off-by: Linus Torvalds --- drivers/acpi/acpi_memhotplug.c | 3 ++- drivers/base/memory.c | 3 ++- drivers/dax/kmem.c | 2 +- drivers/hv/hv_balloon.c | 2 +- drivers/s390/char/sclp_cmd.c | 2 +- drivers/virtio/virtio_mem.c | 2 +- drivers/xen/balloon.c | 2 +- 7 files changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index ad6e90fbc813..b02fd51e5589 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -194,7 +194,8 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) if (node < 0) node = memory_add_physaddr_to_nid(info->start_addr); - result = __add_memory(node, info->start_addr, info->length); + result = __add_memory(node, info->start_addr, info->length, + MHP_NONE); /* * If the memory block has been used by the kernel, add_memory() diff --git a/drivers/base/memory.c b/drivers/base/memory.c index adf828dfccf0..eef4ffb6122c 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -432,7 +432,8 @@ static ssize_t probe_store(struct device *dev, struct device_attribute *attr, nid = memory_add_physaddr_to_nid(phys_addr); ret = __add_memory(nid, phys_addr, - MIN_MEMORY_BLOCK_SIZE * sections_per_block); + MIN_MEMORY_BLOCK_SIZE * sections_per_block, + MHP_NONE); if (ret) goto out; diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c index af04b6d1d263..b4368c5b6a0c 100644 --- a/drivers/dax/kmem.c +++ b/drivers/dax/kmem.c @@ -109,7 +109,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) * this as RAM automatically. */ rc = add_memory_driver_managed(numa_node, range.start, - range_len(&range), kmem_name); + range_len(&range), kmem_name, MHP_NONE); if (rc) { dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n", diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 32e3bc0aa665..3c0d52e24452 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -726,7 +726,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn)); ret = add_memory(nid, PFN_PHYS((start_pfn)), - (HA_CHUNK << PAGE_SHIFT)); + (HA_CHUNK << PAGE_SHIFT), MHP_NONE); if (ret) { pr_err("hot_add memory failed error is %d\n", ret); diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index a864b21af602..f6e97f0830f6 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -406,7 +406,7 @@ static void __init add_memory_merged(u16 rn) if (!size) goto skip_add; for (addr = start; addr < start + size; addr += block_size) - add_memory(0, addr, block_size); + add_memory(0, addr, block_size, MHP_NONE); skip_add: first_rn = rn; num = 1; diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 834b7c13ef3d..ed99e4335401 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -424,7 +424,7 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id); return add_memory_driver_managed(nid, addr, memory_block_size_bytes(), - vm->resource_name); + vm->resource_name, MHP_NONE); } /* diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 51427c752b37..9f40a294d398 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -331,7 +331,7 @@ static enum bp_state reserve_additional_memory(void) mutex_unlock(&balloon_mutex); /* add_memory_resource() requires the device_hotplug lock */ lock_device_hotplug(); - rc = add_memory_resource(nid, resource); + rc = add_memory_resource(nid, resource, MHP_NONE); unlock_device_hotplug(); mutex_lock(&balloon_mutex); -- cgit v1.2.3 From 9b24247a24471e1333fb556a12d0e3be30d2a750 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 15 Oct 2020 20:08:56 -0700 Subject: virtio-mem: try to merge system ram resources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit virtio-mem adds memory in memory block granularity, to be able to remove it in the same granularity again later, and to grow slowly on demand. This, however, results in quite a lot of resources when adding a lot of memory. Resources are effectively stored in a list-based tree. Having a lot of resources not only wastes memory, it also makes traversing that tree more expensive, and makes /proc/iomem explode in size (e.g., requiring kexec-tools to manually merge resources later when e.g., trying to create a kdump header). Before this patch, we get (/proc/iomem) when hotplugging 2G via virtio-mem on x86-64: [...] 100000000-13fffffff : System RAM 140000000-33fffffff : virtio0 140000000-147ffffff : System RAM (virtio_mem) 148000000-14fffffff : System RAM (virtio_mem) 150000000-157ffffff : System RAM (virtio_mem) 158000000-15fffffff : System RAM (virtio_mem) 160000000-167ffffff : System RAM (virtio_mem) 168000000-16fffffff : System RAM (virtio_mem) 170000000-177ffffff : System RAM (virtio_mem) 178000000-17fffffff : System RAM (virtio_mem) 180000000-187ffffff : System RAM (virtio_mem) 188000000-18fffffff : System RAM (virtio_mem) 190000000-197ffffff : System RAM (virtio_mem) 198000000-19fffffff : System RAM (virtio_mem) 1a0000000-1a7ffffff : System RAM (virtio_mem) 1a8000000-1afffffff : System RAM (virtio_mem) 1b0000000-1b7ffffff : System RAM (virtio_mem) 1b8000000-1bfffffff : System RAM (virtio_mem) 3280000000-32ffffffff : PCI Bus 0000:00 With this patch, we get (/proc/iomem): [...] fffc0000-ffffffff : Reserved 100000000-13fffffff : System RAM 140000000-33fffffff : virtio0 140000000-1bfffffff : System RAM (virtio_mem) 3280000000-32ffffffff : PCI Bus 0000:00 Of course, with more hotplugged memory, it gets worse. When unplugging memory blocks again, try_remove_memory() (via offline_and_remove_memory()) will properly split the resource up again. Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Reviewed-by: Pankaj Gupta Cc: Michal Hocko Cc: Dan Williams Cc: Michael S. Tsirkin Cc: Jason Wang Cc: Baoquan He Cc: Wei Yang Cc: Anton Blanchard Cc: Ard Biesheuvel Cc: Benjamin Herrenschmidt Cc: Boris Ostrovsky Cc: Christian Borntraeger Cc: Dave Jiang Cc: Eric Biederman Cc: Greg Kroah-Hartman Cc: Haiyang Zhang Cc: Heiko Carstens Cc: Jason Gunthorpe Cc: Juergen Gross Cc: Julien Grall Cc: Kees Cook Cc: "K. Y. Srinivasan" Cc: Len Brown Cc: Leonardo Bras Cc: Libor Pechacek Cc: Michael Ellerman Cc: Nathan Lynch Cc: "Oliver O'Halloran" Cc: Paul Mackerras Cc: Pingfan Liu Cc: "Rafael J. Wysocki" Cc: Roger Pau Monné Cc: Stefano Stabellini Cc: Stephen Hemminger Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vishal Verma Cc: Wei Liu Link: https://lkml.kernel.org/r/20200911103459.10306-7-david@redhat.com Signed-off-by: Linus Torvalds --- drivers/virtio/virtio_mem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index ed99e4335401..ba4de598f663 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -424,7 +424,8 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id); return add_memory_driver_managed(nid, addr, memory_block_size_bytes(), - vm->resource_name, MHP_NONE); + vm->resource_name, + MEMHP_MERGE_RESOURCE); } /* -- cgit v1.2.3 From 1b989d5d72abb237ae1ee8c4ce3e15cd5a364a68 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 15 Oct 2020 20:09:01 -0700 Subject: xen/balloon: try to merge system ram resources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's try to merge system ram resources we add, to minimize the number of resources in /proc/iomem. We don't care about the boundaries of individual chunks we added. Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Reviewed-by: Juergen Gross Cc: Michal Hocko Cc: Boris Ostrovsky Cc: Stefano Stabellini Cc: Roger Pau Monné Cc: Julien Grall Cc: Pankaj Gupta Cc: Baoquan He Cc: Wei Yang Cc: Anton Blanchard Cc: Ard Biesheuvel Cc: Benjamin Herrenschmidt Cc: Christian Borntraeger Cc: Dan Williams Cc: Dave Jiang Cc: Eric Biederman Cc: Greg Kroah-Hartman Cc: Haiyang Zhang Cc: Heiko Carstens Cc: Jason Gunthorpe Cc: Jason Wang Cc: Kees Cook Cc: "K. Y. Srinivasan" Cc: Len Brown Cc: Leonardo Bras Cc: Libor Pechacek Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Nathan Lynch Cc: "Oliver O'Halloran" Cc: Paul Mackerras Cc: Pingfan Liu Cc: "Rafael J. Wysocki" Cc: Stephen Hemminger Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vishal Verma Cc: Wei Liu Link: https://lkml.kernel.org/r/20200911103459.10306-8-david@redhat.com Signed-off-by: Linus Torvalds --- drivers/xen/balloon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 9f40a294d398..b57b2067ecbf 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -331,7 +331,7 @@ static enum bp_state reserve_additional_memory(void) mutex_unlock(&balloon_mutex); /* add_memory_resource() requires the device_hotplug lock */ lock_device_hotplug(); - rc = add_memory_resource(nid, resource, MHP_NONE); + rc = add_memory_resource(nid, resource, MEMHP_MERGE_RESOURCE); unlock_device_hotplug(); mutex_lock(&balloon_mutex); -- cgit v1.2.3 From 2c76e7f6c42bcee0e25194b54140cbce866d191a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 15 Oct 2020 20:09:07 -0700 Subject: hv_balloon: try to merge system ram resources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's try to merge system ram resources we add, to minimize the number of resources in /proc/iomem. We don't care about the boundaries of individual chunks we added. Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Reviewed-by: Wei Liu Cc: Michal Hocko Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Wei Liu Cc: Pankaj Gupta Cc: Baoquan He Cc: Wei Yang Cc: Anton Blanchard Cc: Ard Biesheuvel Cc: Benjamin Herrenschmidt Cc: Boris Ostrovsky Cc: Christian Borntraeger Cc: Dan Williams Cc: Dave Jiang Cc: Eric Biederman Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Jason Gunthorpe Cc: Jason Wang Cc: Juergen Gross Cc: Julien Grall Cc: Kees Cook Cc: Len Brown Cc: Leonardo Bras Cc: Libor Pechacek Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Nathan Lynch Cc: "Oliver O'Halloran" Cc: Paul Mackerras Cc: Pingfan Liu Cc: "Rafael J. Wysocki" Cc: Roger Pau Monné Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vishal Verma Link: https://lkml.kernel.org/r/20200911103459.10306-9-david@redhat.com Signed-off-by: Linus Torvalds --- drivers/hv/hv_balloon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 3c0d52e24452..b64d2efbefe7 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -726,7 +726,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn)); ret = add_memory(nid, PFN_PHYS((start_pfn)), - (HA_CHUNK << PAGE_SHIFT), MHP_NONE); + (HA_CHUNK << PAGE_SHIFT), MEMHP_MERGE_RESOURCE); if (ret) { pr_err("hot_add memory failed error is %d\n", ret); -- cgit v1.2.3 From 90c7eaeb14a325a760d732184ff1fbed47e5fa98 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Thu, 15 Oct 2020 20:09:15 -0700 Subject: mm: don't panic when links can't be created in sysfs At boot time, or when doing memory hot-add operations, if the links in sysfs can't be created, the system is still able to run, so just report the error in the kernel log rather than BUG_ON and potentially make system unusable because the callpath can be called with locks held. Since the number of memory blocks managed could be high, the messages are rate limited. As a consequence, link_mem_sections() has no status to report anymore. Signed-off-by: Laurent Dufour Signed-off-by: Andrew Morton Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Acked-by: David Hildenbrand Cc: Greg Kroah-Hartman Cc: Fenghua Yu Cc: Nathan Lynch Cc: "Rafael J . Wysocki" Cc: Scott Cheloha Cc: Tony Luck Link: https://lkml.kernel.org/r/20200915094143.79181-4-ldufour@linux.ibm.com Signed-off-by: Linus Torvalds --- drivers/base/node.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/base/node.c b/drivers/base/node.c index 43d21f9e88b1..6ffa470e2984 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -772,8 +772,8 @@ static int __ref get_nid_for_pfn(unsigned long pfn) return pfn_to_nid(pfn); } -static int do_register_memory_block_under_node(int nid, - struct memory_block *mem_blk) +static void do_register_memory_block_under_node(int nid, + struct memory_block *mem_blk) { int ret; @@ -786,12 +786,19 @@ static int do_register_memory_block_under_node(int nid, ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, &mem_blk->dev.kobj, kobject_name(&mem_blk->dev.kobj)); - if (ret) - return ret; + if (ret && ret != -EEXIST) + dev_err_ratelimited(&node_devices[nid]->dev, + "can't create link to %s in sysfs (%d)\n", + kobject_name(&mem_blk->dev.kobj), ret); - return sysfs_create_link_nowarn(&mem_blk->dev.kobj, + ret = sysfs_create_link_nowarn(&mem_blk->dev.kobj, &node_devices[nid]->dev.kobj, kobject_name(&node_devices[nid]->dev.kobj)); + if (ret && ret != -EEXIST) + dev_err_ratelimited(&mem_blk->dev, + "can't create link to %s in sysfs (%d)\n", + kobject_name(&node_devices[nid]->dev.kobj), + ret); } /* register memory section under specified node if it spans that node */ @@ -827,7 +834,8 @@ static int register_mem_block_under_node_early(struct memory_block *mem_blk, if (page_nid != nid) continue; - return do_register_memory_block_under_node(nid, mem_blk); + do_register_memory_block_under_node(nid, mem_blk); + return 0; } /* mem section does not span the specified node */ return 0; @@ -842,7 +850,8 @@ static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk, { int nid = *(int *)arg; - return do_register_memory_block_under_node(nid, mem_blk); + do_register_memory_block_under_node(nid, mem_blk); + return 0; } /* @@ -860,8 +869,8 @@ void unregister_memory_block_under_nodes(struct memory_block *mem_blk) kobject_name(&node_devices[mem_blk->nid]->dev.kobj)); } -int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn, - enum meminit_context context) +void link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn, + enum meminit_context context) { walk_memory_blocks_func_t func; @@ -870,9 +879,9 @@ int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn, else func = register_mem_block_under_node_early; - return walk_memory_blocks(PFN_PHYS(start_pfn), - PFN_PHYS(end_pfn - start_pfn), (void *)&nid, - func); + walk_memory_blocks(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn), + (void *)&nid, func); + return; } #ifdef CONFIG_HUGETLBFS -- cgit v1.2.3 From 4e79603bbd33aa9600313ae6f887741efbb01456 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 15 Oct 2020 20:09:43 -0700 Subject: zram: failing to decompress is WARN_ON worthy If we fail to decompress in zram it's a pretty serious problem. We were entrusted to be able to decompress the old data but we failed. Either we've got some crazy bug in the compression code or we've got memory corruption. At the moment, when this happens the log looks like this: ERR kernel: [ 1833.099861] zram: Decompression failed! err=-22, page=336112 ERR kernel: [ 1833.099881] zram: Decompression failed! err=-22, page=336112 ALERT kernel: [ 1833.099886] Read-error on swap-device (253:0:2688896) It is true that we have an "ALERT" level log in there, but (at least to me) it feels like even this isn't enough to impart the seriousness of this error. Let's convert to a WARN_ON. Note that WARN_ON is automatically "unlikely" so we can simply replace the old annotation with the new one. Signed-off-by: Douglas Anderson Signed-off-by: Andrew Morton Acked-by: Minchan Kim Cc: Sergey Senozhatsky Cc: Sonny Rao Cc: Jens Axboe Link: https://lkml.kernel.org/r/20200917174059.1.If09c882545dbe432268f7a67a4d4cfcb6caace4f@changeid Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index bff3d4021c18..029403c18ca3 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1270,7 +1270,7 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, zram_slot_unlock(zram, index); /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret)) + if (WARN_ON(ret)) pr_err("Decompression failed! err=%d, page=%u\n", ret, index); return ret; -- cgit v1.2.3 From 4d45e75a9955ade5c2f49bd96fc4173b2cec9a72 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 15 Oct 2020 20:13:00 -0700 Subject: mm: remove the now-unnecessary mmget_still_valid() hack The preceding patches have ensured that core dumping properly takes the mmap_lock. Thanks to that, we can now remove mmget_still_valid() and all its users. Signed-off-by: Jann Horn Signed-off-by: Andrew Morton Acked-by: Linus Torvalds Cc: Christoph Hellwig Cc: Alexander Viro Cc: "Eric W . Biederman" Cc: Oleg Nesterov Cc: Hugh Dickins Link: http://lkml.kernel.org/r/20200827114932.3572699-8-jannh@google.com Signed-off-by: Linus Torvalds --- drivers/infiniband/core/uverbs_main.c | 3 --- drivers/vfio/pci/vfio_pci.c | 38 +++++++++++++++++------------------ 2 files changed, 18 insertions(+), 23 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 37794d88b1f3..a4ba0b87d6de 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -845,8 +845,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) * will only be one mm, so no big deal. */ mmap_read_lock(mm); - if (!mmget_still_valid(mm)) - goto skip_mm; mutex_lock(&ufile->umap_lock); list_for_each_entry_safe (priv, next_priv, &ufile->umaps, list) { @@ -865,7 +863,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) } } mutex_unlock(&ufile->umap_lock); - skip_mm: mmap_read_unlock(mm); mmput(mm); } diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 1ab1f5cda4ac..b0f4b92a87ed 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1480,31 +1480,29 @@ static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try) } else { mmap_read_lock(mm); } - if (mmget_still_valid(mm)) { - if (try) { - if (!mutex_trylock(&vdev->vma_lock)) { - mmap_read_unlock(mm); - mmput(mm); - return 0; - } - } else { - mutex_lock(&vdev->vma_lock); + if (try) { + if (!mutex_trylock(&vdev->vma_lock)) { + mmap_read_unlock(mm); + mmput(mm); + return 0; } - list_for_each_entry_safe(mmap_vma, tmp, - &vdev->vma_list, vma_next) { - struct vm_area_struct *vma = mmap_vma->vma; + } else { + mutex_lock(&vdev->vma_lock); + } + list_for_each_entry_safe(mmap_vma, tmp, + &vdev->vma_list, vma_next) { + struct vm_area_struct *vma = mmap_vma->vma; - if (vma->vm_mm != mm) - continue; + if (vma->vm_mm != mm) + continue; - list_del(&mmap_vma->vma_next); - kfree(mmap_vma); + list_del(&mmap_vma->vma_next); + kfree(mmap_vma); - zap_vma_ptes(vma, vma->vm_start, - vma->vm_end - vma->vm_start); - } - mutex_unlock(&vdev->vma_lock); + zap_vma_ptes(vma, vma->vm_start, + vma->vm_end - vma->vm_start); } + mutex_unlock(&vdev->vma_lock); mmap_read_unlock(mm); mmput(mm); } -- cgit v1.2.3 From fa63f083b3492b5ed5332b8d7c90b03b5ef24a1d Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Thu, 15 Oct 2020 20:13:15 -0700 Subject: rapidio: fix error handling path rio_dma_transfer() attempts to clamp the return value of pin_user_pages_fast() to be >= 0. However, the attempt fails because nr_pages is overridden a few lines later, and restored to the undesirable -ERRNO value. The return value is ultimately stored in nr_pages, which in turn is passed to unpin_user_pages(), which expects nr_pages >= 0, else, disaster. Fix this by fixing the nesting of the assignment to nr_pages: nr_pages should be clamped to zero if pin_user_pages_fast() returns -ERRNO, or set to the return value of pin_user_pages_fast(), otherwise. [jhubbard@nvidia.com: new changelog] Fixes: e8de370188d09 ("rapidio: add mport char device driver") Signed-off-by: Souptick Joarder Signed-off-by: Andrew Morton Reviewed-by: Ira Weiny Reviewed-by: John Hubbard Cc: Matthew Wilcox Cc: Matt Porter Cc: Alexandre Bounine Cc: Gustavo A. R. Silva Cc: Madhuparna Bhowmik Cc: Dan Carpenter Link: https://lkml.kernel.org/r/1600227737-20785-1-git-send-email-jrdr.linux@gmail.com Signed-off-by: Linus Torvalds --- drivers/rapidio/devices/rio_mport_cdev.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index a30342942e26..163b6c72501d 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -871,15 +871,16 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode, rmcd_error("pin_user_pages_fast err=%ld", pinned); nr_pages = 0; - } else + } else { rmcd_error("pinned %ld out of %ld pages", pinned, nr_pages); + /* + * Set nr_pages up to mean "how many pages to unpin, in + * the error handler: + */ + nr_pages = pinned; + } ret = -EFAULT; - /* - * Set nr_pages up to mean "how many pages to unpin, in - * the error handler: - */ - nr_pages = pinned; goto err_pg; } -- cgit v1.2.3 From 85094c05eeb47d195a74a25366a2db066f1c9d47 Mon Sep 17 00:00:00 2001 From: Jing Xiangfeng Date: Thu, 15 Oct 2020 20:13:18 -0700 Subject: rapidio: fix the missed put_device() for rio_mport_add_riodev rio_mport_add_riodev() misses to call put_device() when the device already exists. Add the missed function call to fix it. Fixes: e8de370188d0 ("rapidio: add mport char device driver") Signed-off-by: Jing Xiangfeng Signed-off-by: Andrew Morton Reviewed-by: Dan Carpenter Cc: Matt Porter Cc: Alexandre Bounine Cc: Gustavo A. R. Silva Cc: John Hubbard Cc: Kees Cook Cc: Madhuparna Bhowmik Link: https://lkml.kernel.org/r/20200922072525.42330-1-jingxiangfeng@huawei.com Signed-off-by: Linus Torvalds --- drivers/rapidio/devices/rio_mport_cdev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index 163b6c72501d..94331d999d27 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -1680,6 +1680,7 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv, struct rio_dev *rdev; struct rio_switch *rswitch = NULL; struct rio_mport *mport; + struct device *dev; size_t size; u32 rval; u32 swpinfo = 0; @@ -1694,8 +1695,10 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv, rmcd_debug(RDEV, "name:%s ct:0x%x did:0x%x hc:0x%x", dev_info.name, dev_info.comptag, dev_info.destid, dev_info.hopcount); - if (bus_find_device_by_name(&rio_bus_type, NULL, dev_info.name)) { + dev = bus_find_device_by_name(&rio_bus_type, NULL, dev_info.name); + if (dev) { rmcd_debug(RDEV, "device %s already exists", dev_info.name); + put_device(dev); return -EEXIST; } -- cgit v1.2.3