From 17ec4cd985780a7e30aa45bb8f272237c12502a4 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Fri, 15 Jan 2016 16:54:48 -0800 Subject: zram: don't call idr_remove() from zram_remove() The use of idr_remove() is forbidden in the callback functions of idr_for_each(). It is therefore unsafe to call idr_remove in zram_remove(). This patch moves the call to idr_remove() from zram_remove() to hot_remove_store(). In the detroy_devices() path, idrs are removed by idr_destroy(). This solves an use-after-free detected by KASan. [akpm@linux-foundation.org: fix coding stype, per Sergey] Signed-off-by: Jerome Marchand Acked-by: Sergey Senozhatsky Cc: Minchan Kim Cc: [4.2+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 47915d736f8d..370c2f76016d 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1325,7 +1325,6 @@ static int zram_remove(struct zram *zram) pr_info("Removed device: %s\n", zram->disk->disk_name); - idr_remove(&zram_index_idr, zram->disk->first_minor); blk_cleanup_queue(zram->disk->queue); del_gendisk(zram->disk); put_disk(zram->disk); @@ -1367,10 +1366,12 @@ static ssize_t hot_remove_store(struct class *class, mutex_lock(&zram_index_mutex); zram = idr_find(&zram_index_idr, dev_id); - if (zram) + if (zram) { ret = zram_remove(zram); - else + idr_remove(&zram_index_idr, dev_id); + } else { ret = -ENODEV; + } mutex_unlock(&zram_index_mutex); return ret ? ret : count; -- cgit v1.2.3 From 34c0fd540e79fb49ef9ce864dae1058cca265780 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 15 Jan 2016 16:56:14 -0800 Subject: mm, dax, pmem: introduce pfn_t For the purpose of communicating the optional presence of a 'struct page' for the pfn returned from ->direct_access(), introduce a type that encapsulates a page-frame-number plus flags. These flags contain the historical "page_link" encoding for a scatterlist entry, but can also denote "device memory". Where "device memory" is a set of pfns that are not part of the kernel's linear mapping by default, but are accessed via the same memory controller as ram. The motivation for this new type is large capacity persistent memory that needs struct page entries in the 'memmap' to support 3rd party DMA (i.e. O_DIRECT I/O with a persistent memory source/target). However, we also need it in support of maintaining a list of mapped inodes which need to be unmapped at driver teardown or freeze_bdev() time. Signed-off-by: Dan Williams Cc: Christoph Hellwig Cc: Dave Hansen Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/brd.c | 7 +++++-- drivers/nvdimm/pmem.c | 13 +++++++++---- drivers/s390/block/dcssblk.c | 11 +++++------ 3 files changed, 19 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/block/brd.c b/drivers/block/brd.c index a5880f4ab40e..cb27190e9f39 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -19,6 +19,9 @@ #include #include #include +#ifdef CONFIG_BLK_DEV_RAM_DAX +#include +#endif #include @@ -378,7 +381,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, #ifdef CONFIG_BLK_DEV_RAM_DAX static long brd_direct_access(struct block_device *bdev, sector_t sector, - void __pmem **kaddr, unsigned long *pfn) + void __pmem **kaddr, pfn_t *pfn) { struct brd_device *brd = bdev->bd_disk->private_data; struct page *page; @@ -389,7 +392,7 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector, if (!page) return -ENOSPC; *kaddr = (void __pmem *)page_address(page); - *pfn = page_to_pfn(page); + *pfn = page_to_pfn_t(page); return PAGE_SIZE; } diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index b493ff3fccb2..5def7f4ddbd2 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ struct pmem_device { phys_addr_t phys_addr; /* when non-zero this device is hosting a 'pfn' instance */ phys_addr_t data_offset; + unsigned long pfn_flags; void __pmem *virt_addr; size_t size; struct badblocks bb; @@ -135,13 +137,13 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, } static long pmem_direct_access(struct block_device *bdev, sector_t sector, - void __pmem **kaddr, unsigned long *pfn) + void __pmem **kaddr, pfn_t *pfn) { struct pmem_device *pmem = bdev->bd_disk->private_data; resource_size_t offset = sector * 512 + pmem->data_offset; *kaddr = pmem->virt_addr + offset; - *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT; + *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); return pmem->size - offset; } @@ -174,9 +176,11 @@ static struct pmem_device *pmem_alloc(struct device *dev, return ERR_PTR(-EBUSY); } - if (pmem_should_map_pages(dev)) + pmem->pfn_flags = PFN_DEV; + if (pmem_should_map_pages(dev)) { pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res); - else + pmem->pfn_flags |= PFN_MAP; + } else pmem->virt_addr = (void __pmem *) devm_memremap(dev, pmem->phys_addr, pmem->size, ARCH_MEMREMAP_PMEM); @@ -384,6 +388,7 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) pmem = dev_get_drvdata(dev); devm_memunmap(dev, (void __force *) pmem->virt_addr); pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res); + pmem->pfn_flags |= PFN_MAP; if (IS_ERR(pmem->virt_addr)) { rc = PTR_ERR(pmem->virt_addr); goto err; diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 94a8f4ab57bc..ce7b70181740 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -30,7 +31,7 @@ static void dcssblk_release(struct gendisk *disk, fmode_t mode); static blk_qc_t dcssblk_make_request(struct request_queue *q, struct bio *bio); static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum, - void __pmem **kaddr, unsigned long *pfn); + void __pmem **kaddr, pfn_t *pfn); static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; @@ -883,20 +884,18 @@ fail: static long dcssblk_direct_access (struct block_device *bdev, sector_t secnum, - void __pmem **kaddr, unsigned long *pfn) + void __pmem **kaddr, pfn_t *pfn) { struct dcssblk_dev_info *dev_info; unsigned long offset, dev_sz; - void *addr; dev_info = bdev->bd_disk->private_data; if (!dev_info) return -ENODEV; dev_sz = dev_info->end - dev_info->start; offset = secnum * 512; - addr = (void *) (dev_info->start + offset); - *pfn = virt_to_phys(addr) >> PAGE_SHIFT; - *kaddr = (void __pmem *) addr; + *kaddr = (void __pmem *) (dev_info->start + offset); + *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV); return dev_sz - offset; } -- cgit v1.2.3 From 260ae3f7db614a5c4aa4b773599f99adc1d9859e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 15 Jan 2016 16:56:17 -0800 Subject: mm: skip memory block registration for ZONE_DEVICE Prevent userspace from trying and failing to online ZONE_DEVICE pages which are meant to never be onlined. For example on platforms with a udev rule like the following: SUBSYSTEM=="memory", ACTION=="add", ATTR{state}=="offline", ATTR{state}="online" ...will generate futile attempts to online the ZONE_DEVICE sections. Example kernel messages: Built 1 zonelists in Node order, mobility grouping on. Total pages: 1004747 Policy zone: Normal online_pages [mem 0x248000000-0x24fffffff] failed Signed-off-by: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 619fe584a44c..213456c2b123 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -647,6 +647,13 @@ static int add_memory_block(int base_section_nr) return 0; } +static bool is_zone_device_section(struct mem_section *ms) +{ + struct page *page; + + page = sparse_decode_mem_map(ms->section_mem_map, __section_nr(ms)); + return is_zone_device_page(page); +} /* * need an interface for the VM to add new memory regions, @@ -657,6 +664,9 @@ int register_new_memory(int nid, struct mem_section *section) int ret = 0; struct memory_block *mem; + if (is_zone_device_section(section)) + return 0; + mutex_lock(&mem_sysfs_mutex); mem = find_memory_block(section); @@ -693,6 +703,9 @@ static int remove_memory_section(unsigned long node_id, { struct memory_block *mem; + if (is_zone_device_section(section)) + return 0; + mutex_lock(&mem_sysfs_mutex); mem = find_memory_block(section); unregister_mem_sect_under_nodes(mem, __section_nr(section)); -- cgit v1.2.3 From 9476df7d80dfc425b37bfecf1d89edf8ec81fcb6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 15 Jan 2016 16:56:19 -0800 Subject: mm: introduce find_dev_pagemap() There are several scenarios where we need to retrieve and update metadata associated with a given devm_memremap_pages() mapping, and the only lookup key available is a pfn in the range: 1/ We want to augment vmemmap_populate() (called via arch_add_memory()) to allocate memmap storage from pre-allocated pages reserved by the device driver. At vmemmap_alloc_block_buf() time it grabs device pages rather than page allocator pages. This is in support of devm_memremap_pages() mappings where the memmap is too large to fit in main memory (i.e. large persistent memory devices). 2/ Taking a reference against the mapping when inserting device pages into the address_space radix of a given inode. This facilitates unmap_mapping_range() and truncate_inode_pages() operations when the driver is tearing down the mapping. 3/ get_user_pages() operations on ZONE_DEVICE memory require taking a reference against the mapping so that the driver teardown path can revoke and drain usage of device pages. Signed-off-by: Dan Williams Tested-by: Logan Gunthorpe Cc: Christoph Hellwig Cc: Dave Chinner Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/nvdimm/pmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 5def7f4ddbd2..904629b97c4f 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -21,9 +21,9 @@ #include #include #include -#include #include #include +#include #include #include #include -- cgit v1.2.3 From 4b94ffdc4163bae1ec73b6e977ffb7a7da3d06d3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 15 Jan 2016 16:56:22 -0800 Subject: x86, mm: introduce vmem_altmap to augment vmemmap_populate() In support of providing struct page for large persistent memory capacities, use struct vmem_altmap to change the default policy for allocating memory for the memmap array. The default vmemmap_populate() allocates page table storage area from the page allocator. Given persistent memory capacities relative to DRAM it may not be feasible to store the memmap in 'System Memory'. Instead vmem_altmap represents pre-allocated "device pages" to satisfy vmemmap_alloc_block_buf() requests. Signed-off-by: Dan Williams Reported-by: kbuild test robot Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/nvdimm/pmem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 904629b97c4f..be3f8547b702 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -178,7 +178,8 @@ static struct pmem_device *pmem_alloc(struct device *dev, pmem->pfn_flags = PFN_DEV; if (pmem_should_map_pages(dev)) { - pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res); + pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res, + NULL); pmem->pfn_flags |= PFN_MAP; } else pmem->virt_addr = (void __pmem *) devm_memremap(dev, @@ -387,7 +388,8 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) /* establish pfn range for lookup, and switch to direct map */ pmem = dev_get_drvdata(dev); devm_memunmap(dev, (void __force *) pmem->virt_addr); - pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res); + pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res, + NULL); pmem->pfn_flags |= PFN_MAP; if (IS_ERR(pmem->virt_addr)) { rc = PTR_ERR(pmem->virt_addr); -- cgit v1.2.3 From d2c0f041e1bb1260629ecea2161adb9778945aa3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 15 Jan 2016 16:56:26 -0800 Subject: libnvdimm, pfn, pmem: allocate memmap array in persistent memory Use the new vmem_altmap capability to enable the pmem driver to arrange for a struct page memmap to be established in persistent memory. [linux@roeck-us.net: mn10300: declare __pfn_to_phys() to fix build error] Signed-off-by: Dan Williams Cc: Christoph Hellwig Cc: Dave Chinner Cc: Ross Zwisler Signed-off-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/nvdimm/pfn_devs.c | 3 +-- drivers/nvdimm/pmem.c | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index f9b674bc49db..0cc9048b86e2 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -83,8 +83,7 @@ static ssize_t mode_store(struct device *dev, if (strncmp(buf, "pmem\n", n) == 0 || strncmp(buf, "pmem", n) == 0) { - /* TODO: allocate from PMEM support */ - rc = -ENOTTY; + nd_pfn->mode = PFN_MODE_PMEM; } else if (strncmp(buf, "ram\n", n) == 0 || strncmp(buf, "ram", n) == 0) nd_pfn->mode = PFN_MODE_RAM; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index be3f8547b702..03d86687f97b 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -357,12 +357,16 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); struct device *dev = &nd_pfn->dev; - struct vmem_altmap *altmap; struct nd_region *nd_region; + struct vmem_altmap *altmap; struct nd_pfn_sb *pfn_sb; struct pmem_device *pmem; phys_addr_t offset; int rc; + struct vmem_altmap __altmap = { + .base_pfn = __phys_to_pfn(nsio->res.start), + .reserve = __phys_to_pfn(SZ_8K), + }; if (!nd_pfn->uuid || !nd_pfn->ndns) return -ENODEV; @@ -380,6 +384,17 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) return -EINVAL; nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); altmap = NULL; + } else if (nd_pfn->mode == PFN_MODE_PMEM) { + nd_pfn->npfns = (resource_size(&nsio->res) - offset) + / PAGE_SIZE; + if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) + dev_info(&nd_pfn->dev, + "number of pfns truncated from %lld to %ld\n", + le64_to_cpu(nd_pfn->pfn_sb->npfns), + nd_pfn->npfns); + altmap = & __altmap; + altmap->free = __phys_to_pfn(offset - SZ_8K); + altmap->alloc = 0; } else { rc = -ENXIO; goto err; @@ -389,7 +404,7 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) pmem = dev_get_drvdata(dev); devm_memunmap(dev, (void __force *) pmem->virt_addr); pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res, - NULL); + altmap); pmem->pfn_flags |= PFN_MAP; if (IS_ERR(pmem->virt_addr)) { rc = PTR_ERR(pmem->virt_addr); -- cgit v1.2.3 From 01c8f1c44b83a0825b573e7c723b033cece37b86 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 15 Jan 2016 16:56:40 -0800 Subject: mm, dax, gpu: convert vm_insert_mixed to pfn_t Convert the raw unsigned long 'pfn' argument to pfn_t for the purpose of evaluating the PFN_MAP and PFN_DEV flags. When both are set it triggers _PAGE_DEVMAP to be set in the resulting pte. There are no functional changes to the gpu drivers as a result of this conversion. Signed-off-by: Dan Williams Cc: Dave Hansen Cc: David Airlie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpu/drm/exynos/exynos_drm_gem.c | 4 +++- drivers/gpu/drm/gma500/framebuffer.c | 4 +++- drivers/gpu/drm/msm/msm_gem.c | 4 +++- drivers/gpu/drm/omapdrm/omap_gem.c | 7 +++++-- drivers/gpu/drm/ttm/ttm_bo_vm.c | 4 +++- 5 files changed, 17 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 252eb301470c..32358c5e3db4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -14,6 +14,7 @@ #include #include +#include #include #include "exynos_drm_drv.h" @@ -490,7 +491,8 @@ int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } pfn = page_to_pfn(exynos_gem->pages[page_offset]); - ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn); + ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, + __pfn_to_pfn_t(pfn, PFN_DEV)); out: switch (ret) { diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c index 2eaf1b31c7bd..72bc979fa0dc 100644 --- a/drivers/gpu/drm/gma500/framebuffer.c +++ b/drivers/gpu/drm/gma500/framebuffer.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -132,7 +133,8 @@ static int psbfb_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) for (i = 0; i < page_num; i++) { pfn = (phys_addr >> PAGE_SHIFT); - ret = vm_insert_mixed(vma, address, pfn); + ret = vm_insert_mixed(vma, address, + __pfn_to_pfn_t(pfn, PFN_DEV)); if (unlikely((ret == -EBUSY) || (ret != 0 && i > 0))) break; else if (unlikely(ret != 0)) { diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index c76cc853b08a..3cedb8d5c855 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "msm_drv.h" #include "msm_gem.h" @@ -222,7 +223,8 @@ int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address, pfn, pfn << PAGE_SHIFT); - ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn); + ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, + __pfn_to_pfn_t(pfn, PFN_DEV)); out_unlock: mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index 7ed08fdc4c42..ceba5459ceb7 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -19,6 +19,7 @@ #include #include +#include #include @@ -385,7 +386,8 @@ static int fault_1d(struct drm_gem_object *obj, VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address, pfn, pfn << PAGE_SHIFT); - return vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn); + return vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, + __pfn_to_pfn_t(pfn, PFN_DEV)); } /* Special handling for the case of faulting in 2d tiled buffers */ @@ -478,7 +480,8 @@ static int fault_2d(struct drm_gem_object *obj, pfn, pfn << PAGE_SHIFT); for (i = n; i > 0; i--) { - vm_insert_mixed(vma, (unsigned long)vaddr, pfn); + vm_insert_mixed(vma, (unsigned long)vaddr, + __pfn_to_pfn_t(pfn, PFN_DEV)); pfn += usergart[fmt].stride_pfn; vaddr += PAGE_SIZE * m; } diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 8fb7213277cc..06d26dc438b2 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -229,7 +230,8 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } if (vma->vm_flags & VM_MIXEDMAP) - ret = vm_insert_mixed(&cvma, address, pfn); + ret = vm_insert_mixed(&cvma, address, + __pfn_to_pfn_t(pfn, PFN_DEV)); else ret = vm_insert_pfn(&cvma, address, pfn); -- cgit v1.2.3 From 468ded03c07e0f2b5f05332bc255add47b1b0dee Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 15 Jan 2016 16:56:46 -0800 Subject: libnvdimm, pmem: move request_queue allocation earlier in probe Before the dynamically allocated struct pages from devm_memremap_pages() can be put to use outside the driver, we need a mechanism to track whether they are still in use at teardown. Towards that goal reorder the initialization sequence to allow the 'q_usage_counter' from the request_queue to be used by the devm_memremap_pages() implementation (in subsequent patches). Signed-off-by: Dan Williams Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/nvdimm/pmem.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 03d86687f97b..328173d7e1ac 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -159,6 +159,7 @@ static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res, int id) { struct pmem_device *pmem; + struct request_queue *q; pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); if (!pmem) @@ -176,6 +177,10 @@ static struct pmem_device *pmem_alloc(struct device *dev, return ERR_PTR(-EBUSY); } + q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev)); + if (!q) + return ERR_PTR(-ENOMEM); + pmem->pfn_flags = PFN_DEV; if (pmem_should_map_pages(dev)) { pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res, @@ -186,9 +191,12 @@ static struct pmem_device *pmem_alloc(struct device *dev, pmem->phys_addr, pmem->size, ARCH_MEMREMAP_PMEM); - if (IS_ERR(pmem->virt_addr)) + if (IS_ERR(pmem->virt_addr)) { + blk_cleanup_queue(q); return (void __force *) pmem->virt_addr; + } + pmem->pmem_queue = q; return pmem; } @@ -208,10 +216,6 @@ static int pmem_attach_disk(struct device *dev, int nid = dev_to_node(dev); struct gendisk *disk; - pmem->pmem_queue = blk_alloc_queue_node(GFP_KERNEL, nid); - if (!pmem->pmem_queue) - return -ENOMEM; - blk_queue_make_request(pmem->pmem_queue, pmem_make_request); blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE); blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX); @@ -446,19 +450,22 @@ static int nd_pmem_probe(struct device *dev) return -ENOMEM; nvdimm_namespace_add_poison(ndns, &pmem->bb, 0); - if (is_nd_btt(dev)) + if (is_nd_btt(dev)) { + /* btt allocates its own request_queue */ + blk_cleanup_queue(pmem->pmem_queue); + pmem->pmem_queue = NULL; return nvdimm_namespace_attach_btt(ndns); + } if (is_nd_pfn(dev)) return nvdimm_namespace_attach_pfn(ndns); - if (nd_btt_probe(ndns, pmem) == 0) { - /* we'll come back as btt-pmem */ - return -ENXIO; - } - - if (nd_pfn_probe(ndns, pmem) == 0) { - /* we'll come back as pfn-pmem */ + if (nd_btt_probe(ndns, pmem) == 0 || nd_pfn_probe(ndns, pmem) == 0) { + /* + * We'll come back as either btt-pmem, or pfn-pmem, so + * drop the queue allocation for now. + */ + blk_cleanup_queue(pmem->pmem_queue); return -ENXIO; } -- cgit v1.2.3 From 5c2c2587b13235bf8b5c9027589f22eff68bdf49 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 15 Jan 2016 16:56:49 -0800 Subject: mm, dax, pmem: introduce {get|put}_dev_pagemap() for dax-gup get_dev_page() enables paths like get_user_pages() to pin a dynamically mapped pfn-range (devm_memremap_pages()) while the resulting struct page objects are in use. Unlike get_page() it may fail if the device is, or is in the process of being, disabled. While the initial lookup of the range may be an expensive list walk, the result is cached to speed up subsequent lookups which are likely to be in the same mapped range. devm_memremap_pages() now requires a reference counter to be specified at init time. For pmem this means moving request_queue allocation into pmem_alloc() so the existing queue usage counter can track "device pages". ZONE_DEVICE pages always have an elevated count and will never be on an lru reclaim list. That space in 'struct page' can be redirected for other uses, but for safety introduce a poison value that will always trip __list_add() to assert. This allows half of the struct list_head storage to be reclaimed with some assurance to back up the assumption that the page count never goes to zero and a list_add() is never attempted. Signed-off-by: Dan Williams Tested-by: Logan Gunthorpe Cc: Dave Hansen Cc: Matthew Wilcox Cc: Ross Zwisler Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/nvdimm/pmem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 328173d7e1ac..7edf31671dab 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -184,7 +184,7 @@ static struct pmem_device *pmem_alloc(struct device *dev, pmem->pfn_flags = PFN_DEV; if (pmem_should_map_pages(dev)) { pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res, - NULL); + &q->q_usage_counter, NULL); pmem->pfn_flags |= PFN_MAP; } else pmem->virt_addr = (void __pmem *) devm_memremap(dev, @@ -365,6 +365,7 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) struct vmem_altmap *altmap; struct nd_pfn_sb *pfn_sb; struct pmem_device *pmem; + struct request_queue *q; phys_addr_t offset; int rc; struct vmem_altmap __altmap = { @@ -406,9 +407,10 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) /* establish pfn range for lookup, and switch to direct map */ pmem = dev_get_drvdata(dev); + q = pmem->pmem_queue; devm_memunmap(dev, (void __force *) pmem->virt_addr); pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res, - altmap); + &q->q_usage_counter, altmap); pmem->pfn_flags |= PFN_MAP; if (IS_ERR(pmem->virt_addr)) { rc = PTR_ERR(pmem->virt_addr); -- cgit v1.2.3 From 8f57e4d930d48217268315898212518d4d3e0773 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Fri, 15 Jan 2016 16:57:58 -0800 Subject: include/linux/kernel.h: change abs() macro so it uses consistent return type Rewrite abs() so that its return type does not depend on the architecture and no unexpected type conversion happen inside of it. The only conversion is from unsigned to signed type. char is left as a return type but treated as a signed type regradless of it's actual signedness. With the old version, int arguments were promoted to long and depending on architecture a long argument might result in s64 or long return type (which may or may not be the same). This came after some back and forth with Nicolas. The current macro has different return type (for the same input type) depending on architecture which might be midly iritating. An alternative version would promote to int like so: #define abs(x) __abs_choose_expr(x, long long, \ __abs_choose_expr(x, long, \ __builtin_choose_expr( \ sizeof(x) <= sizeof(int), \ ({ int __x = (x); __x<0?-__x:__x; }), \ ((void)0)))) I have no preference but imagine Linus might. :] Nicolas argument against is that promoting to int causes iconsistent behaviour: int main(void) { unsigned short a = 0, b = 1, c = a - b; unsigned short d = abs(a - b); unsigned short e = abs(c); printf("%u %u\n", d, e); // prints: 1 65535 } Then again, no sane person expects consistent behaviour from C integer arithmetic. ;) Note: __builtin_types_compatible_p(unsigned char, char) is always false, and __builtin_types_compatible_p(signed char, char) is also always false. Signed-off-by: Michal Nazarewicz Reviewed-by: Nicolas Pitre Cc: Srinivas Pandruvada Cc: Wey-Yi Guy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/iio/industrialio-core.c | 9 ++++----- drivers/net/wireless/intel/iwlwifi/dvm/calib.c | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index fd01f3493fc7..af7cc1e65656 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -433,16 +433,15 @@ ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals) scale_db = true; case IIO_VAL_INT_PLUS_MICRO: if (vals[1] < 0) - return sprintf(buf, "-%ld.%06u%s\n", abs(vals[0]), - -vals[1], - scale_db ? " dB" : ""); + return sprintf(buf, "-%d.%06u%s\n", abs(vals[0]), + -vals[1], scale_db ? " dB" : ""); else return sprintf(buf, "%d.%06u%s\n", vals[0], vals[1], scale_db ? " dB" : ""); case IIO_VAL_INT_PLUS_NANO: if (vals[1] < 0) - return sprintf(buf, "-%ld.%09u\n", abs(vals[0]), - -vals[1]); + return sprintf(buf, "-%d.%09u\n", abs(vals[0]), + -vals[1]); else return sprintf(buf, "%d.%09u\n", vals[0], vals[1]); case IIO_VAL_FRACTIONAL: diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/calib.c b/drivers/net/wireless/intel/iwlwifi/dvm/calib.c index 07a4c644fb9b..e9cef9de9ed8 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/calib.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/calib.c @@ -901,7 +901,7 @@ static void iwlagn_gain_computation(struct iwl_priv *priv, /* bound gain by 2 bits value max, 3rd bit is sign */ data->delta_gain_code[i] = min(abs(delta_g), - (long) CHAIN_NOISE_MAX_DELTA_GAIN_CODE); + (s32) CHAIN_NOISE_MAX_DELTA_GAIN_CODE); if (delta_g < 0) /* -- cgit v1.2.3