diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-08 14:35:59 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-08 14:35:59 -0700 |
commit | 12f03ee606914317e7e6a0815e53a48205c31dae (patch) | |
tree | f8579bf77d29b3921e1877e0ae12ec65b5ebc738 /arch | |
parent | d9241b22b58e012f26dd2244508d9f4837402af0 (diff) | |
parent | 004f1afbe199e6ab20805b95aefd83ccd24bc5c7 (diff) |
Merge tag 'libnvdimm-for-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
"This update has successfully completed a 0day-kbuild run and has
appeared in a linux-next release. The changes outside of the typical
drivers/nvdimm/ and drivers/acpi/nfit.[ch] paths are related to the
removal of IORESOURCE_CACHEABLE, the introduction of memremap(), and
the introduction of ZONE_DEVICE + devm_memremap_pages().
Summary:
- Introduce ZONE_DEVICE and devm_memremap_pages() as a generic
mechanism for adding device-driver-discovered memory regions to the
kernel's direct map.
This facility is used by the pmem driver to enable pfn_to_page()
operations on the page frames returned by DAX ('direct_access' in
'struct block_device_operations').
For now, the 'memmap' allocation for these "device" pages comes
from "System RAM". Support for allocating the memmap from device
memory will arrive in a later kernel.
- Introduce memremap() to replace usages of ioremap_cache() and
ioremap_wt(). memremap() drops the __iomem annotation for these
mappings to memory that do not have i/o side effects. The
replacement of ioremap_cache() with memremap() is limited to the
pmem driver to ease merging the api change in v4.3.
Completion of the conversion is targeted for v4.4.
- Similar to the usage of memcpy_to_pmem() + wmb_pmem() in the pmem
driver, update the VFS DAX implementation and PMEM api to provide
persistence guarantees for kernel operations on a DAX mapping.
- Convert the ACPI NFIT 'BLK' driver to map the block apertures as
cacheable to improve performance.
- Miscellaneous updates and fixes to libnvdimm including support for
issuing "address range scrub" commands, clarifying the optimal
'sector size' of pmem devices, a clarification of the usage of the
ACPI '_STA' (status) property for DIMM devices, and other minor
fixes"
* tag 'libnvdimm-for-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (34 commits)
libnvdimm, pmem: direct map legacy pmem by default
libnvdimm, pmem: 'struct page' for pmem
libnvdimm, pfn: 'struct page' provider infrastructure
x86, pmem: clarify that ARCH_HAS_PMEM_API implies PMEM mapped WB
add devm_memremap_pages
mm: ZONE_DEVICE for "device memory"
mm: move __phys_to_pfn and __pfn_to_phys to asm/generic/memory_model.h
dax: drop size parameter to ->direct_access()
nd_blk: change aperture mapping from WC to WB
nvdimm: change to use generic kvfree()
pmem, dax: have direct_access use __pmem annotation
dax: update I/O path to do proper PMEM flushing
pmem: add copy_from_iter_pmem() and clear_pmem()
pmem, x86: clean up conditional pmem includes
pmem: remove layer when calling arch_has_wmb_pmem()
pmem, x86: move x86 PMEM API to new pmem.h header
libnvdimm, e820: make CONFIG_X86_PMEM_LEGACY a tristate option
pmem: switch to devm_ allocations
devres: add devm_memremap
libnvdimm, btt: write and validate parent_uuid
...
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/include/asm/memory.h | 6 | ||||
-rw-r--r-- | arch/arm/mach-clps711x/board-cdb89712.c | 2 | ||||
-rw-r--r-- | arch/arm/mach-shmobile/pm-rcar.c | 2 | ||||
-rw-r--r-- | arch/arm64/include/asm/memory.h | 6 | ||||
-rw-r--r-- | arch/ia64/include/asm/io.h | 1 | ||||
-rw-r--r-- | arch/ia64/kernel/cyclone.c | 2 | ||||
-rw-r--r-- | arch/ia64/mm/init.c | 4 | ||||
-rw-r--r-- | arch/powerpc/kernel/pci_of_scan.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 4 | ||||
-rw-r--r-- | arch/powerpc/sysdev/axonram.c | 7 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 2 | ||||
-rw-r--r-- | arch/sh/include/asm/io.h | 1 | ||||
-rw-r--r-- | arch/sh/mm/init.c | 5 | ||||
-rw-r--r-- | arch/sparc/kernel/pci.c | 3 | ||||
-rw-r--r-- | arch/tile/mm/init.c | 2 | ||||
-rw-r--r-- | arch/unicore32/include/asm/memory.h | 6 | ||||
-rw-r--r-- | arch/x86/Kconfig | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/cacheflush.h | 73 | ||||
-rw-r--r-- | arch/x86/include/asm/io.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/pmem.h | 153 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/e820.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/pmem.c | 79 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 4 | ||||
-rw-r--r-- | arch/xtensa/include/asm/io.h | 1 |
26 files changed, 197 insertions, 191 deletions
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index b7f6fb462ea0..98d58bb04ac5 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -119,12 +119,6 @@ #endif /* - * Convert a physical address to a Page Frame Number and back - */ -#define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) -#define __pfn_to_phys(pfn) ((phys_addr_t)(pfn) << PAGE_SHIFT) - -/* * Convert a page to/from a physical address */ #define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page))) diff --git a/arch/arm/mach-clps711x/board-cdb89712.c b/arch/arm/mach-clps711x/board-cdb89712.c index 1ec378c334e5..972abdb10028 100644 --- a/arch/arm/mach-clps711x/board-cdb89712.c +++ b/arch/arm/mach-clps711x/board-cdb89712.c @@ -95,7 +95,7 @@ static struct physmap_flash_data cdb89712_bootrom_pdata __initdata = { static struct resource cdb89712_bootrom_resources[] __initdata = { DEFINE_RES_NAMED(CS7_PHYS_BASE, SZ_128, "BOOTROM", IORESOURCE_MEM | - IORESOURCE_CACHEABLE | IORESOURCE_READONLY), + IORESOURCE_READONLY), }; static struct platform_device cdb89712_bootrom_pdev __initdata = { diff --git a/arch/arm/mach-shmobile/pm-rcar.c b/arch/arm/mach-shmobile/pm-rcar.c index 4092ad16e0a4..0af05d288b09 100644 --- a/arch/arm/mach-shmobile/pm-rcar.c +++ b/arch/arm/mach-shmobile/pm-rcar.c @@ -12,7 +12,7 @@ #include <linux/err.h> #include <linux/mm.h> #include <linux/spinlock.h> -#include <asm/io.h> +#include <linux/io.h> #include "pm-rcar.h" /* SYSC Common */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 44a59c20e773..6b4c3ad75a2a 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -81,12 +81,6 @@ #define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET)) /* - * Convert a physical address to a Page Frame Number and back - */ -#define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) -#define __pfn_to_phys(pfn) ((phys_addr_t)(pfn) << PAGE_SHIFT) - -/* * Convert a page to/from a physical address */ #define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page))) diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h index 80a7e34be009..9041bbe2b7b4 100644 --- a/arch/ia64/include/asm/io.h +++ b/arch/ia64/include/asm/io.h @@ -435,6 +435,7 @@ static inline void __iomem * ioremap_cache (unsigned long phys_addr, unsigned lo { return ioremap(phys_addr, size); } +#define ioremap_cache ioremap_cache /* diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c index 4826ff957a3d..5fa3848ba224 100644 --- a/arch/ia64/kernel/cyclone.c +++ b/arch/ia64/kernel/cyclone.c @@ -4,7 +4,7 @@ #include <linux/errno.h> #include <linux/timex.h> #include <linux/clocksource.h> -#include <asm/io.h> +#include <linux/io.h> /* IBM Summit (EXA) Cyclone counter code*/ #define CYCLONE_CBAR_ADDR 0xFEB00CD0 diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 97e48b0eefc7..1841ef69183d 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -645,7 +645,7 @@ mem_init (void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size) +int arch_add_memory(int nid, u64 start, u64 size, bool for_device) { pg_data_t *pgdat; struct zone *zone; @@ -656,7 +656,7 @@ int arch_add_memory(int nid, u64 start, u64 size) pgdat = NODE_DATA(nid); zone = pgdat->node_zones + - zone_for_memory(nid, start, size, ZONE_NORMAL); + zone_for_memory(nid, start, size, ZONE_NORMAL, for_device); ret = __add_pages(nid, zone, start_pfn, nr_pages); if (ret) diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index c8c62c7fc31c..2e710c15893f 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -102,7 +102,7 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev) res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2]; } else if (i == dev->rom_base_reg) { res = &dev->resource[PCI_ROM_RESOURCE]; - flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE; + flags |= IORESOURCE_READONLY; } else { printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i); continue; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e1fe333da946..22d94c3e6fc4 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -113,7 +113,7 @@ int memory_add_physaddr_to_nid(u64 start) } #endif -int arch_add_memory(int nid, u64 start, u64 size) +int arch_add_memory(int nid, u64 start, u64 size, bool for_device) { struct pglist_data *pgdata; struct zone *zone; @@ -128,7 +128,7 @@ int arch_add_memory(int nid, u64 start, u64 size) /* this should work for most non-highmem platforms */ zone = pgdata->node_zones + - zone_for_memory(nid, start, size, 0); + zone_for_memory(nid, start, size, 0, for_device); return __add_pages(nid, zone, start_pfn, nr_pages); } diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index f86250c48b53..d2b79bc336c1 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -141,13 +141,14 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) */ static long axon_ram_direct_access(struct block_device *device, sector_t sector, - void **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn) { struct axon_ram_bank *bank = device->bd_disk->private_data; loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT; + void *addr = (void *)(bank->ph_addr + offset); - *kaddr = (void *)(bank->ph_addr + offset); - *pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT; + *kaddr = (void __pmem *)addr; + *pfn = virt_to_phys(addr) >> PAGE_SHIFT; return bank->size - offset; } diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 2963b563621c..c3c07d3505ba 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -169,7 +169,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) #endif #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size) +int arch_add_memory(int nid, u64 start, u64 size, bool for_device) { unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM()); unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS); diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index 93ec9066dbef..3280a6bfa503 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -342,6 +342,7 @@ ioremap_cache(phys_addr_t offset, unsigned long size) { return __ioremap_mode(offset, size, PAGE_KERNEL); } +#define ioremap_cache ioremap_cache #ifdef CONFIG_HAVE_IOREMAP_PROT static inline void __iomem * diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 17f486233db0..75491862d900 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -485,7 +485,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) #endif #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size) +int arch_add_memory(int nid, u64 start, u64 size, bool for_device) { pg_data_t *pgdat; unsigned long start_pfn = PFN_DOWN(start); @@ -496,7 +496,8 @@ int arch_add_memory(int nid, u64 start, u64 size) /* We only have ZONE_NORMAL, so this is easy.. */ ret = __add_pages(nid, pgdat->node_zones + - zone_for_memory(nid, start, size, ZONE_NORMAL), + zone_for_memory(nid, start, size, ZONE_NORMAL, + for_device), start_pfn, nr_pages); if (unlikely(ret)) printk("%s: Failed, __add_pages() == %d\n", __func__, ret); diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 3a14a35592fe..b91d7f146175 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -231,8 +231,7 @@ static void pci_parse_of_addrs(struct platform_device *op, res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2]; } else if (i == dev->rom_base_reg) { res = &dev->resource[PCI_ROM_RESOURCE]; - flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE - | IORESOURCE_SIZEALIGN; + flags |= IORESOURCE_READONLY | IORESOURCE_SIZEALIGN; } else { printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i); continue; diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 5bd252e3fdc5..d4e1fc41d06d 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -863,7 +863,7 @@ void __init mem_init(void) * memory to the highmem for now. */ #ifndef CONFIG_NEED_MULTIPLE_NODES -int arch_add_memory(u64 start, u64 size) +int arch_add_memory(u64 start, u64 size, bool for_device) { struct pglist_data *pgdata = &contig_page_data; struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1; diff --git a/arch/unicore32/include/asm/memory.h b/arch/unicore32/include/asm/memory.h index debafc40200a..3bb0a29fd2d7 100644 --- a/arch/unicore32/include/asm/memory.h +++ b/arch/unicore32/include/asm/memory.h @@ -61,12 +61,6 @@ #endif /* - * Convert a physical address to a Page Frame Number and back - */ -#define __phys_to_pfn(paddr) ((paddr) >> PAGE_SHIFT) -#define __pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT) - -/* * Convert a page to/from a physical address */ #define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page))) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 117e2f373e50..cc0d73eac047 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -27,7 +27,8 @@ config X86 select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_GCOV_PROFILE_ALL - select ARCH_HAS_PMEM_API + select ARCH_HAS_PMEM_API if X86_64 + select ARCH_HAS_MMIO_FLUSH select ARCH_HAS_SG_CHAIN select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI @@ -1450,10 +1451,14 @@ config ILLEGAL_POINTER_VALUE source "mm/Kconfig" +config X86_PMEM_LEGACY_DEVICE + bool + config X86_PMEM_LEGACY - bool "Support non-standard NVDIMMs and ADR protected memory" + tristate "Support non-standard NVDIMMs and ADR protected memory" depends on PHYS_ADDR_T_64BIT depends on BLK_DEV + select X86_PMEM_LEGACY_DEVICE select LIBNVDIMM help Treat memory marked using the non-standard e820 type of 12 as used diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 9bf3ea14b9f0..e63aa38e85fb 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -89,6 +89,8 @@ int set_pages_rw(struct page *page, int numpages); void clflush_cache_range(void *addr, unsigned int size); +#define mmio_flush_range(addr, size) clflush_cache_range(addr, size) + #ifdef CONFIG_DEBUG_RODATA void mark_rodata_ro(void); extern const int rodata_test_data; @@ -109,75 +111,4 @@ static inline int rodata_test(void) } #endif -#ifdef ARCH_HAS_NOCACHE_UACCESS - -/** - * arch_memcpy_to_pmem - copy data to persistent memory - * @dst: destination buffer for the copy - * @src: source buffer for the copy - * @n: length of the copy in bytes - * - * Copy data to persistent memory media via non-temporal stores so that - * a subsequent arch_wmb_pmem() can flush cpu and memory controller - * write buffers to guarantee durability. - */ -static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, - size_t n) -{ - int unwritten; - - /* - * We are copying between two kernel buffers, if - * __copy_from_user_inatomic_nocache() returns an error (page - * fault) we would have already reported a general protection fault - * before the WARN+BUG. - */ - unwritten = __copy_from_user_inatomic_nocache((void __force *) dst, - (void __user *) src, n); - if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n", - __func__, dst, src, unwritten)) - BUG(); -} - -/** - * arch_wmb_pmem - synchronize writes to persistent memory - * - * After a series of arch_memcpy_to_pmem() operations this drains data - * from cpu write buffers and any platform (memory controller) buffers - * to ensure that written data is durable on persistent memory media. - */ -static inline void arch_wmb_pmem(void) -{ - /* - * wmb() to 'sfence' all previous writes such that they are - * architecturally visible to 'pcommit'. Note, that we've - * already arranged for pmem writes to avoid the cache via - * arch_memcpy_to_pmem(). - */ - wmb(); - pcommit_sfence(); -} - -static inline bool __arch_has_wmb_pmem(void) -{ -#ifdef CONFIG_X86_64 - /* - * We require that wmb() be an 'sfence', that is only guaranteed on - * 64-bit builds - */ - return static_cpu_has(X86_FEATURE_PCOMMIT); -#else - return false; -#endif -} -#else /* ARCH_HAS_NOCACHE_UACCESS i.e. ARCH=um */ -extern void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n); -extern void arch_wmb_pmem(void); - -static inline bool __arch_has_wmb_pmem(void) -{ - return false; -} -#endif - #endif /* _ASM_X86_CACHEFLUSH_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 7cfc085b6879..de25aad07853 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -250,12 +250,6 @@ static inline void flush_write_buffers(void) #endif } -static inline void __pmem *arch_memremap_pmem(resource_size_t offset, - unsigned long size) -{ - return (void __force __pmem *) ioremap_cache(offset, size); -} - #endif /* __KERNEL__ */ extern void native_io_delay(void); diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h new file mode 100644 index 000000000000..d8ce3ec816ab --- /dev/null +++ b/arch/x86/include/asm/pmem.h @@ -0,0 +1,153 @@ +/* + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __ASM_X86_PMEM_H__ +#define __ASM_X86_PMEM_H__ + +#include <linux/uaccess.h> +#include <asm/cacheflush.h> +#include <asm/cpufeature.h> +#include <asm/special_insns.h> + +#ifdef CONFIG_ARCH_HAS_PMEM_API +/** + * arch_memcpy_to_pmem - copy data to persistent memory + * @dst: destination buffer for the copy + * @src: source buffer for the copy + * @n: length of the copy in bytes + * + * Copy data to persistent memory media via non-temporal stores so that + * a subsequent arch_wmb_pmem() can flush cpu and memory controller + * write buffers to guarantee durability. + */ +static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, + size_t n) +{ + int unwritten; + + /* + * We are copying between two kernel buffers, if + * __copy_from_user_inatomic_nocache() returns an error (page + * fault) we would have already reported a general protection fault + * before the WARN+BUG. + */ + unwritten = __copy_from_user_inatomic_nocache((void __force *) dst, + (void __user *) src, n); + if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n", + __func__, dst, src, unwritten)) + BUG(); +} + +/** + * arch_wmb_pmem - synchronize writes to persistent memory + * + * After a series of arch_memcpy_to_pmem() operations this drains data + * from cpu write buffers and any platform (memory controller) buffers + * to ensure that written data is durable on persistent memory media. + */ +static inline void arch_wmb_pmem(void) +{ + /* + * wmb() to 'sfence' all previous writes such that they are + * architecturally visible to 'pcommit'. Note, that we've + * already arranged for pmem writes to avoid the cache via + * arch_memcpy_to_pmem(). + */ + wmb(); + pcommit_sfence(); +} + +/** + * __arch_wb_cache_pmem - write back a cache range with CLWB + * @vaddr: virtual start address + * @size: number of bytes to write back + * + * Write back a cache range using the CLWB (cache line write back) + * instruction. This function requires explicit ordering with an + * arch_wmb_pmem() call. This API is internal to the x86 PMEM implementation. + */ +static inline void __arch_wb_cache_pmem(void *vaddr, size_t size) +{ + u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; + unsigned long clflush_mask = x86_clflush_size - 1; + void *vend = vaddr + size; + void *p; + + for (p = (void *)((unsigned long)vaddr & ~clflush_mask); + p < vend; p += x86_clflush_size) + clwb(p); +} + +/* + * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec + * iterators, so for other types (bvec & kvec) we must do a cache write-back. + */ +static inline bool __iter_needs_pmem_wb(struct iov_iter *i) +{ + return iter_is_iovec(i) == false; +} + +/** + * arch_copy_from_iter_pmem - copy data from an iterator to PMEM + * @addr: PMEM destination address + * @bytes: number of bytes to copy + * @i: iterator with source data + * + * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. + * This function requires explicit ordering with an arch_wmb_pmem() call. + */ +static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, + struct iov_iter *i) +{ + void *vaddr = (void __force *)addr; + size_t len; + + /* TODO: skip the write-back by always using non-temporal stores */ + len = copy_from_iter_nocache(vaddr, bytes, i); + + if (__iter_needs_pmem_wb(i)) + __arch_wb_cache_pmem(vaddr, bytes); + + return len; +} + +/** + * arch_clear_pmem - zero a PMEM memory range + * @addr: virtual start address + * @size: number of bytes to zero + * + * Write zeros into the memory range starting at 'addr' for 'size' bytes. + * This function requires explicit ordering with an arch_wmb_pmem() call. + */ +static inline void arch_clear_pmem(void __pmem *addr, size_t size) +{ + void *vaddr = (void __force *)addr; + + /* TODO: implement the zeroing via non-temporal writes */ + if (size == PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0) + clear_page(vaddr); + else + memset(vaddr, 0, size); + + __arch_wb_cache_pmem(vaddr, size); +} + +static inline bool __arch_has_wmb_pmem(void) +{ + /* + * We require that wmb() be an 'sfence', that is only guaranteed on + * 64-bit builds + */ + return static_cpu_has(X86_FEATURE_PCOMMIT); +} +#endif /* CONFIG_ARCH_HAS_PMEM_API */ +#endif /* __ASM_X86_PMEM_H__ */ diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h index 0f457e6eab18..9dafe59cf6e2 100644 --- a/arch/x86/include/uapi/asm/e820.h +++ b/arch/x86/include/uapi/asm/e820.h @@ -37,7 +37,7 @@ /* * This is a non-standardized way to represent ADR or NVDIMM regions that * persist over a reboot. The kernel will ignore their special capabilities - * unless the CONFIG_X86_PMEM_LEGACY=y option is set. + * unless the CONFIG_X86_PMEM_LEGACY option is set. * * ( Note that older platforms also used 6 for the same type of memory, * but newer versions switched to 12 as 6 was assigned differently. Some diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 3c3622176340..9ffdf25e5b86 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -94,7 +94,7 @@ obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o -obj-$(CONFIG_X86_PMEM_LEGACY) += pmem.o +obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c index 64f90f53bb85..4f00b63d7ff3 100644 --- a/arch/x86/kernel/pmem.c +++ b/arch/x86/kernel/pmem.c @@ -3,80 +3,17 @@ * Copyright (c) 2015, Intel Corporation. */ #include <linux/platform_device.h> -#include <linux/libnvdimm.h> #include <linux/module.h> -#include <asm/e820.h> - -static void e820_pmem_release(struct device *dev) -{ - struct nvdimm_bus *nvdimm_bus = dev->platform_data; - - if (nvdimm_bus) - nvdimm_bus_unregister(nvdimm_bus); -} - -static struct platform_device e820_pmem = { - .name = "e820_pmem", - .id = -1, - .dev = { - .release = e820_pmem_release, - }, -}; - -static const struct attribute_group *e820_pmem_attribute_groups[] = { - &nvdimm_bus_attribute_group, - NULL, -}; - -static const struct attribute_group *e820_pmem_region_attribute_groups[] = { - &nd_region_attribute_group, - &nd_device_attribute_group, - NULL, -}; static __init int register_e820_pmem(void) { - static struct nvdimm_bus_descriptor nd_desc; - struct device *dev = &e820_pmem.dev; - struct nvdimm_bus *nvdimm_bus; - int rc, i; - - rc = platform_device_register(&e820_pmem); - if (rc) - return rc; - - nd_desc.attr_groups = e820_pmem_attribute_groups; - nd_desc.provider_name = "e820"; - nvdimm_bus = nvdimm_bus_register(dev, &nd_desc); - if (!nvdimm_bus) - goto err; - dev->platform_data = nvdimm_bus; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - struct resource res = { - .flags = IORESOURCE_MEM, - .start = ei->addr, - .end = ei->addr + ei->size - 1, - }; - struct nd_region_desc ndr_desc; - - if (ei->type != E820_PRAM) - continue; - - memset(&ndr_desc, 0, sizeof(ndr_desc)); - ndr_desc.res = &res; - ndr_desc.attr_groups = e820_pmem_region_attribute_groups; - ndr_desc.numa_node = NUMA_NO_NODE; - if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc)) - goto err; - } - - return 0; - - err: - dev_err(dev, "failed to register legacy persistent memory ranges\n"); - platform_device_unregister(&e820_pmem); - return -ENXIO; + struct platform_device *pdev; + + /* + * See drivers/nvdimm/e820.c for the implementation, this is + * simply here to trigger the module to load on demand. + */ + pdev = platform_device_alloc("e820_pmem", -1); + return platform_device_add(pdev); } device_initcall(register_e820_pmem); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 68aec42545c2..7562f42914b4 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -823,11 +823,11 @@ void __init mem_init(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size) +int arch_add_memory(int nid, u64 start, u64 size, bool for_device) { struct pglist_data *pgdata = NODE_DATA(nid); struct zone *zone = pgdata->node_zones + - zone_for_memory(nid, start, size, ZONE_HIGHMEM); + zone_for_memory(nid, start, size, ZONE_HIGHMEM, for_device); unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3fba623e3ba5..30564e2752d3 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -687,11 +687,11 @@ static void update_end_of_memory_vars(u64 start, u64 size) * Memory is added always to NORMAL zone. This means you will never get * additional DMA/DMA32 memory. */ -int arch_add_memory(int nid, u64 start, u64 size) +int arch_add_memory(int nid, u64 start, u64 size, bool for_device) { struct pglist_data *pgdat = NODE_DATA(nid); struct zone *zone = pgdat->node_zones + - zone_for_memory(nid, start, size, ZONE_NORMAL); + zone_for_memory(nid, start, size, ZONE_NORMAL, for_device); unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; diff --git a/arch/xtensa/include/asm/io.h b/arch/xtensa/include/asm/io.h index c39bb6e61911..867840f5400f 100644 --- a/arch/xtensa/include/asm/io.h +++ b/arch/xtensa/include/asm/io.h @@ -57,6 +57,7 @@ static inline void __iomem *ioremap_cache(unsigned long offset, else BUG(); } +#define ioremap_cache ioremap_cache #define ioremap_wc ioremap_nocache #define ioremap_wt ioremap_nocache |