From 2584cf83578c26db144730ef498f4070f82ee3ea Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 10 Aug 2015 23:07:05 -0400
Subject: arch, drivers: don't include <asm/io.h> directly, use <linux/io.h>
 instead

Preparation for uniform definition of ioremap, ioremap_wc, ioremap_wt,
and ioremap_cache, tree-wide.

Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/arm/mach-shmobile/pm-rcar.c | 2 +-
 arch/ia64/kernel/cyclone.c       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')
diff --git a/arch/arm/mach-shmobile/pm-rcar.c b/arch/arm/mach-shmobile/pm-rcar.c
index 00022ee56f80..9d3dde00c2fe 100644
--- a/arch/arm/mach-shmobile/pm-rcar.c
+++ b/arch/arm/mach-shmobile/pm-rcar.c
@@ -12,7 +12,7 @@
 #include <linux/err.h>
 #include <linux/mm.h>
 #include <linux/spinlock.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include "pm-rcar.h"
 
 /* SYSC */
diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c
index 4826ff957a3d..5fa3848ba224 100644
--- a/arch/ia64/kernel/cyclone.c
+++ b/arch/ia64/kernel/cyclone.c
@@ -4,7 +4,7 @@
 #include <linux/errno.h>
 #include <linux/timex.h>
 #include <linux/clocksource.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 /* IBM Summit (EXA) Cyclone counter code*/
 #define CYCLONE_CBAR_ADDR 0xFEB00CD0
-- 
cgit v1.2.3


From 92b19ff50e8f242392d78b2aacc5b5b672f1796b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 10 Aug 2015 23:07:06 -0400
Subject: cleanup IORESOURCE_CACHEABLE vs ioremap()

Quoting Arnd:
    I was thinking the opposite approach and basically removing all uses
    of IORESOURCE_CACHEABLE from the kernel. There are only a handful of
    them.and we can probably replace them all with hardcoded
    ioremap_cached() calls in the cases they are actually useful.

All existing usages of IORESOURCE_CACHEABLE call ioremap() instead of
ioremap_nocache() if the resource is cacheable, however ioremap() is
uncached by default. Clearly none of the existing usages care about the
cacheability. Particularly devm_ioremap_resource() never worked as
advertised since it always fell back to plain ioremap().

Clean this up as the new direction we want is to convert
ioremap_<type>() usages to memremap(..., flags).

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/arm/mach-clps711x/board-cdb89712.c | 2 +-
 arch/powerpc/kernel/pci_of_scan.c       | 2 +-
 arch/sparc/kernel/pci.c                 | 3 +--
 3 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-clps711x/board-cdb89712.c b/arch/arm/mach-clps711x/board-cdb89712.c
index 1ec378c334e5..972abdb10028 100644
--- a/arch/arm/mach-clps711x/board-cdb89712.c
+++ b/arch/arm/mach-clps711x/board-cdb89712.c
@@ -95,7 +95,7 @@ static struct physmap_flash_data cdb89712_bootrom_pdata __initdata = {
 
 static struct resource cdb89712_bootrom_resources[] __initdata = {
 	DEFINE_RES_NAMED(CS7_PHYS_BASE, SZ_128, "BOOTROM", IORESOURCE_MEM |
-			 IORESOURCE_CACHEABLE | IORESOURCE_READONLY),
+			 IORESOURCE_READONLY),
 };
 
 static struct platform_device cdb89712_bootrom_pdev __initdata = {
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 42e02a2d570b..d4726addff0b 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -102,7 +102,7 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
 			res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
 		} else if (i == dev->rom_base_reg) {
 			res = &dev->resource[PCI_ROM_RESOURCE];
-			flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+			flags |= IORESOURCE_READONLY;
 		} else {
 			printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
 			continue;
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index c928bc64b4ba..04da147e0712 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -231,8 +231,7 @@ static void pci_parse_of_addrs(struct platform_device *op,
 			res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
 		} else if (i == dev->rom_base_reg) {
 			res = &dev->resource[PCI_ROM_RESOURCE];
-			flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE
-			      | IORESOURCE_SIZEALIGN;
+			flags |= IORESOURCE_READONLY | IORESOURCE_SIZEALIGN;
 		} else {
 			printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
 			continue;
-- 
cgit v1.2.3


From 92281dee825f6d2eb07c441437e4196a44b0861c Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 10 Aug 2015 23:07:06 -0400
Subject: arch: introduce memremap()

Existing users of ioremap_cache() are mapping memory that is known in
advance to not have i/o side effects.  These users are forced to cast
away the __iomem annotation, or otherwise neglect to fix the sparse
errors thrown when dereferencing pointers to this memory.  Provide
memremap() as a non __iomem annotated ioremap_*() in the case when
ioremap is otherwise a pointer to cacheable memory. Empirically,
ioremap_<cacheable-type>() call sites are seeking memory-like semantics
(e.g.  speculative reads, and prefetching permitted).

memremap() is a break from the ioremap implementation pattern of adding
a new memremap_<type>() for each mapping type and having silent
compatibility fall backs.  Instead, the implementation defines flags
that are passed to the central memremap() and if a mapping type is not
supported by an arch memremap returns NULL.

We introduce a memremap prototype as a trivial wrapper of
ioremap_cache() and ioremap_wt().  Later, once all ioremap_cache() and
ioremap_wt() usage has been removed from drivers we teach archs to
implement arch_memremap() with the ability to strictly enforce the
mapping type.

Cc: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/ia64/include/asm/io.h   | 1 +
 arch/sh/include/asm/io.h     | 1 +
 arch/xtensa/include/asm/io.h | 1 +
 3 files changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h
index 80a7e34be009..9041bbe2b7b4 100644
--- a/arch/ia64/include/asm/io.h
+++ b/arch/ia64/include/asm/io.h
@@ -435,6 +435,7 @@ static inline void __iomem * ioremap_cache (unsigned long phys_addr, unsigned lo
 {
 	return ioremap(phys_addr, size);
 }
+#define ioremap_cache ioremap_cache
 
 
 /*
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index 728c4c571f40..6194e20fccca 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -342,6 +342,7 @@ ioremap_cache(phys_addr_t offset, unsigned long size)
 {
 	return __ioremap_mode(offset, size, PAGE_KERNEL);
 }
+#define ioremap_cache ioremap_cache
 
 #ifdef CONFIG_HAVE_IOREMAP_PROT
 static inline void __iomem *
diff --git a/arch/xtensa/include/asm/io.h b/arch/xtensa/include/asm/io.h
index c39bb6e61911..867840f5400f 100644
--- a/arch/xtensa/include/asm/io.h
+++ b/arch/xtensa/include/asm/io.h
@@ -57,6 +57,7 @@ static inline void __iomem *ioremap_cache(unsigned long offset,
 	else
 		BUG();
 }
+#define ioremap_cache ioremap_cache
 
 #define ioremap_wc ioremap_nocache
 #define ioremap_wt ioremap_nocache
-- 
cgit v1.2.3


From e836a256e8fd579c9d7a3685f22981225a1ca451 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 12 Aug 2015 18:42:56 -0400
Subject: pmem: convert to generic memremap

Kill arch_memremap_pmem() and just let the architecture specify the
flags to be passed to memremap().  Default to writethrough by default.

Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/io.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index cc9c61bc1abe..d241fbd5c87b 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -248,11 +248,7 @@ static inline void flush_write_buffers(void)
 #endif
 }
 
-static inline void __pmem *arch_memremap_pmem(resource_size_t offset,
-	unsigned long size)
-{
-	return (void __force __pmem *) ioremap_cache(offset, size);
-}
+#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
 
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.3


From 7a67832c7e44c20935c5d6f2264035a0f7bf0d8f Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 19 Aug 2015 00:34:34 -0400
Subject: libnvdimm, e820: make CONFIG_X86_PMEM_LEGACY a tristate option

We currently register a platform device for e820 type-12 memory and
register a nvdimm bus beneath it.  Registering the platform device
triggers the device-core machinery to probe for a driver, but that
search currently comes up empty.  Building the nvdimm-bus registration
into the e820_pmem platform device registration in this way forces
libnvdimm to be built-in.  Instead, convert the built-in portion of
CONFIG_X86_PMEM_LEGACY to simply register a platform device and move the
rest of the logic to the driver for e820_pmem, for the following
reasons:

1/ Letting e820_pmem support be a module allows building and testing
   libnvdimm.ko changes without rebooting

2/ All the normal policy around modules can be applied to e820_pmem
   (unbind to disable and/or blacklisting the module from loading by
   default)

3/ Moving the driver to a generic location and converting it to scan
   "iomem_resource" rather than "e820.map" means any other architecture can
   take advantage of this simple nvdimm resource discovery mechanism by
   registering a resource named "Persistent Memory (legacy)"

Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/Kconfig                 |  6 ++-
 arch/x86/include/uapi/asm/e820.h |  2 +-
 arch/x86/kernel/Makefile         |  2 +-
 arch/x86/kernel/pmem.c           | 79 ++++------------------------------------
 4 files changed, 15 insertions(+), 74 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b3a1a5d77d92..76c61154ed50 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1426,10 +1426,14 @@ config ILLEGAL_POINTER_VALUE
 
 source "mm/Kconfig"
 
+config X86_PMEM_LEGACY_DEVICE
+	bool
+
 config X86_PMEM_LEGACY
-	bool "Support non-standard NVDIMMs and ADR protected memory"
+	tristate "Support non-standard NVDIMMs and ADR protected memory"
 	depends on PHYS_ADDR_T_64BIT
 	depends on BLK_DEV
+	select X86_PMEM_LEGACY_DEVICE
 	select LIBNVDIMM
 	help
 	  Treat memory marked using the non-standard e820 type of 12 as used
diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h
index 0f457e6eab18..9dafe59cf6e2 100644
--- a/arch/x86/include/uapi/asm/e820.h
+++ b/arch/x86/include/uapi/asm/e820.h
@@ -37,7 +37,7 @@
 /*
  * This is a non-standardized way to represent ADR or NVDIMM regions that
  * persist over a reboot.  The kernel will ignore their special capabilities
- * unless the CONFIG_X86_PMEM_LEGACY=y option is set.
+ * unless the CONFIG_X86_PMEM_LEGACY option is set.
  *
  * ( Note that older platforms also used 6 for the same type of memory,
  *   but newer versions switched to 12 as 6 was assigned differently.  Some
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0f15af41bd80..ac2bb7e28ba2 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -92,7 +92,7 @@ obj-$(CONFIG_KVM_GUEST)		+= kvm.o kvmclock.o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
-obj-$(CONFIG_X86_PMEM_LEGACY)	+= pmem.o
+obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
 
 obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
 
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
index 64f90f53bb85..4f00b63d7ff3 100644
--- a/arch/x86/kernel/pmem.c
+++ b/arch/x86/kernel/pmem.c
@@ -3,80 +3,17 @@
  * Copyright (c) 2015, Intel Corporation.
  */
 #include <linux/platform_device.h>
-#include <linux/libnvdimm.h>
 #include <linux/module.h>
-#include <asm/e820.h>
-
-static void e820_pmem_release(struct device *dev)
-{
-	struct nvdimm_bus *nvdimm_bus = dev->platform_data;
-
-	if (nvdimm_bus)
-		nvdimm_bus_unregister(nvdimm_bus);
-}
-
-static struct platform_device e820_pmem = {
-	.name = "e820_pmem",
-	.id = -1,
-	.dev = {
-		.release = e820_pmem_release,
-	},
-};
-
-static const struct attribute_group *e820_pmem_attribute_groups[] = {
-	&nvdimm_bus_attribute_group,
-	NULL,
-};
-
-static const struct attribute_group *e820_pmem_region_attribute_groups[] = {
-	&nd_region_attribute_group,
-	&nd_device_attribute_group,
-	NULL,
-};
 
 static __init int register_e820_pmem(void)
 {
-	static struct nvdimm_bus_descriptor nd_desc;
-	struct device *dev = &e820_pmem.dev;
-	struct nvdimm_bus *nvdimm_bus;
-	int rc, i;
-
-	rc = platform_device_register(&e820_pmem);
-	if (rc)
-		return rc;
-
-	nd_desc.attr_groups = e820_pmem_attribute_groups;
-	nd_desc.provider_name = "e820";
-	nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
-	if (!nvdimm_bus)
-		goto err;
-	dev->platform_data = nvdimm_bus;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
-		struct resource res = {
-			.flags	= IORESOURCE_MEM,
-			.start	= ei->addr,
-			.end	= ei->addr + ei->size - 1,
-		};
-		struct nd_region_desc ndr_desc;
-
-		if (ei->type != E820_PRAM)
-			continue;
-
-		memset(&ndr_desc, 0, sizeof(ndr_desc));
-		ndr_desc.res = &res;
-		ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
-		ndr_desc.numa_node = NUMA_NO_NODE;
-		if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
-			goto err;
-	}
-
-	return 0;
-
- err:
-	dev_err(dev, "failed to register legacy persistent memory ranges\n");
-	platform_device_unregister(&e820_pmem);
-	return -ENXIO;
+	struct platform_device *pdev;
+
+	/*
+	 * See drivers/nvdimm/e820.c for the implementation, this is
+	 * simply here to trigger the module to load on demand.
+	 */
+	pdev = platform_device_alloc("e820_pmem", -1);
+	return platform_device_add(pdev);
 }
 device_initcall(register_e820_pmem);
-- 
cgit v1.2.3


From 40603526569b304dd92f720f2f8ab11e828ea145 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 18 Aug 2015 13:55:36 -0600
Subject: pmem, x86: move x86 PMEM API to new pmem.h header

Move the x86 PMEM API implementation out of asm/cacheflush.h and into
its own header asm/pmem.h.  This will allow members of the PMEM API to
be more easily identified on this and other architectures.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/cacheflush.h | 71 ------------------------------
 arch/x86/include/asm/pmem.h       | 92 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+), 71 deletions(-)
 create mode 100644 arch/x86/include/asm/pmem.h

(limited to 'arch')

diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 9bf3ea14b9f0..471418ac1ff9 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -109,75 +109,4 @@ static inline int rodata_test(void)
 }
 #endif
 
-#ifdef ARCH_HAS_NOCACHE_UACCESS
-
-/**
- * arch_memcpy_to_pmem - copy data to persistent memory
- * @dst: destination buffer for the copy
- * @src: source buffer for the copy
- * @n: length of the copy in bytes
- *
- * Copy data to persistent memory media via non-temporal stores so that
- * a subsequent arch_wmb_pmem() can flush cpu and memory controller
- * write buffers to guarantee durability.
- */
-static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
-		size_t n)
-{
-	int unwritten;
-
-	/*
-	 * We are copying between two kernel buffers, if
-	 * __copy_from_user_inatomic_nocache() returns an error (page
-	 * fault) we would have already reported a general protection fault
-	 * before the WARN+BUG.
-	 */
-	unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
-			(void __user *) src, n);
-	if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
-				__func__, dst, src, unwritten))
-		BUG();
-}
-
-/**
- * arch_wmb_pmem - synchronize writes to persistent memory
- *
- * After a series of arch_memcpy_to_pmem() operations this drains data
- * from cpu write buffers and any platform (memory controller) buffers
- * to ensure that written data is durable on persistent memory media.
- */
-static inline void arch_wmb_pmem(void)
-{
-	/*
-	 * wmb() to 'sfence' all previous writes such that they are
-	 * architecturally visible to 'pcommit'.  Note, that we've
-	 * already arranged for pmem writes to avoid the cache via
-	 * arch_memcpy_to_pmem().
-	 */
-	wmb();
-	pcommit_sfence();
-}
-
-static inline bool __arch_has_wmb_pmem(void)
-{
-#ifdef CONFIG_X86_64
-	/*
-	 * We require that wmb() be an 'sfence', that is only guaranteed on
-	 * 64-bit builds
-	 */
-	return static_cpu_has(X86_FEATURE_PCOMMIT);
-#else
-	return false;
-#endif
-}
-#else /* ARCH_HAS_NOCACHE_UACCESS i.e. ARCH=um */
-extern void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n);
-extern void arch_wmb_pmem(void);
-
-static inline bool __arch_has_wmb_pmem(void)
-{
-	return false;
-}
-#endif
-
 #endif /* _ASM_X86_CACHEFLUSH_H */
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
new file mode 100644
index 000000000000..f43462cc91aa
--- /dev/null
+++ b/arch/x86/include/asm/pmem.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __ASM_X86_PMEM_H__
+#define __ASM_X86_PMEM_H__
+
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
+#include <asm/special_insns.h>
+
+#ifdef ARCH_HAS_NOCACHE_UACCESS
+
+/**
+ * arch_memcpy_to_pmem - copy data to persistent memory
+ * @dst: destination buffer for the copy
+ * @src: source buffer for the copy
+ * @n: length of the copy in bytes
+ *
+ * Copy data to persistent memory media via non-temporal stores so that
+ * a subsequent arch_wmb_pmem() can flush cpu and memory controller
+ * write buffers to guarantee durability.
+ */
+static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
+		size_t n)
+{
+	int unwritten;
+
+	/*
+	 * We are copying between two kernel buffers, if
+	 * __copy_from_user_inatomic_nocache() returns an error (page
+	 * fault) we would have already reported a general protection fault
+	 * before the WARN+BUG.
+	 */
+	unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
+			(void __user *) src, n);
+	if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
+				__func__, dst, src, unwritten))
+		BUG();
+}
+
+/**
+ * arch_wmb_pmem - synchronize writes to persistent memory
+ *
+ * After a series of arch_memcpy_to_pmem() operations this drains data
+ * from cpu write buffers and any platform (memory controller) buffers
+ * to ensure that written data is durable on persistent memory media.
+ */
+static inline void arch_wmb_pmem(void)
+{
+	/*
+	 * wmb() to 'sfence' all previous writes such that they are
+	 * architecturally visible to 'pcommit'.  Note, that we've
+	 * already arranged for pmem writes to avoid the cache via
+	 * arch_memcpy_to_pmem().
+	 */
+	wmb();
+	pcommit_sfence();
+}
+
+static inline bool __arch_has_wmb_pmem(void)
+{
+#ifdef CONFIG_X86_64
+	/*
+	 * We require that wmb() be an 'sfence', that is only guaranteed on
+	 * 64-bit builds
+	 */
+	return static_cpu_has(X86_FEATURE_PCOMMIT);
+#else
+	return false;
+#endif
+}
+#else /* ARCH_HAS_NOCACHE_UACCESS i.e. ARCH=um */
+extern void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n);
+extern void arch_wmb_pmem(void);
+
+static inline bool __arch_has_wmb_pmem(void)
+{
+	return false;
+}
+#endif
+
+#endif /* __ASM_X86_PMEM_H__ */
-- 
cgit v1.2.3


From 18279b467a9d89afe44afbc19d768e834dbf4545 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 18 Aug 2015 13:55:37 -0600
Subject: pmem: remove layer when calling arch_has_wmb_pmem()

Prior to this change arch_has_wmb_pmem() was only called by
arch_has_pmem_api().  Both arch_has_wmb_pmem() and arch_has_pmem_api()
checked to make sure that CONFIG_ARCH_HAS_PMEM_API was enabled.

Instead, remove the old arch_has_wmb_pmem() wrapper to be rid of one
extra layer of indirection and the redundant CONFIG_ARCH_HAS_PMEM_API
check. Rename __arch_has_wmb_pmem() to arch_has_wmb_pmem() since we no
longer have a wrapper, and just have arch_has_pmem_api() call the
architecture specific arch_has_wmb_pmem() directly.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/pmem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index f43462cc91aa..1e8dbb72d6ee 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -67,7 +67,7 @@ static inline void arch_wmb_pmem(void)
 	pcommit_sfence();
 }
 
-static inline bool __arch_has_wmb_pmem(void)
+static inline bool arch_has_wmb_pmem(void)
 {
 #ifdef CONFIG_X86_64
 	/*
-- 
cgit v1.2.3


From 4a370df5534ef727cba9a9d74bf22e0609f91d6e Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 18 Aug 2015 13:55:38 -0600
Subject: pmem, x86: clean up conditional pmem includes

Prior to this change x86_64 used the pmem defines in
arch/x86/include/asm/pmem.h, and UM used the default ones at the
top of include/linux/pmem.h.  The inclusion or exclusion in linux/pmem.h
was controlled by CONFIG_ARCH_HAS_PMEM_API, but the ones in asm/pmem.h
were controlled by ARCH_HAS_NOCACHE_UACCESS.

Instead, control them both with CONFIG_ARCH_HAS_PMEM_API so that it's
clear that they are related and we don't run into the possibility where
they are both included or excluded.  Also remove a bunch of stale
function prototypes meant for UM in asm/pmem.h - these just conflicted
with the inline defaults in linux/pmem.h and gave compile errors.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/pmem.h | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 1e8dbb72d6ee..7f3413fce46c 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -18,8 +18,7 @@
 #include <asm/cpufeature.h>
 #include <asm/special_insns.h>
 
-#ifdef ARCH_HAS_NOCACHE_UACCESS
-
+#ifdef CONFIG_ARCH_HAS_PMEM_API
 /**
  * arch_memcpy_to_pmem - copy data to persistent memory
  * @dst: destination buffer for the copy
@@ -79,14 +78,6 @@ static inline bool arch_has_wmb_pmem(void)
 	return false;
 #endif
 }
-#else /* ARCH_HAS_NOCACHE_UACCESS i.e. ARCH=um */
-extern void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n);
-extern void arch_wmb_pmem(void);
-
-static inline bool __arch_has_wmb_pmem(void)
-{
-	return false;
-}
-#endif
+#endif /* CONFIG_ARCH_HAS_PMEM_API */
 
 #endif /* __ASM_X86_PMEM_H__ */
-- 
cgit v1.2.3


From 5de490daec8b6354b90d5c9d3e2415b195f5adb6 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 18 Aug 2015 13:55:39 -0600
Subject: pmem: add copy_from_iter_pmem() and clear_pmem()

Add support for two new PMEM APIs, copy_from_iter_pmem() and
clear_pmem().  copy_from_iter_pmem() is used to copy data from an
iterator into a PMEM buffer.  clear_pmem() zeros a PMEM memory range.

Both of these new APIs must be explicitly ordered using a wmb_pmem()
function call and are implemented in such a way that the wmb_pmem()
will make the stores to PMEM durable.  Because both APIs are unordered
they can be called as needed without introducing any unwanted memory
barriers.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/pmem.h | 75 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 7f3413fce46c..a3a0df6545ee 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -66,6 +66,81 @@ static inline void arch_wmb_pmem(void)
 	pcommit_sfence();
 }
 
+/**
+ * __arch_wb_cache_pmem - write back a cache range with CLWB
+ * @vaddr:	virtual start address
+ * @size:	number of bytes to write back
+ *
+ * Write back a cache range using the CLWB (cache line write back)
+ * instruction.  This function requires explicit ordering with an
+ * arch_wmb_pmem() call.  This API is internal to the x86 PMEM implementation.
+ */
+static inline void __arch_wb_cache_pmem(void *vaddr, size_t size)
+{
+	u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
+	unsigned long clflush_mask = x86_clflush_size - 1;
+	void *vend = vaddr + size;
+	void *p;
+
+	for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
+	     p < vend; p += x86_clflush_size)
+		clwb(p);
+}
+
+/*
+ * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec
+ * iterators, so for other types (bvec & kvec) we must do a cache write-back.
+ */
+static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
+{
+	return iter_is_iovec(i) == false;
+}
+
+/**
+ * arch_copy_from_iter_pmem - copy data from an iterator to PMEM
+ * @addr:	PMEM destination address
+ * @bytes:	number of bytes to copy
+ * @i:		iterator with source data
+ *
+ * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
+ * This function requires explicit ordering with an arch_wmb_pmem() call.
+ */
+static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+		struct iov_iter *i)
+{
+	void *vaddr = (void __force *)addr;
+	size_t len;
+
+	/* TODO: skip the write-back by always using non-temporal stores */
+	len = copy_from_iter_nocache(vaddr, bytes, i);
+
+	if (__iter_needs_pmem_wb(i))
+		__arch_wb_cache_pmem(vaddr, bytes);
+
+	return len;
+}
+
+/**
+ * arch_clear_pmem - zero a PMEM memory range
+ * @addr:	virtual start address
+ * @size:	number of bytes to zero
+ *
+ * Write zeros into the memory range starting at 'addr' for 'size' bytes.
+ * This function requires explicit ordering with an arch_wmb_pmem() call.
+ */
+static inline void arch_clear_pmem(void __pmem *addr, size_t size)
+{
+	void *vaddr = (void __force *)addr;
+
+	/* TODO: implement the zeroing via non-temporal writes */
+	if (size == PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0)
+		clear_page(vaddr);
+	else
+		memset(vaddr, 0, size);
+
+	__arch_wb_cache_pmem(vaddr, size);
+}
+
 static inline bool arch_has_wmb_pmem(void)
 {
 #ifdef CONFIG_X86_64
-- 
cgit v1.2.3


From e2e05394e4a3420dab96f728df4531893494e15d Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 18 Aug 2015 13:55:41 -0600
Subject: pmem, dax: have direct_access use __pmem annotation

Update the annotation for the kaddr pointer returned by direct_access()
so that it is a __pmem pointer.  This is consistent with the PMEM driver
and with how this direct_access() pointer is used in the DAX code.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/powerpc/sysdev/axonram.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index ee90db17b097..a2be2a66dab6 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -141,13 +141,14 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
  */
 static long
 axon_ram_direct_access(struct block_device *device, sector_t sector,
-		       void **kaddr, unsigned long *pfn, long size)
+		       void __pmem **kaddr, unsigned long *pfn, long size)
 {
 	struct axon_ram_bank *bank = device->bd_disk->private_data;
 	loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
+	void *addr = (void *)(bank->ph_addr + offset);
 
-	*kaddr = (void *)(bank->ph_addr + offset);
-	*pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT;
+	*kaddr = (void __pmem *)addr;
+	*pfn = virt_to_phys(addr) >> PAGE_SHIFT;
 
 	return bank->size - offset;
 }
-- 
cgit v1.2.3


From 67a3e8fe90156d41cd480d3dfbb40f3bc007c262 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Thu, 27 Aug 2015 13:14:20 -0600
Subject: nd_blk: change aperture mapping from WC to WB

This should result in a pretty sizeable performance gain for reads.  For
rough comparison I did some simple read testing using PMEM to compare
reads of write combining (WC) mappings vs write-back (WB).  This was
done on a random lab machine.

PMEM reads from a write combining mapping:
	# dd of=/dev/null if=/dev/pmem0 bs=4096 count=100000
	100000+0 records in
	100000+0 records out
	409600000 bytes (410 MB) copied, 9.2855 s, 44.1 MB/s

PMEM reads from a write-back mapping:
	# dd of=/dev/null if=/dev/pmem0 bs=4096 count=1000000
	1000000+0 records in
	1000000+0 records out
	4096000000 bytes (4.1 GB) copied, 3.44034 s, 1.2 GB/s

To be able to safely support a write-back aperture I needed to add
support for the "read flush" _DSM flag, as outlined in the DSM spec:

http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf

This flag tells the ND BLK driver that it needs to flush the cache lines
associated with the aperture after the aperture is moved but before any
new data is read.  This ensures that any stale cache lines from the
previous contents of the aperture will be discarded from the processor
cache, and the new data will be read properly from the DIMM.  We know
that the cache lines are clean and will be discarded without any
writeback because either a) the previous aperture operation was a read,
and we never modified the contents of the aperture, or b) the previous
aperture operation was a write and we must have written back the dirtied
contents of the aperture to the DIMM before the I/O was completed.

In order to add support for the "read flush" flag I needed to add a
generic routine to invalidate cache lines, mmio_flush_range().  This is
protected by the ARCH_HAS_MMIO_FLUSH Kconfig variable, and is currently
only supported on x86.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/Kconfig                  | 1 +
 arch/x86/include/asm/cacheflush.h | 2 ++
 arch/x86/include/asm/io.h         | 2 --
 arch/x86/include/asm/pmem.h       | 2 ++
 4 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b3a1a5d77d92..5d4980e6bc4f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -28,6 +28,7 @@ config X86
 	select ARCH_HAS_FAST_MULTIPLIER
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_PMEM_API
+	select ARCH_HAS_MMIO_FLUSH
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 471418ac1ff9..e63aa38e85fb 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -89,6 +89,8 @@ int set_pages_rw(struct page *page, int numpages);
 
 void clflush_cache_range(void *addr, unsigned int size);
 
+#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
+
 #ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void);
 extern const int rodata_test_data;
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index d241fbd5c87b..83ec9b1d77cc 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -248,8 +248,6 @@ static inline void flush_write_buffers(void)
 #endif
 }
 
-#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
-
 #endif /* __KERNEL__ */
 
 extern void native_io_delay(void);
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index a3a0df6545ee..bb026c5adf8a 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -18,6 +18,8 @@
 #include <asm/cpufeature.h>
 #include <asm/special_insns.h>
 
+#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
+
 #ifdef CONFIG_ARCH_HAS_PMEM_API
 /**
  * arch_memcpy_to_pmem - copy data to persistent memory
-- 
cgit v1.2.3


From cb389b9c0e00c30c9daf20287f7d91e2466edbb1 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 7 Aug 2015 17:41:00 -0400
Subject: dax: drop size parameter to ->direct_access()

None of the implementations currently use it.  The common
bdev_direct_access() entry point handles all the size checks before
calling ->direct_access().

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/powerpc/sysdev/axonram.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index a2be2a66dab6..4419c84ac15a 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -141,7 +141,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
  */
 static long
 axon_ram_direct_access(struct block_device *device, sector_t sector,
-		       void __pmem **kaddr, unsigned long *pfn, long size)
+		       void __pmem **kaddr, unsigned long *pfn)
 {
 	struct axon_ram_bank *bank = device->bd_disk->private_data;
 	loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
-- 
cgit v1.2.3


From 012dcef3f058385268630c0003e9b7f8dcafbeb4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 7 Aug 2015 17:41:01 -0400
Subject: mm: move __phys_to_pfn and __pfn_to_phys to
 asm/generic/memory_model.h

Three architectures already define these, and we'll need them genericly
soon.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/arm/include/asm/memory.h       | 6 ------
 arch/arm64/include/asm/memory.h     | 6 ------
 arch/unicore32/include/asm/memory.h | 6 ------
 3 files changed, 18 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index b7f6fb462ea0..98d58bb04ac5 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -118,12 +118,6 @@
 #define DTCM_OFFSET	UL(0xfffe8000)
 #endif
 
-/*
- * Convert a physical address to a Page Frame Number and back
- */
-#define	__phys_to_pfn(paddr)	((unsigned long)((paddr) >> PAGE_SHIFT))
-#define	__pfn_to_phys(pfn)	((phys_addr_t)(pfn) << PAGE_SHIFT)
-
 /*
  * Convert a page to/from a physical address
  */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index f800d45ea226..d808bb688751 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -80,12 +80,6 @@
 #define __virt_to_phys(x)	(((phys_addr_t)(x) - PAGE_OFFSET + PHYS_OFFSET))
 #define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
 
-/*
- * Convert a physical address to a Page Frame Number and back
- */
-#define	__phys_to_pfn(paddr)	((unsigned long)((paddr) >> PAGE_SHIFT))
-#define	__pfn_to_phys(pfn)	((phys_addr_t)(pfn) << PAGE_SHIFT)
-
 /*
  * Convert a page to/from a physical address
  */
diff --git a/arch/unicore32/include/asm/memory.h b/arch/unicore32/include/asm/memory.h
index debafc40200a..3bb0a29fd2d7 100644
--- a/arch/unicore32/include/asm/memory.h
+++ b/arch/unicore32/include/asm/memory.h
@@ -60,12 +60,6 @@
 #define __phys_to_virt(x)	((x) - PHYS_OFFSET + PAGE_OFFSET)
 #endif
 
-/*
- * Convert a physical address to a Page Frame Number and back
- */
-#define	__phys_to_pfn(paddr)	((paddr) >> PAGE_SHIFT)
-#define	__pfn_to_phys(pfn)	((pfn) << PAGE_SHIFT)
-
 /*
  * Convert a page to/from a physical address
  */
-- 
cgit v1.2.3


From 033fbae988fcb67e5077203512181890848b8e90 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 9 Aug 2015 15:29:06 -0400
Subject: mm: ZONE_DEVICE for "device memory"

While pmem is usable as a block device or via DAX mappings to userspace
there are several usage scenarios that can not target pmem due to its
lack of struct page coverage. In preparation for "hot plugging" pmem
into the vmemmap add ZONE_DEVICE as a new zone to tag these pages
separately from the ones that are subject to standard page allocations.
Importantly "device memory" can be removed at will by userspace
unbinding the driver of the device.

Having a separate zone prevents allocation and otherwise marks these
pages that are distinct from typical uniform memory.  Device memory has
different lifetime and performance characteristics than RAM.  However,
since we have run out of ZONES_SHIFT bits this functionality currently
depends on sacrificing ZONE_DMA.

Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Jerome Glisse <j.glisse@gmail.com>
[hch: various simplifications in the arch interface]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/ia64/mm/init.c   | 4 ++--
 arch/powerpc/mm/mem.c | 4 ++--
 arch/s390/mm/init.c   | 2 +-
 arch/sh/mm/init.c     | 5 +++--
 arch/tile/mm/init.c   | 2 +-
 arch/x86/mm/init_32.c | 4 ++--
 arch/x86/mm/init_64.c | 4 ++--
 7 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 97e48b0eefc7..1841ef69183d 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -645,7 +645,7 @@ mem_init (void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 	pg_data_t *pgdat;
 	struct zone *zone;
@@ -656,7 +656,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	pgdat = NODE_DATA(nid);
 
 	zone = pgdat->node_zones +
-		zone_for_memory(nid, start, size, ZONE_NORMAL);
+		zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
 	ret = __add_pages(nid, zone, start_pfn, nr_pages);
 
 	if (ret)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 0f11819d8f1d..6571cfb05668 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -113,7 +113,7 @@ int memory_add_physaddr_to_nid(u64 start)
 }
 #endif
 
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 	struct pglist_data *pgdata;
 	struct zone *zone;
@@ -128,7 +128,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 	/* this should work for most non-highmem platforms */
 	zone = pgdata->node_zones +
-		zone_for_memory(nid, start, size, 0);
+		zone_for_memory(nid, start, size, 0, for_device);
 
 	return __add_pages(nid, zone, start_pfn, nr_pages);
 }
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 76e873748b56..48ee78be88ba 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -168,7 +168,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
 #endif
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 	unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
 	unsigned long start_pfn = PFN_DOWN(start);
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 2790b6a64157..c1490096b863 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -485,7 +485,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 #endif
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 	pg_data_t *pgdat;
 	unsigned long start_pfn = start >> PAGE_SHIFT;
@@ -496,7 +496,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 	/* We only have ZONE_NORMAL, so this is easy.. */
 	ret = __add_pages(nid, pgdat->node_zones +
-			zone_for_memory(nid, start, size, ZONE_NORMAL),
+			zone_for_memory(nid, start, size, ZONE_NORMAL,
+			for_device),
 			start_pfn, nr_pages);
 	if (unlikely(ret))
 		printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 5bd252e3fdc5..d4e1fc41d06d 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -863,7 +863,7 @@ void __init mem_init(void)
  * memory to the highmem for now.
  */
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-int arch_add_memory(u64 start, u64 size)
+int arch_add_memory(u64 start, u64 size, bool for_device)
 {
 	struct pglist_data *pgdata = &contig_page_data;
 	struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8340e45c891a..2a9237d20a70 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -822,11 +822,11 @@ void __init mem_init(void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 	struct pglist_data *pgdata = NODE_DATA(nid);
 	struct zone *zone = pgdata->node_zones +
-		zone_for_memory(nid, start, size, ZONE_HIGHMEM);
+		zone_for_memory(nid, start, size, ZONE_HIGHMEM, for_device);
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3fba623e3ba5..30564e2752d3 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -687,11 +687,11 @@ static void  update_end_of_memory_vars(u64 start, u64 size)
  * Memory is added always to NORMAL zone. This means you will never get
  * additional DMA/DMA32 memory.
  */
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 	struct pglist_data *pgdat = NODE_DATA(nid);
 	struct zone *zone = pgdat->node_zones +
-		zone_for_memory(nid, start, size, ZONE_NORMAL);
+		zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int ret;
-- 
cgit v1.2.3


From 96601adb745186ccbcf5b078d4756f13381ec2af Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 24 Aug 2015 18:29:38 -0400
Subject: x86, pmem: clarify that ARCH_HAS_PMEM_API implies PMEM mapped WB

Given that a write-back (WB) mapping plus non-temporal stores is
expected to be the most efficient way to access PMEM, update the
definition of ARCH_HAS_PMEM_API to imply arch support for
WB-mapped-PMEM.  This is needed as a pre-requisite for adding PMEM to
the direct map and mapping it with struct page.

The above clarification for X86_64 means that memcpy_to_pmem() is
permitted to use the non-temporal arch_memcpy_to_pmem() rather than
needlessly fall back to default_memcpy_to_pmem() when the pcommit
instruction is not available.  When arch_memcpy_to_pmem() is not
guaranteed to flush writes out of cache, i.e. on older X86_32
implementations where non-temporal stores may just dirty cache,
ARCH_HAS_PMEM_API is simply disabled.

The default fall back for persistent memory handling remains.  Namely,
map it with the WT (write-through) cache-type and hope for the best.

arch_has_pmem_api() is updated to only indicate whether the arch
provides the proper helpers to meet the minimum "writes are visible
outside the cache hierarchy after memcpy_to_pmem() + wmb_pmem()".  Code
that cares whether wmb_pmem() actually flushes writes to pmem must now
call arch_has_wmb_pmem() directly.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
[hch: set ARCH_HAS_PMEM_API=n on x86_32]
Reviewed-by: Christoph Hellwig <hch@lst.de>
[toshi: x86_32 compile fixes]
Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/Kconfig            | 2 +-
 arch/x86/include/asm/pmem.h | 9 +--------
 2 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 03ab6122325a..ef4c6bbb3af1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -27,7 +27,7 @@ config X86
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
 	select ARCH_HAS_GCOV_PROFILE_ALL
-	select ARCH_HAS_PMEM_API
+	select ARCH_HAS_PMEM_API		if X86_64
 	select ARCH_HAS_MMIO_FLUSH
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index bb026c5adf8a..d8ce3ec816ab 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -18,8 +18,6 @@
 #include <asm/cpufeature.h>
 #include <asm/special_insns.h>
 
-#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
-
 #ifdef CONFIG_ARCH_HAS_PMEM_API
 /**
  * arch_memcpy_to_pmem - copy data to persistent memory
@@ -143,18 +141,13 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
 	__arch_wb_cache_pmem(vaddr, size);
 }
 
-static inline bool arch_has_wmb_pmem(void)
+static inline bool __arch_has_wmb_pmem(void)
 {
-#ifdef CONFIG_X86_64
 	/*
 	 * We require that wmb() be an 'sfence', that is only guaranteed on
 	 * 64-bit builds
 	 */
 	return static_cpu_has(X86_FEATURE_PCOMMIT);
-#else
-	return false;
-#endif
 }
 #endif /* CONFIG_ARCH_HAS_PMEM_API */
-
 #endif /* __ASM_X86_PMEM_H__ */
-- 
cgit v1.2.3