From 0aed55af88345b5d673240f90e671d79662fb01e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 12:22:50 -0700 Subject: x86, uaccess: introduce copy_from_iter_flushcache for pmem / cache-bypass operations The pmem driver has a need to transfer data with a persistent memory destination and be able to rely on the fact that the destination writes are not cached. It is sufficient for the writes to be flushed to a cpu-store-buffer (non-temporal / "movnt" in x86 terms), as we expect userspace to call fsync() to ensure data-writes have reached a power-fail-safe zone in the platform. The fsync() triggers a REQ_FUA or REQ_FLUSH to the pmem driver which will turn around and fence previous writes with an "sfence". Implement a __copy_from_user_inatomic_flushcache, memcpy_page_flushcache, and memcpy_flushcache, that guarantee that the destination buffer is not dirty in the cpu cache on completion. The new copy_from_iter_flushcache and sub-routines will be used to replace the "pmem api" (include/linux/pmem.h + arch/x86/include/asm/pmem.h). The availability of copy_from_iter_flushcache() and memcpy_flushcache() are gated by the CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE config symbol, and fallback to copy_from_iter_nocache() and plain memcpy() otherwise. This is meant to satisfy the concern from Linus that if a driver wants to do something beyond the normal nocache semantics it should be something private to that driver [1], and Al's concern that anything uaccess related belongs with the rest of the uaccess code [2]. The first consumer of this interface is a new 'copy_from_iter' dax operation so that pmem can inject cache maintenance operations without imposing this overhead on other dax-capable drivers. [1]: https://lists.01.org/pipermail/linux-nvdimm/2017-January/008364.html [2]: https://lists.01.org/pipermail/linux-nvdimm/2017-April/009942.html Cc: Cc: Jan Kara Cc: Jeff Moyer Cc: Ingo Molnar Cc: Christoph Hellwig Cc: Toshi Kani Cc: "H. Peter Anvin" Cc: Al Viro Cc: Thomas Gleixner Cc: Matthew Wilcox Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams --- include/linux/dax.h | 3 +++ include/linux/string.h | 6 ++++++ include/linux/uio.h | 15 +++++++++++++++ 3 files changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index 5ec1f6c47716..bbe79ed90e2b 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -16,6 +16,9 @@ struct dax_operations { */ long (*direct_access)(struct dax_device *, pgoff_t, long, void **, pfn_t *); + /* copy_from_iter: dax-driver override for default copy_from_iter */ + size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, + struct iov_iter *); }; #if IS_ENABLED(CONFIG_DAX) diff --git a/include/linux/string.h b/include/linux/string.h index 537918f8a98e..7439d83eaa33 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -122,6 +122,12 @@ static inline __must_check int memcpy_mcsafe(void *dst, const void *src, return 0; } #endif +#ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE +static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) +{ + memcpy(dst, src, cnt); +} +#endif void *memchr_inv(const void *s, int c, size_t n); char *strreplace(char *s, char old, char new); diff --git a/include/linux/uio.h b/include/linux/uio.h index f2d36a3d3005..55cd54a0e941 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -95,6 +95,21 @@ size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i); size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +/* + * Note, users like pmem that depend on the stricter semantics of + * copy_from_iter_flushcache() than copy_from_iter_nocache() must check for + * IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the + * destination is flushed from the cache on return. + */ +size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); +#else +static inline size_t copy_from_iter_flushcache(void *addr, size_t bytes, + struct iov_iter *i) +{ + return copy_from_iter_nocache(addr, bytes, i); +} +#endif bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i); size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); -- cgit v1.2.3 From 7e026c8c0a4200da86bc51edeaad79dcdccf78ca Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 12:57:56 -0700 Subject: dm: add ->copy_from_iter() dax operation support Allow device-mapper to route copy_from_iter operations to the per-target implementation. In order for the device stacking to work we need a dax_dev and a pgoff relative to that device. This gives each layer of the stack the information it needs to look up the operation pointer for the next level. This conceptually allows for an array of mixed device drivers with varying copy_from_iter implementations. Reviewed-by: Toshi Kani Reviewed-by: Mike Snitzer Signed-off-by: Dan Williams --- include/linux/dax.h | 2 ++ include/linux/device-mapper.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index bbe79ed90e2b..28e398f8c59e 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -78,6 +78,8 @@ void kill_dax(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); +size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i); /* * We use lowest available bit in exceptional entry for locking, one bit for diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index f4c639c0c362..11c8a0a92f9c 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -132,6 +132,8 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); */ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); +typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i); #define PAGE_SECTORS (PAGE_SIZE / 512) void dm_error(const char *message); @@ -181,6 +183,7 @@ struct target_type { dm_iterate_devices_fn iterate_devices; dm_io_hints_fn io_hints; dm_dax_direct_access_fn direct_access; + dm_dax_copy_from_iter_fn dax_copy_from_iter; /* For internal device-mapper use. */ struct list_head list; -- cgit v1.2.3 From c12c48ce869d72029d70666f615cbd8f67fc14e9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 4 Jun 2017 10:59:15 +0900 Subject: libnvdimm, label: add v1.2 interleave-set-cookie algorithm The interleave-set-cookie algorithm is extended to incorporate all the same components that are used to generate an nvdimm unique-id. For backwards compatibility we still maintain the old v1.1 definition. Reported-by: Nicholas Moulin Reported-by: Kaushik Kanetkar Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 6c807017128d..722cdf21429f 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -71,7 +71,10 @@ struct nd_cmd_desc { }; struct nd_interleave_set { - u64 cookie; + /* v1.1 definition of the interleave-set-cookie algorithm */ + u64 cookie1; + /* v1.2 definition of the interleave-set-cookie algorithm */ + u64 cookie2; /* compatibility with initial buggy Linux implementation */ u64 altcookie; }; -- cgit v1.2.3 From f979b13c3cc51584882bffa32965f34e5afa3b9b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 4 Jun 2017 12:12:07 +0900 Subject: libnvdimm, label: honor the lba size specified in v1.2 labels Previously we only honored the lba size for blk-aperture mode namespaces. For pmem namespaces the lba size was just assumed to be 512. With the new v1.2 label definition and compatibility with other operating environments, the ->lbasize property is now respected for pmem namespaces. Cc: Ross Zwisler Signed-off-by: Dan Williams --- include/linux/nd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nd.h b/include/linux/nd.h index 194b8e002ea7..d8f5023b49ae 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -75,12 +75,14 @@ struct nd_namespace_io { /** * struct nd_namespace_pmem - namespace device for dimm-backed interleaved memory * @nsio: device and system physical address range to drive + * @lbasize: logical sector size for the namespace in block-device-mode * @alt_name: namespace name supplied in the dimm label * @uuid: namespace name supplied in the dimm label * @id: ida allocated id */ struct nd_namespace_pmem { struct nd_namespace_io nsio; + unsigned long lbasize; char *alt_name; u8 *uuid; int id; -- cgit v1.2.3 From faec6f8a1cd2c44e439de35ab3328c5cf7bf52d8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jun 2017 11:10:51 -0700 Subject: libnvdimm, label: populate the type_guid property for v1.2 namespaces The type_guid refers to the "Address Range Type GUID" for the region backing a namespace as defined the ACPI NFIT (NVDIMM Firmware Interface Table). This 'type' identifier specifies an access mechanism for the given namespace. This capability replaces the confusing usage of the 'NSLABEL_FLAG_LOCAL' flag to indicate a block-aperture-mode namespace. Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 722cdf21429f..4b9f178c82e6 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -17,6 +17,7 @@ #include #include #include +#include enum { /* when a dimm supports both PMEM and BLK access a label is required */ @@ -77,6 +78,8 @@ struct nd_interleave_set { u64 cookie2; /* compatibility with initial buggy Linux implementation */ u64 altcookie; + + guid_t type_guid; }; struct nd_mapping_desc { -- cgit v1.2.3 From b3fde74ea195d2f9f49830a29f971a0aab4cd67a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 4 Jun 2017 10:18:39 +0900 Subject: libnvdimm, label: add address abstraction identifiers Starting with v1.2 labels, 'address abstractions' can be hinted via an address abstraction id that implies an info-block format. The standard address abstraction in the specification is the v2 format of the Block-Translation-Table (BTT). Support for that is saved for a later patch, for now we add support for the Linux supported address abstractions BTT (v1), PFN, and DAX. The new 'holder_class' attribute for namespace devices is added for tooling to specify the 'abstraction_guid' to store in the namespace label. For v1.1 labels this field is undefined and any setting of 'holder_class' away from the default 'none' value will only have effect until the driver is unloaded. Setting 'holder_class' requires that whatever device tries to claim the namespace must be of the specified class. Cc: Vishal Verma Signed-off-by: Dan Williams --- include/linux/nd.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/nd.h b/include/linux/nd.h index d8f5023b49ae..96069c543890 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -21,6 +21,14 @@ enum nvdimm_event { NVDIMM_REVALIDATE_POISON, }; +enum nvdimm_claim_class { + NVDIMM_CCLASS_NONE, + NVDIMM_CCLASS_BTT, + NVDIMM_CCLASS_PFN, + NVDIMM_CCLASS_DAX, + NVDIMM_CCLASS_UNKNOWN, +}; + struct nd_device_driver { struct device_driver drv; unsigned long type; @@ -41,12 +49,14 @@ static inline struct nd_device_driver *to_nd_device_driver( * @force_raw: ignore other personalities for the namespace (e.g. btt) * @dev: device model node * @claim: when set a another personality has taken ownership of the namespace + * @claim_class: restrict claim type to a given class * @rw_bytes: access the raw namespace capacity with byte-aligned transfers */ struct nd_namespace_common { int force_raw; struct device dev; struct device *claim; + enum nvdimm_claim_class claim_class; int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset, void *buf, size_t size, int rw, unsigned long flags); }; -- cgit v1.2.3 From fec53774fd043038e57ac737d90e8d58975d6e92 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 21:56:49 -0700 Subject: filesystem-dax: convert to dax_copy_from_iter() Now that all possible providers of the dax_operations copy_from_iter method are implemented, switch filesytem-dax to call the driver rather than copy_to_iter_pmem. Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/pmem.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include') diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 71ecf3d46aac..9d542a5600e4 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -31,13 +31,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) BUG(); } -static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, - struct iov_iter *i) -{ - BUG(); - return 0; -} - static inline void arch_clear_pmem(void *addr, size_t size) { BUG(); @@ -79,23 +72,6 @@ static inline void memcpy_to_pmem(void *dst, const void *src, size_t n) memcpy(dst, src, n); } -/** - * copy_from_iter_pmem - copy data from an iterator to PMEM - * @addr: PMEM destination address - * @bytes: number of bytes to copy - * @i: iterator with source data - * - * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. - * See blkdev_issue_flush() note for memcpy_to_pmem(). - */ -static inline size_t copy_from_iter_pmem(void *addr, size_t bytes, - struct iov_iter *i) -{ - if (arch_has_pmem_api()) - return arch_copy_from_iter_pmem(addr, bytes, i); - return copy_from_iter_nocache(addr, bytes, i); -} - /** * clear_pmem - zero a PMEM memory range * @addr: virtual start address -- cgit v1.2.3 From 3c1cebff23cdca01c421411e953a9e239f2b9ef9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 12:58:19 -0700 Subject: dax, pmem: introduce an optional 'flush' dax_operation Filesystem-DAX flushes caches whenever it writes to the address returned through dax_direct_access() and when writing back dirty radix entries. That flushing is only required in the pmem case, so add a dax operation to allow pmem to take this extra action, but skip it for other dax capable devices that do not provide a flush routine. An example for this differentiation might be a volatile ram disk where there is no expectation of persistence. In fact the pmem driver itself might front such an address range specified by the NFIT. So, this "no flush" property might be something passed down by the bus / libnvdimm. Cc: Christoph Hellwig Cc: Matthew Wilcox Cc: Ross Zwisler Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/dax.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index 28e398f8c59e..407dd3ff6e54 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -19,6 +19,8 @@ struct dax_operations { /* copy_from_iter: dax-driver override for default copy_from_iter */ size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, struct iov_iter *); + /* flush: optional driver-specific cache management after writes */ + void (*flush)(struct dax_device *, pgoff_t, void *, size_t); }; #if IS_ENABLED(CONFIG_DAX) -- cgit v1.2.3 From abebfbe2f7315dd3ec9a0c69596a76e32beb5749 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 13:02:52 -0700 Subject: dm: add ->flush() dax operation support Allow device-mapper to route flush operations to the per-target implementation. In order for the device stacking to work we need a dax_dev and a pgoff relative to that device. This gives each layer of the stack the information it needs to look up the operation pointer for the next level. This conceptually allows for an array of mixed device drivers with varying flush implementations. Reviewed-by: Toshi Kani Reviewed-by: Mike Snitzer Signed-off-by: Dan Williams --- include/linux/dax.h | 2 ++ include/linux/device-mapper.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index 407dd3ff6e54..1f6b6072af64 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -82,6 +82,8 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); +void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t size); /* * We use lowest available bit in exceptional entry for locking, one bit for diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 11c8a0a92f9c..67bfe8ddcb32 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -134,6 +134,8 @@ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); +typedef void (*dm_dax_flush_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, + size_t size); #define PAGE_SECTORS (PAGE_SIZE / 512) void dm_error(const char *message); @@ -184,6 +186,7 @@ struct target_type { dm_io_hints_fn io_hints; dm_dax_direct_access_fn direct_access; dm_dax_copy_from_iter_fn dax_copy_from_iter; + dm_dax_flush_fn dax_flush; /* For internal device-mapper use. */ struct list_head list; -- cgit v1.2.3 From 81f558701ae8d5677635118751b1b4043094c7e9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 13:12:20 -0700 Subject: x86, dax: replace clear_pmem() with open coded memset + dax_ops->flush The clear_pmem() helper simply combines a memset() plus a cache flush. Now that the flush routine is optionally provided by the dax device driver we can avoid unnecessary cache management on dax devices fronting volatile memory. With clear_pmem() gone we can follow on with a patch to make pmem cache management completely defined within the pmem driver. Cc: Cc: Jeff Moyer Cc: Ingo Molnar Cc: Christoph Hellwig Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Matthew Wilcox Cc: Ross Zwisler Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/pmem.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include') diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 9d542a5600e4..772bd02a5b52 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -31,11 +31,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) BUG(); } -static inline void arch_clear_pmem(void *addr, size_t size) -{ - BUG(); -} - static inline void arch_wb_cache_pmem(void *addr, size_t size) { BUG(); @@ -72,22 +67,6 @@ static inline void memcpy_to_pmem(void *dst, const void *src, size_t n) memcpy(dst, src, n); } -/** - * clear_pmem - zero a PMEM memory range - * @addr: virtual start address - * @size: number of bytes to zero - * - * Write zeros into the memory range starting at 'addr' for 'size' bytes. - * See blkdev_issue_flush() note for memcpy_to_pmem(). - */ -static inline void clear_pmem(void *addr, size_t size) -{ - if (arch_has_pmem_api()) - arch_clear_pmem(addr, size); - else - memset(addr, 0, size); -} - /** * invalidate_pmem - flush a pmem range from the cache hierarchy * @addr: virtual start address -- cgit v1.2.3 From 4e4f00a9b51a1c52ebdd728a1caeb3b9fe48c39d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 22:40:44 -0700 Subject: x86, dax, libnvdimm: remove wb_cache_pmem() indirection With all handling of the CONFIG_ARCH_HAS_PMEM_API case being moved to libnvdimm and the pmem driver directly we do not need to provide global wrappers and fallbacks in the CONFIG_ARCH_HAS_PMEM_API=n case. The pmem driver will simply not link to arch_wb_cache_pmem() in that case. Same as before, pmem flushing is only defined for x86_64, via clean_cache_range(), but it is straightforward to add other archs in the future. arch_wb_cache_pmem() is an exported function since the pmem module needs to find it, but it is privately declared in drivers/nvdimm/pmem.h because there are no consumers outside of the pmem driver. Cc: Cc: Jan Kara Cc: Jeff Moyer Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Oliver O'Halloran Cc: Matthew Wilcox Cc: Ross Zwisler Suggested-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/pmem.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include') diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 772bd02a5b52..33ae761f010a 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -31,11 +31,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) BUG(); } -static inline void arch_wb_cache_pmem(void *addr, size_t size) -{ - BUG(); -} - static inline void arch_invalidate_pmem(void *addr, size_t size) { BUG(); @@ -80,18 +75,4 @@ static inline void invalidate_pmem(void *addr, size_t size) if (arch_has_pmem_api()) arch_invalidate_pmem(addr, size); } - -/** - * wb_cache_pmem - write back processor cache for PMEM memory range - * @addr: virtual start address - * @size: number of bytes to write back - * - * Write back the processor cache range starting at 'addr' for 'size' bytes. - * See blkdev_issue_flush() note for memcpy_to_pmem(). - */ -static inline void wb_cache_pmem(void *addr, size_t size) -{ - if (arch_has_pmem_api()) - arch_wb_cache_pmem(addr, size); -} #endif /* __PMEM_H__ */ -- cgit v1.2.3 From f2b612578e163b49661ece2fe01dfafb0e78f545 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 23:00:34 -0700 Subject: x86, libnvdimm, pmem: move arch_invalidate_pmem() to libnvdimm Kill this globally defined wrapper and move to libnvdimm so that we can ultimately remove include/linux/pmem.h and asm/pmem.h. Cc: Cc: Jeff Moyer Cc: Ingo Molnar Cc: Christoph Hellwig Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Matthew Wilcox Cc: Ross Zwisler Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/pmem.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include') diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 33ae761f010a..559c00848583 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -30,11 +30,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) { BUG(); } - -static inline void arch_invalidate_pmem(void *addr, size_t size) -{ - BUG(); -} #endif static inline bool arch_has_pmem_api(void) @@ -61,18 +56,4 @@ static inline void memcpy_to_pmem(void *dst, const void *src, size_t n) else memcpy(dst, src, n); } - -/** - * invalidate_pmem - flush a pmem range from the cache hierarchy - * @addr: virtual start address - * @size: bytes to invalidate (internally aligned to cache line size) - * - * For platforms that support clearing poison this flushes any poisoned - * ranges out of the cache - */ -static inline void invalidate_pmem(void *addr, size_t size) -{ - if (arch_has_pmem_api()) - arch_invalidate_pmem(addr, size); -} #endif /* __PMEM_H__ */ -- cgit v1.2.3 From ca6a4657e5420dec727256717e905ebc3c751352 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 13 Jan 2017 20:36:58 -0800 Subject: x86, libnvdimm, pmem: remove global pmem api Now that all callers of the pmem api have been converted to dax helpers that call back to the pmem driver, we can remove include/linux/pmem.h and asm/pmem.h. Cc: Cc: Jeff Moyer Cc: Ingo Molnar Cc: Christoph Hellwig Cc: Toshi Kani Cc: Oliver O'Halloran Cc: Ross Zwisler Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 1 + include/linux/pmem.h | 59 ----------------------------------------------- 2 files changed, 1 insertion(+), 59 deletions(-) delete mode 100644 include/linux/pmem.h (limited to 'include') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 6c807017128d..b2f659bd661d 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -159,6 +159,7 @@ void *nd_region_provider_data(struct nd_region *nd_region); void *nd_blk_region_provider_data(struct nd_blk_region *ndbr); void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data); struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr); +unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr); unsigned int nd_region_acquire_lane(struct nd_region *nd_region); void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); u64 nd_fletcher64(void *addr, size_t len, bool le); diff --git a/include/linux/pmem.h b/include/linux/pmem.h deleted file mode 100644 index 559c00848583..000000000000 --- a/include/linux/pmem.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright(c) 2015 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ -#ifndef __PMEM_H__ -#define __PMEM_H__ - -#include -#include - -#ifdef CONFIG_ARCH_HAS_PMEM_API -#define ARCH_MEMREMAP_PMEM MEMREMAP_WB -#include -#else -#define ARCH_MEMREMAP_PMEM MEMREMAP_WT -/* - * These are simply here to enable compilation, all call sites gate - * calling these symbols with arch_has_pmem_api() and redirect to the - * implementation in asm/pmem.h. - */ -static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) -{ - BUG(); -} -#endif - -static inline bool arch_has_pmem_api(void) -{ - return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); -} - -/** - * memcpy_to_pmem - copy data to persistent memory - * @dst: destination buffer for the copy - * @src: source buffer for the copy - * @n: length of the copy in bytes - * - * Perform a memory copy that results in the destination of the copy - * being effectively evicted from, or never written to, the processor - * cache hierarchy after the copy completes. After memcpy_to_pmem() - * data may still reside in cpu or platform buffers, so this operation - * must be followed by a blkdev_issue_flush() on the pmem block device. - */ -static inline void memcpy_to_pmem(void *dst, const void *src, size_t n) -{ - if (arch_has_pmem_api()) - arch_memcpy_to_pmem(dst, src, n); - else - memcpy(dst, src, n); -} -#endif /* __PMEM_H__ */ -- cgit v1.2.3 From 5d61e43b3975c0582003329d9de9d5e85abf5d33 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 27 Jun 2017 13:06:22 -0700 Subject: dax: remove default copy_from_iter fallback Require all dax-drivers to register a ->copy_from_iter() operation so that it is clear which dax_operations are optional and which must be implemented for filesystem-dax to operate. Cc: Gerald Schaefer Suggested-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/dax.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index 1f6b6072af64..73fca1bebaf3 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -16,7 +16,7 @@ struct dax_operations { */ long (*direct_access)(struct dax_device *, pgoff_t, long, void **, pfn_t *); - /* copy_from_iter: dax-driver override for default copy_from_iter */ + /* copy_from_iter: required operation for fs-dax direct-i/o */ size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, struct iov_iter *); /* flush: optional driver-specific cache management after writes */ -- cgit v1.2.3 From 6e0c90d691cd5d90569f5918ab03eb76c81f9c6e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 26 Jun 2017 21:28:41 -0700 Subject: libnvdimm, pmem, dax: export a cache control attribute The dax_flush() operation can be turned into a nop on platforms where firmware arranges for cpu caches to be flushed on a power-fail event. The ACPI 6.2 specification defines a mechanism for the platform to indicate this capability so the kernel can select the proper default. However, for other platforms, the administrator must toggle this setting manually. Given this flush setting is a dax-specific mechanism we advertise it through a 'dax' attribute group hanging off a host device. For example, a 'pmem0' block-device gets a 'dax' sysfs-subdirectory with a 'write_cache' attribute to control response to dax cache flush requests. This is similar to the 'queue/write_cache' attribute that appears under block devices. Cc: Jan Kara Cc: Jeff Moyer Cc: Matthew Wilcox Cc: Ross Zwisler Suggested-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/dax.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index 73fca1bebaf3..8f39db7439c3 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -23,6 +23,8 @@ struct dax_operations { void (*flush)(struct dax_device *, pgoff_t, void *, size_t); }; +extern struct attribute_group dax_attribute_group; + #if IS_ENABLED(CONFIG_DAX) struct dax_device *dax_get_by_host(const char *host); void put_dax(struct dax_device *dax_dev); @@ -84,6 +86,7 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t size); +void dax_write_cache(struct dax_device *dax_dev, bool wc); /* * We use lowest available bit in exceptional entry for locking, one bit for -- cgit v1.2.3 From 0b277961f4484fb3f142caaa1dd1748cb0b2cbee Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 9 Jun 2017 09:46:50 -0700 Subject: libnvdimm, pmem: disable dax flushing when pmem is fronting a volatile region The pmem driver attaches to both persistent and volatile memory ranges advertised by the ACPI NFIT. When the region is volatile it is redundant to spend cycles flushing caches at fsync(). Check if the hosting region is volatile and do not set dax_write_cache() if it is. Cc: Jan Kara Cc: Jeff Moyer Cc: Christoph Hellwig Cc: Matthew Wilcox Cc: Ross Zwisler Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index b2f659bd661d..a8ee1d0afd70 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -165,4 +165,5 @@ void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); u64 nd_fletcher64(void *addr, size_t len, bool le); void nvdimm_flush(struct nd_region *nd_region); int nvdimm_has_flush(struct nd_region *nd_region); +int nvdimm_has_cache(struct nd_region *nd_region); #endif /* __LIBNVDIMM_H__ */ -- cgit v1.2.3 From 14e494542636b7a685c5bf27e695e3bb9ec3fe7d Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 28 Jun 2017 14:25:00 -0600 Subject: libnvdimm, btt: BTT updates for UEFI 2.7 format The UEFI 2.7 specification defines an updated BTT metadata format, bumping the revision to 2.0. Add support for the new format, while retaining compatibility for the old 1.1 format. Cc: Toshi Kani Cc: Linda Knippers Cc: Dan Williams Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- include/linux/nd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nd.h b/include/linux/nd.h index 96069c543890..5dc6b695437d 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -24,6 +24,7 @@ enum nvdimm_event { enum nvdimm_claim_class { NVDIMM_CCLASS_NONE, NVDIMM_CCLASS_BTT, + NVDIMM_CCLASS_BTT2, NVDIMM_CCLASS_PFN, NVDIMM_CCLASS_DAX, NVDIMM_CCLASS_UNKNOWN, -- cgit v1.2.3 From 37d74841b9d42b105cba053e70e9db0e395949da Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Fri, 30 Jun 2017 20:41:24 -0700 Subject: acpi, nfit: Enable DSM pass thru for root functions. Set ND_CMD_CALL in the cmd_mask to enable calling root functions via the pass thru mechanism. Signed-off-by: Jerry Hoemann Signed-off-by: Dan Williams --- include/uapi/linux/ndctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 7ad3863cb88b..e23c37fedade 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -179,6 +179,7 @@ static inline const char *nvdimm_bus_cmd_name(unsigned cmd) [ND_CMD_ARS_START] = "ars_start", [ND_CMD_ARS_STATUS] = "ars_status", [ND_CMD_CLEAR_ERROR] = "clear_error", + [ND_CMD_CALL] = "cmd_call", }; if (cmd < ARRAY_SIZE(names) && names[cmd]) -- cgit v1.2.3 From 7db5bb33add5afe6c64e00516b0c928bfc937466 Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Fri, 30 Jun 2017 20:53:24 -0700 Subject: libnvdimm, acpi, nfit: Add bus level dsm mask for pass thru. Add a bus level dsm_mask to nvdimm_bus_descriptor to allow the passthru calling mechanism to specify a different mask from the cmd_mask. Populate bus_dsm_mask and use it to filter dsm calls that user can make through the pass thru interface. Signed-off-by: Jerry Hoemann [djbw: use command number constants instead of a magic mask value] Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 4b9f178c82e6..6aee1a6e4e63 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -55,6 +55,7 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; + unsigned long bus_dsm_mask; unsigned long cmd_mask; struct module *module; char *provider_name; -- cgit v1.2.3 From 759d6a9641d7f52f9311aae4f2d90058adad3ac2 Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Fri, 30 Jun 2017 20:41:29 -0700 Subject: libnvdimm: New ACPI 6.2 DSM functions ACPI 6.2 added new NVDIMM root DSM functions. Define their data structures. Signed-off-by: Jerry Hoemann Signed-off-by: Dan Williams --- include/uapi/linux/ndctl.h | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index e23c37fedade..e15768fb4b99 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -105,7 +105,8 @@ struct nd_cmd_ars_cap { __u32 status; __u32 max_ars_out; __u32 clear_err_unit; - __u32 reserved; + __u16 flags; + __u16 reserved; } __packed; struct nd_cmd_ars_start { @@ -144,6 +145,43 @@ struct nd_cmd_clear_error { __u64 cleared; } __packed; +struct nd_cmd_trans_spa { + __u64 spa; + __u32 status; + __u8 flags; + __u8 _reserved[3]; + __u64 trans_length; + __u32 num_nvdimms; + struct nd_nvdimm_device { + __u32 nfit_device_handle; + __u32 _reserved; + __u64 dpa; + } __packed devices[0]; + +} __packed; + +struct nd_cmd_ars_err_inj { + __u64 err_inj_spa_range_base; + __u64 err_inj_spa_range_length; + __u8 err_inj_options; + __u32 status; +} __packed; + +struct nd_cmd_ars_err_inj_clr { + __u64 err_inj_clr_spa_range_base; + __u64 err_inj_clr_spa_range_length; + __u32 status; +} __packed; + +struct nd_cmd_ars_err_inj_stat { + __u32 status; + __u32 inj_err_rec_count; + struct nd_error_stat_query_record { + __u64 err_inj_stat_spa_range_base; + __u64 err_inj_stat_spa_range_length; + } __packed record[0]; +} __packed; + enum { ND_CMD_IMPLEMENTED = 0, -- cgit v1.2.3 From 807900395efebf9276178eb6157959f2e81fe013 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 29 Jun 2017 20:41:30 -0600 Subject: acpi/nfit: Issue Start ARS to retrieve existing records ACPI 6.2 defines in section 9.20.7.2 that the OSPM may call a Start ARS with Flags Bit [1] set upon receiving the 0x81 notification. Upon receiving the notification, the OSPM may decide to issue a Start ARS with Flags Bit [1] set to prepare for the retrieval of existing records and issue the Query ARS Status function to retrieve the records. Add support to call a Start ARS from acpi_nfit_uc_error_notify() with ND_ARS_RETURN_PREV_DATA set when HW_ERROR_SCRUB_ON is not set. Link: http://www.uefi.org/sites/default/files/resources/ACPI_6_2.pdf Signed-off-by: Toshi Kani Cc: Dan Williams Cc: Rafael J. Wysocki Cc: Vishal Verma Cc: Linda Knippers Signed-off-by: Dan Williams --- include/uapi/linux/ndctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index e15768fb4b99..6d3c54264d8e 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -207,6 +207,7 @@ enum { enum { ND_ARS_VOLATILE = 1, ND_ARS_PERSISTENT = 2, + ND_ARS_RETURN_PREV_DATA = 1 << 1, ND_CONFIG_LOCKED = 1, }; -- cgit v1.2.3