summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-13 08:39:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-13 08:39:51 -0700
commitfdcbb1bc06508eb7ad961b3876b16382ae678ef8 (patch)
treea67b4c80a35f58fc8f0020b52d8f0ab11e884fc3
parent028ef9c96e96197026887c0f092424679298aae8 (diff)
parent809b997a5ce945ab470f70c187048fe4f5df20bf (diff)
Merge branch 'nocache-cleanup'
This series cleans up some of the special user copy functions naming and semantics. In particular, get rid of the (very traditional) double underscore names and behavior: the whole "optimize away the range check" model has been largely excised from the other user accessors because it's so subtle and can be unsafe, but also because it's just not a relevant optimization any more. To do that, a couple of drivers that misused the "user" copies as kernel copies in order to get non-temporal stores had to be fixed up, but that kind of code should never have been allowed anyway. The x86-only "nocache" version was also renamed to more accurately reflect what it actually does. This was all done because I looked at this code due to a report by Jann Horn, and I just couldn't stand the inconsistent naming, the horrible semantics, and the random misuse of these functions. This code should probably be cleaned up further, but it's at least slightly closer to normal semantics. I had a more intrusive series that went even further in trying to normalize the semantics, but that ended up hitting so many other inconsistencies between different architectures in this area (eg 'size_t' vs 'unsigned long' vs 'int' as size arguments, and various iovec check differences that Vasily Gorbik pointed out) that I ended up with this more limited version that fixed the worst of the issues. Reported-by: Jann Horn <jannh@google.com> Tested-by: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/all/CAHk-=wgg1QVWNWG-UCFo1hx0zqrPnB3qhPzUTrWNft+MtXQXig@mail.gmail.com/ * nocache-cleanup: x86-64/arm64/powerpc: clean up and rename __copy_from_user_flushcache x86: rename and clean up __copy_from_user_inatomic_nocache() x86-64: rename misleadingly named '__copy_user_nocache()' function
-rw-r--r--arch/arm64/include/asm/uaccess.h2
-rw-r--r--arch/powerpc/include/asm/uaccess.h3
-rw-r--r--arch/powerpc/lib/pmem.c11
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/uaccess_32.h8
-rw-r--r--arch/x86/include/asm/uaccess_64.h16
-rw-r--r--arch/x86/lib/copy_user_uncached_64.S6
-rw-r--r--arch/x86/lib/usercopy_32.c9
-rw-r--r--arch/x86/lib/usercopy_64.c12
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c2
-rw-r--r--drivers/gpu/drm/qxl/qxl_ioctl.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c8
-rw-r--r--drivers/ntb/ntb_transport.c7
-rw-r--r--include/linux/uaccess.h11
-rw-r--r--lib/iov_iter.c4
-rw-r--r--tools/objtool/check.c2
16 files changed, 53 insertions, 52 deletions
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 9810106a3f66..eafc83d255d8 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -478,7 +478,7 @@ extern __must_check long strnlen_user(const char __user *str, long n);
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
extern unsigned long __must_check __copy_user_flushcache(void *to, const void __user *from, unsigned long n);
-static inline int __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
+static inline size_t copy_from_user_flushcache(void *dst, const void __user *src, size_t size)
{
kasan_check_write(dst, size);
return __copy_user_flushcache(dst, __uaccess_mask_ptr(src), size);
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 17e63244e885..e98c628e3899 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -434,8 +434,7 @@ copy_mc_to_user(void __user *to, const void *from, unsigned long n)
}
#endif
-extern long __copy_from_user_flushcache(void *dst, const void __user *src,
- unsigned size);
+extern size_t copy_from_user_flushcache(void *dst, const void __user *src, size_t size);
static __must_check __always_inline bool __user_access_begin(const void __user *ptr, size_t len,
unsigned long dir)
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 4e724c4c01ad..0f0f2d851ac6 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -66,15 +66,16 @@ EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
/*
* CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE symbols
*/
-long __copy_from_user_flushcache(void *dest, const void __user *src,
- unsigned size)
+size_t copy_from_user_flushcache(void *dest, const void __user *src,
+ size_t size)
{
- unsigned long copied, start = (unsigned long) dest;
+ unsigned long not_copied, start = (unsigned long) dest;
- copied = __copy_from_user(dest, src, size);
+ src = mask_user_address(src);
+ not_copied = __copy_from_user(dest, src, size);
clean_pmem_range(start, start + size);
- return copied;
+ return not_copied;
}
void memcpy_flushcache(void *dest, const void *src, size_t size)
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 367297b188c3..3a0dd3c2b233 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -507,7 +507,7 @@ extern struct movsl_mask {
} ____cacheline_aligned_in_smp movsl_mask;
#endif
-#define ARCH_HAS_NOCACHE_UACCESS 1
+#define ARCH_HAS_NONTEMPORAL_UACCESS 1
/*
* The "unsafe" user accesses aren't really "unsafe", but the naming
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 40379a1adbb8..fff19e73ccb3 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -26,13 +26,7 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n)
return __copy_user_ll(to, (__force const void *)from, n);
}
-static __always_inline unsigned long
-__copy_from_user_inatomic_nocache(void *to, const void __user *from,
- unsigned long n)
-{
- return __copy_from_user_ll_nocache_nozero(to, from, n);
-}
-
+unsigned long __must_check copy_from_user_inatomic_nontemporal(void *, const void __user *, unsigned long n);
unsigned long __must_check clear_user(void __user *mem, unsigned long len);
unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 915124011c27..20de34cc9aa6 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -147,26 +147,28 @@ raw_copy_to_user(void __user *dst, const void *src, unsigned long size)
return copy_user_generic((__force void *)dst, src, size);
}
-extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size);
-extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size);
+#define copy_to_nontemporal copy_to_nontemporal
+extern size_t copy_to_nontemporal(void *dst, const void *src, size_t size);
+extern size_t copy_user_flushcache(void *dst, const void __user *src, size_t size);
static inline int
-__copy_from_user_inatomic_nocache(void *dst, const void __user *src,
+copy_from_user_inatomic_nontemporal(void *dst, const void __user *src,
unsigned size)
{
long ret;
kasan_check_write(dst, size);
+ src = mask_user_address(src);
stac();
- ret = __copy_user_nocache(dst, src, size);
+ ret = copy_to_nontemporal(dst, (__force const void *)src, size);
clac();
return ret;
}
-static inline int
-__copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
+static inline size_t
+copy_from_user_flushcache(void *dst, const void __user *src, size_t size)
{
kasan_check_write(dst, size);
- return __copy_user_flushcache(dst, src, size);
+ return copy_user_flushcache(dst, src, size);
}
/*
diff --git a/arch/x86/lib/copy_user_uncached_64.S b/arch/x86/lib/copy_user_uncached_64.S
index 18350b343c2a..8ed0ce3ad227 100644
--- a/arch/x86/lib/copy_user_uncached_64.S
+++ b/arch/x86/lib/copy_user_uncached_64.S
@@ -27,7 +27,7 @@
* Output:
* rax uncopied bytes or 0 if successful.
*/
-SYM_FUNC_START(__copy_user_nocache)
+SYM_FUNC_START(copy_to_nontemporal)
ANNOTATE_NOENDBR
/* If destination is not 7-byte aligned, we'll have to align it */
testb $7,%dil
@@ -240,5 +240,5 @@ _ASM_EXTABLE_UA(95b, .Ldone)
_ASM_EXTABLE_UA(52b, .Ldone0)
_ASM_EXTABLE_UA(53b, .Ldone0)
-SYM_FUNC_END(__copy_user_nocache)
-EXPORT_SYMBOL(__copy_user_nocache)
+SYM_FUNC_END(copy_to_nontemporal)
+EXPORT_SYMBOL(copy_to_nontemporal)
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index f6f436f1d573..ac27e39fc993 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -322,10 +322,11 @@ unsigned long __copy_user_ll(void *to, const void *from, unsigned long n)
}
EXPORT_SYMBOL(__copy_user_ll);
-unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
+unsigned long copy_from_user_inatomic_nontemporal(void *to, const void __user *from,
unsigned long n)
{
- __uaccess_begin_nospec();
+ if (!user_access_begin(from, n))
+ return n;
#ifdef CONFIG_X86_INTEL_USERCOPY
if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
n = __copy_user_intel_nocache(to, from, n);
@@ -334,7 +335,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
#else
__copy_user(to, from, n);
#endif
- __uaccess_end();
+ user_access_end();
return n;
}
-EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
+EXPORT_SYMBOL(copy_from_user_inatomic_nontemporal);
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 654280aaa3e9..c47d8cd0e243 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -43,17 +43,17 @@ void arch_wb_cache_pmem(void *addr, size_t size)
}
EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
-long __copy_user_flushcache(void *dst, const void __user *src, unsigned size)
+size_t copy_user_flushcache(void *dst, const void __user *src, size_t size)
{
unsigned long flushed, dest = (unsigned long) dst;
- long rc;
+ unsigned long rc;
- stac();
- rc = __copy_user_nocache(dst, src, size);
- clac();
+ src = masked_user_access_begin(src);
+ rc = copy_to_nontemporal(dst, (__force const void *)src, size);
+ user_access_end();
/*
- * __copy_user_nocache() uses non-temporal stores for the bulk
+ * copy_to_nontemporal() uses non-temporal stores for the bulk
* of the transfer, but we need to manually flush if the
* transfer is unaligned. A cached memory copy is used when
* destination or size is not naturally aligned. That is:
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 160733619a4a..3cfa1344b1b2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -520,7 +520,7 @@ ggtt_write(struct io_mapping *mapping,
/* We can use the cpu mem copy function because this is X86. */
vaddr = io_mapping_map_atomic_wc(mapping, base);
- unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset,
+ unwritten = copy_from_user_inatomic_nontemporal((void __force *)vaddr + offset,
user_data, length);
io_mapping_unmap_atomic(vaddr);
if (unwritten) {
diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c
index 4ee2b5acf2e0..591b026ceff9 100644
--- a/drivers/gpu/drm/qxl/qxl_ioctl.c
+++ b/drivers/gpu/drm/qxl/qxl_ioctl.c
@@ -183,7 +183,7 @@ static int qxl_process_single_command(struct qxl_device *qdev,
/* TODO copy slow path code from i915 */
fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_MASK));
- unwritten = __copy_from_user_inatomic_nocache
+ unwritten = copy_from_user_inatomic_nontemporal
(fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_MASK),
u64_to_user_ptr(cmd->command), cmd->command_size);
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index c1199ea5d41f..3c7ee7ddc5dd 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -92,12 +92,10 @@ static int rvt_wss_llc_size(void)
static void cacheless_memcpy(void *dst, void *src, size_t n)
{
/*
- * Use the only available X64 cacheless copy. Add a __user cast
- * to quiet sparse. The src agument is already in the kernel so
- * there are no security issues. The extra fault recovery machinery
- * is not invoked.
+ * Use the only available X64 cacheless copy.
+ * The extra fault recovery machinery is not invoked.
*/
- __copy_user_nocache(dst, (void __user *)src, n);
+ copy_to_nontemporal(dst, src, n);
}
void rvt_wss_exit(struct rvt_dev_info *rdi)
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 78e02fe6caba..2e77b699be2a 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -1779,12 +1779,13 @@ static void ntb_tx_copy_callback(void *data,
static void ntb_memcpy_tx_on_stack(struct ntb_queue_entry *entry, void __iomem *offset)
{
-#ifdef ARCH_HAS_NOCACHE_UACCESS
+#ifdef copy_to_nontemporal
/*
* Using non-temporal mov to improve performance on non-cached
- * writes, even though we aren't actually copying from user space.
+ * writes. This only works if __iomem is strictly memory-like,
+ * but that is the case on x86-64
*/
- __copy_from_user_inatomic_nocache(offset, entry->buf, entry->len);
+ copy_to_nontemporal(offset, entry->buf, entry->len);
#else
memcpy_toio(offset, entry->buf, entry->len);
#endif
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 4fe63169d5a2..56328601218c 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -331,16 +331,21 @@ static inline size_t probe_subpage_writeable(char __user *uaddr, size_t size)
#endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */
-#ifndef ARCH_HAS_NOCACHE_UACCESS
+#ifndef ARCH_HAS_NONTEMPORAL_UACCESS
static inline __must_check unsigned long
-__copy_from_user_inatomic_nocache(void *to, const void __user *from,
+copy_from_user_inatomic_nontemporal(void *to, const void __user *from,
unsigned long n)
{
+ if (can_do_masked_user_access())
+ from = mask_user_address(from);
+ else
+ if (!access_ok(from, n))
+ return n;
return __copy_from_user_inatomic(to, from, n);
}
-#endif /* ARCH_HAS_NOCACHE_UACCESS */
+#endif /* ARCH_HAS_NONTEMPORAL_UACCESS */
extern __must_check int check_zeroed_user(const void __user *from, size_t size);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 0a63c7fba313..243662af1af7 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -277,7 +277,7 @@ static __always_inline
size_t copy_from_user_iter_nocache(void __user *iter_from, size_t progress,
size_t len, void *to, void *priv2)
{
- return __copy_from_user_inatomic_nocache(to + progress, iter_from, len);
+ return copy_from_user_inatomic_nontemporal(to + progress, iter_from, len);
}
size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
@@ -296,7 +296,7 @@ static __always_inline
size_t copy_from_user_iter_flushcache(void __user *iter_from, size_t progress,
size_t len, void *to, void *priv2)
{
- return __copy_from_user_flushcache(to + progress, iter_from, len);
+ return copy_from_user_flushcache(to + progress, iter_from, len);
}
static __always_inline
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index b6765e876507..731fd595ac45 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1301,7 +1301,7 @@ static const char *uaccess_safe_builtin[] = {
"copy_mc_enhanced_fast_string",
"rep_stos_alternative",
"rep_movs_alternative",
- "__copy_user_nocache",
+ "copy_to_nontemporal",
NULL
};