From 9f79b78ef74436c7507bac6bfb7b8b989263bccb Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 21 May 2016 21:59:07 -0700
Subject: Convert filldir[64]() from __put_user() to unsafe_put_user()

We really should avoid the "__{get,put}_user()" functions entirely,
because they can easily be mis-used and the original intent of being
used for simple direct user accesses no longer holds in a post-SMAP/PAN
world.

Manually optimizing away the user access range check makes no sense any
more, when the range check is generally much cheaper than the "enable
user accesses" code that the __{get,put}_user() functions still need.

So instead of __put_user(), use the unsafe_put_user() interface with
user_access_{begin,end}() that really does generate better code these
days, and which is generally a nicer interface.  Under some loads, the
multiple user writes that filldir() does are actually quite noticeable.

This also makes the dirent name copy use unsafe_put_user() with a couple
of macros.  We do not want to make function calls with SMAP/PAN
disabled, and the code this generates is quite good when the
architecture uses "asm goto" for unsafe_put_user() like x86 does.

Note that this doesn't bother with the legacy cases.  Nobody should use
them anyway, so performance doesn't really matter there.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/readdir.c | 128 +++++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 93 insertions(+), 35 deletions(-)

(limited to 'fs/readdir.c')

diff --git a/fs/readdir.c b/fs/readdir.c
index 2f6a4534e0df..579c8ea894ae 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -20,9 +20,63 @@
 #include <linux/syscalls.h>
 #include <linux/unistd.h>
 #include <linux/compat.h>
-
 #include <linux/uaccess.h>
 
+#include <asm/unaligned.h>
+
+/*
+ * Note the "unsafe_put_user() semantics: we goto a
+ * label for errors.
+ *
+ * Also note how we use a "while()" loop here, even though
+ * only the biggest size needs to loop. The compiler (well,
+ * at least gcc) is smart enough to turn the smaller sizes
+ * into just if-statements, and this way we don't need to
+ * care whether 'u64' or 'u32' is the biggest size.
+ */
+#define unsafe_copy_loop(dst, src, len, type, label) 		\
+	while (len >= sizeof(type)) {				\
+		unsafe_put_user(get_unaligned((type *)src),	\
+			(type __user *)dst, label);		\
+		dst += sizeof(type);				\
+		src += sizeof(type);				\
+		len -= sizeof(type);				\
+	}
+
+/*
+ * We avoid doing 64-bit copies on 32-bit architectures. They
+ * might be better, but the component names are mostly small,
+ * and the 64-bit cases can end up being much more complex and
+ * put much more register pressure on the code, so it's likely
+ * not worth the pain of unaligned accesses etc.
+ *
+ * So limit the copies to "unsigned long" size. I did verify
+ * that at least the x86-32 case is ok without this limiting,
+ * but I worry about random other legacy 32-bit cases that
+ * might not do as well.
+ */
+#define unsafe_copy_type(dst, src, len, type, label) do {	\
+	if (sizeof(type) <= sizeof(unsigned long))		\
+		unsafe_copy_loop(dst, src, len, type, label);	\
+} while (0)
+
+/*
+ * Copy the dirent name to user space, and NUL-terminate
+ * it. This should not be a function call, since we're doing
+ * the copy inside a "user_access_begin/end()" section.
+ */
+#define unsafe_copy_dirent_name(_dst, _src, _len, label) do {	\
+	char __user *dst = (_dst);				\
+	const char *src = (_src);				\
+	size_t len = (_len);					\
+	unsafe_copy_type(dst, src, len, u64, label);	 	\
+	unsafe_copy_type(dst, src, len, u32, label);		\
+	unsafe_copy_type(dst, src, len, u16, label);		\
+	unsafe_copy_type(dst, src, len, u8,  label);		\
+	unsafe_put_user(0, dst, label);				\
+} while (0)
+
+
 int iterate_dir(struct file *file, struct dir_context *ctx)
 {
 	struct inode *inode = file_inode(file);
@@ -182,28 +236,31 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
 		return -EOVERFLOW;
 	}
 	dirent = buf->previous;
-	if (dirent) {
-		if (signal_pending(current))
-			return -EINTR;
-		if (__put_user(offset, &dirent->d_off))
-			goto efault;
-	}
-	dirent = buf->current_dir;
-	if (__put_user(d_ino, &dirent->d_ino))
-		goto efault;
-	if (__put_user(reclen, &dirent->d_reclen))
-		goto efault;
-	if (copy_to_user(dirent->d_name, name, namlen))
-		goto efault;
-	if (__put_user(0, dirent->d_name + namlen))
-		goto efault;
-	if (__put_user(d_type, (char __user *) dirent + reclen - 1))
+	if (dirent && signal_pending(current))
+		return -EINTR;
+
+	/*
+	 * Note! This range-checks 'previous' (which may be NULL).
+	 * The real range was checked in getdents
+	 */
+	if (!user_access_begin(dirent, sizeof(*dirent)))
 		goto efault;
+	if (dirent)
+		unsafe_put_user(offset, &dirent->d_off, efault_end);
+	dirent = buf->current_dir;
+	unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
+	unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
+	unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
+	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
+	user_access_end();
+
 	buf->previous = dirent;
 	dirent = (void __user *)dirent + reclen;
 	buf->current_dir = dirent;
 	buf->count -= reclen;
 	return 0;
+efault_end:
+	user_access_end();
 efault:
 	buf->error = -EFAULT;
 	return -EFAULT;
@@ -263,30 +320,31 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
 	if (reclen > buf->count)
 		return -EINVAL;
 	dirent = buf->previous;
-	if (dirent) {
-		if (signal_pending(current))
-			return -EINTR;
-		if (__put_user(offset, &dirent->d_off))
-			goto efault;
-	}
-	dirent = buf->current_dir;
-	if (__put_user(ino, &dirent->d_ino))
-		goto efault;
-	if (__put_user(0, &dirent->d_off))
-		goto efault;
-	if (__put_user(reclen, &dirent->d_reclen))
-		goto efault;
-	if (__put_user(d_type, &dirent->d_type))
-		goto efault;
-	if (copy_to_user(dirent->d_name, name, namlen))
-		goto efault;
-	if (__put_user(0, dirent->d_name + namlen))
+	if (dirent && signal_pending(current))
+		return -EINTR;
+
+	/*
+	 * Note! This range-checks 'previous' (which may be NULL).
+	 * The real range was checked in getdents
+	 */
+	if (!user_access_begin(dirent, sizeof(*dirent)))
 		goto efault;
+	if (dirent)
+		unsafe_put_user(offset, &dirent->d_off, efault_end);
+	dirent = buf->current_dir;
+	unsafe_put_user(ino, &dirent->d_ino, efault_end);
+	unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
+	unsafe_put_user(d_type, &dirent->d_type, efault_end);
+	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
+	user_access_end();
+
 	buf->previous = dirent;
 	dirent = (void __user *)dirent + reclen;
 	buf->current_dir = dirent;
 	buf->count -= reclen;
 	return 0;
+efault_end:
+	user_access_end();
 efault:
 	buf->error = -EFAULT;
 	return -EFAULT;
-- 
cgit v1.2.3


From 8a23eb804ca4f2be909e372cf5a9e7b30ae476cd Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 5 Oct 2019 11:32:52 -0700
Subject: Make filldir[64]() verify the directory entry filename is valid

This has been discussed several times, and now filesystem people are
talking about doing it individually at the filesystem layer, so head
that off at the pass and just do it in getdents{64}().

This is partially based on a patch by Jann Horn, but checks for NUL
bytes as well, and somewhat simplified.

There's also commentary about how it might be better if invalid names
due to filesystem corruption don't cause an immediate failure, but only
an error at the end of the readdir(), so that people can still see the
filenames that are ok.

There's also been discussion about just how much POSIX strictly speaking
requires this since it's about filesystem corruption.  It's really more
"protect user space from bad behavior" as pointed out by Jann.  But
since Eric Biederman looked up the POSIX wording, here it is for context:

 "From readdir:

   The readdir() function shall return a pointer to a structure
   representing the directory entry at the current position in the
   directory stream specified by the argument dirp, and position the
   directory stream at the next entry. It shall return a null pointer
   upon reaching the end of the directory stream. The structure dirent
   defined in the <dirent.h> header describes a directory entry.

  From definitions:

   3.129 Directory Entry (or Link)

   An object that associates a filename with a file. Several directory
   entries can associate names with the same file.

  ...

   3.169 Filename

   A name consisting of 1 to {NAME_MAX} bytes used to name a file. The
   characters composing the name may be selected from the set of all
   character values excluding the slash character and the null byte. The
   filenames dot and dot-dot have special meaning. A filename is
   sometimes referred to as a 'pathname component'."

Note that I didn't bother adding the checks to any legacy interfaces
that nobody uses.

Also note that if this ends up being noticeable as a performance
regression, we can fix that to do a much more optimized model that
checks for both NUL and '/' at the same time one word at a time.

We haven't really tended to optimize 'memchr()', and it only checks for
one pattern at a time anyway, and we really _should_ check for NUL too
(but see the comment about "soft errors" in the code about why it
currently only checks for '/')

See the CONFIG_DCACHE_WORD_ACCESS case of hash_name() for how the name
lookup code looks for pathname terminating characters in parallel.

Link: https://lore.kernel.org/lkml/20190118161440.220134-2-jannh@google.com/
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Jann Horn <jannh@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/readdir.c | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'fs/readdir.c')

diff --git a/fs/readdir.c b/fs/readdir.c
index 579c8ea894ae..19bea591c3f1 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -118,6 +118,40 @@ out:
 }
 EXPORT_SYMBOL(iterate_dir);
 
+/*
+ * POSIX says that a dirent name cannot contain NULL or a '/'.
+ *
+ * It's not 100% clear what we should really do in this case.
+ * The filesystem is clearly corrupted, but returning a hard
+ * error means that you now don't see any of the other names
+ * either, so that isn't a perfect alternative.
+ *
+ * And if you return an error, what error do you use? Several
+ * filesystems seem to have decided on EUCLEAN being the error
+ * code for EFSCORRUPTED, and that may be the error to use. Or
+ * just EIO, which is perhaps more obvious to users.
+ *
+ * In order to see the other file names in the directory, the
+ * caller might want to make this a "soft" error: skip the
+ * entry, and return the error at the end instead.
+ *
+ * Note that this should likely do a "memchr(name, 0, len)"
+ * check too, since that would be filesystem corruption as
+ * well. However, that case can't actually confuse user space,
+ * which has to do a strlen() on the name anyway to find the
+ * filename length, and the above "soft error" worry means
+ * that it's probably better left alone until we have that
+ * issue clarified.
+ */
+static int verify_dirent_name(const char *name, int len)
+{
+	if (WARN_ON_ONCE(!len))
+		return -EIO;
+	if (WARN_ON_ONCE(memchr(name, '/', len)))
+		return -EIO;
+	return 0;
+}
+
 /*
  * Traditional linux readdir() handling..
  *
@@ -227,6 +261,9 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
 	int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
 		sizeof(long));
 
+	buf->error = verify_dirent_name(name, namlen);
+	if (unlikely(buf->error))
+		return buf->error;
 	buf->error = -EINVAL;	/* only used if we fail.. */
 	if (reclen > buf->count)
 		return -EINVAL;
@@ -316,6 +353,9 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
 	int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
 		sizeof(u64));
 
+	buf->error = verify_dirent_name(name, namlen);
+	if (unlikely(buf->error))
+		return buf->error;
 	buf->error = -EINVAL;	/* only used if we fail.. */
 	if (reclen > buf->count)
 		return -EINVAL;
-- 
cgit v1.2.3


From c512c69187197fe08026cb5bbe7b9709f4f89b73 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 7 Oct 2019 12:56:48 -0700
Subject: uaccess: implement a proper unsafe_copy_to_user() and switch filldir
 over to it

In commit 9f79b78ef744 ("Convert filldir[64]() from __put_user() to
unsafe_put_user()") I made filldir() use unsafe_put_user(), which
improves code generation on x86 enormously.

But because we didn't have a "unsafe_copy_to_user()", the dirent name
copy was also done by hand with unsafe_put_user() in a loop, and it
turns out that a lot of other architectures didn't like that, because
unlike x86, they have various alignment issues.

Most non-x86 architectures trap and fix it up, and some (like xtensa)
will just fail unaligned put_user() accesses unconditionally.  Which
makes that "copy using put_user() in a loop" not work for them at all.

I could make that code do explicit alignment etc, but the architectures
that don't like unaligned accesses also don't really use the fancy
"user_access_begin/end()" model, so they might just use the regular old
__copy_to_user() interface.

So this commit takes that looping implementation, turns it into the x86
version of "unsafe_copy_to_user()", and makes other architectures
implement the unsafe copy version as __copy_to_user() (the same way they
do for the other unsafe_xyz() accessor functions).

Note that it only does this for the copying _to_ user space, and we
still don't have a unsafe version of copy_from_user().

That's partly because we have no current users of it, but also partly
because the copy_from_user() case is slightly different and cannot
efficiently be implemented in terms of a unsafe_get_user() loop (because
gcc can't do asm goto with outputs).

It would be trivial to do this using "rep movsb", which would work
really nicely on newer x86 cores, but really badly on some older ones.

Al Viro is looking at cleaning up all our user copy routines to make
this all a non-issue, but for now we have this simple-but-stupid version
for x86 that works fine for the dirent name copy case because those
names are short strings and we simply don't need anything fancier.

Fixes: 9f79b78ef744 ("Convert filldir[64]() from __put_user() to unsafe_put_user()")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Reported-and-tested-by: Tony Luck <tony.luck@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/readdir.c | 44 ++------------------------------------------
 1 file changed, 2 insertions(+), 42 deletions(-)

(limited to 'fs/readdir.c')

diff --git a/fs/readdir.c b/fs/readdir.c
index 19bea591c3f1..6e2623e57b2e 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -27,53 +27,13 @@
 /*
  * Note the "unsafe_put_user() semantics: we goto a
  * label for errors.
- *
- * Also note how we use a "while()" loop here, even though
- * only the biggest size needs to loop. The compiler (well,
- * at least gcc) is smart enough to turn the smaller sizes
- * into just if-statements, and this way we don't need to
- * care whether 'u64' or 'u32' is the biggest size.
- */
-#define unsafe_copy_loop(dst, src, len, type, label) 		\
-	while (len >= sizeof(type)) {				\
-		unsafe_put_user(get_unaligned((type *)src),	\
-			(type __user *)dst, label);		\
-		dst += sizeof(type);				\
-		src += sizeof(type);				\
-		len -= sizeof(type);				\
-	}
-
-/*
- * We avoid doing 64-bit copies on 32-bit architectures. They
- * might be better, but the component names are mostly small,
- * and the 64-bit cases can end up being much more complex and
- * put much more register pressure on the code, so it's likely
- * not worth the pain of unaligned accesses etc.
- *
- * So limit the copies to "unsigned long" size. I did verify
- * that at least the x86-32 case is ok without this limiting,
- * but I worry about random other legacy 32-bit cases that
- * might not do as well.
- */
-#define unsafe_copy_type(dst, src, len, type, label) do {	\
-	if (sizeof(type) <= sizeof(unsigned long))		\
-		unsafe_copy_loop(dst, src, len, type, label);	\
-} while (0)
-
-/*
- * Copy the dirent name to user space, and NUL-terminate
- * it. This should not be a function call, since we're doing
- * the copy inside a "user_access_begin/end()" section.
  */
 #define unsafe_copy_dirent_name(_dst, _src, _len, label) do {	\
 	char __user *dst = (_dst);				\
 	const char *src = (_src);				\
 	size_t len = (_len);					\
-	unsafe_copy_type(dst, src, len, u64, label);	 	\
-	unsafe_copy_type(dst, src, len, u32, label);		\
-	unsafe_copy_type(dst, src, len, u16, label);		\
-	unsafe_copy_type(dst, src, len, u8,  label);		\
-	unsafe_put_user(0, dst, label);				\
+	unsafe_put_user(0, dst+len, label);			\
+	unsafe_copy_to_user(dst, src, len, label);		\
 } while (0)
 
 
-- 
cgit v1.2.3


From b9959c7a347d6adbb558fba7e36e9fef3cba3b07 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 18 Oct 2019 18:41:16 -0400
Subject: filldir[64]: remove WARN_ON_ONCE() for bad directory entries

This was always meant to be a temporary thing, just for testing and to
see if it actually ever triggered.

The only thing that reported it was syzbot doing disk image fuzzing, and
then that warning is expected.  So let's just remove it before -rc4,
because the extra sanity testing should probably go to -stable, but we
don't want the warning to do so.

Reported-by: syzbot+3031f712c7ad5dd4d926@syzkaller.appspotmail.com
Fixes: 8a23eb804ca4 ("Make filldir[64]() verify the directory entry filename is valid")
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/readdir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs/readdir.c')

diff --git a/fs/readdir.c b/fs/readdir.c
index 6e2623e57b2e..d26d5ea4de7b 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -105,9 +105,9 @@ EXPORT_SYMBOL(iterate_dir);
  */
 static int verify_dirent_name(const char *name, int len)
 {
-	if (WARN_ON_ONCE(!len))
+	if (!len)
 		return -EIO;
-	if (WARN_ON_ONCE(memchr(name, '/', len)))
+	if (memchr(name, '/', len))
 		return -EIO;
 	return 0;
 }
-- 
cgit v1.2.3