From 64a989dbd144e0622371396461b11335459692d2 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@hammerspace.com>
Date: Thu, 27 Nov 2025 11:02:05 -0500
Subject: VFS/knfsd: Teach dentry_create() to use atomic_open()

While knfsd offers combined exclusive create and open results to clients,
on some filesystems those results may not be atomic.  This behavior can be
observed.  For example, an open O_CREAT with mode 0 will succeed in creating
the file but unexpectedly return -EACCES from vfs_open().

Additionally reducing the number of remote RPC calls required for O_CREAT
on network filesystem provides a performance benefit in the open path.

Teach knfsd's helper dentry_create() to use atomic_open() for filesystems
that support it.  The previously const @path is passed up to atomic_open()
and may be modified depending on whether an existing entry was found or if
the atomic_open() returned an error and consumed the passed-in dentry.

Signed-off-by: Benjamin Coddington <bcodding@hammerspace.com>
Link: https://patch.msgid.link/8e449bfb64ab055abb9fd82641a171531415a88c.1764259052.git.bcodding@hammerspace.com
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 04ceeca12a0d..1cb3385ee852 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2457,7 +2457,7 @@ struct file *dentry_open(const struct path *path, int flags,
 			 const struct cred *creds);
 struct file *dentry_open_nonotify(const struct path *path, int flags,
 				  const struct cred *cred);
-struct file *dentry_create(const struct path *path, int flags, umode_t mode,
+struct file *dentry_create(struct path *path, int flags, umode_t mode,
 			   const struct cred *cred);
 const struct path *backing_file_user_path(const struct file *f);
 
-- 
cgit v1.2.3


From 887e97745ec336c2f49b6c0af3c4cc00a5df3211 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Wed, 3 Dec 2025 10:48:37 +0100
Subject: fs: track the inode having file locks with a flag in ->i_opflags

Opening and closing an inode dirties the ->i_readcount field.

Depending on the alignment of the inode, it may happen to false-share
with other fields loaded both for both operations to various extent.

This notably concerns the ->i_flctx field.

Since most inodes don't have the field populated, this bit can be managed
with a flag in ->i_opflags instead which bypasses the problem.

Here are results I obtained while opening a file read-only in a loop
with 24 cores doing the work on Sapphire Rapids. Utilizing the flag as
opposed to reading ->i_flctx field was toggled at runtime as the benchmark
was running, to make sure both results come from the same alignment.

before: 3233740
after:  3373346 (+4%)

before: 3284313
after:  3518711 (+7%)

before: 3505545
after:  4092806 (+16%)

Or to put it differently, this varies wildly depending on how (un)lucky
you get.

The primary bottleneck before and after is the avoidable lockref trip in
do_dentry_open().

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Link: https://patch.msgid.link/20251203094837.290654-2-mjguzik@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 04ceeca12a0d..094b0adcb035 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -631,6 +631,7 @@ is_uncached_acl(struct posix_acl *acl)
 #define IOP_MGTIME		0x0020
 #define IOP_CACHED_LINK		0x0040
 #define IOP_FASTPERM_MAY_EXEC	0x0080
+#define IOP_FLCTX		0x0100
 
 /*
  * Inode state bits.  Protected by inode->i_lock
-- 
cgit v1.2.3


From 51a146e0595c638c58097a1660ff6b6e7d3b72f3 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Fri, 12 Dec 2025 11:44:03 -0600
Subject: fs: Remove internal old mount API code

Now that the last in-tree filesystem has been converted to the new mount
API, remove all legacy mount API code designed to handle un-converted
filesystems, and remove associated documentation as well.

(The code to handle the legacy mount(2) syscall from userspace is still
in place, of course.)

Tested with an allmodconfig build on x86_64, and a sanity check of an
old mount(2) syscall mount.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Link: https://patch.msgid.link/20251212174403.2882183-1-sandeen@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 04ceeca12a0d..9949d253e5aa 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2274,8 +2274,6 @@ struct file_system_type {
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
 	int (*init_fs_context)(struct fs_context *);
 	const struct fs_parameter_spec *parameters;
-	struct dentry *(*mount) (struct file_system_type *, int,
-		       const char *, void *);
 	void (*kill_sb) (struct super_block *);
 	struct module *owner;
 	struct file_system_type * next;
-- 
cgit v1.2.3


From f7386f545e49e5e6229a14d92b39340d155b0b3f Mon Sep 17 00:00:00 2001
From: Joel Granados <joel.granados@kernel.org>
Date: Mon, 3 Nov 2025 22:29:08 +0100
Subject: sysctl: Remove unused ctl_table forward declarations

Remove superfluous forward declarations of ctl_table from header files
where they are no longer needed. These declarations were left behind
after sysctl code refactoring and cleanup.

Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Muchun Song <muchun.song@linux.dev>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Joel Granados <joel.granados@kernel.org>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 04ceeca12a0d..77f6302fdced 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3487,7 +3487,6 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
 ssize_t simple_attr_write_signed(struct file *file, const char __user *buf,
 				 size_t len, loff_t *ppos);
 
-struct ctl_table;
 int __init list_bdev_fs_names(char *buf, size_t size);
 
 #define __FMODE_EXEC		((__force int) FMODE_EXEC)
-- 
cgit v1.2.3


From 51e49111c00bee76ca403adf7cd617b71a9a0da4 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Thu, 8 Jan 2026 12:13:19 -0500
Subject: fs: remove simple_nosetlease()

Setting ->setlease() to a NULL pointer now has the same effect as
setting it to simple_nosetlease(). Remove all of the setlease
file_operations that are set to simple_nosetlease, and the function
itself.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://patch.msgid.link/20260108-setlease-6-20-v1-24-ea4dec9b67fa@kernel.org
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index f5c9cf28c4dc..e46e8aad9339 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3217,7 +3217,6 @@ extern int always_delete_dentry(const struct dentry *);
 extern struct inode *alloc_anon_inode(struct super_block *);
 struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name,
 					   const struct inode *context_inode);
-extern int simple_nosetlease(struct file *, int, struct file_lease **, void **);
 
 extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
-- 
cgit v1.2.3


From c644bce62b9c6b441143a03c910f986109c47001 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 8 Jan 2026 08:45:22 +0100
Subject: readdir: require opt-in for d_type flags

Commit c31f91c6af96 ("fuse: don't allow signals to interrupt getdents
copying") introduced the use of high bits in d_type as flags. However,
overlayfs was not adapted to handle this change.

In ovl_cache_entry_new(), the code checks if d_type == DT_CHR to
determine if an entry might be a whiteout. When fuse is used as the
lower layer and sets high bits in d_type, this comparison fails,
causing whiteout files to not be recognized properly and resulting in
incorrect overlayfs behavior.

Fix this by requiring callers of iterate_dir() to opt-in for getting
flag bits in d_type outside of S_DT_MASK.

Fixes: c31f91c6af96 ("fuse: don't allow signals to interrupt getdents copying")
Link: https://lore.kernel.org/all/20260107034551.439-1-luochunsheng@ustc.edu/
Link: https://github.com/containerd/stargz-snapshotter/issues/2214
Reported-by: Chunsheng Luo <luochunsheng@ustc.edu>
Reviewed-by: Chunsheng Luo <luochunsheng@ustc.edu>
Tested-by: Chunsheng Luo <luochunsheng@ustc.edu>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Link: https://patch.msgid.link/20260108074522.3400998-1-amir73il@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index f5c9cf28c4dc..a01621fa636a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1855,6 +1855,8 @@ struct dir_context {
 	 * INT_MAX  unlimited
 	 */
 	int count;
+	/* @actor supports these flags in d_type high bits */
+	unsigned int dt_flags_mask;
 };
 
 /* If OR-ed with d_type, pending signals are not checked */
@@ -3524,7 +3526,9 @@ static inline bool dir_emit(struct dir_context *ctx,
 			    const char *name, int namelen,
 			    u64 ino, unsigned type)
 {
-	return ctx->actor(ctx, name, namelen, ctx->pos, ino, type);
+	unsigned int dt_mask = S_DT_MASK | ctx->dt_flags_mask;
+
+	return ctx->actor(ctx, name, namelen, ctx->pos, ino, type & dt_mask);
 }
 static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx)
 {
-- 
cgit v1.2.3


From 20b781834ea0037b63c657e15b5aa4cfb4dd9b8b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jan 2026 15:19:01 +0100
Subject: fs: remove inode_update_time

The only external user is gone now, open code it in the two VFS
callers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260108141934.2052404-2-hch@lst.de
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 04ceeca12a0d..294e4c0b5aa8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2246,7 +2246,6 @@ enum file_time_flags {
 
 extern bool atime_needs_update(const struct path *, struct inode *);
 extern void touch_atime(const struct path *);
-int inode_update_time(struct inode *inode, int flags);
 
 static inline void file_accessed(struct file *file)
 {
-- 
cgit v1.2.3


From dc9629faef0a3d3cd35aff22806376700275a8b6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jan 2026 15:19:02 +0100
Subject: fs: allow error returns from generic_update_time

Now that no caller looks at the updated flags, switch generic_update_time
to the same calling convention as the ->update_time method and return 0
or a negative errno.

This prepares for adding non-blocking timestamp updates that could return
-EAGAIN.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260108141934.2052404-3-hch@lst.de
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 294e4c0b5aa8..0983df0d0705 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2399,7 +2399,7 @@ extern void ihold(struct inode * inode);
 extern void iput(struct inode *);
 void iput_not_last(struct inode *);
 int inode_update_timestamps(struct inode *inode, int flags);
-int generic_update_time(struct inode *, int);
+int generic_update_time(struct inode *inode, int flags);
 
 /* /sys/fs */
 extern struct kobject *fs_kobj;
-- 
cgit v1.2.3


From 761475268fa8e322fe6b80bcf557dc65517df71e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jan 2026 15:19:05 +0100
Subject: fs: refactor ->update_time handling

Pass the type of update (atime vs c/mtime plus version) as an enum
instead of a set of flags that caused all kinds of confusion.
Because inode_update_timestamps now can't return a modified version
of those flags, return the I_DIRTY_* flags needed to persist the
update, which is what the main caller in generic_update_time wants
anyway, and which is suitable for the other callers that only want
to know if an update happened.

The whole update_time path keeps the flags argument, which will be used
to support non-blocking updates soon even if it is unused, and (the
slightly renamed) inode_update_time also gains the possibility to return
a negative errno to support this.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260108141934.2052404-6-hch@lst.de
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0983df0d0705..77985b4ed6ff 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1717,6 +1717,13 @@ static inline struct timespec64 inode_set_ctime(struct inode *inode,
 
 struct timespec64 simple_inode_init_ts(struct inode *inode);
 
+static inline int inode_time_dirty_flag(struct inode *inode)
+{
+	if (inode->i_sb->s_flags & SB_LAZYTIME)
+		return I_DIRTY_TIME;
+	return I_DIRTY_SYNC;
+}
+
 /*
  * Snapshotting support.
  */
@@ -1983,6 +1990,11 @@ int wrap_directory_iterator(struct file *, struct dir_context *,
 	static int shared_##x(struct file *file , struct dir_context *ctx) \
 	{ return wrap_directory_iterator(file, ctx, x); }
 
+enum fs_update_time {
+	FS_UPD_ATIME,
+	FS_UPD_CMTIME,
+};
+
 struct inode_operations {
 	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
 	const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
@@ -2010,7 +2022,8 @@ struct inode_operations {
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
 		      u64 len);
-	int (*update_time)(struct inode *, int);
+	int (*update_time)(struct inode *inode, enum fs_update_time type,
+			   unsigned int flags);
 	int (*atomic_open)(struct inode *, struct dentry *,
 			   struct file *, unsigned open_flag,
 			   umode_t create_mode);
@@ -2237,13 +2250,6 @@ static inline void inode_dec_link_count(struct inode *inode)
 	mark_inode_dirty(inode);
 }
 
-enum file_time_flags {
-	S_ATIME = 1,
-	S_MTIME = 2,
-	S_CTIME = 4,
-	S_VERSION = 8,
-};
-
 extern bool atime_needs_update(const struct path *, struct inode *);
 extern void touch_atime(const struct path *);
 
@@ -2398,8 +2404,10 @@ static inline void super_set_sysfs_name_generic(struct super_block *sb, const ch
 extern void ihold(struct inode * inode);
 extern void iput(struct inode *);
 void iput_not_last(struct inode *);
-int inode_update_timestamps(struct inode *inode, int flags);
-int generic_update_time(struct inode *inode, int flags);
+int inode_update_time(struct inode *inode, enum fs_update_time type,
+		unsigned int flags);
+int generic_update_time(struct inode *inode, enum fs_update_time type,
+		unsigned int flags);
 
 /* /sys/fs */
 extern struct kobject *fs_kobj;
-- 
cgit v1.2.3


From 5cf06ea56ee67209d4e9a0b381641fb062ecd2c3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jan 2026 15:19:07 +0100
Subject: fs: add a ->sync_lazytime method

Allow the file system to explicitly implement lazytime syncing instead
of pigging back on generic inode dirtying.  This allows to simplify
the XFS implementation and prepares for non-blocking lazytime timestamp
updates.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260108141934.2052404-8-hch@lst.de
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 77985b4ed6ff..9cce8b9a29ac 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2024,6 +2024,7 @@ struct inode_operations {
 		      u64 len);
 	int (*update_time)(struct inode *inode, enum fs_update_time type,
 			   unsigned int flags);
+	void (*sync_lazytime)(struct inode *inode);
 	int (*atomic_open)(struct inode *, struct dentry *,
 			   struct file *, unsigned open_flag,
 			   umode_t create_mode);
-- 
cgit v1.2.3


From 24df85ffb9712cd6060588f6e08defcda5986efe Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 31 Oct 2025 13:16:03 -0400
Subject: allow to use CLASS() for struct filename *

Not all users match that model, but most of them do.  By the end of
the series we'll be left with very few irregular ones...

Added:
CLASS(filename, name)(user_path) =>
	getname(user_path)
CLASS(filename_kernel, name)(string) =>
	getname_kernel(string)
CLASS(filename_flags, name)(user_path, flags) =>
	getname_flags(user_path, flags)
CLASS(filename_uflags, name)(user_path, flags) =>
	getname_uflags(user_path, flags)
CLASS(filename_maybe_null, name)(user_path, flags) =>
	getname_maybe_null(user_path, flags)
all with putname() as destructor.

"flags" in filename_flags is in LOOKUP_... space, only LOOKUP_EMPTY matters.
"flags" in filename_uflags and filename_maybe_null is in AT_...... space,
and only AT_EMPTY_PATH matters.

filename_flags conventions might be worth reconsidering later (it might or
might not be better off with boolean instead)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index f5c9cf28c4dc..d49b969ab432 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2517,6 +2517,12 @@ static inline struct filename *refname(struct filename *name)
 	return name;
 }
 
+DEFINE_CLASS(filename, struct filename *, putname(_T), getname(p), const char __user *p)
+EXTEND_CLASS(filename, _kernel, getname_kernel(p), const char *p)
+EXTEND_CLASS(filename, _flags, getname_flags(p, f), const char __user *p, unsigned int f)
+EXTEND_CLASS(filename, _uflags, getname_uflags(p, f), const char __user *p, unsigned int f)
+EXTEND_CLASS(filename, _maybe_null, getname_maybe_null(p, f), const char __user *p, unsigned int f)
+
 extern int finish_open(struct file *file, struct dentry *dentry,
 			int (*open)(struct inode *, struct file *));
 extern int finish_no_open(struct file *file, struct dentry *dentry);
-- 
cgit v1.2.3


From 41670a5900a8866b8cab52ab5936b5e9ef06fe91 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 1 Nov 2025 01:54:52 -0400
Subject: get rid of audit_reusename()

Originally we tried to avoid multiple insertions into audit names array
during retry loop by a cute hack - memorize the userland pointer and
if there already is a match, just grab an extra reference to it.

Cute as it had been, it had problems - two identical pointers had
audit aux entries merged, two identical strings did not.  Having
different behaviour for syscalls that differ only by addresses of
otherwise identical string arguments is obviously wrong - if nothing
else, compiler can decide to merge identical string literals.

Besides, this hack does nothing for non-audited processes - they get
a fresh copy for retry.  It's not time-critical, but having behaviour
subtly differ that way is bogus.

These days we have very few places that import filename more than once
(9 functions total) and it's easy to massage them so we get rid of all
re-imports.  With that done, we don't need audit_reusename() anymore.
There's no need to memorize userland pointer either.

Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d49b969ab432..abe9c95c4874 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2411,7 +2411,6 @@ extern struct kobject *fs_kobj;
 struct audit_names;
 struct filename {
 	const char		*name;	/* pointer to actual string */
-	const __user char	*uptr;	/* original userland pointer */
 	atomic_t		refcnt;
 	struct audit_names	*aname;
 	const char		iname[];
-- 
cgit v1.2.3


From c3a3577cdb351e74d6ff6bc328c3bee18ce69298 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 19 Nov 2025 19:19:24 -0500
Subject: struct filename: use names_cachep only for getname() and friends

        Instances of struct filename come from names_cachep (via
__getname()).  That is done by getname_flags() and getname_kernel()
and these two are the main callers of __getname().  However, there are
other callers that simply want to allocate PATH_MAX bytes for uses that
have nothing to do with struct filename.

	We want saner allocation rules for long pathnames, so that struct
filename would *always* come from names_cachep, with the out-of-line
pathname getting kmalloc'ed.  For that we need to be able to change the
size of objects allocated by getname_flags()/getname_kernel().

	That requires the rest of __getname() users to stop using
names_cachep; we could explicitly switch all of those to kmalloc(),
but that would cause quite a bit of noise.  So the plan is to switch
getname_...() to new helpers and turn __getname() into a wrapper for
kmalloc().  Remaining __getname() users could be converted to explicit
kmalloc() at leisure, hopefully along with figuring out what size do
they really want - PATH_MAX is an overkill for some of them, used out
of laziness ("we have a convenient helper that does 4K allocations and
that's large enough, let's use it").

	As a side benefit, names_cachep is no longer used outside
of fs/namei.c, so we can move it there and be done with that.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index abe9c95c4874..997d515bab32 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2539,10 +2539,8 @@ static inline int finish_open_simple(struct file *file, int error)
 extern void __init vfs_caches_init_early(void);
 extern void __init vfs_caches_init(void);
 
-extern struct kmem_cache *names_cachep;
-
-#define __getname()		kmem_cache_alloc(names_cachep, GFP_KERNEL)
-#define __putname(name)		kmem_cache_free(names_cachep, (void *)(name))
+#define __getname()		kmalloc(PATH_MAX, GFP_KERNEL)
+#define __putname(name)		kfree(name)
 
 void emergency_thaw_all(void);
 extern int sync_filesystem(struct super_block *);
-- 
cgit v1.2.3


From 8c888b31903cc2acfbf054c23d702caf68857810 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 19 Nov 2025 19:45:04 -0500
Subject: struct filename: saner handling of long names

Always allocate struct filename from names_cachep, long name or short;
short names would be embedded into struct filename.  Longer ones do
not cannibalize the original struct filename - put them into kmalloc'ed
buffers (PATH_MAX-sized for import from userland, strlen() + 1 - for
ones originating kernel-side, where we know the length beforehand).

Cutoff length for short names is chosen so that struct filename would be
192 bytes long - that's both a multiple of 64 and large enough to cover
the majority of real-world uses.

Simplifies logics in getname()/putname() and friends.

[fixed an embarrassing braino in EMBEDDED_NAME_MAX, first reported by
Dan Carpenter]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 997d515bab32..f0f1e8034539 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2409,13 +2409,19 @@ extern struct kobject *fs_kobj;
 
 /* fs/open.c */
 struct audit_names;
-struct filename {
+
+struct __filename_head {
 	const char		*name;	/* pointer to actual string */
 	atomic_t		refcnt;
 	struct audit_names	*aname;
-	const char		iname[];
+};
+#define EMBEDDED_NAME_MAX	(192 - sizeof(struct __filename_head))
+struct filename {
+	struct __filename_head;
+	const char		iname[EMBEDDED_NAME_MAX];
 };
 static_assert(offsetof(struct filename, iname) % sizeof(long) == 0);
+static_assert(sizeof(struct filename) % 64 == 0);
 
 static inline struct mnt_idmap *file_mnt_idmap(const struct file *file)
 {
-- 
cgit v1.2.3


From 9fa3ec84587c5eca7580eafc27eee332bc3a5a0e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 1 Nov 2025 20:29:06 -0400
Subject: allow incomplete imports of filenames

There are two filename-related problems in io_uring and its
interplay with audit.

Filenames are imported when request is submitted and used when
it is processed.  Unfortunately, the latter may very well
happen in a different thread.  In that case the reference to
filename is put into the wrong audit_context - that of submitting
thread, not the processing one.  Audit logics is called by
the latter, and it really wants to be able to find the names
in audit_context current (== processing) thread.

Another related problem is the headache with refcounts -
normally all references to given struct filename are visible
only to one thread (the one that uses that struct filename).
io_uring violates that - an extra reference is stashed in
audit_context of submitter.  It gets dropped when submitter
returns to userland, which can happen simultaneously with
processing thread deciding to drop the reference it got.

We paper over that by making refcount atomic, but that means
pointless headache for everyone.

Solution: the notion of partially imported filenames.  Namely,
already copied from userland, but *not* exposed to audit yet.

io_uring can create that in submitter thread, and complete the
import (obtaining the usual reference to struct filename) in
processing thread.

Object: struct delayed_filename.

Primitives for working with it:

delayed_getname(&delayed_filename, user_string) - copies the name from
userland, returning 0 and stashing the address of (still incomplete)
struct filename in delayed_filename on success and returning -E... on
error.

delayed_getname_uflags(&delayed_filename, user_string, atflags) -
similar, in the same relation to delayed_getname() as getname_uflags()
is to getname()

complete_getname(&delayed_filename) - completes the import of filename
stashed in delayed_filename and returns struct filename to caller,
emptying delayed_filename.

CLASS(filename_complete_delayed, name)(&delayed_filename) - variant of
CLASS(filename) with complete_getname() for constructor.

dismiss_delayed_filename(&delayed_filename) - destructor; drops whatever
might be stashed in delayed_filename, emptying it.

putname_to_delayed(&delayed_filename, name) - if name is shared, stashes
its copy into delayed_filename and drops the reference to name, otherwise
stashes the name itself in there.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index f0f1e8034539..f1612a7dffd0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2516,6 +2516,17 @@ static inline struct filename *getname_maybe_null(const char __user *name, int f
 extern void putname(struct filename *name);
 DEFINE_FREE(putname, struct filename *, if (!IS_ERR_OR_NULL(_T)) putname(_T))
 
+struct delayed_filename {
+	struct filename *__incomplete_filename;	// don't touch
+};
+#define INIT_DELAYED_FILENAME(ptr) \
+	((void)(*(ptr) = (struct delayed_filename){}))
+int delayed_getname(struct delayed_filename *, const char __user *);
+int delayed_getname_uflags(struct delayed_filename *v, const char __user *, int);
+void dismiss_delayed_filename(struct delayed_filename *);
+int putname_to_delayed(struct delayed_filename *, struct filename *);
+struct filename *complete_getname(struct delayed_filename *);
+
 static inline struct filename *refname(struct filename *name)
 {
 	atomic_inc(&name->refcnt);
@@ -2527,6 +2538,7 @@ EXTEND_CLASS(filename, _kernel, getname_kernel(p), const char *p)
 EXTEND_CLASS(filename, _flags, getname_flags(p, f), const char __user *p, unsigned int f)
 EXTEND_CLASS(filename, _uflags, getname_uflags(p, f), const char __user *p, unsigned int f)
 EXTEND_CLASS(filename, _maybe_null, getname_maybe_null(p, f), const char __user *p, unsigned int f)
+EXTEND_CLASS(filename, _complete_delayed, complete_getname(p), struct delayed_filename *p)
 
 extern int finish_open(struct file *file, struct dentry *dentry,
 			int (*open)(struct inode *, struct file *));
-- 
cgit v1.2.3


From 741c97fecb6a4160014a76759e9b8c0880fc44f1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 2 Nov 2025 01:01:47 -0400
Subject: struct filename ->refcnt doesn't need to be atomic

... or visible outside of audit, really.  Note that references
held in delayed_filename always have refcount 1, and from the
moment of complete_getname() or equivalent point in getname...()
there won't be any references to struct filename instance left
in places visible to other threads.

Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index f1612a7dffd0..6a26ee347517 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2412,7 +2412,7 @@ struct audit_names;
 
 struct __filename_head {
 	const char		*name;	/* pointer to actual string */
-	atomic_t		refcnt;
+	int			refcnt;
 	struct audit_names	*aname;
 };
 #define EMBEDDED_NAME_MAX	(192 - sizeof(struct __filename_head))
@@ -2527,12 +2527,6 @@ void dismiss_delayed_filename(struct delayed_filename *);
 int putname_to_delayed(struct delayed_filename *, struct filename *);
 struct filename *complete_getname(struct delayed_filename *);
 
-static inline struct filename *refname(struct filename *name)
-{
-	atomic_inc(&name->refcnt);
-	return name;
-}
-
 DEFINE_CLASS(filename, struct filename *, putname(_T), getname(p), const char __user *p)
 EXTEND_CLASS(filename, _kernel, getname_kernel(p), const char *p)
 EXTEND_CLASS(filename, _flags, getname_flags(p, f), const char __user *p, unsigned int f)
-- 
cgit v1.2.3


From 173e937552432db9406f04eb7905541b774ac7cd Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 13 Jan 2026 12:39:50 +0000
Subject: fs: export may_delete() as may_delete_dentry()

For many years btrfs as been using a copy of may_delete() in
fs/btrfs/ioctl.c:btrfs_may_delete(). Everytime may_delete() is updated we
need to update the btrfs copy, and this is a maintenance burden. Currently
there are minor differences between both because the btrfs side lacks
updates done in may_delete().

Export may_delete() so that btrfs can use it and with the less generic
name may_delete_dentry(). While at it change the calls in vfs_rmdir() to
pass a boolean literal instead of 1 and 0 as the last argument since the
argument has a bool type.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Link: https://patch.msgid.link/e09128fd53f01b19d0a58f0e7d24739f79f47f6d.1768307858.git.fdmanana@suse.com
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 04ceeca12a0d..06632783a76c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2657,6 +2657,9 @@ static inline int path_permission(const struct path *path, int mask)
 int __check_sticky(struct mnt_idmap *idmap, struct inode *dir,
 		   struct inode *inode);
 
+int may_delete_dentry(struct mnt_idmap *idmap, struct inode *dir,
+		      struct dentry *victim, bool isdir);
+
 static inline bool execute_ok(struct inode *inode)
 {
 	return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode);
-- 
cgit v1.2.3


From 26aab3a485d500cb89ef7340797982bd066f63a5 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 13 Jan 2026 12:39:51 +0000
Subject: fs: export may_create() as may_create_dentry()

For many years btrfs as been using a copy of may_create() in
fs/btrfs/ioctl.c:btrfs_may_create(). Everytime may_create() is updated we
need to update the btrfs copy, and this is a maintenance burden. Currently
there are minor differences between both because the btrfs side lacks
updates done in may_create().

Export may_create() so that btrfs can use it and with the less generic
name may_create_dentry().

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Link: https://patch.msgid.link/ce5174bca079f4cdcbb8dd145f0924feb1f227cd.1768307858.git.fdmanana@suse.com
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 06632783a76c..2d28eff6eb6a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2659,6 +2659,8 @@ int __check_sticky(struct mnt_idmap *idmap, struct inode *dir,
 
 int may_delete_dentry(struct mnt_idmap *idmap, struct inode *dir,
 		      struct dentry *victim, bool isdir);
+int may_create_dentry(struct mnt_idmap *idmap,
+		      struct inode *dir, struct dentry *child);
 
 static inline bool execute_ok(struct inode *inode)
 {
-- 
cgit v1.2.3


From 55fb177d3a0346106974749374ae2191ba250825 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 28 Jan 2026 14:24:05 +0100
Subject: fs: add helpers name_is_dot{,dot,_dotdot}

Rename the helper is_dot_dotdot() into the name_ namespace
and add complementary helpers to check for dot and dotdot
names individually.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Link: https://patch.msgid.link/20260128132406.23768-3-amir73il@gmail.com
Reviewed-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 094b0adcb035..95bb9a15e109 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2844,12 +2844,22 @@ u64 vfsmount_to_propagation_flags(struct vfsmount *mnt);
 
 extern char *file_path(struct file *, char *, int);
 
+static inline bool name_is_dot(const char *name, size_t len)
+{
+	return unlikely(len == 1 && name[0] == '.');
+}
+
+static inline bool name_is_dotdot(const char *name, size_t len)
+{
+	return unlikely(len == 2 && name[0] == '.' && name[1] == '.');
+}
+
 /**
- * is_dot_dotdot - returns true only if @name is "." or ".."
+ * name_is_dot_dotdot - returns true only if @name is "." or ".."
  * @name: file name to check
  * @len: length of file name, in bytes
  */
-static inline bool is_dot_dotdot(const char *name, size_t len)
+static inline bool name_is_dot_dotdot(const char *name, size_t len)
 {
 	return len && unlikely(name[0] == '.') &&
 		(len == 1 || (len == 2 && name[1] == '.'));
-- 
cgit v1.2.3