From 15623c860c93aac71d22e7bedb7661ff2d3418de Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Mon, 29 Sep 2025 11:02:05 +0200 Subject: nsfs: handle inode number mismatches gracefully in file handles Replace VFS_WARN_ON_ONCE() with graceful error handling when file handles contain inode numbers that don't match the actual namespace inode. This prevents userspace from triggering kernel warnings by providing malformed file handles to open_by_handle_at(). The issue occurs when userspace provides a file handle with valid namespace type and ID that successfully locates a namespace, but specifies an incorrect inode number. Previously, this would trigger VFS_WARN_ON_ONCE() when comparing the real inode number against the provided value. Since file handle data is user-controllable, inode number mismatches should be treated as invalid input rather than kernel consistency errors. Handle this case by returning NULL to indicate the file handle is invalid, rather than warning about what is essentially user input validation. Reported-by: syzbot+9eefe09bedd093f156c2@syzkaller.appspotmail.com Suggested-by: Jan Kara Reviewed-by: Jan Kara Signed-off-by: Deepanshu Kartikey Signed-off-by: Christian Brauner --- fs/nsfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nsfs.c b/fs/nsfs.c index 648dc59bef7f..79b026a36fb6 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -490,7 +490,9 @@ static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh, VFS_WARN_ON_ONCE(ns->ns_id != fid->ns_id); VFS_WARN_ON_ONCE(ns->ns_type != fid->ns_type); - VFS_WARN_ON_ONCE(ns->inum != fid->ns_inum); + + if (ns->inum != fid->ns_inum) + return NULL; if (!__ns_ref_get(ns)) return NULL; -- cgit v1.2.3 From deafd21efdd106f9744e2339e0c70c0f4ba565c3 Mon Sep 17 00:00:00 2001 From: Zhou Yuhang Date: Wed, 24 Sep 2025 20:21:39 +0800 Subject: fs: update comment in init_file() The f_count member in struct file has been replaced by f_ref, so update f_count to f_ref in the comment. Signed-off-by: Zhou Yuhang Signed-off-by: Christian Brauner --- fs/file_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/file_table.c b/fs/file_table.c index b223d873e48b..cd4a3db4659a 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -192,7 +192,7 @@ static int init_file(struct file *f, int flags, const struct cred *cred) f->f_sb_err = 0; /* - * We're SLAB_TYPESAFE_BY_RCU so initialize f_count last. While + * We're SLAB_TYPESAFE_BY_RCU so initialize f_ref last. While * fget-rcu pattern users need to be able to handle spurious * refcount bumps we should reinitialize the reused file first. */ -- cgit v1.2.3 From 154d1e7ad9e5ce4b2aaefd3862b3dba545ad978d Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Tue, 30 Sep 2025 13:42:57 +0800 Subject: dax: skip read lock assertion for read-only filesystems The commit 168316db3583("dax: assert that i_rwsem is held exclusive for writes") added lock assertions to ensure proper locking in DAX operations. However, these assertions trigger false-positive lockdep warnings since read lock is unnecessary on read-only filesystems(e.g., erofs). This patch skips the read lock assertion for read-only filesystems, eliminating the spurious warnings while maintaining the integrity checks for writable filesystems. Fixes: 168316db3583 ("dax: assert that i_rwsem is held exclusive for writes") Signed-off-by: Yuezhang Mo Reviewed-by: Friendy Su Reviewed-by: Daniel Palmer Reviewed-by: Gao Xiang Signed-off-by: Christian Brauner --- fs/dax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/dax.c b/fs/dax.c index 89f071ba7b10..516f995a988c 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1725,7 +1725,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, if (iov_iter_rw(iter) == WRITE) { lockdep_assert_held_write(&iomi.inode->i_rwsem); iomi.flags |= IOMAP_WRITE; - } else { + } else if (!sb_rdonly(iomi.inode->i_sb)) { lockdep_assert_held(&iomi.inode->i_rwsem); } -- cgit v1.2.3 From 56094ad3eaa21e6621396cc33811d8f72847a834 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Oct 2025 17:55:07 +0200 Subject: vfs: Don't leak disconnected dentries on umount When user calls open_by_handle_at() on some inode that is not cached, we will create disconnected dentry for it. If such dentry is a directory, exportfs_decode_fh_raw() will then try to connect this dentry to the dentry tree through reconnect_path(). It may happen for various reasons (such as corrupted fs or race with rename) that the call to lookup_one_unlocked() in reconnect_one() will fail to find the dentry we are trying to reconnect and instead create a new dentry under the parent. Now this dentry will not be marked as disconnected although the parent still may well be disconnected (at least in case this inconsistency happened because the fs is corrupted and .. doesn't point to the real parent directory). This creates inconsistency in disconnected flags but AFAICS it was mostly harmless. At least until commit f1ee616214cb ("VFS: don't keep disconnected dentries on d_anon") which removed adding of most disconnected dentries to sb->s_anon list. Thus after this commit cleanup of disconnected dentries implicitely relies on the fact that dput() will immediately reclaim such dentries. However when some leaf dentry isn't marked as disconnected, as in the scenario described above, the reclaim doesn't happen and the dentries are "leaked". Memory reclaim can eventually reclaim them but otherwise they stay in memory and if umount comes first, we hit infamous "Busy inodes after unmount" bug. Make sure all dentries created under a disconnected parent are marked as disconnected as well. Reported-by: syzbot+1d79ebe5383fc016cf07@syzkaller.appspotmail.com Fixes: f1ee616214cb ("VFS: don't keep disconnected dentries on d_anon") CC: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Christian Brauner --- fs/dcache.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index a067fa0a965a..035cccbc9276 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2557,6 +2557,8 @@ struct dentry *d_alloc_parallel(struct dentry *parent, spin_lock(&parent->d_lock); new->d_parent = dget_dlock(parent); hlist_add_head(&new->d_sib, &parent->d_children); + if (parent->d_flags & DCACHE_DISCONNECTED) + new->d_flags |= DCACHE_DISCONNECTED; spin_unlock(&parent->d_lock); retry: -- cgit v1.2.3 From a779e27f24aeb679969ddd1fdd7f636e22ddbc1e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 7 Oct 2025 11:32:42 +0200 Subject: coredump: fix core_pattern input validation In be1e0283021e ("coredump: don't pointlessly check and spew warnings") we tried to fix input validation so it only happens during a write to core_pattern. This would avoid needlessly logging a lot of warnings during a read operation. However the logic accidently got inverted in this commit. Fix it so the input validation only happens on write and is skipped on read. Fixes: be1e0283021e ("coredump: don't pointlessly check and spew warnings") Fixes: 16195d2c7dd2 ("coredump: validate socket name as it is written") Reviewed-by: Jan Kara Reported-by: Yu Watanabe Signed-off-by: Christian Brauner --- fs/coredump.c | 2 +- fs/exec.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index b5fc06a092a4..5c1c381ee380 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1468,7 +1468,7 @@ static int proc_dostring_coredump(const struct ctl_table *table, int write, ssize_t retval; char old_core_pattern[CORENAME_MAX_SIZE]; - if (write) + if (!write) return proc_dostring(table, write, buffer, lenp, ppos); retval = strscpy(old_core_pattern, core_pattern, CORENAME_MAX_SIZE); diff --git a/fs/exec.c b/fs/exec.c index 6b70c6726d31..4298e7e08d5d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2048,7 +2048,7 @@ static int proc_dointvec_minmax_coredump(const struct ctl_table *table, int writ { int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (!error && !write) + if (!error && write) validate_coredump_safety(); return error; } -- cgit v1.2.3 From 4dd5b5ac089bb6ea719b7ffb748707ac9cbce4e4 Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Wed, 8 Oct 2025 14:44:17 +0200 Subject: Revert "fs: make vfs_fileattr_[get|set] return -EOPNOTSUPP" This reverts commit 474b155adf3927d2c944423045757b54aa1ca4de. This patch caused regression in ioctl_setflags(). Underlying filesystems use EOPNOTSUPP to indicate that flag is not supported. This error is also gets converted in ioctl_setflags(). Therefore, for unsupported flags error changed from EOPNOSUPP to ENOIOCTLCMD. Link: https://lore.kernel.org/linux-xfs/a622643f-1585-40b0-9441-cf7ece176e83@kernel.org/ Signed-off-by: Andrey Albershteyn Signed-off-by: Christian Brauner --- fs/file_attr.c | 12 ++---------- fs/fuse/ioctl.c | 4 ---- fs/overlayfs/copy_up.c | 2 +- fs/overlayfs/inode.c | 5 ++++- 4 files changed, 7 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/file_attr.c b/fs/file_attr.c index 12424d4945d0..460b2dd21a85 100644 --- a/fs/file_attr.c +++ b/fs/file_attr.c @@ -84,7 +84,7 @@ int vfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) int error; if (!inode->i_op->fileattr_get) - return -EOPNOTSUPP; + return -ENOIOCTLCMD; error = security_inode_file_getattr(dentry, fa); if (error) @@ -270,7 +270,7 @@ int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, int err; if (!inode->i_op->fileattr_set) - return -EOPNOTSUPP; + return -ENOIOCTLCMD; if (!inode_owner_or_capable(idmap, inode)) return -EPERM; @@ -312,8 +312,6 @@ int ioctl_getflags(struct file *file, unsigned int __user *argp) int err; err = vfs_fileattr_get(file->f_path.dentry, &fa); - if (err == -EOPNOTSUPP) - err = -ENOIOCTLCMD; if (!err) err = put_user(fa.flags, argp); return err; @@ -335,8 +333,6 @@ int ioctl_setflags(struct file *file, unsigned int __user *argp) fileattr_fill_flags(&fa, flags); err = vfs_fileattr_set(idmap, dentry, &fa); mnt_drop_write_file(file); - if (err == -EOPNOTSUPP) - err = -ENOIOCTLCMD; } } return err; @@ -349,8 +345,6 @@ int ioctl_fsgetxattr(struct file *file, void __user *argp) int err; err = vfs_fileattr_get(file->f_path.dentry, &fa); - if (err == -EOPNOTSUPP) - err = -ENOIOCTLCMD; if (!err) err = copy_fsxattr_to_user(&fa, argp); @@ -371,8 +365,6 @@ int ioctl_fssetxattr(struct file *file, void __user *argp) if (!err) { err = vfs_fileattr_set(idmap, dentry, &fa); mnt_drop_write_file(file); - if (err == -EOPNOTSUPP) - err = -ENOIOCTLCMD; } } return err; diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c index 57032eadca6c..fdc175e93f74 100644 --- a/fs/fuse/ioctl.c +++ b/fs/fuse/ioctl.c @@ -536,8 +536,6 @@ int fuse_fileattr_get(struct dentry *dentry, struct file_kattr *fa) cleanup: fuse_priv_ioctl_cleanup(inode, ff); - if (err == -ENOTTY) - err = -EOPNOTSUPP; return err; } @@ -574,7 +572,5 @@ int fuse_fileattr_set(struct mnt_idmap *idmap, cleanup: fuse_priv_ioctl_cleanup(inode, ff); - if (err == -ENOTTY) - err = -EOPNOTSUPP; return err; } diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index aac7e34f56c1..604a82acd164 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -178,7 +178,7 @@ static int ovl_copy_fileattr(struct inode *inode, const struct path *old, err = ovl_real_fileattr_get(old, &oldfa); if (err) { /* Ntfs-3g returns -EINVAL for "no fileattr support" */ - if (err == -EOPNOTSUPP || err == -EINVAL) + if (err == -ENOTTY || err == -EINVAL) return 0; pr_warn("failed to retrieve lower fileattr (%pd2, err=%i)\n", old->dentry, err); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index aaa4cf579561..e11f310ce092 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -720,7 +720,10 @@ int ovl_real_fileattr_get(const struct path *realpath, struct file_kattr *fa) if (err) return err; - return vfs_fileattr_get(realpath->dentry, fa); + err = vfs_fileattr_get(realpath->dentry, fa); + if (err == -ENOIOCTLCMD) + err = -ENOTTY; + return err; } int ovl_fileattr_get(struct dentry *dentry, struct file_kattr *fa) -- cgit v1.2.3 From d90ad28e8aa482e397150e22f3762173d918a724 Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Wed, 8 Oct 2025 14:44:18 +0200 Subject: fs: return EOPNOTSUPP from file_setattr/file_getattr syscalls These syscalls call to vfs_fileattr_get/set functions which return ENOIOCTLCMD if filesystem doesn't support setting file attribute on an inode. For syscalls EOPNOTSUPP would be more appropriate return error. Signed-off-by: Andrey Albershteyn Reviewed-by: Jan Kara Reviewed-by: Arnd Bergmann Signed-off-by: Christian Brauner --- fs/file_attr.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/file_attr.c b/fs/file_attr.c index 460b2dd21a85..1dcec88c0680 100644 --- a/fs/file_attr.c +++ b/fs/file_attr.c @@ -416,6 +416,8 @@ SYSCALL_DEFINE5(file_getattr, int, dfd, const char __user *, filename, } error = vfs_fileattr_get(filepath.dentry, &fa); + if (error == -ENOIOCTLCMD || error == -ENOTTY) + error = -EOPNOTSUPP; if (error) return error; @@ -483,6 +485,8 @@ SYSCALL_DEFINE5(file_setattr, int, dfd, const char __user *, filename, if (!error) { error = vfs_fileattr_set(mnt_idmap(filepath.mnt), filepath.dentry, &fa); + if (error == -ENOIOCTLCMD || error == -ENOTTY) + error = -EOPNOTSUPP; mnt_drop_write(filepath.mnt); } -- cgit v1.2.3 From 7933a585d70ee496fa341b50b8b0a95b131867ff Mon Sep 17 00:00:00 2001 From: Seong-Gwang Heo Date: Thu, 9 Oct 2025 13:41:48 +0800 Subject: ovl: remove redundant IOCB_DIO_CALLER_COMP clearing The backing_file_write_iter() function, which is called immediately after this code, already contains identical logic to clear the IOCB_DIO_CALLER_COMP flag along with the same explanatory comment. There is no need to duplicate this operation in the overlayfs code. Signed-off-by: Seong-Gwang Heo Fixes: a6293b3e285c ("fs: factor out backing_file_{read,write}_iter() helpers") Acked-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/overlayfs/file.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index fc52c796061d..7ab2c9daffd0 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -369,11 +369,6 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) if (!ovl_should_sync(OVL_FS(inode->i_sb))) ifl &= ~(IOCB_DSYNC | IOCB_SYNC); - /* - * Overlayfs doesn't support deferred completions, don't copy - * this property in case it is set by the issuer. - */ - ifl &= ~IOCB_DIO_CALLER_COMP; ret = backing_file_write_iter(realfile, iter, iocb, ifl, &ctx); out_unlock: -- cgit v1.2.3