summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-09 15:13:05 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-09 15:13:05 -0800
commit9e355113f02be17db573d579515dee63621b7c8b (patch)
tree9d6d6dea7ccae97b5439a91d30eed16b5821cfb9 /include
parent3304b3fedddfb1357c7f9e25526b5a7899ee1f13 (diff)
parent6cbfdf89470ef3c2110f376a507d135e7a7a7378 (diff)
Merge tag 'vfs-7.0-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull misc vfs updates from Christian Brauner: "This contains a mix of VFS cleanups, performance improvements, API fixes, documentation, and a deprecation notice. Scalability and performance: - Rework pid allocation to only take pidmap_lock once instead of twice during alloc_pid(), improving thread creation/teardown throughput by 10-16% depending on false-sharing luck. Pad the namespace refcount to reduce false-sharing - Track file lock presence via a flag in ->i_opflags instead of reading ->i_flctx, avoiding false-sharing with ->i_readcount on open/close hot paths. Measured 4-16% improvement on 24-core open-in-a-loop benchmarks - Use a consume fence in locks_inode_context() to match the store-release/load-consume idiom, eliminating a hardware fence on some architectures - Annotate cdev_lock with __cacheline_aligned_in_smp to prevent false-sharing - Remove a redundant DCACHE_MANAGED_DENTRY check in __follow_mount_rcu() that never fires since the caller already verifies it, eliminating a 100% mispredicted branch - Fix a 100% mispredicted likely() in devcgroup_inode_permission() that became wrong after a prior code reorder Bug fixes and correctness: - Make insert_inode_locked() wait for inode destruction instead of skipping, fixing a corner case where two matching inodes could exist in the hash - Move f_mode initialization before file_ref_init() in alloc_file() to respect the SLAB_TYPESAFE_BY_RCU ordering contract - Add a WARN_ON_ONCE guard in try_to_free_buffers() for folios with no buffers attached, preventing a null pointer dereference when AS_RELEASE_ALWAYS is set but no release_folio op exists - Fix select restart_block to store end_time as timespec64, avoiding truncation of tv_sec on 32-bit architectures - Make dump_inode() use get_kernel_nofault() to safely access inode and superblock fields, matching the dump_mapping() pattern API modernization: - Make posix_acl_to_xattr() allocate the buffer internally since every single caller was doing it anyway. Reduces boilerplate and unnecessary error checking across ~15 filesystems - Replace deprecated simple_strtoul() with kstrtoul() for the ihash_entries, dhash_entries, mhash_entries, and mphash_entries boot parameters, adding proper error handling - Convert chardev code to use guard(mutex) and __free(kfree) cleanup patterns - Replace min_t() with min() or umin() in VFS code to avoid silently truncating unsigned long to unsigned int - Gate LOOKUP_RCU assertions behind CONFIG_DEBUG_VFS since callers already check the flag Deprecation: - Begin deprecating legacy BSD process accounting (acct(2)). The interface has numerous footguns and better alternatives exist (eBPF) Documentation: - Fix and complete kernel-doc for struct export_operations, removing duplicated documentation between ReST and source - Fix kernel-doc warnings for __start_dirop() and ilookup5_nowait() Testing: - Add a kunit test for initramfs cpio handling of entries with filesize > PATH_MAX Misc: - Add missing <linux/init_task.h> include in fs_struct.c" * tag 'vfs-7.0-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (28 commits) posix_acl: make posix_acl_to_xattr() alloc the buffer fs: make insert_inode_locked() wait for inode destruction initramfs_test: kunit test for cpio.filesize > PATH_MAX fs: improve dump_inode() to safely access inode fields fs: add <linux/init_task.h> for 'init_fs' docs: exportfs: Use source code struct documentation fs: move initializing f_mode before file_ref_init() exportfs: Complete kernel-doc for struct export_operations exportfs: Mark struct export_operations functions at kernel-doc exportfs: Fix kernel-doc output for get_name() acct(2): begin the deprecation of legacy BSD process accounting device_cgroup: remove branch hint after code refactor VFS: fix __start_dirop() kernel-doc warnings fs: Describe @isnew parameter in ilookup5_nowait() fs/namei: Remove redundant DCACHE_MANAGED_DENTRY check in __follow_mount_rcu fs: only assert on LOOKUP_RCU when built with CONFIG_DEBUG_VFS select: store end_time as timespec64 in restart block chardev: Switch to guard(mutex) and __free(kfree) namespace: Replace simple_strtoul with kstrtoul to parse boot params dcache: Replace simple_strtoul with kstrtoul in set_dhash_entries ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/device_cgroup.h2
-rw-r--r--include/linux/exportfs.h33
-rw-r--r--include/linux/filelock.h18
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/ns/ns_common_types.h4
-rw-r--r--include/linux/posix_acl_xattr.h5
-rw-r--r--include/linux/restart_block.h4
7 files changed, 47 insertions, 20 deletions
diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h
index 0864773a57e8..822085bc2d20 100644
--- a/include/linux/device_cgroup.h
+++ b/include/linux/device_cgroup.h
@@ -21,7 +21,7 @@ static inline int devcgroup_inode_permission(struct inode *inode, int mask)
if (likely(!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)))
return 0;
- if (likely(!inode->i_rdev))
+ if (!inode->i_rdev)
return 0;
if (S_ISBLK(inode->i_mode))
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index f0cf2714ec52..262e24d83313 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -201,9 +201,9 @@ struct handle_to_path_ctx {
* @commit_metadata: commit metadata changes to stable storage
*
* See Documentation/filesystems/nfs/exporting.rst for details on how to use
- * this interface correctly.
+ * this interface correctly and the definition of the flags.
*
- * encode_fh:
+ * @encode_fh:
* @encode_fh should store in the file handle fragment @fh (using at most
* @max_len bytes) information that can be used by @decode_fh to recover the
* file referred to by the &struct dentry @de. If @flag has CONNECTABLE bit
@@ -215,7 +215,7 @@ struct handle_to_path_ctx {
* greater than @max_len*4 bytes). On error @max_len contains the minimum
* size(in 4 byte unit) needed to encode the file handle.
*
- * fh_to_dentry:
+ * @fh_to_dentry:
* @fh_to_dentry is given a &struct super_block (@sb) and a file handle
* fragment (@fh, @fh_len). It should return a &struct dentry which refers
* to the same file that the file handle fragment refers to. If it cannot,
@@ -227,31 +227,44 @@ struct handle_to_path_ctx {
* created with d_alloc_root. The caller can then find any other extant
* dentries by following the d_alias links.
*
- * fh_to_parent:
+ * @fh_to_parent:
* Same as @fh_to_dentry, except that it returns a pointer to the parent
* dentry if it was encoded into the filehandle fragment by @encode_fh.
*
- * get_name:
+ * @get_name:
* @get_name should find a name for the given @child in the given @parent
* directory. The name should be stored in the @name (with the
- * understanding that it is already pointing to a %NAME_MAX+1 sized
+ * understanding that it is already pointing to a %NAME_MAX + 1 sized
* buffer. get_name() should return %0 on success, a negative error code
* or error. @get_name will be called without @parent->i_rwsem held.
*
- * get_parent:
+ * @get_parent:
* @get_parent should find the parent directory for the given @child which
* is also a directory. In the event that it cannot be found, or storage
* space cannot be allocated, a %ERR_PTR should be returned.
*
- * permission:
+ * @permission:
* Allow filesystems to specify a custom permission function.
*
- * open:
+ * @open:
* Allow filesystems to specify a custom open function.
*
- * commit_metadata:
+ * @commit_metadata:
* @commit_metadata should commit metadata changes to stable storage.
*
+ * @get_uuid:
+ * Get a filesystem unique signature exposed to clients.
+ *
+ * @map_blocks:
+ * Map and, if necessary, allocate blocks for a layout.
+ *
+ * @commit_blocks:
+ * Commit blocks in a layout once the client is done with them.
+ *
+ * @flags:
+ * Allows the filesystem to communicate to nfsd that it may want to do things
+ * differently when dealing with it.
+ *
* Locking rules:
* get_parent is called with child->d_inode->i_rwsem down
* get_name is not (which is possibly inconsistent)
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index 2f5e5588ee07..d2c9740e26a8 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -242,7 +242,14 @@ bool locks_owner_has_blockers(struct file_lock_context *flctx,
static inline struct file_lock_context *
locks_inode_context(const struct inode *inode)
{
- return smp_load_acquire(&inode->i_flctx);
+ /*
+ * Paired with smp_store_release in locks_get_lock_context().
+ *
+ * Ensures ->i_flctx will be visible if we spotted the flag.
+ */
+ if (likely(!(smp_load_acquire(&inode->i_opflags) & IOP_FLCTX)))
+ return NULL;
+ return READ_ONCE(inode->i_flctx);
}
#else /* !CONFIG_FILE_LOCKING */
@@ -469,7 +476,7 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
* could end up racing with tasks trying to set a new lease on this
* file.
*/
- flctx = READ_ONCE(inode->i_flctx);
+ flctx = locks_inode_context(inode);
if (!flctx)
return 0;
smp_mb();
@@ -488,7 +495,7 @@ static inline int break_deleg(struct inode *inode, unsigned int flags)
* could end up racing with tasks trying to set a new lease on this
* file.
*/
- flctx = READ_ONCE(inode->i_flctx);
+ flctx = locks_inode_context(inode);
if (!flctx)
return 0;
smp_mb();
@@ -533,8 +540,11 @@ static inline int break_deleg_wait(struct delegated_inode *di)
static inline int break_layout(struct inode *inode, bool wait)
{
+ struct file_lock_context *flctx;
+
smp_mb();
- if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) {
+ flctx = locks_inode_context(inode);
+ if (flctx && !list_empty_careful(&flctx->flc_lease)) {
unsigned int flags = LEASE_BREAK_LAYOUT;
if (!wait)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ca31bc9308a3..73911f961c7e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -631,6 +631,7 @@ is_uncached_acl(struct posix_acl *acl)
#define IOP_MGTIME 0x0020
#define IOP_CACHED_LINK 0x0040
#define IOP_FASTPERM_MAY_EXEC 0x0080
+#define IOP_FLCTX 0x0100
/*
* Inode state bits. Protected by inode->i_lock
diff --git a/include/linux/ns/ns_common_types.h b/include/linux/ns/ns_common_types.h
index b332b019b29c..0014fbc1c626 100644
--- a/include/linux/ns/ns_common_types.h
+++ b/include/linux/ns/ns_common_types.h
@@ -108,11 +108,13 @@ extern const struct proc_ns_operations utsns_operations;
* @ns_tree: namespace tree nodes and active reference count
*/
struct ns_common {
+ struct {
+ refcount_t __ns_ref; /* do not use directly */
+ } ____cacheline_aligned_in_smp;
u32 ns_type;
struct dentry *stashed;
const struct proc_ns_operations *ops;
unsigned int inum;
- refcount_t __ns_ref; /* do not use directly */
union {
struct ns_tree;
struct rcu_head ns_rcu;
diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h
index e86f3b731da2..9e1892525eac 100644
--- a/include/linux/posix_acl_xattr.h
+++ b/include/linux/posix_acl_xattr.h
@@ -44,8 +44,9 @@ posix_acl_from_xattr(struct user_namespace *user_ns, const void *value,
}
#endif
-int posix_acl_to_xattr(struct user_namespace *user_ns,
- const struct posix_acl *acl, void *buffer, size_t size);
+extern void *posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl,
+ size_t *sizep, gfp_t gfp);
+
static inline const char *posix_acl_xattr_name(int type)
{
switch (type) {
diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h
index 67d2bf579942..9b262109726d 100644
--- a/include/linux/restart_block.h
+++ b/include/linux/restart_block.h
@@ -6,6 +6,7 @@
#define __LINUX_RESTART_BLOCK_H
#include <linux/compiler.h>
+#include <linux/time64.h>
#include <linux/types.h>
struct __kernel_timespec;
@@ -50,8 +51,7 @@ struct restart_block {
struct pollfd __user *ufds;
int nfds;
int has_timeout;
- unsigned long tv_sec;
- unsigned long tv_nsec;
+ struct timespec64 end_time;
} poll;
};
};