diff options
| author | Miklos Szeredi <mszeredi@redhat.com> | 2026-06-05 15:53:16 +0200 |
|---|---|---|
| committer | Christian Brauner <brauner@kernel.org> | 2026-06-06 15:21:40 +0200 |
| commit | 6a07814ff643b5c8e1353d8c6229f52fde205cde (patch) | |
| tree | f263947e743b386c49a12a3240788f7a01cffc8e | |
| parent | 254f49634ee16a731174d2ae34bc50bd5f45e731 (diff) | |
kernfs: fix xattr race condition with multiple superblocks
Multiple superblocks with different namespaces can share the same
kernfs_node when kernfs_test_super() finds a matching root but
different namespace. This means multiple inodes from different
superblocks can reference the same kernfs_node->iattr->xattrs
structure.
The VFS layer only holds per-inode locks during xattr operations,
which is insufficient to serialize concurrent xattr modifications on
the shared kernfs_node. This can lead to race conditions in
simple_xattr_set() where the lookup->replace/remove sequence is not
atomic with respect to operations from other superblocks.
Fix this by protecting xattr operations with the existing hashed
kernfs_locks->open_file_mutex[] array, which is already used to
protect per-node open file data. The hashed mutex array provides
scalable per-node serialization (scaled by CPU count, up to 1024 locks
on 32+ CPU systems) with zero memory overhead.
Changes:
- Rename open_file_mutex[] to node_mutex[] to reflect dual purpose
- Add kernfs_node_lock_ptr() and kernfs_node_lock() helpers
- Protect simple_xattr_set() calls in kernfs_xattr_set() and
kernfs_vfs_user_xattr_set() with the hashed mutex
- Update file.c to use new helpers via compatibility wrappers
- Update documentation to explain the extended lock usage
Fixes: b32c4a213698 ("xattr: add rhashtable-based simple_xattr infrastructure")
Reported-by: Sashiko <sashiko-bot@kernel.org>
Closes: https://sashiko.dev/#/patchset/20260601162454.2116375-1-mszeredi%40redhat.com
Assisted-by: Claude:claude-sonnet-4-5
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Link: https://patch.msgid.link/20260605135322.2632068-2-mszeredi@redhat.com
Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
| -rw-r--r-- | fs/kernfs/file.c | 13 | ||||
| -rw-r--r-- | fs/kernfs/inode.c | 12 | ||||
| -rw-r--r-- | fs/kernfs/kernfs-internal.h | 20 | ||||
| -rw-r--r-- | fs/kernfs/mount.c | 2 | ||||
| -rw-r--r-- | include/linux/kernfs.h | 11 |
5 files changed, 44 insertions, 14 deletions
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 1163aa769738..8e0e90c93372 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -40,22 +40,15 @@ struct kernfs_open_node { static DEFINE_SPINLOCK(kernfs_notify_lock); static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; +/* Compatibility wrappers - use the common hashed node lock */ static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn) { - int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS); - - return &kernfs_locks->open_file_mutex[idx]; + return kernfs_node_lock_ptr(kn); } static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn) { - struct mutex *lock; - - lock = kernfs_open_file_mutex_ptr(kn); - - mutex_lock(lock); - - return lock; + return kernfs_node_lock(kn); } /** diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 38b28aa7cd02..e676737d9531 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -320,6 +320,15 @@ int kernfs_xattr_set(struct kernfs_node *kn, const char *name, if (!attrs) return -ENOMEM; + /* + * Protect xattr modifications with the hashed per-node mutex. + * Multiple superblocks (with different namespaces) can share the same + * kernfs_node, so inode locking alone is insufficient. The hashed mutex + * ensures serialization of concurrent xattr operations on the same node, + * including the lazy allocation of the xattrs structure itself. + */ + CLASS(kernfs_node_lock, lock)(kn); + xattrs = simple_xattrs_lazy_alloc(&attrs->xattrs, value, flags); if (IS_ERR_OR_NULL(xattrs)) return PTR_ERR(xattrs); @@ -372,6 +381,9 @@ static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler, if (!attrs) return -ENOMEM; + /* See comment in kernfs_xattr_set() about locking. */ + CLASS(kernfs_node_lock, lock)(kn); + xattrs = simple_xattrs_lazy_alloc(&attrs->xattrs, value, flags); if (IS_ERR_OR_NULL(xattrs)) return PTR_ERR(xattrs); diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index 8d8912f50b05..1dc6663553d1 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -211,4 +211,24 @@ extern const struct inode_operations kernfs_symlink_iops; * kernfs locks */ extern struct kernfs_global_locks *kernfs_locks; + +/* Hashed mutex helpers - protect per-node data structures */ +static inline struct mutex *kernfs_node_lock_ptr(struct kernfs_node *kn) +{ + int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS); + + return &kernfs_locks->node_mutex[idx]; +} + +static inline struct mutex *kernfs_node_lock(struct kernfs_node *kn) +{ + struct mutex *lock = kernfs_node_lock_ptr(kn); + + mutex_lock(lock); + return lock; +} + +DEFINE_CLASS(kernfs_node_lock, struct mutex *, + mutex_unlock(_T), kernfs_node_lock(kn), struct kernfs_node *kn) + #endif /* __KERNFS_INTERNAL_H */ diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 6e3217b6e481..f183a96778b9 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -446,7 +446,7 @@ static void __init kernfs_mutex_init(void) int count; for (count = 0; count < NR_KERNFS_LOCKS; count++) - mutex_init(&kernfs_locks->open_file_mutex[count]); + mutex_init(&kernfs_locks->node_mutex[count]); } static void __init kernfs_lock_init(void) diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index e21b2f7f4159..351a5101c862 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -76,20 +76,25 @@ struct kernfs_iattrs; * kernfs_open_file. * * kernfs_open_files are chained at kernfs_open_node->files, which is - * protected by kernfs_global_locks.open_file_mutex[i]. + * protected by kernfs_global_locks.node_mutex[i]. * * To reduce possible contention in sysfs access, arising due to single - * locks, use an array of locks (e.g. open_file_mutex) and use kernfs_node + * locks, use an array of locks (e.g. node_mutex) and use kernfs_node * object address as hash keys to get the index of these locks. * * Hashed mutexes are safe to use here because operations using these don't * rely on global exclusion. * + * The hashed mutex array protects per-node data: the kernfs_open_node for + * open file management, and kernfs_node xattr operations (necessary because + * multiple superblocks with different namespaces can share the same + * kernfs_node, making per-inode locking insufficient). + * * In future we intend to replace other global locks with hashed ones as well. * kernfs_global_locks acts as a holder for all such hash tables. */ struct kernfs_global_locks { - struct mutex open_file_mutex[NR_KERNFS_LOCKS]; + struct mutex node_mutex[NR_KERNFS_LOCKS]; }; enum kernfs_node_type { |
