summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-12 10:52:46 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-12 10:52:46 -0800
commita8ed22870f5304a6ac64f694572cafc12801a9cf (patch)
treef59d9ae7e020899f4c48c7e78372577094e1c141 /fs
parent997f9640c9238b991b6c8abf5420b37bbba5d867 (diff)
parent74bd284537b3447c651588101c32a203e4fe1a32 (diff)
Merge tag 'fsnotify_for_v6.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull fsnotify updates from Jan Kara: "A set of fixes to shutdown fsnotify subsystem before invalidating dcache thus addressing some nasty possible races" * tag 'fsnotify_for_v6.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: fsnotify: Shutdown fsnotify before destroying sb's dcache fsnotify: Use connector list for destroying inode marks fsnotify: Track inode connectors for a superblock
Diffstat (limited to 'fs')
-rw-r--r--fs/notify/fsnotify.c69
-rw-r--r--fs/notify/fsnotify.h5
-rw-r--r--fs/notify/mark.c137
-rw-r--r--fs/super.c4
4 files changed, 140 insertions, 75 deletions
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 71bd44e5ab6d..9995de1710e5 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -33,65 +33,6 @@ void __fsnotify_mntns_delete(struct mnt_namespace *mntns)
fsnotify_clear_marks_by_mntns(mntns);
}
-/**
- * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
- * @sb: superblock being unmounted.
- *
- * Called during unmount with no locks held, so needs to be safe against
- * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
- */
-static void fsnotify_unmount_inodes(struct super_block *sb)
-{
- struct inode *inode, *iput_inode = NULL;
-
- spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- /*
- * We cannot __iget() an inode in state I_FREEING,
- * I_WILL_FREE, or I_NEW which is fine because by that point
- * the inode cannot have any associated watches.
- */
- spin_lock(&inode->i_lock);
- if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE | I_NEW)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
-
- /*
- * If i_count is zero, the inode cannot have any watches and
- * doing an __iget/iput with SB_ACTIVE clear would actually
- * evict all inodes with zero i_count from icache which is
- * unnecessarily violent and may in fact be illegal to do.
- * However, we should have been called /after/ evict_inodes
- * removed all zero refcount inodes, in any case. Test to
- * be sure.
- */
- if (!icount_read(inode)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
-
- __iget(inode);
- spin_unlock(&inode->i_lock);
- spin_unlock(&sb->s_inode_list_lock);
-
- iput(iput_inode);
-
- /* for each watch, send FS_UNMOUNT and then remove it */
- fsnotify_inode(inode, FS_UNMOUNT);
-
- fsnotify_inode_delete(inode);
-
- iput_inode = inode;
-
- cond_resched();
- spin_lock(&sb->s_inode_list_lock);
- }
- spin_unlock(&sb->s_inode_list_lock);
-
- iput(iput_inode);
-}
-
void fsnotify_sb_delete(struct super_block *sb)
{
struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
@@ -100,7 +41,7 @@ void fsnotify_sb_delete(struct super_block *sb)
if (!sbinfo)
return;
- fsnotify_unmount_inodes(sb);
+ fsnotify_unmount_inodes(sbinfo);
fsnotify_clear_marks_by_sb(sb);
/* Wait for outstanding object references from connectors */
wait_var_event(fsnotify_sb_watched_objects(sb),
@@ -112,7 +53,10 @@ void fsnotify_sb_delete(struct super_block *sb)
void fsnotify_sb_free(struct super_block *sb)
{
- kfree(sb->s_fsnotify_info);
+ if (sb->s_fsnotify_info) {
+ WARN_ON_ONCE(!list_empty(&sb->s_fsnotify_info->inode_conn_list));
+ kfree(sb->s_fsnotify_info);
+ }
}
/*
@@ -777,8 +721,7 @@ static __init int fsnotify_init(void)
if (ret)
panic("initializing fsnotify_mark_srcu");
- fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector,
- SLAB_PANIC);
+ fsnotify_init_connector_caches();
return 0;
}
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 5950c7a67f41..58c7bb25e571 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -77,6 +77,9 @@ extern struct srcu_struct fsnotify_mark_srcu;
extern int fsnotify_compare_groups(struct fsnotify_group *a,
struct fsnotify_group *b);
+/* Destroy all inode marks for given superblock */
+void fsnotify_unmount_inodes(struct fsnotify_sb_info *sbinfo);
+
/* Destroy all marks attached to an object via connector */
extern void fsnotify_destroy_marks(fsnotify_connp_t *connp);
/* run the list of all marks associated with inode and destroy them */
@@ -106,6 +109,6 @@ static inline void fsnotify_clear_marks_by_mntns(struct mnt_namespace *mntns)
*/
extern void fsnotify_set_children_dentry_flags(struct inode *inode);
-extern struct kmem_cache *fsnotify_mark_connector_cachep;
+void fsnotify_init_connector_caches(void);
#endif /* __FS_NOTIFY_FSNOTIFY_H_ */
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 55a03bb05aa1..8e6997e9aebb 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -79,7 +79,8 @@
#define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */
struct srcu_struct fsnotify_mark_srcu;
-struct kmem_cache *fsnotify_mark_connector_cachep;
+static struct kmem_cache *fsnotify_mark_connector_cachep;
+static struct kmem_cache *fsnotify_inode_mark_connector_cachep;
static DEFINE_SPINLOCK(destroy_lock);
static LIST_HEAD(destroy_list);
@@ -323,10 +324,12 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
while (conn) {
free = conn;
conn = conn->destroy_next;
- kmem_cache_free(fsnotify_mark_connector_cachep, free);
+ kfree(free);
}
}
+static void fsnotify_untrack_connector(struct fsnotify_mark_connector *conn);
+
static void *fsnotify_detach_connector_from_object(
struct fsnotify_mark_connector *conn,
unsigned int *type)
@@ -342,6 +345,7 @@ static void *fsnotify_detach_connector_from_object(
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = fsnotify_conn_inode(conn);
inode->i_fsnotify_mask = 0;
+ fsnotify_untrack_connector(conn);
/* Unpin inode when detaching from connector */
if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF))
@@ -644,6 +648,8 @@ static int fsnotify_attach_info_to_sb(struct super_block *sb)
if (!sbinfo)
return -ENOMEM;
+ INIT_LIST_HEAD(&sbinfo->inode_conn_list);
+ spin_lock_init(&sbinfo->list_lock);
/*
* cmpxchg() provides the barrier so that callers of fsnotify_sb_info()
* will observe an initialized structure
@@ -655,20 +661,123 @@ static int fsnotify_attach_info_to_sb(struct super_block *sb)
return 0;
}
-static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
- void *obj, unsigned int obj_type)
+struct fsnotify_inode_mark_connector {
+ struct fsnotify_mark_connector common;
+ struct list_head conns_list;
+};
+
+static struct inode *fsnotify_get_living_inode(struct fsnotify_sb_info *sbinfo)
{
- struct fsnotify_mark_connector *conn;
+ struct fsnotify_inode_mark_connector *iconn;
+ struct inode *inode;
+
+ spin_lock(&sbinfo->list_lock);
+ /* Find the first non-evicting inode */
+ list_for_each_entry(iconn, &sbinfo->inode_conn_list, conns_list) {
+ /* All connectors on the list are still attached to an inode */
+ inode = iconn->common.obj;
+ /*
+ * For connectors without FSNOTIFY_CONN_FLAG_HAS_IREF
+ * (evictable marks) corresponding inode may well have 0
+ * refcount and can be undergoing eviction. OTOH list_lock
+ * protects us from the connector getting detached and inode
+ * freed. So we can poke around the inode safely.
+ */
+ spin_lock(&inode->i_lock);
+ if (likely(
+ !(inode_state_read(inode) & (I_FREEING | I_WILL_FREE)))) {
+ __iget(inode);
+ spin_unlock(&inode->i_lock);
+ spin_unlock(&sbinfo->list_lock);
+ return inode;
+ }
+ spin_unlock(&inode->i_lock);
+ }
+ spin_unlock(&sbinfo->list_lock);
- conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL);
- if (!conn)
- return -ENOMEM;
+ return NULL;
+}
+
+/**
+ * fsnotify_unmount_inodes - an sb is unmounting. Handle any watched inodes.
+ * @sbinfo: fsnotify info for superblock being unmounted.
+ *
+ * Walk all inode connectors for the superblock and free all associated marks.
+ */
+void fsnotify_unmount_inodes(struct fsnotify_sb_info *sbinfo)
+{
+ struct inode *inode;
+
+ while ((inode = fsnotify_get_living_inode(sbinfo))) {
+ fsnotify_inode(inode, FS_UNMOUNT);
+ fsnotify_clear_marks_by_inode(inode);
+ iput(inode);
+ cond_resched();
+ }
+}
+
+static void fsnotify_init_connector(struct fsnotify_mark_connector *conn,
+ void *obj, unsigned int obj_type)
+{
spin_lock_init(&conn->lock);
INIT_HLIST_HEAD(&conn->list);
conn->flags = 0;
conn->prio = 0;
conn->type = obj_type;
conn->obj = obj;
+}
+
+static struct fsnotify_mark_connector *
+fsnotify_alloc_inode_connector(struct inode *inode)
+{
+ struct fsnotify_inode_mark_connector *iconn;
+ struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(inode->i_sb);
+
+ iconn = kmem_cache_alloc(fsnotify_inode_mark_connector_cachep,
+ GFP_KERNEL);
+ if (!iconn)
+ return NULL;
+
+ fsnotify_init_connector(&iconn->common, inode, FSNOTIFY_OBJ_TYPE_INODE);
+ spin_lock(&sbinfo->list_lock);
+ list_add(&iconn->conns_list, &sbinfo->inode_conn_list);
+ spin_unlock(&sbinfo->list_lock);
+
+ return &iconn->common;
+}
+
+static void fsnotify_untrack_connector(struct fsnotify_mark_connector *conn)
+{
+ struct fsnotify_inode_mark_connector *iconn;
+ struct fsnotify_sb_info *sbinfo;
+
+ if (conn->type != FSNOTIFY_OBJ_TYPE_INODE)
+ return;
+
+ iconn = container_of(conn, struct fsnotify_inode_mark_connector, common);
+ sbinfo = fsnotify_sb_info(fsnotify_conn_inode(conn)->i_sb);
+ spin_lock(&sbinfo->list_lock);
+ list_del(&iconn->conns_list);
+ spin_unlock(&sbinfo->list_lock);
+}
+
+static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
+ void *obj, unsigned int obj_type)
+{
+ struct fsnotify_mark_connector *conn;
+
+ if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) {
+ struct inode *inode = obj;
+
+ conn = fsnotify_alloc_inode_connector(inode);
+ } else {
+ conn = kmem_cache_alloc(fsnotify_mark_connector_cachep,
+ GFP_KERNEL);
+ if (conn)
+ fsnotify_init_connector(conn, obj, obj_type);
+ }
+ if (!conn)
+ return -ENOMEM;
/*
* cmpxchg() provides the barrier so that readers of *connp can see
@@ -676,7 +785,8 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
*/
if (cmpxchg(connp, NULL, conn)) {
/* Someone else created list structure for us */
- kmem_cache_free(fsnotify_mark_connector_cachep, conn);
+ fsnotify_untrack_connector(conn);
+ kfree(conn);
}
return 0;
}
@@ -1007,3 +1117,12 @@ void fsnotify_wait_marks_destroyed(void)
flush_delayed_work(&reaper_work);
}
EXPORT_SYMBOL_GPL(fsnotify_wait_marks_destroyed);
+
+__init void fsnotify_init_connector_caches(void)
+{
+ fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector,
+ SLAB_PANIC);
+ fsnotify_inode_mark_connector_cachep = KMEM_CACHE(
+ fsnotify_inode_mark_connector,
+ SLAB_PANIC);
+}
diff --git a/fs/super.c b/fs/super.c
index b13c1fd6a6f4..784b5297a7d7 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -620,6 +620,7 @@ void generic_shutdown_super(struct super_block *sb)
const struct super_operations *sop = sb->s_op;
if (sb->s_root) {
+ fsnotify_sb_delete(sb);
shrink_dcache_for_umount(sb);
sync_filesystem(sb);
sb->s_flags &= ~SB_ACTIVE;
@@ -632,9 +633,8 @@ void generic_shutdown_super(struct super_block *sb)
/*
* Clean up and evict any inodes that still have references due
- * to fsnotify or the security policy.
+ * to the security policy.
*/
- fsnotify_sb_delete(sb);
security_sb_delete(sb);
if (sb->s_dio_done_wq) {