From 71d734103edfa2b4c6657578a3082ee0e51d767e Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 8 Jul 2020 14:11:36 +0300 Subject: fsnotify: Rearrange fast path to minimise overhead when there is no watcher The fsnotify paths are trivial to hit even when there are no watchers and they are surprisingly expensive. For example, every successful vfs_write() hits fsnotify_modify which calls both fsnotify_parent and fsnotify unless FMODE_NONOTIFY is set which is an internal flag invisible to userspace. As it stands, fsnotify_parent is a guaranteed functional call even if there are no watchers and fsnotify() does a substantial amount of unnecessary work before it checks if there are any watchers. A perf profile showed that applying mnt->mnt_fsnotify_mask in fnotify() was almost half of the total samples taken in that function during a test. This patch rearranges the fast paths to reduce the amount of work done when there are no watchers. The test motivating this was "perf bench sched messaging --pipe". Despite the fact the pipes are anonymous, fsnotify is still called a lot and the overhead is noticeable even though it's completely pointless. It's likely the overhead is negligible for real IO so this is an extreme example. This is a comparison of hackbench using processes and pipes on a 1-socket machine with 8 CPU threads without fanotify watchers. 5.7.0 5.7.0 vanilla fastfsnotify-v1r1 Amean 1 0.4837 ( 0.00%) 0.4630 * 4.27%* Amean 3 1.5447 ( 0.00%) 1.4557 ( 5.76%) Amean 5 2.6037 ( 0.00%) 2.4363 ( 6.43%) Amean 7 3.5987 ( 0.00%) 3.4757 ( 3.42%) Amean 12 5.8267 ( 0.00%) 5.6983 ( 2.20%) Amean 18 8.4400 ( 0.00%) 8.1327 ( 3.64%) Amean 24 11.0187 ( 0.00%) 10.0290 * 8.98%* Amean 30 13.1013 ( 0.00%) 12.8510 ( 1.91%) Amean 32 13.9190 ( 0.00%) 13.2410 ( 4.87%) 5.7.0 5.7.0 vanilla fastfsnotify-v1r1 Duration User 157.05 152.79 Duration System 1279.98 1219.32 Duration Elapsed 182.81 174.52 This is showing that the latencies are improved by roughly 2-9%. The variability is not shown but some of these results are within the noise as this workload heavily overloads the machine. That said, the system CPU usage is reduced by quite a bit so it makes sense to avoid the overhead even if it is a bit tricky to detect at times. A perf profile of just 1 group of tasks showed that 5.14% of samples taken were in either fsnotify() or fsnotify_parent(). With the patch, 2.8% of samples were in fsnotify, mostly function entry and the initial check for watchers. The check for watchers is complicated enough that inlining it may be controversial. [Amir] Slightly simplify with mnt_or_sb_mask => marks_mask Link: https://lore.kernel.org/r/20200708111156.24659-1-amir73il@gmail.com Signed-off-by: Mel Gorman Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 10 ++++++++++ include/linux/fsnotify_backend.h | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 5ab28f6c7d26..508f6bb0b06b 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -44,6 +44,16 @@ static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, fsnotify_name(dir, mask, d_inode(dentry), &dentry->d_name, 0); } +/* Notify this dentry's parent about a child's events. */ +static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, + const void *data, int data_type) +{ + if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) + return 0; + + return __fsnotify_parent(dentry, mask, data, data_type); +} + /* * Simple wrappers to consolidate calls fsnotify_parent()/fsnotify() when * an event is on a file/dentry. diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index f0c506405b54..1626fa7d10ff 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -379,7 +379,7 @@ struct fsnotify_mark { /* main fsnotify call to send events */ extern int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_type, const struct qstr *name, u32 cookie); -extern int fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, +extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type); extern void __fsnotify_inode_delete(struct inode *inode); extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); @@ -541,7 +541,7 @@ static inline int fsnotify(struct inode *to_tell, __u32 mask, const void *data, return 0; } -static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, +static inline int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type) { return 0; -- cgit v1.2.3 From c738fbabb0ff62d0f9a9572e56e65d05a1b34c6a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 8 Jul 2020 14:11:37 +0300 Subject: fsnotify: fold fsnotify() call into fsnotify_parent() All (two) callers of fsnotify_parent() also call fsnotify() to notify the child inode. Move the second fsnotify() call into fsnotify_parent(). This will allow more flexibility in making decisions about which of the two event falvors should be sent. Using 'goto notify_child' in the inline helper seems a bit strange, but it mimics the code in __fsnotify_parent() for clarity and the goto pattern will become less strage after following patches are applied. Link: https://lore.kernel.org/r/20200708111156.24659-2-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 508f6bb0b06b..316c9b820517 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -48,44 +48,37 @@ static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type) { + struct inode *inode = d_inode(dentry); + + if (S_ISDIR(inode->i_mode)) + mask |= FS_ISDIR; + if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) - return 0; + goto notify_child; return __fsnotify_parent(dentry, mask, data, data_type); + +notify_child: + return fsnotify(inode, mask, data, data_type, NULL, 0); } /* - * Simple wrappers to consolidate calls fsnotify_parent()/fsnotify() when - * an event is on a file/dentry. + * Simple wrappers to consolidate calls to fsnotify_parent() when an event + * is on a file/dentry. */ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask) { - struct inode *inode = d_inode(dentry); - - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - fsnotify_parent(dentry, mask, inode, FSNOTIFY_EVENT_INODE); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); + fsnotify_parent(dentry, mask, d_inode(dentry), FSNOTIFY_EVENT_INODE); } static inline int fsnotify_file(struct file *file, __u32 mask) { const struct path *path = &file->f_path; - struct inode *inode = file_inode(file); - int ret; if (file->f_mode & FMODE_NONOTIFY) return 0; - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - ret = fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); - if (ret) - return ret; - - return fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); + return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); } /* Simple call site for access decisions */ -- cgit v1.2.3 From cbcf47adc8aadbcaa741391ccfd96f764b50be7e Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 8 Jul 2020 14:11:38 +0300 Subject: fsnotify: return non const from fsnotify_data_inode() Return non const inode pointer from fsnotify_data_inode(). None of the fsnotify hooks pass const inode pointer as data and callers often need to cast to a non const pointer. Link: https://lore.kernel.org/r/20200708111156.24659-3-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 1626fa7d10ff..97300f3b8ff0 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -220,12 +220,11 @@ enum fsnotify_data_type { FSNOTIFY_EVENT_INODE, }; -static inline const struct inode *fsnotify_data_inode(const void *data, - int data_type) +static inline struct inode *fsnotify_data_inode(const void *data, int data_type) { switch (data_type) { case FSNOTIFY_EVENT_INODE: - return data; + return (struct inode *)data; case FSNOTIFY_EVENT_PATH: return d_inode(((const struct path *)data)->dentry); default: -- cgit v1.2.3 From b54cecf5e2293d15620f7b3f8d1bf486243d5643 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 7 Jun 2020 12:10:40 +0300 Subject: fsnotify: pass dir argument to handle_event() callback The 'inode' argument to handle_event(), sometimes referred to as 'to_tell' is somewhat obsolete. It is a remnant from the times when a group could only have an inode mark associated with an event. We now pass an iter_info array to the callback, with all marks associated with an event. Most backends ignore this argument, with two exceptions: 1. dnotify uses it for sanity check that event is on directory 2. fanotify uses it to report fid of directory on directory entry modification events Remove the 'inode' argument and add a 'dir' argument. The callback function signature is deliberately changed, because the meaning of the argument has changed and the arguments have been documented. The 'dir' argument is set to when 'file_name' is specified and it is referring to the directory that the 'file_name' entry belongs to. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 97300f3b8ff0..0de130cbf72d 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -108,6 +108,17 @@ struct mem_cgroup; * these operations for each relevant group. * * handle_event - main call for a group to handle an fs event + * @group: group to notify + * @mask: event type and flags + * @data: object that event happened on + * @data_type: type of object for fanotify_data_XXX() accessors + * @dir: optional directory associated with event - + * if @file_name is not NULL, this is the directory that + * @file_name is relative to + * @file_name: optional file name associated with event + * @cookie: inotify rename cookie + * @iter_info: array of marks from this group that are interested in the event + * * free_group_priv - called when a group refcnt hits 0 to clean up the private union * freeing_mark - called when a mark is being destroyed for some reason. The group * MUST be holding a reference on each mark and that reference must be @@ -115,9 +126,8 @@ struct mem_cgroup; * userspace messages that marks have been removed. */ struct fsnotify_ops { - int (*handle_event)(struct fsnotify_group *group, - struct inode *inode, - u32 mask, const void *data, int data_type, + int (*handle_event)(struct fsnotify_group *group, u32 mask, + const void *data, int data_type, struct inode *dir, const struct qstr *file_name, u32 cookie, struct fsnotify_iter_info *iter_info); void (*free_group_priv)(struct fsnotify_group *group); -- cgit v1.2.3 From 08b95c338e0c5a96e47f4ca314ea1e7580ecb5d7 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 8 Jul 2020 14:11:52 +0300 Subject: fanotify: remove event FAN_DIR_MODIFY It was never enabled in uapi and its functionality is about to be superseded by events FAN_CREATE, FAN_DELETE, FAN_MOVE with group flag FAN_REPORT_NAME. Keep a place holder variable name_event instead of removing the name recording code since it will be used by the new events. Link: https://lore.kernel.org/r/20200708111156.24659-17-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 6 ------ include/linux/fsnotify_backend.h | 4 +--- 2 files changed, 1 insertion(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 316c9b820517..9b2566d273a9 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -30,12 +30,6 @@ static inline void fsnotify_name(struct inode *dir, __u32 mask, const struct qstr *name, u32 cookie) { fsnotify(dir, mask, child, FSNOTIFY_EVENT_INODE, name, cookie); - /* - * Send another flavor of the event without child inode data and - * without the specific event type (e.g. FS_CREATE|FS_IS_DIR). - * The name is relative to the dir inode the event is reported to. - */ - fsnotify(dir, FS_DIR_MODIFY, dir, FSNOTIFY_EVENT_INODE, name, 0); } static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 0de130cbf72d..94a4ff3d5bbe 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -47,7 +47,6 @@ #define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ -#define FS_DIR_MODIFY 0x00080000 /* Directory entry was modified */ #define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */ /* This inode cares about things that happen to its children. Always set for @@ -67,8 +66,7 @@ * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event * when a directory entry inside a child subdir changes. */ -#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | \ - FS_DIR_MODIFY) +#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE) #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ FS_OPEN_EXEC_PERM) -- cgit v1.2.3 From d809daf1b6add51eec001bf60b17885d697a299d Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 16 Jul 2020 11:42:12 +0300 Subject: fanotify: generalize test for FAN_REPORT_FID As preparation for new flags that report fids, define a bit set of flags for a group reporting fids, currently containing the only bit FAN_REPORT_FID. Link: https://lore.kernel.org/r/20200716084230.30611-5-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fanotify.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index b79fa9bb7359..bbbee11d2521 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -18,8 +18,10 @@ #define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ FAN_CLASS_PRE_CONTENT) -#define FANOTIFY_INIT_FLAGS (FANOTIFY_CLASS_BITS | \ - FAN_REPORT_TID | FAN_REPORT_FID | \ +#define FANOTIFY_FID_BITS (FAN_REPORT_FID) + +#define FANOTIFY_INIT_FLAGS (FANOTIFY_CLASS_BITS | FANOTIFY_FID_BITS | \ + FAN_REPORT_TID | \ FAN_CLOEXEC | FAN_NONBLOCK | \ FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS) -- cgit v1.2.3 From 6ba8d7107f27c1bde60a80bc5def027979af3e8e Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 16 Jul 2020 11:42:16 +0300 Subject: fsnotify: add object type "child" to object type iterator The object type iterator is used to collect all the marks of a specific group that have interest in an event. It is used by fanotify to get a single handle_event callback when an event has a match to either of inode/sb/mount marks of the group. The nature of fsnotify events is that they are associated with at most one sb at most one mount and at most one inode. When a parent and child are both watching, two events are sent to backend, one associated to parent inode and one associated to the child inode. This results in duplicate events in fanotify, which usually get merged before user reads them, but this is sub-optimal. It would be better if the same event is sent to backend with an object type iterator that has both the child inode and its parent, and let the backend decide if the event should be reported once (fanotify) or twice (inotify). Link: https://lore.kernel.org/r/20200716084230.30611-9-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 94a4ff3d5bbe..d22519001027 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -253,6 +253,7 @@ static inline const struct path *fsnotify_data_path(const void *data, enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_INODE, + FSNOTIFY_OBJ_TYPE_CHILD, FSNOTIFY_OBJ_TYPE_VFSMOUNT, FSNOTIFY_OBJ_TYPE_SB, FSNOTIFY_OBJ_TYPE_COUNT, @@ -260,6 +261,7 @@ enum fsnotify_obj_type { }; #define FSNOTIFY_OBJ_TYPE_INODE_FL (1U << FSNOTIFY_OBJ_TYPE_INODE) +#define FSNOTIFY_OBJ_TYPE_CHILD_FL (1U << FSNOTIFY_OBJ_TYPE_CHILD) #define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL (1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT) #define FSNOTIFY_OBJ_TYPE_SB_FL (1U << FSNOTIFY_OBJ_TYPE_SB) #define FSNOTIFY_OBJ_ALL_TYPES_MASK ((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1) @@ -304,6 +306,7 @@ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \ } FSNOTIFY_ITER_FUNCS(inode, INODE) +FSNOTIFY_ITER_FUNCS(child, CHILD) FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT) FSNOTIFY_ITER_FUNCS(sb, SB) -- cgit v1.2.3 From 82ace1efb3cb1d49a1681cc6e31156047d5ae1f2 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 22 Jul 2020 15:58:44 +0300 Subject: fsnotify: create helper fsnotify_inode() Simple helper to consolidate biolerplate code. Link: https://lore.kernel.org/r/20200722125849.17418-5-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 9b2566d273a9..f9db7c9b3ef1 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -38,6 +38,14 @@ static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, fsnotify_name(dir, mask, d_inode(dentry), &dentry->d_name, 0); } +static inline void fsnotify_inode(struct inode *inode, __u32 mask) +{ + if (S_ISDIR(inode->i_mode)) + mask |= FS_ISDIR; + + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); +} + /* Notify this dentry's parent about a child's events. */ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type) @@ -105,12 +113,7 @@ static inline int fsnotify_perm(struct file *file, int mask) */ static inline void fsnotify_link_count(struct inode *inode) { - __u32 mask = FS_ATTRIB; - - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); + fsnotify_inode(inode, FS_ATTRIB); } /* @@ -125,7 +128,6 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, u32 fs_cookie = fsnotify_get_cookie(); __u32 old_dir_mask = FS_MOVED_FROM; __u32 new_dir_mask = FS_MOVED_TO; - __u32 mask = FS_MOVE_SELF; const struct qstr *new_name = &moved->d_name; if (old_dir == new_dir) @@ -134,7 +136,6 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, if (isdir) { old_dir_mask |= FS_ISDIR; new_dir_mask |= FS_ISDIR; - mask |= FS_ISDIR; } fsnotify_name(old_dir, old_dir_mask, source, old_name, fs_cookie); @@ -144,7 +145,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, fsnotify_link_count(target); if (source) - fsnotify(source, mask, source, FSNOTIFY_EVENT_INODE, NULL, 0); + fsnotify_inode(source, FS_MOVE_SELF); audit_inode_child(new_dir, moved, AUDIT_TYPE_CHILD_CREATE); } @@ -169,12 +170,7 @@ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt) */ static inline void fsnotify_inoderemove(struct inode *inode) { - __u32 mask = FS_DELETE_SELF; - - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); + fsnotify_inode(inode, FS_DELETE_SELF); __fsnotify_inode_delete(inode); } -- cgit v1.2.3 From 40a100d3adc1ad7f0a34875468c499fcecd20ba4 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 22 Jul 2020 15:58:46 +0300 Subject: fsnotify: pass dir and inode arguments to fsnotify() The arguments of fsnotify() are overloaded and mean different things for different event types. Replace the to_tell argument with separate arguments @dir and @inode, because we may be sending to both dir and child. Using the @data argument to pass the child is not enough, because dirent events pass this argument (for audit), but we do not report to child. Document the new fsnotify() function argumenets. Link: https://lore.kernel.org/r/20200722125849.17418-7-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 9 +++++---- include/linux/fsnotify_backend.h | 10 ++++++---- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index f9db7c9b3ef1..99922cac4fcd 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -23,13 +23,14 @@ * have changed (i.e. renamed over). * * Unlike fsnotify_parent(), the event will be reported regardless of the - * FS_EVENT_ON_CHILD mask on the parent inode. + * FS_EVENT_ON_CHILD mask on the parent inode and will not be reported if only + * the child is interested and not the parent. */ static inline void fsnotify_name(struct inode *dir, __u32 mask, struct inode *child, const struct qstr *name, u32 cookie) { - fsnotify(dir, mask, child, FSNOTIFY_EVENT_INODE, name, cookie); + fsnotify(mask, child, FSNOTIFY_EVENT_INODE, dir, name, NULL, cookie); } static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, @@ -43,7 +44,7 @@ static inline void fsnotify_inode(struct inode *inode, __u32 mask) if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); + fsnotify(mask, inode, FSNOTIFY_EVENT_INODE, NULL, NULL, inode, 0); } /* Notify this dentry's parent about a child's events. */ @@ -61,7 +62,7 @@ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, return __fsnotify_parent(dentry, mask, data, data_type); notify_child: - return fsnotify(inode, mask, data, data_type, NULL, 0); + return fsnotify(mask, data, data_type, NULL, NULL, inode, 0); } /* diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index d22519001027..152520635bd3 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -387,8 +387,9 @@ struct fsnotify_mark { /* called from the vfs helpers */ /* main fsnotify call to send events */ -extern int fsnotify(struct inode *to_tell, __u32 mask, const void *data, - int data_type, const struct qstr *name, u32 cookie); +extern int fsnotify(__u32 mask, const void *data, int data_type, + struct inode *dir, const struct qstr *name, + struct inode *inode, u32 cookie); extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type); extern void __fsnotify_inode_delete(struct inode *inode); @@ -545,8 +546,9 @@ static inline void fsnotify_init_event(struct fsnotify_event *event, #else -static inline int fsnotify(struct inode *to_tell, __u32 mask, const void *data, - int data_type, const struct qstr *name, u32 cookie) +static inline int fsnotify(__u32 mask, const void *data, int data_type, + struct inode *dir, const struct qstr *name, + struct inode *inode, u32 cookie) { return 0; } -- cgit v1.2.3 From 9b93f33105f5f9bd3d016ff870eb6000c9d89eff Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 16 Jul 2020 11:42:23 +0300 Subject: fsnotify: send event with parent/name info to sb/mount/non-dir marks Similar to events "on child" to watching directory, send event with parent/name info if sb/mount/non-dir marks are interested in parent/name info. The FS_EVENT_ON_CHILD flag can be set on sb/mount/non-dir marks to specify interest in parent/name info for events on non-directory inodes. Events on "orphan" children (disconnected dentries) are sent without parent/name info. Events on directories are sent with parent/name info only if the parent directory is watching. After this change, even groups that do not subscribe to events on children could get an event with mark iterator type TYPE_CHILD and without mark iterator type TYPE_INODE if fanotify has marks on the same objects. dnotify and inotify event handlers can already cope with that situation. audit does not subscribe to events that are possible on child, so won't get to this situation. nfsd does not access the marks iterator from its event handler at the moment, so it is not affected. This is a bit too fragile, so we should prepare all groups to cope with mark type TYPE_CHILD preferably using a generic helper. Link: https://lore.kernel.org/r/20200716084230.30611-16-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 10 ++++++++-- include/linux/fsnotify_backend.h | 32 ++++++++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 99922cac4fcd..6e63f7e10da0 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -53,10 +53,16 @@ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, { struct inode *inode = d_inode(dentry); - if (S_ISDIR(inode->i_mode)) + if (S_ISDIR(inode->i_mode)) { mask |= FS_ISDIR; - if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) + /* sb/mount marks are not interested in name of directory */ + if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) + goto notify_child; + } + + /* disconnected dentry cannot notify parent */ + if (IS_ROOT(dentry)) goto notify_child; return __fsnotify_parent(dentry, mask, data, data_type); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 152520635bd3..32104cfc27a5 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -49,8 +49,11 @@ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ #define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */ -/* This inode cares about things that happen to its children. Always set for - * dnotify and inotify. */ +/* + * Set on inode mark that cares about things that happen to its children. + * Always set for dnotify and inotify. + * Set on inode/sb/mount marks that care about parent/name info. + */ #define FS_EVENT_ON_CHILD 0x08000000 #define FS_DN_RENAME 0x10000000 /* file renamed */ @@ -72,14 +75,22 @@ FS_OPEN_EXEC_PERM) /* - * This is a list of all events that may get sent to a parent based on fs event - * happening to inodes inside that directory. + * This is a list of all events that may get sent to a parent that is watching + * with flag FS_EVENT_ON_CHILD based on fs event on a child of that directory. */ #define FS_EVENTS_POSS_ON_CHILD (ALL_FSNOTIFY_PERM_EVENTS | \ FS_ACCESS | FS_MODIFY | FS_ATTRIB | \ FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | \ FS_OPEN | FS_OPEN_EXEC) +/* + * This is a list of all events that may get sent with the parent inode as the + * @to_tell argument of fsnotify(). + * It may include events that can be sent to an inode/sb/mount mark, but cannot + * be sent to a parent watching children. + */ +#define FS_EVENTS_POSS_TO_PARENT (FS_EVENTS_POSS_ON_CHILD) + /* Events that can be reported to backends */ #define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \ FS_EVENTS_POSS_ON_CHILD | \ @@ -397,6 +408,19 @@ extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); extern void fsnotify_sb_delete(struct super_block *sb); extern u32 fsnotify_get_cookie(void); +static inline __u32 fsnotify_parent_needed_mask(__u32 mask) +{ + /* FS_EVENT_ON_CHILD is set on marks that want parent/name info */ + if (!(mask & FS_EVENT_ON_CHILD)) + return 0; + /* + * This object might be watched by a mark that cares about parent/name + * info, does it care about the specific set of events that can be + * reported with parent/name info? + */ + return mask & FS_EVENTS_POSS_TO_PARENT; +} + static inline int fsnotify_inode_watches_children(struct inode *inode) { /* FS_EVENT_ON_CHILD is set if the inode may care */ -- cgit v1.2.3 From 79cb299c7e181fad683bf6191edb8224b2412512 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 16 Jul 2020 11:42:24 +0300 Subject: fsnotify: remove check that source dentry is positive Remove the unneeded check for positive source dentry in fsnotify_move(). fsnotify_move() hook is mostly called from vfs_rename() under lock_rename() and vfs_rename() starts with may_delete() test that verifies positive source dentry. The only other caller of fsnotify_move() - debugfs_rename() also verifies positive source. Link: https://lore.kernel.org/r/20200716084230.30611-17-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 6e63f7e10da0..f8acddcf54fb 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -150,9 +150,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, if (target) fsnotify_link_count(target); - - if (source) - fsnotify_inode(source, FS_MOVE_SELF); + fsnotify_inode(source, FS_MOVE_SELF); audit_inode_child(new_dir, moved, AUDIT_TYPE_CHILD_CREATE); } -- cgit v1.2.3 From 83b7a59896dd24015a34b7f00027f0ff3747972f Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 16 Jul 2020 11:42:26 +0300 Subject: fanotify: add basic support for FAN_REPORT_DIR_FID For now, the flag is mutually exclusive with FAN_REPORT_FID. Events include a single info record of type FAN_EVENT_INFO_TYPE_DFID with a directory file handle. For now, events are only reported for: - Directory modification events - Events on children of a watching directory - Events on directory objects Soon, we will add support for reporting the parent directory fid for events on non-directories with filesystem/mount mark and support for reporting both parent directory fid and child fid. Link: https://lore.kernel.org/r/20200716084230.30611-19-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fanotify.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index bbbee11d2521..4ddac97b2bf7 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -18,7 +18,7 @@ #define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ FAN_CLASS_PRE_CONTENT) -#define FANOTIFY_FID_BITS (FAN_REPORT_FID) +#define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DIR_FID) #define FANOTIFY_INIT_FLAGS (FANOTIFY_CLASS_BITS | FANOTIFY_FID_BITS | \ FAN_REPORT_TID | \ -- cgit v1.2.3 From 929943b38daf817f2e6d303ea04401651fc3bc05 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 16 Jul 2020 11:42:28 +0300 Subject: fanotify: add support for FAN_REPORT_NAME Introduce a new fanotify_init() flag FAN_REPORT_NAME. It requires the flag FAN_REPORT_DIR_FID and there is a constant for setting both flags named FAN_REPORT_DFID_NAME. For a group with flag FAN_REPORT_NAME, the parent fid and name are reported for directory entry modification events (create/detete/move) and for events on non-directory objects. Events on directories themselves are reported with their own fid and "." as the name. The parent fid and name are reported with an info record of type FAN_EVENT_INFO_TYPE_DFID_NAME, similar to the way that parent fid is reported with into type FAN_EVENT_INFO_TYPE_DFID, but with an appended null terminated name string. Link: https://lore.kernel.org/r/20200716084230.30611-21-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fanotify.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 4ddac97b2bf7..3e9c56ee651f 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -18,7 +18,7 @@ #define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ FAN_CLASS_PRE_CONTENT) -#define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DIR_FID) +#define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DFID_NAME) #define FANOTIFY_INIT_FLAGS (FANOTIFY_CLASS_BITS | FANOTIFY_FID_BITS | \ FAN_REPORT_TID | \ -- cgit v1.2.3 From b9a1b9772509cbc6f6aa8bcd0b019f6347a2b631 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 22 Jul 2020 15:58:48 +0300 Subject: fsnotify: create method handle_inode_event() in fsnotify_operations The method handle_event() grew a lot of complexity due to the design of fanotify and merging of ignore masks. Most backends do not care about this complex functionality, so we can hide this complexity from them. Introduce a method handle_inode_event() that serves those backends and passes a single inode mark and less arguments. This change converts all backends except fanotify and inotify to use the simplified handle_inode_event() method. In pricipal, inotify could have also used the new method, but that would require passing more arguments on the simple helper (data, data_type, cookie), so we leave it with the handle_event() method. Link: https://lore.kernel.org/r/20200722125849.17418-9-amir73il@gmail.com Suggested-by: Jan Kara Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 32104cfc27a5..f8529a3a2923 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -128,17 +128,30 @@ struct mem_cgroup; * @cookie: inotify rename cookie * @iter_info: array of marks from this group that are interested in the event * + * handle_inode_event - simple variant of handle_event() for groups that only + * have inode marks and don't have ignore mask + * @mark: mark to notify + * @mask: event type and flags + * @inode: inode that event happened on + * @dir: optional directory associated with event - + * if @file_name is not NULL, this is the directory that + * @file_name is relative to. + * @file_name: optional file name associated with event + * * free_group_priv - called when a group refcnt hits 0 to clean up the private union * freeing_mark - called when a mark is being destroyed for some reason. The group - * MUST be holding a reference on each mark and that reference must be - * dropped in this function. inotify uses this function to send - * userspace messages that marks have been removed. + * MUST be holding a reference on each mark and that reference must be + * dropped in this function. inotify uses this function to send + * userspace messages that marks have been removed. */ struct fsnotify_ops { int (*handle_event)(struct fsnotify_group *group, u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *file_name, u32 cookie, struct fsnotify_iter_info *iter_info); + int (*handle_inode_event)(struct fsnotify_mark *mark, u32 mask, + struct inode *inode, struct inode *dir, + const struct qstr *file_name); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); void (*free_event)(struct fsnotify_event *event); -- cgit v1.2.3