From 9b6304c1d53745c300b86f202d0dcff395e2d2db Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 5 Jul 2023 14:58:10 -0400 Subject: fs: add ctime accessors infrastructure struct timespec64 has unused bits in the tv_nsec field that can be used for other purposes. In future patches, we're going to change how the inode->i_ctime is accessed in certain inodes in order to make use of them. In order to do that safely though, we'll need to eradicate raw accesses of the inode->i_ctime field from the kernel. Add new accessor functions for the ctime that we use to replace them. Reviewed-by: Jan Kara Reviewed-by: Luis Chamberlain Signed-off-by: Jeff Layton Reviewed-by: Damien Le Moal Message-Id: <20230705185812.579118-2-jlayton@kernel.org> Signed-off-by: Christian Brauner --- include/linux/fs.h | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6867512907d6..d41bfcb26da0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1474,7 +1474,50 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, kgid_has_mapping(fs_userns, kgid); } -extern struct timespec64 current_time(struct inode *inode); +struct timespec64 current_time(struct inode *inode); +struct timespec64 inode_set_ctime_current(struct inode *inode); + +/** + * inode_get_ctime - fetch the current ctime from the inode + * @inode: inode from which to fetch ctime + * + * Grab the current ctime from the inode and return it. + */ +static inline struct timespec64 inode_get_ctime(const struct inode *inode) +{ + return inode->i_ctime; +} + +/** + * inode_set_ctime_to_ts - set the ctime in the inode + * @inode: inode in which to set the ctime + * @ts: value to set in the ctime field + * + * Set the ctime in @inode to @ts + */ +static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, + struct timespec64 ts) +{ + inode->i_ctime = ts; + return ts; +} + +/** + * inode_set_ctime - set the ctime in the inode + * @inode: inode in which to set the ctime + * @sec: tv_sec value to set + * @nsec: tv_nsec value to set + * + * Set the ctime in @inode to { @sec, @nsec } + */ +static inline struct timespec64 inode_set_ctime(struct inode *inode, + time64_t sec, long nsec) +{ + struct timespec64 ts = { .tv_sec = sec, + .tv_nsec = nsec }; + + return inode_set_ctime_to_ts(inode, ts); +} /* * Snapshotting support. -- cgit v1.2.3 From 0c4767923ed6964d279309744cdb248890e95ec2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 5 Jul 2023 14:58:11 -0400 Subject: fs: new helper: simple_rename_timestamp A rename potentially involves updating 4 different inode timestamps. Add a function that handles the details sanely, and convert the libfs.c callers to use it. Signed-off-by: Jeff Layton Reviewed-by: Jan Kara Message-Id: <20230705185812.579118-3-jlayton@kernel.org> Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index d41bfcb26da0..42755cb7d55b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2977,6 +2977,8 @@ extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); +void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename(struct mnt_idmap *, struct inode *, -- cgit v1.2.3 From e359147f01606e17d74df96cfeb0027f540f5e97 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 5 Jul 2023 15:01:49 -0400 Subject: linux: convert to ctime accessor functions In later patches, we're going to change how the inode's ctime field is used. Switch to using accessor functions instead of raw accesses of inode->i_ctime. Signed-off-by: Jeff Layton Reviewed-by: Jan Kara Message-Id: <20230705190309.579783-82-jlayton@kernel.org> Signed-off-by: Christian Brauner --- include/linux/fs_stack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h index 54210a42c30d..010d39d0dc1c 100644 --- a/include/linux/fs_stack.h +++ b/include/linux/fs_stack.h @@ -24,7 +24,7 @@ static inline void fsstack_copy_attr_times(struct inode *dest, { dest->i_atime = src->i_atime; dest->i_mtime = src->i_mtime; - dest->i_ctime = src->i_ctime; + inode_set_ctime_to_ts(dest, inode_get_ctime(src)); } #endif /* _LINUX_FS_STACK_H */ -- cgit v1.2.3 From 13bc24457850583a2e7203ded05b7209ab4bc5ef Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 5 Jul 2023 14:58:12 -0400 Subject: fs: rename i_ctime field to __i_ctime Now that everything in-tree is converted to use the accessor functions, rename the i_ctime field in the inode to discourage direct access. Signed-off-by: Jeff Layton Reviewed-by: Damien Le Moal Reviewed-by: Jan Kara Message-Id: <20230705185812.579118-4-jlayton@kernel.org> Signed-off-by: Christian Brauner --- include/linux/fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 42755cb7d55b..61f27011fd04 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -642,7 +642,7 @@ struct inode { loff_t i_size; struct timespec64 i_atime; struct timespec64 i_mtime; - struct timespec64 i_ctime; + struct timespec64 __i_ctime; /* use inode_*_ctime accessors! */ spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; @@ -1485,7 +1485,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode); */ static inline struct timespec64 inode_get_ctime(const struct inode *inode) { - return inode->i_ctime; + return inode->__i_ctime; } /** @@ -1498,7 +1498,7 @@ static inline struct timespec64 inode_get_ctime(const struct inode *inode) static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->i_ctime = ts; + inode->__i_ctime = ts; return ts; } -- cgit v1.2.3 From 0d72b92883c651a11059d93335f33d65c6eb653b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 7 Aug 2023 15:38:33 -0400 Subject: fs: pass the request_mask to generic_fillattr generic_fillattr just fills in the entire stat struct indiscriminately today, copying data from the inode. There is at least one attribute (STATX_CHANGE_COOKIE) that can have side effects when it is reported, and we're looking at adding more with the addition of multigrain timestamps. Add a request_mask argument to generic_fillattr and have most callers just pass in the value that is passed to getattr. Have other callers (e.g. ksmbd) just pass in STATX_BASIC_STATS. Also move the setting of STATX_CHANGE_COOKIE into generic_fillattr. Acked-by: Joseph Qi Reviewed-by: Xiubo Li Reviewed-by: "Paulo Alcantara (SUSE)" Reviewed-by: Jan Kara Signed-off-by: Jeff Layton Message-Id: <20230807-mgctime-v7-2-d1dec143a704@kernel.org> Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 61f27011fd04..85977cdeda94 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2917,7 +2917,7 @@ extern void page_put_link(void *); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); -void generic_fillattr(struct mnt_idmap *, struct inode *, struct kstat *); +void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); -- cgit v1.2.3 From 541d4c798a598854fcce7326d947cbcbd35701d6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 7 Aug 2023 15:38:34 -0400 Subject: fs: drop the timespec64 arg from generic_update_time In future patches we're going to change how the ctime is updated to keep track of when it has been queried. The way that the update_time operation works (and a lot of its callers) make this difficult, since they grab a timestamp early and then pass it down to eventually be copied into the inode. All of the existing update_time callers pass in the result of current_time() in some fashion. Drop the "time" parameter from generic_update_time, and rework it to fetch its own timestamp. This change means that an update_time could fetch a different timestamp than was seen in inode_needs_update_time. update_time is only ever called with one of two flag combinations: Either S_ATIME is set, or S_MTIME|S_CTIME|S_VERSION are set. With this change we now treat the flags argument as an indicator that some value needed to be updated when last checked, rather than an indication to update specific timestamps. Rework the logic for updating the timestamps and put it in a new inode_update_timestamps helper that other update_time routines can use. S_ATIME is as treated as we always have, but if any of the other three are set, then we attempt to update all three. Also, some callers of generic_update_time need to know what timestamps were actually updated. Change it to return an S_* flag mask to indicate that and rework the callers to expect it. Signed-off-by: Jeff Layton Reviewed-by: Jan Kara Message-Id: <20230807-mgctime-v7-3-d1dec143a704@kernel.org> Signed-off-by: Christian Brauner --- include/linux/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 85977cdeda94..bb3c2c4f871f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2343,7 +2343,8 @@ extern int current_umask(void); extern void ihold(struct inode * inode); extern void iput(struct inode *); -extern int generic_update_time(struct inode *, struct timespec64 *, int); +int inode_update_timestamps(struct inode *inode, int flags); +int generic_update_time(struct inode *, int); /* /sys/fs */ extern struct kobject *fs_kobj; -- cgit v1.2.3 From 913e99287b98fd051ac1976140a2764a8ef9dfbf Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 7 Aug 2023 15:38:39 -0400 Subject: fs: drop the timespec64 argument from update_time Now that all of the update_time operations are prepared for it, we can drop the timespec64 argument from the update_time operation. Do that and remove it from some associated functions like inode_update_time and inode_needs_update_time. Signed-off-by: Jeff Layton Reviewed-by: Jan Kara Message-Id: <20230807-mgctime-v7-8-d1dec143a704@kernel.org> Signed-off-by: Christian Brauner --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index bb3c2c4f871f..a83313f90fe3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1887,7 +1887,7 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); - int (*update_time)(struct inode *, struct timespec64 *, int); + int (*update_time)(struct inode *, int); int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, umode_t create_mode); @@ -2237,7 +2237,7 @@ enum file_time_flags { extern bool atime_needs_update(const struct path *, struct inode *); extern void touch_atime(const struct path *); -int inode_update_time(struct inode *inode, struct timespec64 *time, int flags); +int inode_update_time(struct inode *inode, int flags); static inline void file_accessed(struct file *file) { -- cgit v1.2.3 From ffb6cf19e06334062744b7e3493f71e500964f8e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 7 Aug 2023 15:38:40 -0400 Subject: fs: add infrastructure for multigrain timestamps The VFS always uses coarse-grained timestamps when updating the ctime and mtime after a change. This has the benefit of allowing filesystems to optimize away a lot metadata updates, down to around 1 per jiffy, even when a file is under heavy writes. Unfortunately, this has always been an issue when we're exporting via NFSv3, which relies on timestamps to validate caches. A lot of changes can happen in a jiffy, so timestamps aren't sufficient to help the client decide to invalidate the cache. Even with NFSv4, a lot of exported filesystems don't properly support a change attribute and are subject to the same problems with timestamp granularity. Other applications have similar issues with timestamps (e.g backup applications). If we were to always use fine-grained timestamps, that would improve the situation, but that becomes rather expensive, as the underlying filesystem would have to log a lot more metadata updates. What we need is a way to only use fine-grained timestamps when they are being actively queried. POSIX generally mandates that when the the mtime changes, the ctime must also change. The kernel always stores normalized ctime values, so only the first 30 bits of the tv_nsec field are ever used. Use the 31st bit of the ctime tv_nsec field to indicate that something has queried the inode for the mtime or ctime. When this flag is set, on the next mtime or ctime update, the kernel will fetch a fine-grained timestamp instead of the usual coarse-grained one. Filesytems can opt into this behavior by setting the FS_MGTIME flag in the fstype. Filesystems that don't set this flag will continue to use coarse-grained timestamps. Later patches will convert individual filesystems to use the new infrastructure. Signed-off-by: Jeff Layton Reviewed-by: Jan Kara Message-Id: <20230807-mgctime-v7-9-d1dec143a704@kernel.org> Signed-off-by: Christian Brauner --- include/linux/fs.h | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index a83313f90fe3..455835d0e963 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1474,18 +1474,47 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, kgid_has_mapping(fs_userns, kgid); } +struct timespec64 current_mgtime(struct inode *inode); struct timespec64 current_time(struct inode *inode); struct timespec64 inode_set_ctime_current(struct inode *inode); +/* + * Multigrain timestamps + * + * Conditionally use fine-grained ctime and mtime timestamps when there + * are users actively observing them via getattr. The primary use-case + * for this is NFS clients that use the ctime to distinguish between + * different states of the file, and that are often fooled by multiple + * operations that occur in the same coarse-grained timer tick. + * + * The kernel always keeps normalized struct timespec64 values in the ctime, + * which means that only the first 30 bits of the value are used. Use the + * 31st bit of the ctime's tv_nsec field as a flag to indicate that the value + * has been queried since it was last updated. + */ +#define I_CTIME_QUERIED (1L<<30) + /** * inode_get_ctime - fetch the current ctime from the inode * @inode: inode from which to fetch ctime * - * Grab the current ctime from the inode and return it. + * Grab the current ctime tv_nsec field from the inode, mask off the + * I_CTIME_QUERIED flag and return it. This is mostly intended for use by + * internal consumers of the ctime that aren't concerned with ensuring a + * fine-grained update on the next change (e.g. when preparing to store + * the value in the backing store for later retrieval). + * + * This is safe to call regardless of whether the underlying filesystem + * is using multigrain timestamps. */ static inline struct timespec64 inode_get_ctime(const struct inode *inode) { - return inode->__i_ctime; + struct timespec64 ctime; + + ctime.tv_sec = inode->__i_ctime.tv_sec; + ctime.tv_nsec = inode->__i_ctime.tv_nsec & ~I_CTIME_QUERIED; + + return ctime; } /** @@ -2259,6 +2288,7 @@ struct file_system_type { #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ +#define FS_MGTIME 64 /* FS uses multigrain timestamps */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; @@ -2282,6 +2312,17 @@ struct file_system_type { #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) +/** + * is_mgtime: is this inode using multigrain timestamps + * @inode: inode to test for multigrain timestamps + * + * Return true if the inode uses multigrain timestamps, false otherwise. + */ +static inline bool is_mgtime(const struct inode *inode) +{ + return inode->i_sb->s_type->fs_flags & FS_MGTIME; +} + extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); @@ -2918,6 +2959,7 @@ extern void page_put_link(void *); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); +void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode); void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); -- cgit v1.2.3