summaryrefslogtreecommitdiff
path: root/include/linux/fs.h
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2024-02-14 10:45:07 +0100
committerIngo Molnar <mingo@kernel.org>2024-02-14 10:45:07 +0100
commit03c11eb3b16dc0058589751dfd91f254be2be613 (patch)
treee5f2889212fec0bb0babdce9abd781ab487e246a /include/linux/fs.h
parentde8c6a352131f642b82474abe0cbb5dd26a7e081 (diff)
parent841c35169323cd833294798e58b9bf63fa4fa1de (diff)
Merge tag 'v6.8-rc4' into x86/percpu, to resolve conflicts and refresh the branch
Conflicts: arch/x86/include/asm/percpu.h arch/x86/include/asm/text-patching.h Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'include/linux/fs.h')
-rw-r--r--include/linux/fs.h300
1 files changed, 203 insertions, 97 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4aeb3fa11927..ed5966a70495 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -67,7 +67,7 @@ struct swap_info_struct;
struct seq_file;
struct workqueue_struct;
struct iov_iter;
-struct fscrypt_info;
+struct fscrypt_inode_info;
struct fscrypt_operations;
struct fsverity_info;
struct fsverity_operations;
@@ -434,7 +434,7 @@ struct address_space_operations {
bool (*is_partially_uptodate) (struct folio *, size_t from,
size_t count);
void (*is_dirty_writeback) (struct folio *, bool *dirty, bool *wb);
- int (*error_remove_page)(struct address_space *, struct page *);
+ int (*error_remove_folio)(struct address_space *, struct folio *);
/* swapfile support */
int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
@@ -454,7 +454,7 @@ extern const struct address_space_operations empty_aops;
* It is also used to block modification of page cache contents through
* memory mappings.
* @gfp_mask: Memory allocation flags to use for allocating pages.
- * @i_mmap_writable: Number of VM_SHARED mappings.
+ * @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings.
* @nr_thps: Number of THPs in the pagecache (non-shmem only).
* @i_mmap: Tree of private and shared mappings.
* @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
@@ -463,9 +463,9 @@ extern const struct address_space_operations empty_aops;
* @a_ops: Methods.
* @flags: Error bits and flags (AS_*).
* @wb_err: The most recent error which has occurred.
- * @private_lock: For use by the owner of the address_space.
- * @private_list: For use by the owner of the address_space.
- * @private_data: For use by the owner of the address_space.
+ * @i_private_lock: For use by the owner of the address_space.
+ * @i_private_list: For use by the owner of the address_space.
+ * @i_private_data: For use by the owner of the address_space.
*/
struct address_space {
struct inode *host;
@@ -484,9 +484,9 @@ struct address_space {
unsigned long flags;
struct rw_semaphore i_mmap_rwsem;
errseq_t wb_err;
- spinlock_t private_lock;
- struct list_head private_list;
- void *private_data;
+ spinlock_t i_private_lock;
+ struct list_head i_private_list;
+ void * i_private_data;
} __attribute__((aligned(sizeof(long)))) __randomize_layout;
/*
* On most architectures that alignment is already the case; but
@@ -557,7 +557,7 @@ static inline int mapping_mapped(struct address_space *mapping)
/*
* Might pages of this file have been modified in userspace?
- * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap
+ * Note that i_mmap_writable counts all VM_SHARED, VM_MAYWRITE vmas: do_mmap
* marks vma as VM_SHARED if it is shared, and the file was opened for
* writing i.e. vma may be mprotected writable even if now readonly.
*
@@ -671,8 +671,8 @@ struct inode {
};
dev_t i_rdev;
loff_t i_size;
- struct timespec64 i_atime;
- struct timespec64 i_mtime;
+ struct timespec64 __i_atime;
+ struct timespec64 __i_mtime;
struct timespec64 __i_ctime; /* use inode_*_ctime accessors! */
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short i_bytes;
@@ -738,7 +738,7 @@ struct inode {
#endif
#ifdef CONFIG_FS_ENCRYPTION
- struct fscrypt_info *i_crypt_info;
+ struct fscrypt_inode_info *i_crypt_info;
#endif
#ifdef CONFIG_FS_VERITY
@@ -991,8 +991,10 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
*/
struct file {
union {
+ /* fput() uses task work when closing and freeing file (default). */
+ struct callback_head f_task_work;
+ /* fput() must use workqueue (most kernel threads). */
struct llist_node f_llist;
- struct rcu_head f_rcuhead;
unsigned int f_iocb_flags;
};
@@ -1042,7 +1044,10 @@ static inline struct file *get_file(struct file *f)
atomic_long_inc(&f->f_count);
return f;
}
-#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count)
+
+struct file *get_file_rcu(struct file __rcu **f);
+struct file *get_file_active(struct file **f);
+
#define file_count(x) atomic_long_read(&(x)->f_count)
#define MAX_NON_LFS ((1UL<<31) - 1)
@@ -1119,7 +1124,7 @@ extern int send_sigurg(struct fown_struct *fown);
#define SB_NOATIME BIT(10) /* Do not update access times. */
#define SB_NODIRATIME BIT(11) /* Do not update directory access times */
#define SB_SILENT BIT(15)
-#define SB_POSIXACL BIT(16) /* VFS does not apply the umask */
+#define SB_POSIXACL BIT(16) /* Supports POSIX ACLs */
#define SB_INLINECRYPT BIT(17) /* Use blk-crypto for encrypted files */
#define SB_KERNMOUNT BIT(22) /* this is a kern_mount call */
#define SB_I_VERSION BIT(23) /* Update inode I_version field */
@@ -1161,11 +1166,13 @@ extern int send_sigurg(struct fown_struct *fown);
#define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */
#define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020
#define SB_I_UNTRUSTED_MOUNTER 0x00000040
+#define SB_I_EVM_UNSUPPORTED 0x00000080
#define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */
#define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */
#define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
#define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */
+#define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */
/* Possible states of 'frozen' field */
enum {
@@ -1181,7 +1188,8 @@ enum {
struct sb_writers {
unsigned short frozen; /* Is sb frozen? */
- unsigned short freeze_holders; /* Who froze fs? */
+ int freeze_kcount; /* How many kernel freeze requests? */
+ int freeze_ucount; /* How many userspace freeze requests? */
struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
};
@@ -1206,7 +1214,7 @@ struct super_block {
#ifdef CONFIG_SECURITY
void *s_security;
#endif
- const struct xattr_handler **s_xattr;
+ const struct xattr_handler * const *s_xattr;
#ifdef CONFIG_FS_ENCRYPTION
const struct fscrypt_operations *s_cop;
struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */
@@ -1221,6 +1229,7 @@ struct super_block {
struct hlist_bl_head s_roots; /* alternate root dentries for NFS */
struct list_head s_mounts; /* list of mounts; _not_ for fs use */
struct block_device *s_bdev;
+ struct bdev_handle *s_bdev_handle;
struct backing_dev_info *s_bdi;
struct mtd_info *s_mtd;
struct hlist_node s_instances;
@@ -1265,7 +1274,7 @@ struct super_block {
const struct dentry_operations *s_d_op; /* default d_op for dentries */
- struct shrinker s_shrink; /* per-sb shrinker handle */
+ struct shrinker *s_shrink; /* per-sb shrinker handle */
/* Number of inodes with nlink == 0 but still referenced */
atomic_long_t s_remove_count;
@@ -1508,56 +1517,84 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb,
kgid_has_mapping(fs_userns, kgid);
}
-struct timespec64 current_mgtime(struct inode *inode);
struct timespec64 current_time(struct inode *inode);
struct timespec64 inode_set_ctime_current(struct inode *inode);
-/*
- * Multigrain timestamps
- *
- * Conditionally use fine-grained ctime and mtime timestamps when there
- * are users actively observing them via getattr. The primary use-case
- * for this is NFS clients that use the ctime to distinguish between
- * different states of the file, and that are often fooled by multiple
- * operations that occur in the same coarse-grained timer tick.
- *
- * The kernel always keeps normalized struct timespec64 values in the ctime,
- * which means that only the first 30 bits of the value are used. Use the
- * 31st bit of the ctime's tv_nsec field as a flag to indicate that the value
- * has been queried since it was last updated.
- */
-#define I_CTIME_QUERIED (1L<<30)
+static inline time64_t inode_get_atime_sec(const struct inode *inode)
+{
+ return inode->__i_atime.tv_sec;
+}
-/**
- * inode_get_ctime - fetch the current ctime from the inode
- * @inode: inode from which to fetch ctime
- *
- * Grab the current ctime tv_nsec field from the inode, mask off the
- * I_CTIME_QUERIED flag and return it. This is mostly intended for use by
- * internal consumers of the ctime that aren't concerned with ensuring a
- * fine-grained update on the next change (e.g. when preparing to store
- * the value in the backing store for later retrieval).
- *
- * This is safe to call regardless of whether the underlying filesystem
- * is using multigrain timestamps.
- */
-static inline struct timespec64 inode_get_ctime(const struct inode *inode)
+static inline long inode_get_atime_nsec(const struct inode *inode)
{
- struct timespec64 ctime;
+ return inode->__i_atime.tv_nsec;
+}
- ctime.tv_sec = inode->__i_ctime.tv_sec;
- ctime.tv_nsec = inode->__i_ctime.tv_nsec & ~I_CTIME_QUERIED;
+static inline struct timespec64 inode_get_atime(const struct inode *inode)
+{
+ return inode->__i_atime;
+}
- return ctime;
+static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode,
+ struct timespec64 ts)
+{
+ inode->__i_atime = ts;
+ return ts;
+}
+
+static inline struct timespec64 inode_set_atime(struct inode *inode,
+ time64_t sec, long nsec)
+{
+ struct timespec64 ts = { .tv_sec = sec,
+ .tv_nsec = nsec };
+ return inode_set_atime_to_ts(inode, ts);
+}
+
+static inline time64_t inode_get_mtime_sec(const struct inode *inode)
+{
+ return inode->__i_mtime.tv_sec;
+}
+
+static inline long inode_get_mtime_nsec(const struct inode *inode)
+{
+ return inode->__i_mtime.tv_nsec;
+}
+
+static inline struct timespec64 inode_get_mtime(const struct inode *inode)
+{
+ return inode->__i_mtime;
+}
+
+static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode,
+ struct timespec64 ts)
+{
+ inode->__i_mtime = ts;
+ return ts;
+}
+
+static inline struct timespec64 inode_set_mtime(struct inode *inode,
+ time64_t sec, long nsec)
+{
+ struct timespec64 ts = { .tv_sec = sec,
+ .tv_nsec = nsec };
+ return inode_set_mtime_to_ts(inode, ts);
+}
+
+static inline time64_t inode_get_ctime_sec(const struct inode *inode)
+{
+ return inode->__i_ctime.tv_sec;
+}
+
+static inline long inode_get_ctime_nsec(const struct inode *inode)
+{
+ return inode->__i_ctime.tv_nsec;
+}
+
+static inline struct timespec64 inode_get_ctime(const struct inode *inode)
+{
+ return inode->__i_ctime;
}
-/**
- * inode_set_ctime_to_ts - set the ctime in the inode
- * @inode: inode in which to set the ctime
- * @ts: value to set in the ctime field
- *
- * Set the ctime in @inode to @ts
- */
static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
struct timespec64 ts)
{
@@ -1582,6 +1619,8 @@ static inline struct timespec64 inode_set_ctime(struct inode *inode,
return inode_set_ctime_to_ts(inode, ts);
}
+struct timespec64 simple_inode_init_ts(struct inode *inode);
+
/*
* Snapshotting support.
*/
@@ -1610,9 +1649,70 @@ static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
#define __sb_writers_release(sb, lev) \
percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
+/**
+ * __sb_write_started - check if sb freeze level is held
+ * @sb: the super we write to
+ * @level: the freeze level
+ *
+ * * > 0 - sb freeze level is held
+ * * 0 - sb freeze level is not held
+ * * < 0 - !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN
+ */
+static inline int __sb_write_started(const struct super_block *sb, int level)
+{
+ return lockdep_is_held_type(sb->s_writers.rw_sem + level - 1, 1);
+}
+
+/**
+ * sb_write_started - check if SB_FREEZE_WRITE is held
+ * @sb: the super we write to
+ *
+ * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
+ */
static inline bool sb_write_started(const struct super_block *sb)
{
- return lockdep_is_held_type(sb->s_writers.rw_sem + SB_FREEZE_WRITE - 1, 1);
+ return __sb_write_started(sb, SB_FREEZE_WRITE);
+}
+
+/**
+ * sb_write_not_started - check if SB_FREEZE_WRITE is not held
+ * @sb: the super we write to
+ *
+ * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
+ */
+static inline bool sb_write_not_started(const struct super_block *sb)
+{
+ return __sb_write_started(sb, SB_FREEZE_WRITE) <= 0;
+}
+
+/**
+ * file_write_started - check if SB_FREEZE_WRITE is held
+ * @file: the file we write to
+ *
+ * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
+ * May be false positive with !S_ISREG, because file_start_write() has
+ * no effect on !S_ISREG.
+ */
+static inline bool file_write_started(const struct file *file)
+{
+ if (!S_ISREG(file_inode(file)->i_mode))
+ return true;
+ return sb_write_started(file_inode(file)->i_sb);
+}
+
+/**
+ * file_write_not_started - check if SB_FREEZE_WRITE is not held
+ * @file: the file we write to
+ *
+ * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
+ * May be false positive with !S_ISREG, because file_start_write() has
+ * no effect on !S_ISREG.
+ */
+static inline bool file_write_not_started(const struct file *file)
+{
+ if (!S_ISREG(file_inode(file)->i_mode))
+ return true;
+ return sb_write_not_started(file_inode(file)->i_sb);
}
/**
@@ -1994,9 +2094,6 @@ extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
loff_t, size_t, unsigned int);
-extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- size_t len, unsigned int flags);
int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t *len, unsigned int remap_flags,
@@ -2016,9 +2113,24 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
struct file *dst_file, loff_t dst_pos,
loff_t len, unsigned int remap_flags);
+/**
+ * enum freeze_holder - holder of the freeze
+ * @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem
+ * @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem
+ * @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed
+ *
+ * Indicate who the owner of the freeze or thaw request is and whether
+ * the freeze needs to be exclusive or can nest.
+ * Without @FREEZE_MAY_NEST, multiple freeze and thaw requests from the
+ * same holder aren't allowed. It is however allowed to hold a single
+ * @FREEZE_HOLDER_USERSPACE and a single @FREEZE_HOLDER_KERNEL freeze at
+ * the same time. This is relied upon by some filesystems during online
+ * repair or similar.
+ */
enum freeze_holder {
FREEZE_HOLDER_KERNEL = (1U << 0),
FREEZE_HOLDER_USERSPACE = (1U << 1),
+ FREEZE_MAY_NEST = (1U << 2),
};
struct super_operations {
@@ -2110,7 +2222,12 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
#define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
#define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
#define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
+
+#ifdef CONFIG_FS_POSIX_ACL
#define IS_POSIXACL(inode) __IS_FLG(inode, SB_POSIXACL)
+#else
+#define IS_POSIXACL(inode) 0
+#endif
#define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
#define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME)
@@ -2254,7 +2371,7 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
#define I_CREATING (1 << 15)
#define I_DONTCACHE (1 << 16)
#define I_SYNC_QUEUED (1 << 17)
-#define I_PINNING_FSCACHE_WB (1 << 18)
+#define I_PINNING_NETFS_WB (1 << 18)
#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
@@ -2334,7 +2451,6 @@ struct file_system_type {
#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */
#define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */
-#define FS_MGTIME 64 /* FS uses multigrain timestamps */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_spec *parameters;
@@ -2358,17 +2474,6 @@ struct file_system_type {
#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
-/**
- * is_mgtime: is this inode using multigrain timestamps
- * @inode: inode to test for multigrain timestamps
- *
- * Return true if the inode uses multigrain timestamps, false otherwise.
- */
-static inline bool is_mgtime(const struct inode *inode)
-{
- return inode->i_sb->s_type->fs_flags & FS_MGTIME;
-}
-
extern struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
int (*fill_super)(struct super_block *, void *, int));
@@ -2444,13 +2549,13 @@ struct audit_names;
struct filename {
const char *name; /* pointer to actual string */
const __user char *uptr; /* original userland pointer */
- int refcnt;
+ atomic_t refcnt;
struct audit_names *aname;
const char iname[];
};
static_assert(offsetof(struct filename, iname) % sizeof(long) == 0);
-static inline struct mnt_idmap *file_mnt_idmap(struct file *file)
+static inline struct mnt_idmap *file_mnt_idmap(const struct file *file)
{
return mnt_idmap(file->f_path.mnt);
}
@@ -2489,26 +2594,31 @@ struct file *dentry_open(const struct path *path, int flags,
const struct cred *creds);
struct file *dentry_create(const struct path *path, int flags, umode_t mode,
const struct cred *cred);
-struct file *backing_file_open(const struct path *path, int flags,
- const struct path *real_path,
- const struct cred *cred);
-struct path *backing_file_real_path(struct file *f);
+struct path *backing_file_user_path(struct file *f);
/*
- * file_real_path - get the path corresponding to f_inode
- *
- * When opening a backing file for a stackable filesystem (e.g.,
- * overlayfs) f_path may be on the stackable filesystem and f_inode on
- * the underlying filesystem. When the path associated with f_inode is
- * needed, this helper should be used instead of accessing f_path
- * directly.
-*/
-static inline const struct path *file_real_path(struct file *f)
+ * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file
+ * stored in ->vm_file is a backing file whose f_inode is on the underlying
+ * filesystem. When the mapped file path and inode number are displayed to
+ * user (e.g. via /proc/<pid>/maps), these helpers should be used to get the
+ * path and inode number to display to the user, which is the path of the fd
+ * that user has requested to map and the inode number that would be returned
+ * by fstat() on that same fd.
+ */
+/* Get the path to display in /proc/<pid>/maps */
+static inline const struct path *file_user_path(struct file *f)
{
if (unlikely(f->f_mode & FMODE_BACKING))
- return backing_file_real_path(f);
+ return backing_file_user_path(f);
return &f->f_path;
}
+/* Get the inode whose inode number to display in /proc/<pid>/maps */
+static inline const struct inode *file_user_inode(struct file *f)
+{
+ if (unlikely(f->f_mode & FMODE_BACKING))
+ return d_inode(backing_file_user_path(f)->dentry);
+ return file_inode(f);
+}
static inline struct file *file_clone_open(struct file *file)
{
@@ -2963,8 +3073,6 @@ ssize_t copy_splice_read(struct file *in, loff_t *ppos,
size_t len, unsigned int flags);
extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int);
-extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
- loff_t *opos, size_t len, unsigned int flags);
extern void
@@ -3054,7 +3162,6 @@ extern void page_put_link(void *);
extern int page_symlink(struct inode *inode, const char *symname, int len);
extern const struct inode_operations page_symlink_inode_operations;
extern void kfree_link(void *);
-void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode);
void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
@@ -3094,7 +3201,6 @@ extern int vfs_readlink(struct dentry *, char __user *, int);
extern struct file_system_type *get_filesystem(struct file_system_type *fs);
extern void put_filesystem(struct file_system_type *fs);
extern struct file_system_type *get_fs_type(const char *name);
-extern struct super_block *get_active_super(struct block_device *bdev);
extern void drop_super(struct super_block *sb);
extern void drop_super_exclusive(struct super_block *sb);
extern void iterate_supers(void (*)(struct super_block *, void *), void *);