From 4aa7c6346be395bdf776f82bbb2e3e2bc60bdd2b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:35 +0200 Subject: vfs: add i_op->dentry_open() Add a new inode operation i_op->dentry_open(). This is for stacked filesystems that want to return a struct file from a different filesystem. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index a957d4366c24..5cf7f6759679 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1528,6 +1528,9 @@ struct inode_operations { umode_t create_mode, int *opened); int (*tmpfile) (struct inode *, struct dentry *, umode_t); int (*set_acl)(struct inode *, struct posix_acl *, int); + + /* WARNING: probably going away soon, do not use! */ + int (*dentry_open)(struct dentry *, struct file *, const struct cred *); } ____cacheline_aligned; ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, @@ -2040,6 +2043,7 @@ extern struct file *file_open_name(struct filename *, int, umode_t); extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(struct dentry *, struct vfsmount *, const char *, int); +extern int vfs_open(const struct path *, struct file *, const struct cred *); extern struct file * dentry_open(const struct path *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); -- cgit v1.2.3 From 1c118596a7682912106c80007102ce0184c77780 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:35 +0200 Subject: vfs: export do_splice_direct() to modules Export do_splice_direct() to modules. Needed by overlay filesystem. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5cf7f6759679..10ed65b2c31d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2456,6 +2456,9 @@ extern ssize_t iter_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, loff_t *, size_t len, unsigned int flags); +extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len, unsigned int flags); + extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); -- cgit v1.2.3 From bd5d08569cc379f8366663a61558a9ce17c2e460 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:35 +0200 Subject: vfs: export __inode_permission() to modules We need to be able to check inode permissions (but not filesystem implied permissions) for stackable filesystems. Expose this interface for overlayfs. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 10ed65b2c31d..5419df70a835 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2257,6 +2257,7 @@ extern sector_t bmap(struct inode *, sector_t); #endif extern int notify_change(struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); +extern int __inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int); static inline bool execute_ok(struct inode *inode) -- cgit v1.2.3 From c771d683a62e5d36bc46036f5c07f4f5bb7dda61 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:36 +0200 Subject: vfs: introduce clone_private_mount() Overlayfs needs a private clone of the mount, so create a function for this and export to modules. Signed-off-by: Miklos Szeredi --- include/linux/mount.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 9262e4bf0cc3..c2c561dc0114 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -81,6 +81,9 @@ extern struct vfsmount *mntget(struct vfsmount *mnt); extern struct vfsmount *mnt_clone_internal(struct path *path); extern int __mnt_is_readonly(struct vfsmount *mnt); +struct path; +extern struct vfsmount *clone_private_mount(struct path *path); + struct file_system_type; extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, -- cgit v1.2.3 From cbdf35bcb833bfd00f0925d7a9a33a21f41ea582 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:36 +0200 Subject: vfs: export check_sticky() It's already duplicated in btrfs and about to be used in overlayfs too. Move the sticky bit check to an inline helper and call the out-of-line helper only in the unlikly case of the sticky bit being set. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5419df70a835..55cc0a319baa 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2259,6 +2259,7 @@ extern int notify_change(struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); extern int __inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int); +extern int __check_sticky(struct inode *dir, struct inode *inode); static inline bool execute_ok(struct inode *inode) { @@ -2745,6 +2746,14 @@ static inline int is_sxid(umode_t mode) return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); } +static inline int check_sticky(struct inode *dir, struct inode *inode) +{ + if (!(dir->i_mode & S_ISVTX)) + return 0; + + return __check_sticky(dir, inode); +} + static inline void inode_has_no_xattr(struct inode *inode) { if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC)) -- cgit v1.2.3 From 787fb6bc9682ec7c05fb5d9561b57100fbc1cc41 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:36 +0200 Subject: vfs: add whiteout support Whiteout isn't actually a new file type, but is represented as a char device (Linus's idea) with 0/0 device number. This has several advantages compared to introducing a new whiteout file type: - no userspace API changes (e.g. trivial to make backups of upper layer filesystem, without losing whiteouts) - no fs image format changes (you can boot an old kernel/fsck without whiteout support and things won't break) - implementation is trivial Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 55cc0a319baa..69118b3cb917 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -222,6 +222,13 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ #define ATTR_TIMES_SET (1 << 16) +/* + * Whiteout is represented by a char device. The following constants define the + * mode and device number to use. + */ +#define WHITEOUT_MODE 0 +#define WHITEOUT_DEV 0 + /* * This is the Inode Attributes structure, used for notify_change(). It * uses the above definitions as flags, to know which values have changed. @@ -1398,6 +1405,7 @@ extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct ino extern int vfs_rmdir(struct inode *, struct dentry *); extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); +extern int vfs_whiteout(struct inode *, struct dentry *); /* * VFS dentry helper functions. @@ -1628,6 +1636,9 @@ struct super_operations { #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) +#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ + (inode)->i_rdev == WHITEOUT_DEV) + /* * Inode state bits. Protected by inode->i_lock * -- cgit v1.2.3 From 69c433ed2ecd2d3264efd7afec4439524b319121 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:39 +0200 Subject: fs: limit filesystem stacking depth Add a simple read-only counter to super_block that indicates how deep this is in the stack of filesystems. Previously ecryptfs was the only stackable filesystem and it explicitly disallowed multiple layers of itself. Overlayfs, however, can be stacked recursively and also may be stacked on top of ecryptfs or vice versa. To limit the kernel stack usage we must limit the depth of the filesystem stack. Initially the limit is set to 2. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 69118b3cb917..4e41a4a331bb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -261,6 +261,12 @@ struct iattr { */ #include +/* + * Maximum number of layers of fs stack. Needs to be limited to + * prevent kernel stack overflow + */ +#define FILESYSTEM_MAX_STACK_DEPTH 2 + /** * enum positive_aop_returns - aop return codes with specific semantics * @@ -1273,6 +1279,11 @@ struct super_block { struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; struct list_lru s_inode_lru ____cacheline_aligned_in_smp; struct rcu_head rcu; + + /* + * Indicates how deep in a filesystem stack this SB is + */ + int s_stack_depth; }; extern struct timespec current_fs_time(struct super_block *sb); -- cgit v1.2.3