From 87dc800be2499128efb3a6f059d75dc8e1e6d503 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 16 Sep 2013 10:30:04 -0400 Subject: new helper: kfree_put_link() duplicated to hell and back... Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f40547ba191..d80c2437d624 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2502,6 +2502,7 @@ extern int __page_symlink(struct inode *inode, const char *symname, int len, int nofs); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; +extern void kfree_put_link(struct dentry *, struct nameidata *, void *); extern int generic_readlink(struct dentry *, char __user *, int); extern void generic_fillattr(struct inode *, struct kstat *); extern int vfs_getattr(struct path *, struct kstat *); -- cgit v1.2.3 From e84f9e57b90ca89664d733a7cef19aa7ccd832f3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 Sep 2013 14:17:15 -0400 Subject: consolidate the reassignments of ->f_op in ->open() instances Signed-off-by: Al Viro --- include/linux/fs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d80c2437d624..b09e4e1d747a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1875,6 +1875,17 @@ extern struct dentry *mount_pseudo(struct file_system_type *, char *, (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) #define fops_put(fops) \ do { if (fops) module_put((fops)->owner); } while(0) +/* + * This one is to be used *ONLY* from ->open() instances. + * fops must be non-NULL, pinned down *and* module dependencies + * should be sufficient to pin the caller down as well. + */ +#define replace_fops(f, fops) \ + do { \ + struct file *__file = (f); \ + fops_put(__file->f_op); \ + BUG_ON(!(__file->f_op = (fops))); \ + } while(0) extern int register_filesystem(struct file_system_type *); extern int unregister_filesystem(struct file_system_type *); -- cgit v1.2.3 From b70a80e7a133a0c86f2fa078e7c144597c516415 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 1 Oct 2013 16:44:54 +0200 Subject: vfs: introduce d_instantiate_no_diralias() ...which just returns -EBUSY if a directory alias would be created. This is to be used by fuse mkdir to make sure that a buggy or malicious userspace filesystem doesn't do anything nasty. Previously fuse used a private mutex for this purpose, which can now go away. Signed-off-by: Miklos Szeredi --- include/linux/dcache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 59066e0b4ff1..716c3760ee39 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -224,6 +224,7 @@ static inline int dname_external(const struct dentry *dentry) extern void d_instantiate(struct dentry *, struct inode *); extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); extern struct dentry * d_materialise_unique(struct dentry *, struct inode *); +extern int d_instantiate_no_diralias(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); -- cgit v1.2.3 From e2fec7c35582e7bb41cccc1761faa2af4dc17627 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Oct 2013 17:06:56 -0400 Subject: make freeing super_block rcu-delayed Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index b09e4e1d747a..2ab8a67ee054 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1330,6 +1330,7 @@ struct super_block { */ struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; struct list_lru s_inode_lru ____cacheline_aligned_in_smp; + struct rcu_head rcu; }; extern struct timespec current_fs_time(struct super_block *sb); -- cgit v1.2.3 From 1adfcb03e31ba0d6be5fddf773da4357d0792cbb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 3 Oct 2013 13:28:06 -0400 Subject: pid_namespace: make freeing struct pid_namespace rcu-delayed makes procfs ->premission() instances safety in RCU mode independent from vfsmount_lock. Signed-off-by: Al Viro --- include/linux/pid_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index e2772666f004..7246ef3d4455 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -23,6 +23,7 @@ struct bsd_acct_struct; struct pid_namespace { struct kref kref; struct pidmap pidmap[PIDMAP_ENTRIES]; + struct rcu_head rcu; int last_pid; unsigned int nr_hashed; struct task_struct *child_reaper; -- cgit v1.2.3 From 48a066e72d970a3e225a9c18690d570c736fc455 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 Sep 2013 22:06:07 -0400 Subject: RCU'd vfsmounts * RCU-delayed freeing of vfsmounts * vfsmount_lock replaced with a seqlock (mount_lock) * sequence number from mount_lock is stored in nameidata->m_seq and used when we exit RCU mode * new vfsmount flag - MNT_SYNC_UMOUNT. Set by umount_tree() when its caller knows that vfsmount will have no surviving references. * synchronize_rcu() done between unlocking namespace_sem in namespace_unlock() and doing pending mntput(). * new helper: legitimize_mnt(mnt, seq). Checks the mount_lock sequence number against seq, then grabs reference to mnt. Then it rechecks mount_lock again to close the race and either returns success or drops the reference it has acquired. The subtle point is that in case of MNT_SYNC_UMOUNT we can simply decrement the refcount and sod off - aforementioned synchronize_rcu() makes sure that final mntput() won't come until we leave RCU mode. We need that, since we don't want to end up with some lazy pathwalk racing with umount() and stealing the final mntput() from it - caller of umount() may expect it to return only once the fs is shut down and we don't want to break that. In other cases (i.e. with MNT_SYNC_UMOUNT absent) we have to do full-blown mntput() in case of mount_lock sequence number mismatch happening just as we'd grabbed the reference, but in those cases we won't be stealing the final mntput() from anything that would care. * mntput_no_expire() doesn't lock anything on the fast path now. Incidentally, SMP and UP cases are handled the same way - no ifdefs there. * normal pathname resolution does *not* do any writes to mount_lock. It does, of course, bump the refcounts of vfsmount and dentry in the very end, but that's it. Signed-off-by: Al Viro --- include/linux/mount.h | 2 ++ include/linux/namei.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mount.h b/include/linux/mount.h index 38cd98f112a0..371d346fa270 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -49,6 +49,8 @@ struct mnt_namespace; #define MNT_LOCK_READONLY 0x400000 #define MNT_LOCKED 0x800000 +#define MNT_DOOMED 0x1000000 +#define MNT_SYNC_UMOUNT 0x2000000 struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ diff --git a/include/linux/namei.h b/include/linux/namei.h index 8e47bc7a1665..492de72560fa 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -16,7 +16,7 @@ struct nameidata { struct path root; struct inode *inode; /* path.dentry.d_inode */ unsigned int flags; - unsigned seq; + unsigned seq, m_seq; int last_type; unsigned depth; char *saved_names[MAX_NESTED_LINKS + 1]; -- cgit v1.2.3 From eee5cc2702929fd41cce28058dc6d6717f723f87 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Oct 2013 11:06:42 -0400 Subject: get rid of s_files and files_lock The only thing we need it for is alt-sysrq-r (emergency remount r/o) and these days we can do just as well without going through the list of files. Signed-off-by: Al Viro --- include/linux/fs.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2ab8a67ee054..2b0f4e974480 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -764,12 +764,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) #define FILE_MNT_WRITE_RELEASED 2 struct file { - /* - * fu_list becomes invalid after file_free is called and queued via - * fu_rcuhead for RCU freeing - */ union { - struct list_head fu_list; struct llist_node fu_llist; struct rcu_head fu_rcuhead; } f_u; @@ -783,9 +778,6 @@ struct file { * Must not be taken from IRQ context. */ spinlock_t f_lock; -#ifdef CONFIG_SMP - int f_sb_list_cpu; -#endif atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; @@ -1264,11 +1256,6 @@ struct super_block { struct list_head s_inodes; /* all inodes */ struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ -#ifdef CONFIG_SMP - struct list_head __percpu *s_files; -#else - struct list_head s_files; -#endif struct list_head s_mounts; /* list of mounts; _not_ for fs use */ struct block_device *s_bdev; struct backing_dev_info *s_bdi; -- cgit v1.2.3 From 0f6ed63b170778b9c93fb0ae4017f110c9ee6416 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Oct 2013 14:19:39 -0400 Subject: no need to keep brlock macros anymore... Signed-off-by: Al Viro --- include/linux/lglock.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 0d24e932db0b..96549abe8842 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -25,16 +25,6 @@ #include #include -/* can make br locks by using local lock for read side, global lock for write */ -#define br_lock_init(name) lg_lock_init(name, #name) -#define br_read_lock(name) lg_local_lock(name) -#define br_read_unlock(name) lg_local_unlock(name) -#define br_write_lock(name) lg_global_lock(name) -#define br_write_unlock(name) lg_global_unlock(name) - -#define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) -#define DEFINE_STATIC_BRLOCK(name) DEFINE_STATIC_LGLOCK(name) - #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map #else -- cgit v1.2.3 From ecc8c7725e6c21528329b34acae2a1d64b3af89b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Oct 2013 15:32:35 -0400 Subject: new helper: dump_emit() dump_write() analog, takes core_dump_params instead of file, keeps track of the amount written in cprm->written and checks for cprm->limit. Start using it in binfmt_elf.c... Signed-off-by: Al Viro --- include/linux/binfmts.h | 1 + include/linux/coredump.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index e8112ae50531..8aa507e7a41a 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -61,6 +61,7 @@ struct coredump_params { struct file *file; unsigned long limit; unsigned long mm_flags; + loff_t written; }; /* diff --git a/include/linux/coredump.h b/include/linux/coredump.h index a98f1ca60407..2959376a9ad5 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -10,8 +10,10 @@ * These are the only things you should do on a core-file: use only these * functions to write out all the necessary info. */ +struct coredump_params; extern int dump_write(struct file *file, const void *addr, int nr); extern int dump_seek(struct file *file, loff_t off); +extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); #ifdef CONFIG_COREDUMP extern void do_coredump(siginfo_t *siginfo); #else -- cgit v1.2.3 From 506f21c556c747bb07b893f146220ec45cda381b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Oct 2013 17:22:57 -0400 Subject: switch elf_core_write_extra_phdrs() to dump_emit() Signed-off-by: Al Viro --- include/linux/elfcore.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index cdd3d13efce7..1b92a8c40624 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -6,6 +6,8 @@ #include #include +struct coredump_params; + static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *regs) { #ifdef ELF_CORE_COPY_REGS @@ -63,8 +65,7 @@ static inline int elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregse */ extern Elf_Half elf_core_extra_phdrs(void); extern int -elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size, - unsigned long limit); +elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset); extern int elf_core_write_extra_data(struct file *file, size_t *size, unsigned long limit); extern size_t elf_core_extra_data_size(void); -- cgit v1.2.3 From aa3e7eaf0a0f06edd2b733e84e7e8ffe108e8786 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Oct 2013 17:50:15 -0400 Subject: switch elf_core_write_extra_data() to dump_emit() Signed-off-by: Al Viro --- include/linux/elfcore.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 1b92a8c40624..698d51a0eea3 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -67,7 +67,7 @@ extern Elf_Half elf_core_extra_phdrs(void); extern int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset); extern int -elf_core_write_extra_data(struct file *file, size_t *size, unsigned long limit); +elf_core_write_extra_data(struct coredump_params *cprm); extern size_t elf_core_extra_data_size(void); #endif /* _LINUX_ELFCORE_H */ -- cgit v1.2.3 From cdc3d5627d5f7c4e6b6372b9fb39cba0fe6a9b2a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Oct 2013 22:24:29 -0400 Subject: switch elf_coredump_extra_notes_write() to dump_emit() Signed-off-by: Al Viro --- include/linux/elf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/elf.h b/include/linux/elf.h index 40a3c0e01b2b..67a5fa7830c4 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -39,13 +39,13 @@ extern Elf64_Dyn _DYNAMIC []; /* Optional callbacks to write extra ELF notes. */ struct file; +struct coredump_params; #ifndef ARCH_HAVE_EXTRA_ELF_NOTES static inline int elf_coredump_extra_notes_size(void) { return 0; } -static inline int elf_coredump_extra_notes_write(struct file *file, - loff_t *foffset) { return 0; } +static inline int elf_coredump_extra_notes_write(struct coredump_params *cprm) { return 0; } #else extern int elf_coredump_extra_notes_size(void); -extern int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset); +extern int elf_coredump_extra_notes_write(struct coredump_params *cprm); #endif #endif /* _LINUX_ELF_H */ -- cgit v1.2.3 From 9b56d54380adb5fef71f687109bbd6f8413d694f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 8 Oct 2013 09:26:08 -0400 Subject: dump_skip(): dump_seek() replacement taking coredump_params Signed-off-by: Al Viro --- include/linux/coredump.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 2959376a9ad5..07a0af93f230 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -11,8 +11,7 @@ * functions to write out all the necessary info. */ struct coredump_params; -extern int dump_write(struct file *file, const void *addr, int nr); -extern int dump_seek(struct file *file, loff_t off); +extern int dump_skip(struct coredump_params *cprm, size_t nr); extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); #ifdef CONFIG_COREDUMP extern void do_coredump(siginfo_t *siginfo); -- cgit v1.2.3 From 22a8cb8248ba5d340307ba72432253b1dbdb5cf7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 8 Oct 2013 11:05:01 -0400 Subject: new helper: dump_align() dump_skip to given alignment... Signed-off-by: Al Viro --- include/linux/coredump.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 07a0af93f230..d8eb880be82a 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -13,6 +13,7 @@ struct coredump_params; extern int dump_skip(struct coredump_params *cprm, size_t nr); extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); +extern int dump_align(struct coredump_params *cprm, int align); #ifdef CONFIG_COREDUMP extern void do_coredump(siginfo_t *siginfo); #else -- cgit v1.2.3 From 6987843ff7e836ea65b554905aec34d2fad05c94 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 2 Oct 2013 22:35:11 -0400 Subject: take anon inode allocation to libfs.c Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2b0f4e974480..e190326ac212 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2562,6 +2562,7 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, extern int simple_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata); +extern struct inode *alloc_anon_inode(struct super_block *); extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); -- cgit v1.2.3 From 078d8e624c1837aa8ad65e58054a4a40d7ac46d2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 9 Oct 2013 10:26:28 -0400 Subject: ... and kill anon_inode_getfile_private() it's a seriously misguided API, now fortunately without users. Signed-off-by: Al Viro --- include/linux/anon_inodes.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h index cf573c22b81e..8013a45242fe 100644 --- a/include/linux/anon_inodes.h +++ b/include/linux/anon_inodes.h @@ -13,9 +13,6 @@ struct file_operations; struct file *anon_inode_getfile(const char *name, const struct file_operations *fops, void *priv, int flags); -struct file *anon_inode_getfile_private(const char *name, - const struct file_operations *fops, - void *priv, int flags); int anon_inode_getfd(const char *name, const struct file_operations *fops, void *priv, int flags); -- cgit v1.2.3 From ce3959604878c1c693979ec552069dc8bdb5ccde Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 13 Oct 2013 17:23:53 -0400 Subject: constify copy_siginfo_to_user{,32}() Signed-off-by: Al Viro --- include/asm-generic/siginfo.h | 2 +- include/linux/compat.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h index b685d3bd32e2..3d1a3af5cf59 100644 --- a/include/asm-generic/siginfo.h +++ b/include/asm-generic/siginfo.h @@ -32,6 +32,6 @@ static inline void copy_siginfo(struct siginfo *to, struct siginfo *from) #endif -extern int copy_siginfo_to_user(struct siginfo __user *to, struct siginfo *from); +extern int copy_siginfo_to_user(struct siginfo __user *to, const struct siginfo *from); #endif diff --git a/include/linux/compat.h b/include/linux/compat.h index 345da00a86e0..78cdf51ff5ba 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -362,7 +362,7 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask, long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask, unsigned long bitmap_size); int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from); -int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from); +int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *from); int get_compat_sigevent(struct sigevent *event, const struct compat_sigevent __user *u_event); long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, -- cgit v1.2.3 From ec57941e031685de434916e5398d0ca1d44cd374 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 13 Oct 2013 17:57:29 -0400 Subject: constify do_coredump() argument Signed-off-by: Al Viro --- include/linux/binfmts.h | 2 +- include/linux/coredump.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 8aa507e7a41a..790d3305a5a7 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -56,7 +56,7 @@ struct linux_binprm { /* Function parameter for binfmt->coredump */ struct coredump_params { - siginfo_t *siginfo; + const siginfo_t *siginfo; struct pt_regs *regs; struct file *file; unsigned long limit; diff --git a/include/linux/coredump.h b/include/linux/coredump.h index d8eb880be82a..d016a121a8c4 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -15,9 +15,9 @@ extern int dump_skip(struct coredump_params *cprm, size_t nr); extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); extern int dump_align(struct coredump_params *cprm, int align); #ifdef CONFIG_COREDUMP -extern void do_coredump(siginfo_t *siginfo); +extern void do_coredump(const siginfo_t *siginfo); #else -static inline void do_coredump(siginfo_t *siginfo) {} +static inline void do_coredump(const siginfo_t *siginfo) {} #endif #endif /* _LINUX_COREDUMP_H */ -- cgit v1.2.3 From b18825a7c8e37a7cf6abb97a12a6ad71af160de7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 12 Sep 2013 19:22:53 +0100 Subject: VFS: Put a small type field into struct dentry::d_flags Put a type field into struct dentry::d_flags to indicate if the dentry is one of the following types that relate particularly to pathwalk: Miss (negative dentry) Directory "Automount" directory (defective - no i_op->lookup()) Symlink Other (regular, socket, fifo, device) The type field is set to one of the first five types on a dentry by calls to __d_instantiate() and d_obtain_alias() from information in the inode (if one is given). The type is cleared by dentry_unlink_inode() when it reconstitutes an existing dentry as a negative dentry. Accessors provided are: d_set_type(dentry, type) d_is_directory(dentry) d_is_autodir(dentry) d_is_symlink(dentry) d_is_file(dentry) d_is_negative(dentry) d_is_positive(dentry) A bunch of checks in pathname resolution switched to those. Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/dcache.h | 103 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 83 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 716c3760ee39..57e87e749a48 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -169,13 +169,13 @@ struct dentry_operations { */ /* d_flags entries */ -#define DCACHE_OP_HASH 0x0001 -#define DCACHE_OP_COMPARE 0x0002 -#define DCACHE_OP_REVALIDATE 0x0004 -#define DCACHE_OP_DELETE 0x0008 -#define DCACHE_OP_PRUNE 0x0010 +#define DCACHE_OP_HASH 0x00000001 +#define DCACHE_OP_COMPARE 0x00000002 +#define DCACHE_OP_REVALIDATE 0x00000004 +#define DCACHE_OP_DELETE 0x00000008 +#define DCACHE_OP_PRUNE 0x00000010 -#define DCACHE_DISCONNECTED 0x0020 +#define DCACHE_DISCONNECTED 0x00000020 /* This dentry is possibly not currently connected to the dcache tree, in * which case its parent will either be itself, or will have this flag as * well. nfsd will not use a dentry with this bit set, but will first @@ -186,30 +186,38 @@ struct dentry_operations { * dentry into place and return that dentry rather than the passed one, * typically using d_splice_alias. */ -#define DCACHE_REFERENCED 0x0040 /* Recently used, don't discard. */ -#define DCACHE_RCUACCESS 0x0080 /* Entry has ever been RCU-visible */ +#define DCACHE_REFERENCED 0x00000040 /* Recently used, don't discard. */ +#define DCACHE_RCUACCESS 0x00000080 /* Entry has ever been RCU-visible */ -#define DCACHE_CANT_MOUNT 0x0100 -#define DCACHE_GENOCIDE 0x0200 -#define DCACHE_SHRINK_LIST 0x0400 +#define DCACHE_CANT_MOUNT 0x00000100 +#define DCACHE_GENOCIDE 0x00000200 +#define DCACHE_SHRINK_LIST 0x00000400 -#define DCACHE_OP_WEAK_REVALIDATE 0x0800 +#define DCACHE_OP_WEAK_REVALIDATE 0x00000800 -#define DCACHE_NFSFS_RENAMED 0x1000 +#define DCACHE_NFSFS_RENAMED 0x00001000 /* this dentry has been "silly renamed" and has to be deleted on the last * dput() */ -#define DCACHE_COOKIE 0x2000 /* For use by dcookie subsystem */ -#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x4000 +#define DCACHE_COOKIE 0x00002000 /* For use by dcookie subsystem */ +#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x00004000 /* Parent inode is watched by some fsnotify listener */ -#define DCACHE_MOUNTED 0x10000 /* is a mountpoint */ -#define DCACHE_NEED_AUTOMOUNT 0x20000 /* handle automount on this dir */ -#define DCACHE_MANAGE_TRANSIT 0x40000 /* manage transit from this dirent */ +#define DCACHE_DENTRY_KILLED 0x00008000 + +#define DCACHE_MOUNTED 0x00010000 /* is a mountpoint */ +#define DCACHE_NEED_AUTOMOUNT 0x00020000 /* handle automount on this dir */ +#define DCACHE_MANAGE_TRANSIT 0x00040000 /* manage transit from this dirent */ #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) -#define DCACHE_LRU_LIST 0x80000 -#define DCACHE_DENTRY_KILLED 0x100000 +#define DCACHE_LRU_LIST 0x00080000 + +#define DCACHE_ENTRY_TYPE 0x00700000 +#define DCACHE_MISS_TYPE 0x00000000 /* Negative dentry */ +#define DCACHE_DIRECTORY_TYPE 0x00100000 /* Normal directory */ +#define DCACHE_AUTODIR_TYPE 0x00200000 /* Lookupless directory (presumed automount) */ +#define DCACHE_SYMLINK_TYPE 0x00300000 /* Symlink */ +#define DCACHE_FILE_TYPE 0x00400000 /* Other file type */ extern seqlock_t rename_lock; @@ -394,6 +402,61 @@ static inline bool d_mountpoint(const struct dentry *dentry) return dentry->d_flags & DCACHE_MOUNTED; } +/* + * Directory cache entry type accessor functions. + */ +static inline void __d_set_type(struct dentry *dentry, unsigned type) +{ + dentry->d_flags = (dentry->d_flags & ~DCACHE_ENTRY_TYPE) | type; +} + +static inline void __d_clear_type(struct dentry *dentry) +{ + __d_set_type(dentry, DCACHE_MISS_TYPE); +} + +static inline void d_set_type(struct dentry *dentry, unsigned type) +{ + spin_lock(&dentry->d_lock); + __d_set_type(dentry, type); + spin_unlock(&dentry->d_lock); +} + +static inline unsigned __d_entry_type(const struct dentry *dentry) +{ + return dentry->d_flags & DCACHE_ENTRY_TYPE; +} + +static inline bool d_is_directory(const struct dentry *dentry) +{ + return __d_entry_type(dentry) == DCACHE_DIRECTORY_TYPE; +} + +static inline bool d_is_autodir(const struct dentry *dentry) +{ + return __d_entry_type(dentry) == DCACHE_AUTODIR_TYPE; +} + +static inline bool d_is_symlink(const struct dentry *dentry) +{ + return __d_entry_type(dentry) == DCACHE_SYMLINK_TYPE; +} + +static inline bool d_is_file(const struct dentry *dentry) +{ + return __d_entry_type(dentry) == DCACHE_FILE_TYPE; +} + +static inline bool d_is_negative(const struct dentry *dentry) +{ + return __d_entry_type(dentry) == DCACHE_MISS_TYPE; +} + +static inline bool d_is_positive(const struct dentry *dentry) +{ + return !d_is_negative(dentry); +} + extern int sysctl_vfs_cache_pressure; static inline unsigned long vfs_pressure_ratio(unsigned long val) -- cgit v1.2.3 From b7a6ec52dd4eced4a9bcda9ca85b3c8af84d3c90 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 2 Oct 2013 17:01:18 -0400 Subject: vfs: split out vfs_getattr_nosec The filehandle lookup code wants this version of getattr. Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index e190326ac212..5e44b0893db8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2504,6 +2504,7 @@ extern const struct inode_operations page_symlink_inode_operations; extern void kfree_put_link(struct dentry *, struct nameidata *, void *); extern int generic_readlink(struct dentry *, char __user *, int); extern void generic_fillattr(struct inode *, struct kstat *); +int vfs_getattr_nosec(struct path *path, struct kstat *stat); extern int vfs_getattr(struct path *, struct kstat *); void __inode_add_bytes(struct inode *inode, loff_t bytes); void inode_add_bytes(struct inode *inode, loff_t bytes); -- cgit v1.2.3 From 375e289ea85166c5241c570940e7e7e966c63a9f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 18 Apr 2012 15:16:33 -0400 Subject: vfs: pull ext4's double-i_mutex-locking into common code We want to do this elsewhere as well. Also catch any attempts to use it for directories (where this ordering would conflict with ancestor-first directory ordering in lock_rename). Cc: Andreas Dilger Cc: Dave Chinner Acked-by: Jeff Layton Acked-by: "Theodore Ts'o" Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5e44b0893db8..4e1a0b41f966 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -637,6 +637,9 @@ enum inode_i_mutex_lock_class I_MUTEX_QUOTA }; +void lock_two_nondirectories(struct inode *, struct inode*); +void unlock_two_nondirectories(struct inode *, struct inode*); + /* * NOTE: in a 32bit arch with a preemptable kernel and * an UP compile the i_size_read/write must be atomic -- cgit v1.2.3 From 40bd22c9f8617ddd5da06044c81f72a2cf700791 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 18 Apr 2012 15:21:34 -0400 Subject: vfs: rename I_MUTEX_QUOTA now that it's not used for quotas I_MUTEX_QUOTA is now just being used whenever we want to lock two non-directories. So the name isn't right. I_MUTEX_NONDIR2 isn't especially elegant but it's the best I could think of. Also fix some outdated documentation. Acked-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro --- include/linux/fs.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4e1a0b41f966..ed7f94af1ab2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -623,10 +623,13 @@ static inline int inode_unhashed(struct inode *inode) * 0: the object of the current VFS operation * 1: parent * 2: child/target - * 3: quota file + * 3: xattr + * 4: second non-directory + * The last is for certain operations (such as rename) which lock two + * non-directories at once. * * The locking order between these classes is - * parent -> child -> normal -> xattr -> quota + * parent -> child -> normal -> xattr -> second non-directory */ enum inode_i_mutex_lock_class { @@ -634,7 +637,7 @@ enum inode_i_mutex_lock_class I_MUTEX_PARENT, I_MUTEX_CHILD, I_MUTEX_XATTR, - I_MUTEX_QUOTA + I_MUTEX_NONDIR2 }; void lock_two_nondirectories(struct inode *, struct inode*); -- cgit v1.2.3 From 617588d5186c887eb94321b021bb5a46f896f4b3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 1 Jul 2011 15:18:34 -0400 Subject: locks: introduce new FL_DELEG lock flag For now FL_DELEG is just a synonym for FL_LEASE. So this patch doesn't change behavior. Next we'll modify break_lease to treat FL_DELEG leases differently, to account for the fact that NFSv4 delegations should be broken in more situations than Windows oplocks. Acked-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index ed7f94af1ab2..129e150f9e94 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -880,6 +880,7 @@ static inline int file_check_writeable(struct file *filp) #define FL_POSIX 1 #define FL_FLOCK 2 +#define FL_DELEG 4 /* NFSv4 delegation */ #define FL_ACCESS 8 /* not trying to lock, just looking */ #define FL_EXISTS 16 /* when unlocking, test for existence */ #define FL_LEASE 32 /* lease held on this file */ -- cgit v1.2.3 From df4e8d2c1d2bbbbace706bfe5417320c9e3fbee3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 5 Mar 2012 13:18:59 -0500 Subject: locks: implement delegations Implement NFSv4 delegations at the vfs level using the new FL_DELEG lock type. Note nfsd is the only delegation user and is only using read delegations. Warn on any attempt to set a write delegation for now. We'll come back to that case later. Acked-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro --- include/linux/fs.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 129e150f9e94..8e4be1be1a62 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1022,7 +1022,7 @@ extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); -extern int __break_lease(struct inode *inode, unsigned int flags); +extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); extern void lease_get_mtime(struct inode *, struct timespec *time); extern int generic_setlease(struct file *, long, struct file_lock **); extern int vfs_setlease(struct file *, long, struct file_lock **); @@ -1131,7 +1131,7 @@ static inline int flock_lock_file_wait(struct file *filp, return -ENOLCK; } -static inline int __break_lease(struct inode *inode, unsigned int mode) +static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) { return 0; } @@ -1961,9 +1961,17 @@ static inline int locks_verify_truncate(struct inode *inode, static inline int break_lease(struct inode *inode, unsigned int mode) { if (inode->i_flock) - return __break_lease(inode, mode); + return __break_lease(inode, mode, FL_LEASE); return 0; } + +static inline int break_deleg(struct inode *inode, unsigned int mode) +{ + if (inode->i_flock) + return __break_lease(inode, mode, FL_DELEG); + return 0; +} + #else /* !CONFIG_FILE_LOCKING */ static inline int locks_mandatory_locked(struct inode *inode) { @@ -2003,6 +2011,10 @@ static inline int break_lease(struct inode *inode, unsigned int mode) return 0; } +static inline int break_deleg(struct inode *inode, unsigned int mode) +{ + return 0; +} #endif /* CONFIG_FILE_LOCKING */ /* fs/open.c */ -- cgit v1.2.3 From b21996e36c8e3b92a84e972378bde80b43acd890 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 20 Sep 2011 09:14:34 -0400 Subject: locks: break delegations on unlink We need to break delegations on any operation that changes the set of links pointing to an inode. Start with unlink. Such operations also hold the i_mutex on a parent directory. Breaking a delegation may require waiting for a timeout (by default 90 seconds) in the case of a unresponsive NFS client. To avoid blocking all directory operations, we therefore drop locks before waiting for the delegation. The logic then looks like: acquire locks ... test for delegation; if found: take reference on inode release locks wait for delegation break drop reference on inode retry It is possible this could never terminate. (Even if we take precautions to prevent another delegation being acquired on the same inode, we could get a different inode on each retry.) But this seems very unlikely. The initial test for a delegation happens after the lock on the target inode is acquired, but the directory inode may have been acquired further up the call stack. We therefore add a "struct inode **" argument to any intervening functions, which we use to pass the inode back up to the caller in the case it needs a delegation synchronously broken. Cc: David Howells Cc: Tyler Hicks Cc: Dustin Kirkland Acked-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8e4be1be1a62..a5799233142a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1455,7 +1455,7 @@ extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); extern int vfs_symlink(struct inode *, struct dentry *, const char *); extern int vfs_link(struct dentry *, struct inode *, struct dentry *); extern int vfs_rmdir(struct inode *, struct dentry *); -extern int vfs_unlink(struct inode *, struct dentry *); +extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); /* -- cgit v1.2.3 From 5a14696c1795d3843673b5cf1982d0e5357a5bbf Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 28 Aug 2012 07:50:40 -0700 Subject: locks: helper functions for delegation breaking We'll need the same logic for rename and link. Acked-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro --- include/linux/fs.h | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index a5799233142a..931f919f44e1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1905,6 +1905,9 @@ extern bool fs_fully_visible(struct file_system_type *); extern int current_umask(void); +extern void ihold(struct inode * inode); +extern void iput(struct inode *); + /* /sys/fs */ extern struct kobject *fs_kobj; @@ -1972,6 +1975,28 @@ static inline int break_deleg(struct inode *inode, unsigned int mode) return 0; } +static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) +{ + int ret; + + ret = break_deleg(inode, O_WRONLY|O_NONBLOCK); + if (ret == -EWOULDBLOCK && delegated_inode) { + *delegated_inode = inode; + ihold(inode); + } + return ret; +} + +static inline int break_deleg_wait(struct inode **delegated_inode) +{ + int ret; + + ret = break_deleg(*delegated_inode, O_WRONLY); + iput(*delegated_inode); + *delegated_inode = NULL; + return ret; +} + #else /* !CONFIG_FILE_LOCKING */ static inline int locks_mandatory_locked(struct inode *inode) { @@ -2015,6 +2040,18 @@ static inline int break_deleg(struct inode *inode, unsigned int mode) { return 0; } + +static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) +{ + return 0; +} + +static inline int break_deleg_wait(struct inode **delegated_inode) +{ + BUG(); + return 0; +} + #endif /* CONFIG_FILE_LOCKING */ /* fs/open.c */ @@ -2350,8 +2387,6 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence); extern int inode_init_always(struct super_block *, struct inode *); extern void inode_init_once(struct inode *); extern void address_space_init_once(struct address_space *mapping); -extern void ihold(struct inode * inode); -extern void iput(struct inode *); extern struct inode * igrab(struct inode *); extern ino_t iunique(struct super_block *, ino_t); extern int inode_needs_sync(struct inode *inode); -- cgit v1.2.3 From 8e6d782cab50884ba94324632700e6233a252f6a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 20 Sep 2011 16:59:58 -0400 Subject: locks: break delegations on rename Cc: David Howells Acked-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 931f919f44e1..5bcff883fa90 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1456,7 +1456,7 @@ extern int vfs_symlink(struct inode *, struct dentry *, const char *); extern int vfs_link(struct dentry *, struct inode *, struct dentry *); extern int vfs_rmdir(struct inode *, struct dentry *); extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); -extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); +extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **); /* * VFS dentry helper functions. -- cgit v1.2.3 From 146a8595c6399ee6ab4b5cc34c0d28aa4835fdc5 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 20 Sep 2011 17:14:31 -0400 Subject: locks: break delegations on link Cc: Tyler Hicks Cc: Dustin Kirkland Acked-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5bcff883fa90..6e36e7118ec1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1453,7 +1453,7 @@ extern int vfs_create(struct inode *, struct dentry *, umode_t, bool); extern int vfs_mkdir(struct inode *, struct dentry *, umode_t); extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); extern int vfs_symlink(struct inode *, struct dentry *, const char *); -extern int vfs_link(struct dentry *, struct inode *, struct dentry *); +extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **); extern int vfs_rmdir(struct inode *, struct dentry *); extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **); -- cgit v1.2.3 From 27ac0ffeac80ba6b9580529568d06144df044366 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 20 Sep 2011 17:19:26 -0400 Subject: locks: break delegations on any attribute modification NFSv4 uses leases to guarantee that clients can cache metadata as well as data. Cc: Mikulas Patocka Cc: David Howells Cc: Tyler Hicks Cc: Dustin Kirkland Acked-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6e36e7118ec1..ab2a0ca82dc5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2278,7 +2278,7 @@ extern void emergency_remount(void); #ifdef CONFIG_BLOCK extern sector_t bmap(struct inode *, sector_t); #endif -extern int notify_change(struct dentry *, struct iattr *); +extern int notify_change(struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int); -- cgit v1.2.3