diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-12-31 15:57:56 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-12-31 15:57:56 -0800 |
commit | b58602a4bac012b5f4fc12fe6b46ab237b610d5d (patch) | |
tree | 38929e210a723f67fe2b945f37264bf3aae3c2c4 /fs | |
parent | db200df0b3530f673d8e9f5bd535e9e10305842a (diff) | |
parent | 1f3403fa640f9f7b135dee79f2d39d01c8ad4a08 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6: (34 commits)
nfsd race fixes: jfs
nfsd race fixes: reiserfs
nfsd race fixes: ext4
nfsd race fixes: ext3
nfsd race fixes: ext2
nfsd/create race fixes, infrastructure
filesystem notification: create fs/notify to contain all fs notification
fs/block_dev.c: __read_mostly improvement and sb_is_blkdev_sb utilization
kill ->dir_notify()
filp_cachep can be static in fs/file_table.c
fix f_count description in Documentation/filesystems/files.txt
make INIT_FS use the __RW_LOCK_UNLOCKED initialization
take init_fs to saner place
kill vfs_permission
pass a struct path * to may_open
kill walk_init_root
remove incorrect comment in inode_permission
expand some comments (d_path / seq_path)
correct wrong function name of d_put in kernel document and source comment
fix switch_names() breakage in short-to-short case
...
Diffstat (limited to 'fs')
44 files changed, 385 insertions, 292 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 522469a7eca3..ff0e81980207 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -270,44 +270,7 @@ config OCFS2_COMPAT_JBD endif # BLOCK -config DNOTIFY - bool "Dnotify support" - default y - help - Dnotify is a directory-based per-fd file change notification system - that uses signals to communicate events to user-space. There exist - superior alternatives, but some applications may still rely on - dnotify. - - If unsure, say Y. - -config INOTIFY - bool "Inotify file change notification support" - default y - ---help--- - Say Y here to enable inotify support. Inotify is a file change - notification system and a replacement for dnotify. Inotify fixes - numerous shortcomings in dnotify and introduces several new features - including multiple file events, one-shot support, and unmount - notification. - - For more information, see <file:Documentation/filesystems/inotify.txt> - - If unsure, say Y. - -config INOTIFY_USER - bool "Inotify support for userspace" - depends on INOTIFY - default y - ---help--- - Say Y here to enable inotify support for userspace, including the - associated system calls. Inotify allows monitoring of both files and - directories via a single open fd. Events are read from the file - descriptor, which is also select()- and poll()-able. - - For more information, see <file:Documentation/filesystems/inotify.txt> - - If unsure, say Y. +source "fs/notify/Kconfig" config QUOTA bool "Quota support" diff --git a/fs/Makefile b/fs/Makefile index d9f8afe6f0c4..e6f423d1d228 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -20,8 +20,7 @@ obj-y += no-block.o endif obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o -obj-$(CONFIG_INOTIFY) += inotify.o -obj-$(CONFIG_INOTIFY_USER) += inotify_user.o +obj-y += notify/ obj-$(CONFIG_EPOLL) += eventpoll.o obj-$(CONFIG_ANON_INODES) += anon_inodes.o obj-$(CONFIG_SIGNALFD) += signalfd.o @@ -57,8 +56,6 @@ obj-$(CONFIG_QFMT_V1) += quota_v1.o obj-$(CONFIG_QFMT_V2) += quota_v2.o obj-$(CONFIG_QUOTACTL) += quota.o -obj-$(CONFIG_DNOTIFY) += dnotify.o - obj-$(CONFIG_PROC_FS) += proc/ obj-y += partitions/ obj-$(CONFIG_SYSFS) += sysfs/ diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 5f1538c03b1b..a05287a23f62 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -132,11 +132,6 @@ static int bad_file_check_flags(int flags) return -EIO; } -static int bad_file_dir_notify(struct file *file, unsigned long arg) -{ - return -EIO; -} - static int bad_file_flock(struct file *filp, int cmd, struct file_lock *fl) { return -EIO; @@ -179,7 +174,6 @@ static const struct file_operations bad_file_ops = .sendpage = bad_file_sendpage, .get_unmapped_area = bad_file_get_unmapped_area, .check_flags = bad_file_check_flags, - .dir_notify = bad_file_dir_notify, .flock = bad_file_flock, .splice_write = bad_file_splice_write, .splice_read = bad_file_splice_read, diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index b6dfee37c7b7..d06cb023ad02 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -378,7 +378,8 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) inode->i_size = 0; inode->i_blocks = befs_sb->block_size / VFS_BLOCK_SIZE; strncpy(befs_ino->i_data.symlink, raw_inode->data.symlink, - BEFS_SYMLINK_LEN); + BEFS_SYMLINK_LEN - 1); + befs_ino->i_data.symlink[BEFS_SYMLINK_LEN - 1] = '\0'; } else { int num_blks; @@ -477,6 +478,8 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd) kfree(link); befs_error(sb, "Failed to read entire long symlink"); link = ERR_PTR(-EIO); + } else { + link[len - 1] = '\0'; } } else { link = befs_ino->i_data.symlink; diff --git a/fs/block_dev.c b/fs/block_dev.c index 99e0ae1a4c78..349a26c10001 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -326,12 +326,13 @@ static struct file_system_type bd_type = { .kill_sb = kill_anon_super, }; -static struct vfsmount *bd_mnt __read_mostly; -struct super_block *blockdev_superblock; +struct super_block *blockdev_superblock __read_mostly; void __init bdev_cache_init(void) { int err; + struct vfsmount *bd_mnt; + bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_PANIC), @@ -373,7 +374,7 @@ struct block_device *bdget(dev_t dev) struct block_device *bdev; struct inode *inode; - inode = iget5_locked(bd_mnt->mnt_sb, hash(dev), + inode = iget5_locked(blockdev_superblock, hash(dev), bdev_test, bdev_set, &dev); if (!inode) @@ -463,7 +464,7 @@ void bd_forget(struct inode *inode) spin_lock(&bdev_lock); if (inode->i_bdev) { - if (inode->i_sb != blockdev_superblock) + if (!sb_is_blkdev_sb(inode->i_sb)) bdev = inode->i_bdev; __bd_forget(inode); } diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 6ba43fb346fb..9948c0030e86 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_CIFS) += cifs.o cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ - md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o \ + md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ readdir.o ioctl.o sess.o export.o cifsacl.o cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 0005a194a75c..13ea53251dcf 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -747,7 +747,6 @@ const struct file_operations cifs_file_ops = { #endif /* CONFIG_CIFS_POSIX */ #ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -768,7 +767,6 @@ const struct file_operations cifs_file_direct_ops = { #endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, #ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -789,7 +787,6 @@ const struct file_operations cifs_file_nobrl_ops = { #endif /* CONFIG_CIFS_POSIX */ #ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -809,7 +806,6 @@ const struct file_operations cifs_file_direct_nobrl_ops = { #endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, #ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -818,9 +814,6 @@ const struct file_operations cifs_dir_ops = { .readdir = cifs_readdir, .release = cifs_closedir, .read = generic_read_dir, -#ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, -#endif /* CONFIG_CIFS_EXPERIMENTAL */ .unlocked_ioctl = cifs_ioctl, .llseek = generic_file_llseek, }; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 2ce04c73d74e..7ac481841f87 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -76,7 +76,6 @@ extern int cifs_file_mmap(struct file * , struct vm_area_struct *); extern const struct file_operations cifs_dir_ops; extern int cifs_dir_open(struct inode *inode, struct file *file); extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir); -extern int cifs_dir_notify(struct file *, unsigned long arg); /* Functions related to dir entries */ extern struct dentry_operations cifs_dentry_ops; diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c deleted file mode 100644 index 5a57581eb4b2..000000000000 --- a/fs/cifs/fcntl.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * fs/cifs/fcntl.c - * - * vfs operations that deal with the file control API - * - * Copyright (C) International Business Machines Corp., 2003,2004 - * Author(s): Steve French (sfrench@us.ibm.com) - * - * This library is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#include <linux/fs.h> -#include <linux/stat.h> -#include <linux/fcntl.h> -#include "cifsglob.h" -#include "cifsproto.h" -#include "cifs_unicode.h" -#include "cifs_debug.h" -#include "cifsfs.h" - -static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags) -{ - __u32 cifs_ntfy_flags = 0; - - /* No way on Linux VFS to ask to monitor xattr - changes (and no stream support either */ - if (fcntl_notify_flags & DN_ACCESS) - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS; - if (fcntl_notify_flags & DN_MODIFY) { - /* What does this mean on directories? */ - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE | - FILE_NOTIFY_CHANGE_SIZE; - } - if (fcntl_notify_flags & DN_CREATE) { - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION | - FILE_NOTIFY_CHANGE_LAST_WRITE; - } - if (fcntl_notify_flags & DN_DELETE) - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE; - if (fcntl_notify_flags & DN_RENAME) { - /* BB review this - checking various server behaviors */ - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME | - FILE_NOTIFY_CHANGE_FILE_NAME; - } - if (fcntl_notify_flags & DN_ATTRIB) { - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY | - FILE_NOTIFY_CHANGE_ATTRIBUTES; - } -/* if (fcntl_notify_flags & DN_MULTISHOT) { - cifs_ntfy_flags |= ; - } */ /* BB fixme - not sure how to handle this with CIFS yet */ - - return cifs_ntfy_flags; -} - -int cifs_dir_notify(struct file *file, unsigned long arg) -{ - int xid; - int rc = -EINVAL; - int oplock = 0; - struct cifs_sb_info *cifs_sb; - struct cifsTconInfo *pTcon; - char *full_path = NULL; - __u32 filter = FILE_NOTIFY_CHANGE_NAME | FILE_NOTIFY_CHANGE_ATTRIBUTES; - __u16 netfid; - - if (experimEnabled == 0) - return 0; - - xid = GetXid(); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - pTcon = cifs_sb->tcon; - - full_path = build_path_from_dentry(file->f_path.dentry); - - if (full_path == NULL) { - rc = -ENOMEM; - } else { - cFYI(1, ("dir notify on file %s Arg 0x%lx", full_path, arg)); - rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, - GENERIC_READ | SYNCHRONIZE, 0 /* create options */, - &netfid, &oplock, NULL, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - /* BB fixme - add this handle to a notify handle list */ - if (rc) { - cFYI(1, ("Could not open directory for notify")); - } else { - filter = convert_to_cifs_notify_flags(arg); - if (filter != 0) { - rc = CIFSSMBNotify(xid, pTcon, - 0 /* no subdirs */, netfid, - filter, file, arg & DN_MULTISHOT, - cifs_sb->local_nls); - } else { - rc = -EINVAL; - } - /* BB add code to close file eventually (at unmount - it would close automatically but may be a way - to do it easily when inode freed or when - notify info is cleared/changed */ - cFYI(1, ("notify rc %d", rc)); - } - } - - FreeXid(xid); - return rc; -} diff --git a/fs/dcache.c b/fs/dcache.c index a1d86c7f3e66..e88c23b85a32 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -34,7 +34,6 @@ #include <linux/bootmem.h> #include "internal.h" - int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); @@ -948,9 +947,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) dentry->d_op = NULL; dentry->d_fsdata = NULL; dentry->d_mounted = 0; -#ifdef CONFIG_PROFILING - dentry->d_cookie = NULL; -#endif INIT_HLIST_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); @@ -1336,7 +1332,7 @@ err_out: * * Searches the children of the parent dentry for the name in question. If * the dentry is found its reference count is incremented and the dentry - * is returned. The caller must use d_put to free the entry when it has + * is returned. The caller must use dput to free the entry when it has * finished using it. %NULL is returned on failure. * * __d_lookup is dcache_lock free. The hash list is protected using RCU. @@ -1620,8 +1616,11 @@ static void switch_names(struct dentry *dentry, struct dentry *target) */ memcpy(dentry->d_iname, target->d_name.name, target->d_name.len + 1); + dentry->d_name.len = target->d_name.len; + return; } } + do_switch(dentry->d_name.len, target->d_name.len); } /* @@ -1681,7 +1680,6 @@ already_unhashed: /* Switch the names.. */ switch_names(dentry, target); - do_switch(dentry->d_name.len, target->d_name.len); do_switch(dentry->d_name.hash, target->d_name.hash); /* ... and switch the parents */ @@ -1791,7 +1789,6 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) struct dentry *dparent, *aparent; switch_names(dentry, anon); - do_switch(dentry->d_name.len, anon->d_name.len); do_switch(dentry->d_name.hash, anon->d_name.hash); dparent = dentry->d_parent; @@ -1911,7 +1908,8 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) * Convert a dentry into an ASCII path name. If the entry has been deleted * the string " (deleted)" is appended. Note that this is ambiguous. * - * Returns the buffer or an error code if the path was too long. + * Returns a pointer into the buffer or an error code if the + * path was too long. * * "buflen" should be positive. Caller holds the dcache_lock. * @@ -1987,7 +1985,10 @@ Elong: * Convert a dentry into an ASCII path name. If the entry has been deleted * the string " (deleted)" is appended. Note that this is ambiguous. * - * Returns the buffer or an error code if the path was too long. + * Returns a pointer into the buffer or an error code if the path was + * too long. Note: Callers should use the returned pointer, not the passed + * in buffer, to use the name! The implementation often starts at an offset + * into the buffer, and may leave 0 bytes at the start. * * "buflen" should be positive. */ @@ -2313,9 +2314,6 @@ static void __init dcache_init(void) /* SLAB cache for __getname() consumers */ struct kmem_cache *names_cachep __read_mostly; -/* SLAB cache for file structures */ -struct kmem_cache *filp_cachep __read_mostly; - EXPORT_SYMBOL(d_genocide); void __init vfs_caches_init_early(void) @@ -2337,9 +2335,6 @@ void __init vfs_caches_init(unsigned long mempages) names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - dcache_init(); inode_init(); files_init(mempages); diff --git a/fs/dcookies.c b/fs/dcookies.c index 855d4b1d619a..180e9fec4ad8 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c @@ -93,10 +93,15 @@ static struct dcookie_struct *alloc_dcookie(struct path *path) { struct dcookie_struct *dcs = kmem_cache_alloc(dcookie_cache, GFP_KERNEL); + struct dentry *d; if (!dcs) return NULL; - path->dentry->d_cookie = dcs; + d = path->dentry; + spin_lock(&d->d_lock); + d->d_flags |= DCACHE_COOKIE; + spin_unlock(&d->d_lock); + dcs->path = *path; path_get(path); hash_dcookie(dcs); @@ -119,14 +124,14 @@ int get_dcookie(struct path *path, unsigned long *cookie) goto out; } - dcs = path->dentry->d_cookie; - - if (!dcs) + if (path->dentry->d_flags & DCACHE_COOKIE) { + dcs = find_dcookie((unsigned long)path->dentry); + } else { dcs = alloc_dcookie(path); - - if (!dcs) { - err = -ENOMEM; - goto out; + if (!dcs) { + err = -ENOMEM; + goto out; + } } *cookie = dcookie_value(dcs); @@ -251,7 +256,12 @@ out_kmem: static void free_dcookie(struct dcookie_struct * dcs) { - dcs->path.dentry->d_cookie = NULL; + struct dentry *d = dcs->path.dentry; + + spin_lock(&d->d_lock); + d->d_flags &= ~DCACHE_COOKIE; + spin_unlock(&d->d_lock); + path_put(&dcs->path); kmem_cache_free(dcookie_cache, dcs); } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 89209f00f9c7..5e78fc179886 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -673,10 +673,11 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd) ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ " "dentry->d_name.name = [%s]\n", dentry->d_name.name); rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); - buf[rc] = '\0'; set_fs(old_fs); if (rc < 0) goto out_free; + else + buf[rc] = '\0'; rc = 0; nd_set_link(nd, buf); goto out; diff --git a/fs/exec.c b/fs/exec.c index 02d2e120542d..dfbf7009fbe7 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -127,7 +127,8 @@ asmlinkage long sys_uselib(const char __user * library) if (nd.path.mnt->mnt_flags & MNT_NOEXEC) goto exit; - error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN); + error = inode_permission(nd.path.dentry->d_inode, + MAY_READ | MAY_EXEC | MAY_OPEN); if (error) goto exit; @@ -680,7 +681,7 @@ struct file *open_exec(const char *name) if (nd.path.mnt->mnt_flags & MNT_NOEXEC) goto out_path_put; - err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN); + err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN); if (err) goto out_path_put; diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 8d0add625870..c454d5db28a5 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -585,7 +585,10 @@ got: spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); - insert_inode_hash(inode); + if (insert_inode_locked(inode) < 0) { + err = -EINVAL; + goto fail_drop; + } if (DQUOT_ALLOC_INODE(inode)) { err = -EDQUOT; @@ -612,6 +615,7 @@ fail_drop: DQUOT_DROP(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; + unlock_new_inode(inode); iput(inode); return ERR_PTR(err); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 7658b33e2653..02b39a5deb74 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -32,6 +32,7 @@ #include <linux/buffer_head.h> #include <linux/mpage.h> #include <linux/fiemap.h> +#include <linux/namei.h> #include "ext2.h" #include "acl.h" #include "xip.h" @@ -1286,9 +1287,11 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) else inode->i_mapping->a_ops = &ext2_aops; } else if (S_ISLNK(inode->i_mode)) { - if (ext2_inode_is_fast_symlink(inode)) + if (ext2_inode_is_fast_symlink(inode)) { inode->i_op = &ext2_fast_symlink_inode_operations; - else { + nd_terminate_link(ei->i_data, inode->i_size, + sizeof(ei->i_data) - 1); + } else { inode->i_op = &ext2_symlink_inode_operations; if (test_opt(inode->i_sb, NOBH)) inode->i_mapping->a_ops = &ext2_nobh_aops; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 2a747252ec12..90ea17998a73 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -41,9 +41,11 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) int err = ext2_add_link(dentry, inode); if (!err) { d_instantiate(dentry, inode); + unlock_new_inode(inode); return 0; } inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); return err; } @@ -170,6 +172,7 @@ out: out_fail: inode_dec_link_count(inode); + unlock_new_inode(inode); iput (inode); goto out; } @@ -178,6 +181,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir, struct dentry *dentry) { struct inode *inode = old_dentry->d_inode; + int err; if (inode->i_nlink >= EXT2_LINK_MAX) return -EMLINK; @@ -186,7 +190,14 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir, inode_inc_link_count(inode); atomic_inc(&inode->i_count); - return ext2_add_nondir(dentry, inode); + err = ext2_add_link(dentry, inode); + if (!err) { + d_instantiate(dentry, inode); + return 0; + } + inode_dec_link_count(inode); + iput(inode); + return err; } static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) @@ -222,12 +233,14 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) goto out_fail; d_instantiate(dentry, inode); + unlock_new_inode(inode); out: return err; out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); out_dir: inode_dec_link_count(dir); diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 490bd0ed7896..5655fbcbd11f 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -579,7 +579,10 @@ got: ext3_set_inode_flags(inode); if (IS_DIRSYNC(inode)) handle->h_sync = 1; - insert_inode_hash(inode); + if (insert_inode_locked(inode) < 0) { + err = -EINVAL; + goto fail_drop; + } spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); @@ -627,6 +630,7 @@ fail_drop: DQUOT_DROP(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; + unlock_new_inode(inode); iput(inode); brelse(bitmap_bh); return ERR_PTR(err); diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index f8424ad89971..c4bdccf976b5 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -37,6 +37,7 @@ #include <linux/uio.h> #include <linux/bio.h> #include <linux/fiemap.h> +#include <linux/namei.h> #include "xattr.h" #include "acl.h" @@ -2817,9 +2818,11 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) inode->i_op = &ext3_dir_inode_operations; inode->i_fop = &ext3_dir_operations; } else if (S_ISLNK(inode->i_mode)) { - if (ext3_inode_is_fast_symlink(inode)) + if (ext3_inode_is_fast_symlink(inode)) { inode->i_op = &ext3_fast_symlink_inode_operations; - else { + nd_terminate_link(ei->i_data, inode->i_size, + sizeof(ei->i_data) - 1); + } else { inode->i_op = &ext3_symlink_inode_operations; ext3_set_aops(inode); } diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 3e5edc92aa0b..297ea8dfac7c 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1652,9 +1652,11 @@ static int ext3_add_nondir(handle_t *handle, if (!err) { ext3_mark_inode_dirty(handle, inode); d_instantiate(dentry, inode); + unlock_new_inode(inode); return 0; } drop_nlink(inode); + unlock_new_inode(inode); iput(inode); return err; } @@ -1765,6 +1767,7 @@ retry: dir_block = ext3_bread (handle, inode, 0, 1, &err); if (!dir_block) { drop_nlink(inode); /* is this nlink == 0? */ + unlock_new_inode(inode); ext3_mark_inode_dirty(handle, inode); iput (inode); goto out_stop; @@ -1792,6 +1795,7 @@ retry: err = ext3_add_entry (handle, dentry, inode); if (err) { inode->i_nlink = 0; + unlock_new_inode(inode); ext3_mark_inode_dirty(handle, inode); iput (inode); goto out_stop; @@ -1800,6 +1804,7 @@ retry: ext3_update_dx_flag(dir); ext3_mark_inode_dirty(handle, dir); d_instantiate(dentry, inode); + unlock_new_inode(inode); out_stop: ext3_journal_stop(handle); if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) @@ -2174,6 +2179,7 @@ retry: mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); if (err) { drop_nlink(inode); + unlock_new_inode(inode); ext3_mark_inode_dirty(handle, inode); iput (inode); goto out_stop; @@ -2221,7 +2227,14 @@ retry: inc_nlink(inode); atomic_inc(&inode->i_count); - err = ext3_add_nondir(handle, dentry, inode); + err = ext3_add_entry(handle, dentry, inode); + if (!err) { + ext3_mark_inode_dirty(handle, inode); + d_instantiate(dentry, inode); + } else { + drop_nlink(inode); + iput(inode); + } ext3_journal_stop(handle); if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) goto retry; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 08cac9fcace2..6e6052879aa2 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -826,7 +826,10 @@ got: ext4_set_inode_flags(inode); if (IS_DIRSYNC(inode)) handle->h_sync = 1; - insert_inode_hash(inode); + if (insert_inode_locked(inode) < 0) { + err = -EINVAL; + goto fail_drop; + } spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); @@ -881,6 +884,7 @@ fail_drop: DQUOT_DROP(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; + unlock_new_inode(inode); iput(inode); brelse(bitmap_bh); return ERR_PTR(err); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index be21a5ae33cb..7c3325e0b005 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -34,6 +34,7 @@ #include <linux/writeback.h> #include <linux/pagevec.h> #include <linux/mpage.h> +#include <linux/namei.h> #include <linux/uio.h> #include <linux/bio.h> #include "ext4_jbd2.h" @@ -4164,9 +4165,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) inode->i_op = &ext4_dir_inode_operations; inode->i_fop = &ext4_dir_operations; } else if (S_ISLNK(inode->i_mode)) { - if (ext4_inode_is_fast_symlink(inode)) + if (ext4_inode_is_fast_symlink(inode)) { inode->i_op = &ext4_fast_symlink_inode_operations; - else { + nd_terminate_link(ei->i_data, inode->i_size, + sizeof(ei->i_data) - 1); + } else { inode->i_op = &ext4_symlink_inode_operations; ext4_set_aops(inode); } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 63adcb792988..da98a9012fa5 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1693,9 +1693,11 @@ static int ext4_add_nondir(handle_t *handle, if (!err) { ext4_mark_inode_dirty(handle, inode); d_instantiate(dentry, inode); + unlock_new_inode(inode); return 0; } drop_nlink(inode); + unlock_new_inode(inode); iput(inode); return err; } @@ -1830,6 +1832,7 @@ retry: if (err) { out_clear_inode: clear_nlink(inode); + unlock_new_inode(inode); ext4_mark_inode_dirty(handle, inode); iput(inode); goto out_stop; @@ -1838,6 +1841,7 @@ out_clear_inode: ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); d_instantiate(dentry, inode); + unlock_new_inode(inode); out_stop: ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) @@ -2212,6 +2216,7 @@ retry: mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); if (err) { clear_nlink(inode); + unlock_new_inode(inode); ext4_mark_inode_dirty(handle, inode); iput(inode); goto out_stop; @@ -2262,7 +2267,14 @@ retry: ext4_inc_count(handle, inode); atomic_inc(&inode->i_count); - err = ext4_add_nondir(handle, dentry, inode); + err = ext4_add_entry(handle, dentry, inode); + if (!err) { + ext4_mark_inode_dirty(handle, inode); + d_instantiate(dentry, inode); + } else { + drop_nlink(inode); + iput(inode); + } ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; diff --git a/fs/file_table.c b/fs/file_table.c index 0fbcacc3ea75..bbeeac6efa1a 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -32,6 +32,9 @@ struct files_stat_struct files_stat = { /* public. Not pretty! */ __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); +/* SLAB cache for file structures */ +static struct kmem_cache *filp_cachep __read_mostly; + static struct percpu_counter nr_files __cacheline_aligned_in_smp; static inline void file_free_rcu(struct rcu_head *head) @@ -397,7 +400,12 @@ too_bad: void __init files_init(unsigned long mempages) { int n; - /* One file with associated inode and dcache is very roughly 1K. + + filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, + SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); + + /* + * One file with associated inode and dcache is very roughly 1K. * Per default don't use more than 10% of our memory for files. */ diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 9f3f2ceb73f0..03a6ea5e99f7 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -325,8 +325,10 @@ vxfs_iget(struct super_block *sbp, ino_t ino) if (!VXFS_ISIMMED(vip)) { ip->i_op = &page_symlink_inode_operations; ip->i_mapping->a_ops = &vxfs_aops; - } else + } else { ip->i_op = &vxfs_immed_symlink_iops; + vip->vii_immed.vi_immed[ip->i_size] = '\0'; + } } else init_special_inode(ip, ip->i_mode, old_decode_dev(vip->vii_rdev)); diff --git a/fs/inode.c b/fs/inode.c index 098a2443196f..7de1cda92489 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1032,6 +1032,65 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) EXPORT_SYMBOL(iget_locked); +int insert_inode_locked(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + ino_t ino = inode->i_ino; + struct hlist_head *head = inode_hashtable + hash(sb, ino); + struct inode *old; + + inode->i_state |= I_LOCK|I_NEW; + while (1) { + spin_lock(&inode_lock); + old = find_inode_fast(sb, head, ino); + if (likely(!old)) { + hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode_lock); + return 0; + } + __iget(old); + spin_unlock(&inode_lock); + wait_on_inode(old); + if (unlikely(!hlist_unhashed(&old->i_hash))) { + iput(old); + return -EBUSY; + } + iput(old); + } +} + +EXPORT_SYMBOL(insert_inode_locked); + +int insert_inode_locked4(struct inode *inode, unsigned long hashval, + int (*test)(struct inode *, void *), void *data) +{ + struct super_block *sb = inode->i_sb; + struct hlist_head *head = inode_hashtable + hash(sb, hashval); + struct inode *old; + + inode->i_state |= I_LOCK|I_NEW; + + while (1) { + spin_lock(&inode_lock); + old = find_inode(sb, head, test, data); + if (likely(!old)) { + hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode_lock); + return 0; + } + __iget(old); + spin_unlock(&inode_lock); + wait_on_inode(old); + if (unlikely(!hlist_unhashed(&old->i_hash))) { + iput(old); + return -EBUSY; + } + iput(old); + } +} + +EXPORT_SYMBOL(insert_inode_locked4); + /** * __insert_inode_hash - hash an inode * @inode: unhashed inode diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 70022fd1c539..d4d142c2edd4 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -79,7 +79,8 @@ struct inode *ialloc(struct inode *parent, umode_t mode) inode = new_inode(sb); if (!inode) { jfs_warn("ialloc: new_inode returned NULL!"); - return ERR_PTR(-ENOMEM); + rc = -ENOMEM; + goto fail; } jfs_inode = JFS_IP(inode); @@ -89,8 +90,12 @@ struct inode *ialloc(struct inode *parent, umode_t mode) jfs_warn("ialloc: diAlloc returned %d!", rc); if (rc == -EIO) make_bad_inode(inode); - iput(inode); - return ERR_PTR(rc); + goto fail_put; + } + + if (insert_inode_locked(inode) < 0) { + rc = -EINVAL; + goto fail_unlock; } inode->i_uid = current_fsuid(); @@ -112,11 +117,8 @@ struct inode *ialloc(struct inode *parent, umode_t mode) * Allocate inode to quota. */ if (DQUOT_ALLOC_INODE(inode)) { - DQUOT_DROP(inode); - inode->i_flags |= S_NOQUOTA; - inode->i_nlink = 0; - iput(inode); - return ERR_PTR(-EDQUOT); + rc = -EDQUOT; + goto fail_drop; } inode->i_mode = mode; @@ -158,4 +160,15 @@ struct inode *ialloc(struct inode *parent, umode_t mode) jfs_info("ialloc returns inode = 0x%p\n", inode); return inode; + +fail_drop: + DQUOT_DROP(inode); + inode->i_flags |= S_NOQUOTA; +fail_unlock: + inode->i_nlink = 0; + unlock_new_inode(inode); +fail_put: + iput(inode); +fail: + return ERR_PTR(rc); } diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index cc3cedffbfa1..b4de56b851e4 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -155,7 +155,6 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, ip->i_fop = &jfs_file_operations; ip->i_mapping->a_ops = &jfs_aops; - insert_inode_hash(ip); mark_inode_dirty(ip); dip->i_ctime = dip->i_mtime = CURRENT_TIME; @@ -171,9 +170,12 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, if (rc) { free_ea_wmap(ip); ip->i_nlink = 0; + unlock_new_inode(ip); iput(ip); - } else + } else { d_instantiate(dentry, ip); + unlock_new_inode(ip); + } out2: free_UCSname(&dname); @@ -289,7 +291,6 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) ip->i_op = &jfs_dir_inode_operations; ip->i_fop = &jfs_dir_operations; - insert_inode_hash(ip); mark_inode_dirty(ip); /* update parent directory inode */ @@ -306,9 +307,12 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) if (rc) { free_ea_wmap(ip); ip->i_nlink = 0; + unlock_new_inode(ip); iput(ip); - } else + } else { d_instantiate(dentry, ip); + unlock_new_inode(ip); + } out2: free_UCSname(&dname); @@ -1019,7 +1023,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, goto out3; } - insert_inode_hash(ip); mark_inode_dirty(ip); dip->i_ctime = dip->i_mtime = CURRENT_TIME; @@ -1039,9 +1042,12 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, if (rc) { free_ea_wmap(ip); ip->i_nlink = 0; + unlock_new_inode(ip); iput(ip); - } else + } else { d_instantiate(dentry, ip); + unlock_new_inode(ip); + } out2: free_UCSname(&dname); @@ -1399,7 +1405,6 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, jfs_ip->dev = new_encode_dev(rdev); init_special_inode(ip, ip->i_mode, rdev); - insert_inode_hash(ip); mark_inode_dirty(ip); dir->i_ctime = dir->i_mtime = CURRENT_TIME; @@ -1417,9 +1422,12 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, if (rc) { free_ea_wmap(ip); ip->i_nlink = 0; + unlock_new_inode(ip); iput(ip); - } else + } else { d_instantiate(dentry, ip); + unlock_new_inode(ip); + } out1: free_UCSname(&dname); diff --git a/fs/namei.c b/fs/namei.c index af3783fff1de..dd5c9f0bf829 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -226,6 +226,16 @@ int generic_permission(struct inode *inode, int mask, return -EACCES; } +/** + * inode_permission - check for access rights to a given inode + * @inode: inode to check permission on + * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) + * + * Used to check for read/write/execute permissions on an inode. + * We use "fsuid" for this, letting us set arbitrary permissions + * for filesystem access without changing the "normal" uids which + * are used for other things. + */ int inode_permission(struct inode *inode, int mask) { int retval; @@ -247,7 +257,6 @@ int inode_permission(struct inode *inode, int mask) return -EACCES; } - /* Ordinary permission routines do not understand MAY_APPEND. */ if (inode->i_op && inode->i_op->permission) retval = inode->i_op->permission(inode, mask); else @@ -265,21 +274,6 @@ int inode_permission(struct inode *inode, int mask) } /** - * vfs_permission - check for access rights to a given path - * @nd: lookup result that describes the path - * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) - * - * Used to check for read/write/execute permissions on a path. - * We use "fsuid" for this, letting us set arbitrary permissions - * for filesystem access without changing the "normal" uids which - * are used for other things. - */ -int vfs_permission(struct nameidata *nd, int mask) -{ - return inode_permission(nd->path.dentry->d_inode, mask); -} - -/** * file_permission - check for additional access rights to a given file * @file: file to check access rights for * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) @@ -289,7 +283,7 @@ int vfs_permission(struct nameidata *nd, int mask) * * Note: * Do not use this function in new code. All access checks should - * be done using vfs_permission(). + * be done using inode_permission(). */ int file_permission(struct file *file, int mask) { @@ -527,18 +521,6 @@ out_unlock: return result; } -/* SMP-safe */ -static __always_inline void -walk_init_root(const char *name, struct nameidata *nd) -{ - struct fs_struct *fs = current->fs; - - read_lock(&fs->lock); - nd->path = fs->root; - path_get(&fs->root); - read_unlock(&fs->lock); -} - /* * Wrapper to retry pathname resolution whenever the underlying * file system returns an ESTALE. @@ -576,9 +558,16 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l goto fail; if (*link == '/') { + struct fs_struct *fs = current->fs; + path_put(&nd->path); - walk_init_root(link, nd); + + read_lock(&fs->lock); + nd->path = fs->root; + path_get(&fs->root); + read_unlock(&fs->lock); } + res = link_path_walk(link, nd); if (nd->depth || res || nd->last_type!=LAST_NORM) return res; @@ -859,7 +848,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd) nd->flags |= LOOKUP_CONTINUE; err = exec_permission_lite(inode); if (err == -EAGAIN) - err = vfs_permission(nd, MAY_EXEC); + err = inode_permission(nd->path.dentry->d_inode, + MAY_EXEC); if (err) break; @@ -1493,9 +1483,9 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode, return error; } -int may_open(struct nameidata *nd, int acc_mode, int flag) +int may_open(struct path *path, int acc_mode, int flag) { - struct dentry *dentry = nd->path.dentry; + struct dentry *dentry = path->dentry; struct inode *inode = dentry->d_inode; int error; @@ -1516,13 +1506,13 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { flag &= ~O_TRUNC; } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { - if (nd->path.mnt->mnt_flags & MNT_NODEV) + if (path->mnt->mnt_flags & MNT_NODEV) return -EACCES; flag &= ~O_TRUNC; } - error = vfs_permission(nd, acc_mode); + error = inode_permission(inode, acc_mode); if (error) return error; /* @@ -1556,6 +1546,9 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) * Refuse to truncate files with mandatory locks held on them. */ error = locks_verify_locked(inode); + if (!error) + error = security_path_truncate(path, 0, + ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); if (!error) { DQUOT_INIT(inode); @@ -1586,14 +1579,18 @@ static int __open_namei_create(struct nameidata *nd, struct path *path, if (!IS_POSIXACL(dir->d_inode)) mode &= ~current->fs->umask; + error = security_path_mknod(&nd->path, path->dentry, mode, 0); + if (error) + goto out_unlock; error = vfs_create(dir->d_inode, path->dentry, mode, nd); +out_unlock: mutex_unlock(&dir->d_inode->i_mutex); dput(nd->path.dentry); nd->path.dentry = path->dentry; if (error) return error; /* Don't check for write permission, don't truncate */ - return may_open(nd, 0, flag & ~O_TRUNC); + return may_open(&nd->path, 0, flag & ~O_TRUNC); } /* @@ -1779,7 +1776,7 @@ ok: if (error) goto exit; } - error = may_open(&nd, acc_mode, flag); + error = may_open(&nd.path, acc_mode, flag); if (error) { if (will_write) mnt_drop_write(nd.path.mnt); @@ -1999,6 +1996,9 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; + error = security_path_mknod(&nd.path, dentry, mode, dev); + if (error) + goto out_drop_write; switch (mode & S_IFMT) { case 0: case S_IFREG: error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); @@ -2011,6 +2011,7 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); break; } +out_drop_write: mnt_drop_write(nd.path.mnt); out_dput: dput(dentry); @@ -2070,7 +2071,11 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode) error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; + error = security_path_mkdir(&nd.path, dentry, mode); + if (error) + goto out_drop_write; error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); +out_drop_write: mnt_drop_write(nd.path.mnt); out_dput: dput(dentry); @@ -2180,7 +2185,11 @@ static long do_rmdir(int dfd, const char __user *pathname) error = mnt_want_write(nd.path.mnt); if (error) goto exit3; + error = security_path_rmdir(&nd.path, dentry); + if (error) + goto exit4; error = vfs_rmdir(nd.path.dentry->d_inode, dentry); +exit4: mnt_drop_write(nd.path.mnt); exit3: dput(dentry); @@ -2265,7 +2274,11 @@ static long do_unlinkat(int dfd, const char __user *pathname) error = mnt_want_write(nd.path.mnt); if (error) goto exit2; + error = security_path_unlink(&nd.path, dentry); + if (error) + goto exit3; error = vfs_unlink(nd.path.dentry->d_inode, dentry); +exit3: mnt_drop_write(nd.path.mnt); exit2: dput(dentry); @@ -2346,7 +2359,11 @@ asmlinkage long sys_symlinkat(const char __user *oldname, error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; + error = security_path_symlink(&nd.path, dentry, from); + if (error) + goto out_drop_write; error = vfs_symlink(nd.path.dentry->d_inode, dentry, from); +out_drop_write: mnt_drop_write(nd.path.mnt); out_dput: dput(dentry); @@ -2443,7 +2460,11 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname, error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; + error = security_path_link(old_path.dentry, &nd.path, new_dentry); + if (error) + goto out_drop_write; error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry); +out_drop_write: mnt_drop_write(nd.path.mnt); out_dput: dput(new_dentry); @@ -2679,8 +2700,13 @@ asmlinkage long sys_renameat(int olddfd, const char __user *oldname, error = mnt_want_write(oldnd.path.mnt); if (error) goto exit5; + error = security_path_rename(&oldnd.path, old_dentry, + &newnd.path, new_dentry); + if (error) + goto exit6; error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry); +exit6: mnt_drop_write(oldnd.path.mnt); exit5: dput(new_dentry); @@ -2750,13 +2776,16 @@ int vfs_follow_link(struct nameidata *nd, const char *link) /* get the link contents into pagecache */ static char *page_getlink(struct dentry * dentry, struct page **ppage) { - struct page * page; + char *kaddr; + struct page *page; struct address_space *mapping = dentry->d_inode->i_mapping; page = read_mapping_page(mapping, 0, NULL); if (IS_ERR(page)) return (char*)page; *ppage = page; - return kmap(page); + kaddr = kmap(page); + nd_terminate_link(kaddr, dentry->d_inode->i_size, PAGE_SIZE - 1); + return kaddr; } int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) @@ -2849,7 +2878,6 @@ EXPORT_SYMBOL(path_lookup); EXPORT_SYMBOL(kern_path); EXPORT_SYMBOL(vfs_path_lookup); EXPORT_SYMBOL(inode_permission); -EXPORT_SYMBOL(vfs_permission); EXPORT_SYMBOL(file_permission); EXPORT_SYMBOL(unlock_rename); EXPORT_SYMBOL(vfs_create); @@ -2865,3 +2893,10 @@ EXPORT_SYMBOL(vfs_symlink); EXPORT_SYMBOL(vfs_unlink); EXPORT_SYMBOL(dentry_unhash); EXPORT_SYMBOL(generic_readlink); + +/* to be mentioned only in INIT_TASK */ +struct fs_struct init_fs = { + .count = ATOMIC_INIT(1), + .lock = __RW_LOCK_UNLOCKED(init_fs.lock), + .umask = 0022, +}; diff --git a/fs/namespace.c b/fs/namespace.c index 1c09cab8f7cf..a40685d800a8 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1990,7 +1990,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, if (!new_ns->root) { up_write(&namespace_sem); kfree(new_ns); - return ERR_PTR(-ENOMEM);; + return ERR_PTR(-ENOMEM); } spin_lock(&vfsmount_lock); list_add_tail(&new_ns->list, &new_ns->root->mnt_list); diff --git a/fs/nfsctl.c b/fs/nfsctl.c index b1acbd6ab6fb..b27451909dff 100644 --- a/fs/nfsctl.c +++ b/fs/nfsctl.c @@ -38,9 +38,10 @@ static struct file *do_open(char *name, int flags) return ERR_PTR(error); if (flags == O_RDWR) - error = may_open(&nd,MAY_READ|MAY_WRITE,FMODE_READ|FMODE_WRITE); + error = may_open(&nd.path, MAY_READ|MAY_WRITE, + FMODE_READ|FMODE_WRITE); else - error = may_open(&nd, MAY_WRITE, FMODE_WRITE); + error = may_open(&nd.path, MAY_WRITE, FMODE_WRITE); if (!error) return dentry_open(nd.path.dentry, nd.path.mnt, flags, diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig new file mode 100644 index 000000000000..50914d7303c6 --- /dev/null +++ b/fs/notify/Kconfig @@ -0,0 +1,2 @@ +source "fs/notify/dnotify/Kconfig" +source "fs/notify/inotify/Kconfig" diff --git a/fs/notify/Makefile b/fs/notify/Makefile new file mode 100644 index 000000000000..5a95b6010ce7 --- /dev/null +++ b/fs/notify/Makefile @@ -0,0 +1,2 @@ +obj-y += dnotify/ +obj-y += inotify/ diff --git a/fs/notify/dnotify/Kconfig b/fs/notify/dnotify/Kconfig new file mode 100644 index 000000000000..26adf5dfa646 --- /dev/null +++ b/fs/notify/dnotify/Kconfig @@ -0,0 +1,10 @@ +config DNOTIFY + bool "Dnotify support" + default y + help + Dnotify is a directory-based per-fd file change notification system + that uses signals to communicate events to user-space. There exist + superior alternatives, but some applications may still rely on + dnotify. + + If unsure, say Y. diff --git a/fs/notify/dnotify/Makefile b/fs/notify/dnotify/Makefile new file mode 100644 index 000000000000..f145251dcadb --- /dev/null +++ b/fs/notify/dnotify/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_DNOTIFY) += dnotify.o diff --git a/fs/dnotify.c b/fs/notify/dnotify/dnotify.c index 676073b8dda5..b0aa2cde80bd 100644 --- a/fs/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -115,9 +115,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) dn->dn_next = inode->i_dnotify; inode->i_dnotify = dn; spin_unlock(&inode->i_lock); - - if (filp->f_op && filp->f_op->dir_notify) - return filp->f_op->dir_notify(filp, arg); return 0; out_free: diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig new file mode 100644 index 000000000000..446792841023 --- /dev/null +++ b/fs/notify/inotify/Kconfig @@ -0,0 +1,27 @@ +config INOTIFY + bool "Inotify file change notification support" + default y + ---help--- + Say Y here to enable inotify support. Inotify is a file change + notification system and a replacement for dnotify. Inotify fixes + numerous shortcomings in dnotify and introduces several new features + including multiple file events, one-shot support, and unmount + notification. + + For more information, see <file:Documentation/filesystems/inotify.txt> + + If unsure, say Y. + +config INOTIFY_USER + bool "Inotify support for userspace" + depends on INOTIFY + default y + ---help--- + Say Y here to enable inotify support for userspace, including the + associated system calls. Inotify allows monitoring of both files and + directories via a single open fd. Events are read from the file + descriptor, which is also select()- and poll()-able. + + For more information, see <file:Documentation/filesystems/inotify.txt> + + If unsure, say Y. diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile new file mode 100644 index 000000000000..e290f3bb9d8d --- /dev/null +++ b/fs/notify/inotify/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_INOTIFY) += inotify.o +obj-$(CONFIG_INOTIFY_USER) += inotify_user.o diff --git a/fs/inotify.c b/fs/notify/inotify/inotify.c index dae3f28f30d4..dae3f28f30d4 100644 --- a/fs/inotify.c +++ b/fs/notify/inotify/inotify.c diff --git a/fs/inotify_user.c b/fs/notify/inotify/inotify_user.c index e2425bbd871f..400f8064a548 100644 --- a/fs/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -76,10 +76,10 @@ struct inotify_device { struct mutex ev_mutex; /* protects event queue */ struct mutex up_mutex; /* synchronizes watch updates */ struct list_head events; /* list of queued events */ - atomic_t count; /* reference count */ struct user_struct *user; /* user who opened this dev */ struct inotify_handle *ih; /* inotify handle */ struct fasync_struct *fa; /* async notification */ + atomic_t count; /* reference count */ unsigned int queue_size; /* size of the queue (bytes) */ unsigned int event_count; /* number of pending events */ unsigned int max_events; /* maximum number of events */ diff --git a/fs/open.c b/fs/open.c index c0a426d5766c..1cd7d40e9991 100644 --- a/fs/open.c +++ b/fs/open.c @@ -272,6 +272,8 @@ static long do_sys_truncate(const char __user *pathname, loff_t length) goto put_write_and_out; error = locks_verify_truncate(inode, NULL, length); + if (!error) + error = security_path_truncate(&path, length, 0); if (!error) { DQUOT_INIT(inode); error = do_truncate(path.dentry, length, 0, NULL); @@ -329,6 +331,9 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) error = locks_verify_truncate(inode, file, length); if (!error) + error = security_path_truncate(&file->f_path, length, + ATTR_MTIME|ATTR_CTIME); + if (!error) error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); out_putf: fput(file); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 6c4c2c69449f..145c2d3e5e01 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1753,6 +1753,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, struct inode *inode) { struct super_block *sb; + struct reiserfs_iget_args args; INITIALIZE_PATH(path_to_key); struct cpu_key key; struct item_head ih; @@ -1780,6 +1781,14 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, err = -ENOMEM; goto out_bad_inode; } + args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); + memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); + args.dirid = le32_to_cpu(ih.ih_key.k_dir_id); + if (insert_inode_locked4(inode, args.objectid, + reiserfs_find_actor, &args) < 0) { + err = -EINVAL; + goto out_bad_inode; + } if (old_format_only(sb)) /* not a perfect generation count, as object ids can be reused, but ** this is as good as reiserfs can do right now. @@ -1859,13 +1868,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, } else { inode2sd(&sd, inode, inode->i_size); } - // these do not go to on-disk stat data - inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); - // store in in-core inode the key of stat data and version all // object items will have (directory items will have old offset // format, other new objects will consist of new items) - memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) set_inode_item_key_version(inode, KEY_FORMAT_3_5); else @@ -1929,7 +1934,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, reiserfs_mark_inode_private(inode); } - insert_inode_hash(inode); reiserfs_update_sd(th, inode); reiserfs_check_path(&path_to_key); @@ -1956,6 +1960,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, out_inserted_sd: inode->i_nlink = 0; th->t_trans_id = 0; /* so the caller can't use this handle later */ + unlock_new_inode(inode); /* OK to do even if we hadn't locked it */ /* If we were inheriting an ACL, we need to release the lock so that * iput doesn't deadlock in reiserfs_delete_xattrs. The locking diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 4f322e5ed840..738967f6c8ee 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -646,6 +646,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode, err = journal_end(&th, dir->i_sb, jbegin_count); if (err) retval = err; + unlock_new_inode(inode); iput(inode); goto out_failed; } @@ -653,6 +654,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode, reiserfs_update_inode_transaction(dir); d_instantiate(dentry, inode); + unlock_new_inode(inode); retval = journal_end(&th, dir->i_sb, jbegin_count); out_failed: @@ -727,11 +729,13 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode, err = journal_end(&th, dir->i_sb, jbegin_count); if (err) retval = err; + unlock_new_inode(inode); iput(inode); goto out_failed; } d_instantiate(dentry, inode); + unlock_new_inode(inode); retval = journal_end(&th, dir->i_sb, jbegin_count); out_failed: @@ -812,6 +816,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) err = journal_end(&th, dir->i_sb, jbegin_count); if (err) retval = err; + unlock_new_inode(inode); iput(inode); goto out_failed; } @@ -819,6 +824,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) reiserfs_update_sd(&th, dir); d_instantiate(dentry, inode); + unlock_new_inode(inode); retval = journal_end(&th, dir->i_sb, jbegin_count); out_failed: if (locked) @@ -1096,11 +1102,13 @@ static int reiserfs_symlink(struct inode *parent_dir, err = journal_end(&th, parent_dir->i_sb, jbegin_count); if (err) retval = err; + unlock_new_inode(inode); iput(inode); goto out_failed; } d_instantiate(dentry, inode); + unlock_new_inode(inode); retval = journal_end(&th, parent_dir->i_sb, jbegin_count); out_failed: reiserfs_write_unlock(parent_dir->i_sb); diff --git a/fs/seq_file.c b/fs/seq_file.c index 16c211558c22..99d8b8cfc9b7 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -389,8 +389,14 @@ char *mangle_path(char *s, char *p, char *esc) } EXPORT_SYMBOL(mangle_path); -/* - * return the absolute path of 'dentry' residing in mount 'mnt'. +/** + * seq_path - seq_file interface to print a pathname + * @m: the seq_file handle + * @path: the struct path to print + * @esc: set of characters to escape in the output + * + * return the absolute path of 'path', as represented by the + * dentry / mnt pair in the path parameter. */ int seq_path(struct seq_file *m, struct path *path, char *esc) { diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index df0d435baa48..3d81bf58dae2 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -27,6 +27,7 @@ #include <linux/init.h> #include <linux/buffer_head.h> #include <linux/vfs.h> +#include <linux/namei.h> #include <asm/byteorder.h> #include "sysv.h" @@ -163,8 +164,11 @@ void sysv_set_inode(struct inode *inode, dev_t rdev) if (inode->i_blocks) { inode->i_op = &sysv_symlink_inode_operations; inode->i_mapping->a_ops = &sysv_aops; - } else + } else { inode->i_op = &sysv_fast_symlink_inode_operations; + nd_terminate_link(SYSV_I(inode)->i_data, inode->i_size, + sizeof(SYSV_I(inode)->i_data) - 1); + } } else init_special_inode(inode, inode->i_mode, rdev); } |