diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/bad_inode.c | 3 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 15 | ||||
| -rw-r--r-- | fs/btrfs/delayed-inode.c | 32 | ||||
| -rw-r--r-- | fs/btrfs/delayed-inode.h | 5 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 12 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 1 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/relocation.c | 30 | ||||
| -rw-r--r-- | fs/btrfs/sysfs.c | 146 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 114 | ||||
| -rw-r--r-- | fs/btrfs/tree-log.c | 2 | ||||
| -rw-r--r-- | fs/cifs/cifsfs.c | 3 | ||||
| -rw-r--r-- | fs/coda/pioctl.c | 2 | ||||
| -rw-r--r-- | fs/logfs/dir.c | 8 | ||||
| -rw-r--r-- | fs/namei.c | 6 | ||||
| -rw-r--r-- | fs/nfsd/Kconfig | 1 | ||||
| -rw-r--r-- | fs/nfsd/nfsctl.c | 19 | ||||
| -rw-r--r-- | fs/nfsd/vfs.c | 19 | ||||
| -rw-r--r-- | fs/nilfs2/inode.c | 7 | ||||
| -rw-r--r-- | fs/proc/base.c | 6 | ||||
| -rw-r--r-- | fs/proc/proc_sysctl.c | 3 | ||||
| -rw-r--r-- | fs/reiserfs/xattr.c | 2 | ||||
| -rw-r--r-- | fs/timerfd.c | 5 | ||||
| -rw-r--r-- | fs/ubifs/super.c | 1 | 
25 files changed, 202 insertions, 246 deletions
| diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 9ad2369d9e35..bfcb18feb1df 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -231,9 +231,6 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,  static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags)  { -	if (flags & IPERM_FLAG_RCU) -		return -ECHILD; -  	return -EIO;  } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 378b5b4443f3..300628795fdb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -967,6 +967,12 @@ struct btrfs_fs_info {  	struct srcu_struct subvol_srcu;  	spinlock_t trans_lock; +	/* +	 * the reloc mutex goes with the trans lock, it is taken +	 * during commit to protect us from the relocation code +	 */ +	struct mutex reloc_mutex; +  	struct list_head trans_list;  	struct list_head hashers;  	struct list_head dead_roots; @@ -1172,6 +1178,14 @@ struct btrfs_root {  	u32 type;  	u64 highest_objectid; + +	/* btrfs_record_root_in_trans is a multi-step process, +	 * and it can race with the balancing code.   But the +	 * race is very small, and only the first time the root +	 * is added to each transaction.  So in_trans_setup +	 * is used to tell us when more checks are required +	 */ +	unsigned long in_trans_setup;  	int ref_cows;  	int track_dirty;  	int in_radix; @@ -1181,7 +1195,6 @@ struct btrfs_root {  	struct btrfs_key defrag_max;  	int defrag_running;  	char *name; -	int in_sysfs;  	/* the dirty list is only used by non-reference counted roots */  	struct list_head dirty_list; diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 6462c29d2d37..f1cbd028f7b3 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -297,7 +297,6 @@ struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)  		item->data_len = data_len;  		item->ins_or_del = 0;  		item->bytes_reserved = 0; -		item->block_rsv = NULL;  		item->delayed_node = NULL;  		atomic_set(&item->refs, 1);  	} @@ -593,10 +592,8 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,  	num_bytes = btrfs_calc_trans_metadata_size(root, 1);  	ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); -	if (!ret) { +	if (!ret)  		item->bytes_reserved = num_bytes; -		item->block_rsv = dst_rsv; -	}  	return ret;  } @@ -604,10 +601,13 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,  static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,  						struct btrfs_delayed_item *item)  { +	struct btrfs_block_rsv *rsv; +  	if (!item->bytes_reserved)  		return; -	btrfs_block_rsv_release(root, item->block_rsv, +	rsv = &root->fs_info->global_block_rsv; +	btrfs_block_rsv_release(root, rsv,  				item->bytes_reserved);  } @@ -1014,6 +1014,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,  	struct btrfs_delayed_root *delayed_root;  	struct btrfs_delayed_node *curr_node, *prev_node;  	struct btrfs_path *path; +	struct btrfs_block_rsv *block_rsv;  	int ret = 0;  	path = btrfs_alloc_path(); @@ -1021,6 +1022,9 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,  		return -ENOMEM;  	path->leave_spinning = 1; +	block_rsv = trans->block_rsv; +	trans->block_rsv = &root->fs_info->global_block_rsv; +  	delayed_root = btrfs_get_delayed_root(root);  	curr_node = btrfs_first_delayed_node(delayed_root); @@ -1045,6 +1049,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,  	}  	btrfs_free_path(path); +	trans->block_rsv = block_rsv;  	return ret;  } @@ -1052,6 +1057,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,  					      struct btrfs_delayed_node *node)  {  	struct btrfs_path *path; +	struct btrfs_block_rsv *block_rsv;  	int ret;  	path = btrfs_alloc_path(); @@ -1059,6 +1065,9 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,  		return -ENOMEM;  	path->leave_spinning = 1; +	block_rsv = trans->block_rsv; +	trans->block_rsv = &node->root->fs_info->global_block_rsv; +  	ret = btrfs_insert_delayed_items(trans, path, node->root, node);  	if (!ret)  		ret = btrfs_delete_delayed_items(trans, path, node->root, node); @@ -1066,6 +1075,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,  		ret = btrfs_update_delayed_inode(trans, node->root, path, node);  	btrfs_free_path(path); +	trans->block_rsv = block_rsv;  	return ret;  } @@ -1116,6 +1126,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)  	struct btrfs_path *path;  	struct btrfs_delayed_node *delayed_node = NULL;  	struct btrfs_root *root; +	struct btrfs_block_rsv *block_rsv;  	unsigned long nr = 0;  	int need_requeue = 0;  	int ret; @@ -1134,6 +1145,9 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)  	if (IS_ERR(trans))  		goto free_path; +	block_rsv = trans->block_rsv; +	trans->block_rsv = &root->fs_info->global_block_rsv; +  	ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);  	if (!ret)  		ret = btrfs_delete_delayed_items(trans, path, root, @@ -1176,6 +1190,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)  	nr = trans->blocks_used; +	trans->block_rsv = block_rsv;  	btrfs_end_transaction_dmeta(trans, root);  	__btrfs_btree_balance_dirty(root, nr);  free_path: @@ -1222,6 +1237,13 @@ again:  	return 0;  } +void btrfs_assert_delayed_root_empty(struct btrfs_root *root) +{ +	struct btrfs_delayed_root *delayed_root; +	delayed_root = btrfs_get_delayed_root(root); +	WARN_ON(btrfs_first_delayed_node(delayed_root)); +} +  void btrfs_balance_delayed_items(struct btrfs_root *root)  {  	struct btrfs_delayed_root *delayed_root; diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index eb7d240aa648..d1a6a2915c66 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -75,7 +75,6 @@ struct btrfs_delayed_item {  	struct list_head tree_list;	/* used for batch insert/delete items */  	struct list_head readdir_list;	/* used for readdir items */  	u64 bytes_reserved; -	struct btrfs_block_rsv *block_rsv;  	struct btrfs_delayed_node *delayed_node;  	atomic_t refs;  	int ins_or_del; @@ -138,4 +137,8 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,  /* for init */  int __init btrfs_delayed_inode_init(void);  void btrfs_delayed_inode_exit(void); + +/* for debugging */ +void btrfs_assert_delayed_root_empty(struct btrfs_root *root); +  #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9f68c6898653..1ac8db5dc0a3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1044,7 +1044,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,  	root->last_trans = 0;  	root->highest_objectid = 0;  	root->name = NULL; -	root->in_sysfs = 0;  	root->inode_tree = RB_ROOT;  	INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);  	root->block_rsv = NULL; @@ -1300,19 +1299,21 @@ again:  		return root;  	root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); -	if (!root->free_ino_ctl) -		goto fail;  	root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),  					GFP_NOFS); -	if (!root->free_ino_pinned) +	if (!root->free_ino_pinned || !root->free_ino_ctl) { +		ret = -ENOMEM;  		goto fail; +	}  	btrfs_init_free_ino_ctl(root);  	mutex_init(&root->fs_commit_mutex);  	spin_lock_init(&root->cache_lock);  	init_waitqueue_head(&root->cache_wait); -	set_anon_super(&root->anon_super, NULL); +	ret = set_anon_super(&root->anon_super, NULL); +	if (ret) +		goto fail;  	if (btrfs_root_refs(&root->root_item) == 0) {  		ret = -ENOENT; @@ -1618,6 +1619,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,  	spin_lock_init(&fs_info->fs_roots_radix_lock);  	spin_lock_init(&fs_info->delayed_iput_lock);  	spin_lock_init(&fs_info->defrag_inodes_lock); +	mutex_init(&fs_info->reloc_mutex);  	init_completion(&fs_info->kobj_unregister);  	fs_info->tree_root = tree_root; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b42efc2ded51..1f61bf5b4960 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3314,10 +3314,6 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,  	if (reserved == 0)  		return 0; -	/* nothing to shrink - nothing to reclaim */ -	if (root->fs_info->delalloc_bytes == 0) -		return 0; -  	max_reclaim = min(reserved, to_reclaim);  	while (loops < 1024) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 751ddf8fc58a..0a9b10c5b0a7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3076,6 +3076,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,  	ret = btrfs_update_inode(trans, root, dir);  	BUG_ON(ret); +	btrfs_free_path(path);  	return 0;  } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b793d112d1f6..a3c4751e07db 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -482,8 +482,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,  	ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);  	BUG_ON(ret); +	spin_lock(&root->fs_info->trans_lock);  	list_add(&pending_snapshot->list,  		 &trans->transaction->pending_snapshots); +	spin_unlock(&root->fs_info->trans_lock);  	if (async_transid) {  		*async_transid = trans->transid;  		ret = btrfs_commit_transaction_async(trans, diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b1ef27cc673b..5e0a3dc79a45 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1368,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,  	int ret;  	if (!root->reloc_root) -		return 0; +		goto out;  	reloc_root = root->reloc_root;  	root_item = &reloc_root->root_item; @@ -1390,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,  	ret = btrfs_update_root(trans, root->fs_info->tree_root,  				&reloc_root->root_key, root_item);  	BUG_ON(ret); + +out:  	return 0;  } @@ -2142,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err)  	u64 num_bytes = 0;  	int ret; -	spin_lock(&root->fs_info->trans_lock); +	mutex_lock(&root->fs_info->reloc_mutex);  	rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;  	rc->merging_rsv_size += rc->nodes_relocated * 2; -	spin_unlock(&root->fs_info->trans_lock); +	mutex_unlock(&root->fs_info->reloc_mutex); +  again:  	if (!err) {  		num_bytes = rc->merging_rsv_size; @@ -2214,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc)  	int ret;  again:  	root = rc->extent_root; -	spin_lock(&root->fs_info->trans_lock); + +	/* +	 * this serializes us with btrfs_record_root_in_transaction, +	 * we have to make sure nobody is in the middle of +	 * adding their roots to the list while we are +	 * doing this splice +	 */ +	mutex_lock(&root->fs_info->reloc_mutex);  	list_splice_init(&rc->reloc_roots, &reloc_roots); -	spin_unlock(&root->fs_info->trans_lock); +	mutex_unlock(&root->fs_info->reloc_mutex);  	while (!list_empty(&reloc_roots)) {  		found = 1; @@ -3590,17 +3600,19 @@ next:  static void set_reloc_control(struct reloc_control *rc)  {  	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; -	spin_lock(&fs_info->trans_lock); + +	mutex_lock(&fs_info->reloc_mutex);  	fs_info->reloc_ctl = rc; -	spin_unlock(&fs_info->trans_lock); +	mutex_unlock(&fs_info->reloc_mutex);  }  static void unset_reloc_control(struct reloc_control *rc)  {  	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; -	spin_lock(&fs_info->trans_lock); + +	mutex_lock(&fs_info->reloc_mutex);  	fs_info->reloc_ctl = NULL; -	spin_unlock(&fs_info->trans_lock); +	mutex_unlock(&fs_info->reloc_mutex);  }  static int check_extent_flags(u64 flags) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index c3c223ae6691..daac9ae6d731 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -28,152 +28,6 @@  #include "disk-io.h"  #include "transaction.h" -static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf) -{ -	return snprintf(buf, PAGE_SIZE, "%llu\n", -		(unsigned long long)btrfs_root_used(&root->root_item)); -} - -static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) -{ -	return snprintf(buf, PAGE_SIZE, "%llu\n", -		(unsigned long long)btrfs_root_limit(&root->root_item)); -} - -static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) -{ - -	return snprintf(buf, PAGE_SIZE, "%llu\n", -		(unsigned long long)btrfs_super_bytes_used(&fs->super_copy)); -} - -static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) -{ -	return snprintf(buf, PAGE_SIZE, "%llu\n", -		(unsigned long long)btrfs_super_total_bytes(&fs->super_copy)); -} - -static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) -{ -	return snprintf(buf, PAGE_SIZE, "%llu\n", -		(unsigned long long)btrfs_super_sectorsize(&fs->super_copy)); -} - -/* this is for root attrs (subvols/snapshots) */ -struct btrfs_root_attr { -	struct attribute attr; -	ssize_t (*show)(struct btrfs_root *, char *); -	ssize_t (*store)(struct btrfs_root *, const char *, size_t); -}; - -#define ROOT_ATTR(name, mode, show, store) \ -static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \ -							      show, store) - -ROOT_ATTR(blocks_used,	0444,	root_blocks_used_show,	NULL); -ROOT_ATTR(block_limit,	0644,	root_block_limit_show,	NULL); - -static struct attribute *btrfs_root_attrs[] = { -	&btrfs_root_attr_blocks_used.attr, -	&btrfs_root_attr_block_limit.attr, -	NULL, -}; - -/* this is for super attrs (actual full fs) */ -struct btrfs_super_attr { -	struct attribute attr; -	ssize_t (*show)(struct btrfs_fs_info *, char *); -	ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t); -}; - -#define SUPER_ATTR(name, mode, show, store) \ -static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \ -								show, store) - -SUPER_ATTR(blocks_used,		0444,	super_blocks_used_show,		NULL); -SUPER_ATTR(total_blocks,	0444,	super_total_blocks_show,	NULL); -SUPER_ATTR(blocksize,		0444,	super_blocksize_show,		NULL); - -static struct attribute *btrfs_super_attrs[] = { -	&btrfs_super_attr_blocks_used.attr, -	&btrfs_super_attr_total_blocks.attr, -	&btrfs_super_attr_blocksize.attr, -	NULL, -}; - -static ssize_t btrfs_super_attr_show(struct kobject *kobj, -				    struct attribute *attr, char *buf) -{ -	struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, -						super_kobj); -	struct btrfs_super_attr *a = container_of(attr, -						  struct btrfs_super_attr, -						  attr); - -	return a->show ? a->show(fs, buf) : 0; -} - -static ssize_t btrfs_super_attr_store(struct kobject *kobj, -				     struct attribute *attr, -				     const char *buf, size_t len) -{ -	struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, -						super_kobj); -	struct btrfs_super_attr *a = container_of(attr, -						  struct btrfs_super_attr, -						  attr); - -	return a->store ? a->store(fs, buf, len) : 0; -} - -static ssize_t btrfs_root_attr_show(struct kobject *kobj, -				    struct attribute *attr, char *buf) -{ -	struct btrfs_root *root = container_of(kobj, struct btrfs_root, -						root_kobj); -	struct btrfs_root_attr *a = container_of(attr, -						 struct btrfs_root_attr, -						 attr); - -	return a->show ? a->show(root, buf) : 0; -} - -static ssize_t btrfs_root_attr_store(struct kobject *kobj, -				     struct attribute *attr, -				     const char *buf, size_t len) -{ -	struct btrfs_root *root = container_of(kobj, struct btrfs_root, -						root_kobj); -	struct btrfs_root_attr *a = container_of(attr, -						 struct btrfs_root_attr, -						 attr); -	return a->store ? a->store(root, buf, len) : 0; -} - -static void btrfs_super_release(struct kobject *kobj) -{ -	struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, -						super_kobj); -	complete(&fs->kobj_unregister); -} - -static void btrfs_root_release(struct kobject *kobj) -{ -	struct btrfs_root *root = container_of(kobj, struct btrfs_root, -						root_kobj); -	complete(&root->kobj_unregister); -} - -static const struct sysfs_ops btrfs_super_attr_ops = { -	.show	= btrfs_super_attr_show, -	.store	= btrfs_super_attr_store, -}; - -static const struct sysfs_ops btrfs_root_attr_ops = { -	.show	= btrfs_root_attr_show, -	.store	= btrfs_root_attr_store, -}; -  /* /sys/fs/btrfs/ entry */  static struct kset *btrfs_kset; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2b3590b9fe98..51dcec86757f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -126,28 +126,85 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)   * to make sure the old root from before we joined the transaction is deleted   * when the transaction commits   */ -int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, +static int record_root_in_trans(struct btrfs_trans_handle *trans,  			       struct btrfs_root *root)  {  	if (root->ref_cows && root->last_trans < trans->transid) {  		WARN_ON(root == root->fs_info->extent_root);  		WARN_ON(root->commit_root != root->node); +		/* +		 * see below for in_trans_setup usage rules +		 * we have the reloc mutex held now, so there +		 * is only one writer in this function +		 */ +		root->in_trans_setup = 1; + +		/* make sure readers find in_trans_setup before +		 * they find our root->last_trans update +		 */ +		smp_wmb(); +  		spin_lock(&root->fs_info->fs_roots_radix_lock);  		if (root->last_trans == trans->transid) {  			spin_unlock(&root->fs_info->fs_roots_radix_lock);  			return 0;  		} -		root->last_trans = trans->transid;  		radix_tree_tag_set(&root->fs_info->fs_roots_radix,  			   (unsigned long)root->root_key.objectid,  			   BTRFS_ROOT_TRANS_TAG);  		spin_unlock(&root->fs_info->fs_roots_radix_lock); +		root->last_trans = trans->transid; + +		/* this is pretty tricky.  We don't want to +		 * take the relocation lock in btrfs_record_root_in_trans +		 * unless we're really doing the first setup for this root in +		 * this transaction. +		 * +		 * Normally we'd use root->last_trans as a flag to decide +		 * if we want to take the expensive mutex. +		 * +		 * But, we have to set root->last_trans before we +		 * init the relocation root, otherwise, we trip over warnings +		 * in ctree.c.  The solution used here is to flag ourselves +		 * with root->in_trans_setup.  When this is 1, we're still +		 * fixing up the reloc trees and everyone must wait. +		 * +		 * When this is zero, they can trust root->last_trans and fly +		 * through btrfs_record_root_in_trans without having to take the +		 * lock.  smp_wmb() makes sure that all the writes above are +		 * done before we pop in the zero below +		 */  		btrfs_init_reloc_root(trans, root); +		smp_wmb(); +		root->in_trans_setup = 0;  	}  	return 0;  } + +int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, +			       struct btrfs_root *root) +{ +	if (!root->ref_cows) +		return 0; + +	/* +	 * see record_root_in_trans for comments about in_trans_setup usage +	 * and barriers +	 */ +	smp_rmb(); +	if (root->last_trans == trans->transid && +	    !root->in_trans_setup) +		return 0; + +	mutex_lock(&root->fs_info->reloc_mutex); +	record_root_in_trans(trans, root); +	mutex_unlock(&root->fs_info->reloc_mutex); + +	return 0; +} +  /* wait for commit against the current transaction to become unblocked   * when this is done, it is safe to start a new transaction, but the current   * transaction might not be fully on disk. @@ -882,7 +939,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,  	parent = dget_parent(dentry);  	parent_inode = parent->d_inode;  	parent_root = BTRFS_I(parent_inode)->root; -	btrfs_record_root_in_trans(trans, parent_root); +	record_root_in_trans(trans, parent_root);  	/*  	 * insert the directory item @@ -900,7 +957,16 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,  	ret = btrfs_update_inode(trans, parent_root, parent_inode);  	BUG_ON(ret); -	btrfs_record_root_in_trans(trans, root); +	/* +	 * pull in the delayed directory update +	 * and the delayed inode item +	 * otherwise we corrupt the FS during +	 * snapshot +	 */ +	ret = btrfs_run_delayed_items(trans, root); +	BUG_ON(ret); + +	record_root_in_trans(trans, root);  	btrfs_set_root_last_snapshot(&root->root_item, trans->transid);  	memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));  	btrfs_check_and_init_root_item(new_root_item); @@ -961,14 +1027,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,  	int ret;  	list_for_each_entry(pending, head, list) { -		/* -		 * We must deal with the delayed items before creating -		 * snapshots, or we will create a snapthot with inconsistent -		 * information. -		*/ -		ret = btrfs_run_delayed_items(trans, fs_info->fs_root); -		BUG_ON(ret); -  		ret = create_pending_snapshot(trans, fs_info, pending);  		BUG_ON(ret);  	} @@ -1241,21 +1299,42 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,  			schedule_timeout(1);  		finish_wait(&cur_trans->writer_wait, &wait); -		spin_lock(&root->fs_info->trans_lock); -		root->fs_info->trans_no_join = 1; -		spin_unlock(&root->fs_info->trans_lock);  	} while (atomic_read(&cur_trans->num_writers) > 1 ||  		 (should_grow && cur_trans->num_joined != joined)); -	ret = create_pending_snapshots(trans, root->fs_info); -	BUG_ON(ret); +	/* +	 * Ok now we need to make sure to block out any other joins while we +	 * commit the transaction.  We could have started a join before setting +	 * no_join so make sure to wait for num_writers to == 1 again. +	 */ +	spin_lock(&root->fs_info->trans_lock); +	root->fs_info->trans_no_join = 1; +	spin_unlock(&root->fs_info->trans_lock); +	wait_event(cur_trans->writer_wait, +		   atomic_read(&cur_trans->num_writers) == 1); + +	/* +	 * the reloc mutex makes sure that we stop +	 * the balancing code from coming in and moving +	 * extents around in the middle of the commit +	 */ +	mutex_lock(&root->fs_info->reloc_mutex);  	ret = btrfs_run_delayed_items(trans, root);  	BUG_ON(ret); +	ret = create_pending_snapshots(trans, root->fs_info); +	BUG_ON(ret); +  	ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);  	BUG_ON(ret); +	/* +	 * make sure none of the code above managed to slip in a +	 * delayed item +	 */ +	btrfs_assert_delayed_root_empty(root); +  	WARN_ON(cur_trans != trans->transaction);  	btrfs_scrub_pause(root); @@ -1312,6 +1391,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,  	root->fs_info->running_transaction = NULL;  	root->fs_info->trans_no_join = 0;  	spin_unlock(&root->fs_info->trans_lock); +	mutex_unlock(&root->fs_info->reloc_mutex);  	wake_up(&root->fs_info->transaction_wait); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 592396c6dc47..4ce8a9f41d1e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3177,7 +3177,7 @@ again:  		tmp_key.offset = (u64)-1;  		wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); -		BUG_ON(!wc.replay_dest); +		BUG_ON(IS_ERR_OR_NULL(wc.replay_dest));  		wc.replay_dest->log_root = log;  		btrfs_record_root_in_trans(trans, wc.replay_dest); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index e9def996e383..2f0c58646c10 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -257,9 +257,6 @@ static int cifs_permission(struct inode *inode, int mask, unsigned int flags)  {  	struct cifs_sb_info *cifs_sb; -	if (flags & IPERM_FLAG_RCU) -		return -ECHILD; -  	cifs_sb = CIFS_SB(inode->i_sb);  	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index 6cbb3afb36dc..cb140ef293e4 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -43,8 +43,6 @@ const struct file_operations coda_ioctl_operations = {  /* the coda pioctl inode ops */  static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags)  { -	if (flags & IPERM_FLAG_RCU) -		return -ECHILD;  	return (mask & MAY_EXEC) ? -EACCES : 0;  } diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 9ed89d1663f8..1afae26cf236 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -555,13 +555,6 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry,  	return __logfs_create(dir, dentry, inode, target, destlen);  } -static int logfs_permission(struct inode *inode, int mask, unsigned int flags) -{ -	if (flags & IPERM_FLAG_RCU) -		return -ECHILD; -	return generic_permission(inode, mask, flags, NULL); -} -  static int logfs_link(struct dentry *old_dentry, struct inode *dir,  		struct dentry *dentry)  { @@ -820,7 +813,6 @@ const struct inode_operations logfs_dir_iops = {  	.mknod		= logfs_mknod,  	.rename		= logfs_rename,  	.rmdir		= logfs_rmdir, -	.permission	= logfs_permission,  	.symlink	= logfs_symlink,  	.unlink		= logfs_unlink,  }; diff --git a/fs/namei.c b/fs/namei.c index 9e425e7e6c8f..0223c41fb114 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -238,7 +238,8 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,  	/*  	 * Read/write DACs are always overridable. -	 * Executable DACs are overridable if at least one exec bit is set. +	 * Executable DACs are overridable for all directories and +	 * for non-directories that have least one exec bit set.  	 */  	if (!(mask & MAY_EXEC) || execute_ok(inode))  		if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE)) @@ -1011,9 +1012,6 @@ failed:   * Follow down to the covering mount currently visible to userspace.  At each   * point, the filesystem owning that dentry may be queried as to whether the   * caller is permitted to proceed or not. - * - * Care must be taken as namespace_sem may be held (indicated by mounting_here - * being true).   */  int follow_down(struct path *path)  { diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 18b3e8975fe0..fbb2a5ef5817 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -82,6 +82,7 @@ config NFSD_V4  	select NFSD_V3  	select FS_POSIX_ACL  	select SUNRPC_GSS +	select CRYPTO  	help  	  This option enables support in your system's NFS server for  	  version 4 of the NFS protocol (RFC 3530). diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 1f5eae40f34e..2b1449dd2f49 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -13,6 +13,7 @@  #include <linux/lockd/lockd.h>  #include <linux/sunrpc/clnt.h>  #include <linux/sunrpc/gss_api.h> +#include <linux/sunrpc/gss_krb5_enctypes.h>  #include "idmap.h"  #include "nfsd.h" @@ -189,18 +190,10 @@ static struct file_operations export_features_operations = {  	.release	= single_release,  }; -#ifdef CONFIG_SUNRPC_GSS +#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)  static int supported_enctypes_show(struct seq_file *m, void *v)  { -	struct gss_api_mech *k5mech; - -	k5mech = gss_mech_get_by_name("krb5"); -	if (k5mech == NULL) -		goto out; -	if (k5mech->gm_upcall_enctypes != NULL) -		seq_printf(m, k5mech->gm_upcall_enctypes); -	gss_mech_put(k5mech); -out: +	seq_printf(m, KRB5_SUPPORTED_ENCTYPES);  	return 0;  } @@ -215,7 +208,7 @@ static struct file_operations supported_enctypes_ops = {  	.llseek		= seq_lseek,  	.release	= single_release,  }; -#endif /* CONFIG_SUNRPC_GSS */ +#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */  extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);  extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); @@ -1427,9 +1420,9 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)  		[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},  		[NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},  		[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, -#ifdef CONFIG_SUNRPC_GSS +#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)  		[NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, -#endif /* CONFIG_SUNRPC_GSS */ +#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */  #ifdef CONFIG_NFSD_V4  		[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},  		[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d5718273bb32..fd0acca5370a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -696,7 +696,15 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor  }  #endif /* CONFIG_NFSD_V3 */ +static int nfsd_open_break_lease(struct inode *inode, int access) +{ +	unsigned int mode; +	if (access & NFSD_MAY_NOT_BREAK_LEASE) +		return 0; +	mode = (access & NFSD_MAY_WRITE) ? O_WRONLY : O_RDONLY; +	return break_lease(inode, mode | O_NONBLOCK); +}  /*   * Open an existing file or directory. @@ -744,12 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,  	if (!inode->i_fop)  		goto out; -	/* -	 * Check to see if there are any leases on this file. -	 * This may block while leases are broken. -	 */ -	if (!(access & NFSD_MAY_NOT_BREAK_LEASE)) -		host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0)); +	host_err = nfsd_open_break_lease(inode, access);  	if (host_err) /* NOMEM or WOULDBLOCK */  		goto out_nfserr; @@ -1660,8 +1663,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,  	if (!dold->d_inode)  		goto out_drop_write;  	host_err = nfsd_break_lease(dold->d_inode); -	if (host_err) +	if (host_err) { +		err = nfserrno(host_err);  		goto out_drop_write; +	}  	host_err = vfs_link(dold, dirp, dnew);  	if (!host_err) {  		err = nfserrno(commit_metadata(ffhp)); diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index b954878ad6ce..b9b45fc2903e 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -801,12 +801,7 @@ out_err:  int nilfs_permission(struct inode *inode, int mask, unsigned int flags)  { -	struct nilfs_root *root; - -	if (flags & IPERM_FLAG_RCU) -		return -ECHILD; - -	root = NILFS_I(inode)->i_root; +	struct nilfs_root *root = NILFS_I(inode)->i_root;  	if ((mask & MAY_WRITE) && root &&  	    root->cno != NILFS_CPTREE_CURRENT_CNO)  		return -EROFS; /* snapshot is not writable */ diff --git a/fs/proc/base.c b/fs/proc/base.c index 14def991d9dd..8a84210ca080 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2169,11 +2169,7 @@ static const struct file_operations proc_fd_operations = {   */  static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags)  { -	int rv; - -	if (flags & IPERM_FLAG_RCU) -		return -ECHILD; -	rv = generic_permission(inode, mask, flags, NULL); +	int rv = generic_permission(inode, mask, flags, NULL);  	if (rv == 0)  		return 0;  	if (task_pid(current) == proc_pid(inode)) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index f50133c11c24..d167de365a8d 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -304,9 +304,6 @@ static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags)  	struct ctl_table *table;  	int error; -	if (flags & IPERM_FLAG_RCU) -		return -ECHILD; -  	/* Executable files are not allowed under /proc/sys/ */  	if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))  		return -EACCES; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index e8a62f41b458..d78089690965 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -954,8 +954,6 @@ static int xattr_mount_check(struct super_block *s)  int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)  { -	if (flags & IPERM_FLAG_RCU) -		return -ECHILD;  	/*  	 * We don't do permission checks on the internal objects.  	 * Permissions are determined by the "owning" object. diff --git a/fs/timerfd.c b/fs/timerfd.c index f67acbdda5e8..dffeb3795af1 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -61,7 +61,9 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)  /*   * Called when the clock was set to cancel the timers in the cancel - * list. + * list. This will wake up processes waiting on these timers. The + * wake-up requires ctx->ticks to be non zero, therefore we increment + * it before calling wake_up_locked().   */  void timerfd_clock_was_set(void)  { @@ -76,6 +78,7 @@ void timerfd_clock_was_set(void)  		spin_lock_irqsave(&ctx->wqh.lock, flags);  		if (ctx->moffs.tv64 != moffs.tv64) {  			ctx->moffs.tv64 = KTIME_MAX; +			ctx->ticks++;  			wake_up_locked(&ctx->wqh);  		}  		spin_unlock_irqrestore(&ctx->wqh.lock, flags); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 8c892c2d5300..529be0582029 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2146,6 +2146,7 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,  	if (IS_ERR(sb)) {  		err = PTR_ERR(sb);  		kfree(c); +		goto out_close;  	}  	if (sb->s_root) { | 
