71 files changed, 874 insertions, 1111 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3dab9e9948d0..722743b152d8 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode)
 {
 	struct address_space *mapping = vnode->vfs_inode.i_mapping;
 	struct writeback_control wbc = {
-		.bdi		= mapping->backing_dev_info,
 		.sync_mode	= WB_SYNC_ALL,
 		.nr_to_write	= LONG_MAX,
 		.range_cyclic	= 1,
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index b6ab27ccf214..811384bec8de 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -68,11 +68,7 @@
  * Here we can be a bit looser than the data sections since this
  * needs to only meet arch ABI requirements.
  */
-#ifdef ARCH_SLAB_MINALIGN
-#define FLAT_STACK_ALIGN	(ARCH_SLAB_MINALIGN)
-#else
-#define FLAT_STACK_ALIGN	(sizeof(void *))
-#endif
+#define FLAT_STACK_ALIGN	max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN)
 
 #define RELOC_FAILED 0xff00ff01		/* Relocation incorrect somewhere */
 #define UNLOADED_LIB 0x7ff000ff		/* Placeholder for unused library */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7346c96308a5..99d6af811747 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -706,8 +706,13 @@ retry:
  * @bdev is about to be opened exclusively.  Check @bdev can be opened
  * exclusively and mark that an exclusive open is in progress.  Each
  * successful call to this function must be matched with a call to
- * either bd_claim() or bd_abort_claiming().  If this function
- * succeeds, the matching bd_claim() is guaranteed to succeed.
+ * either bd_finish_claiming() or bd_abort_claiming() (which do not
+ * fail).
+ *
+ * This function is used to gain exclusive access to the block device
+ * without actually causing other exclusive open attempts to fail. It
+ * should be used when the open sequence itself requires exclusive
+ * access but may subsequently fail.
  *
  * CONTEXT:
  * Might sleep.
@@ -734,6 +739,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
 		return ERR_PTR(-ENXIO);
 
 	whole = bdget_disk(disk, 0);
+	module_put(disk->fops->owner);
 	put_disk(disk);
 	if (!whole)
 		return ERR_PTR(-ENOMEM);
@@ -782,15 +788,46 @@ static void bd_abort_claiming(struct block_device *whole, void *holder)
 	__bd_abort_claiming(whole, holder);		/* releases bdev_lock */
 }
 
+/* increment holders when we have a legitimate claim. requires bdev_lock */
+static void __bd_claim(struct block_device *bdev, struct block_device *whole,
+					void *holder)
+{
+	/* note that for a whole device bd_holders
+	 * will be incremented twice, and bd_holder will
+	 * be set to bd_claim before being set to holder
+	 */
+	whole->bd_holders++;
+	whole->bd_holder = bd_claim;
+	bdev->bd_holders++;
+	bdev->bd_holder = holder;
+}
+
+/**
+ * bd_finish_claiming - finish claiming a block device
+ * @bdev: block device of interest (passed to bd_start_claiming())
+ * @whole: whole block device returned by bd_start_claiming()
+ * @holder: holder trying to claim @bdev
+ *
+ * Finish a claiming block started by bd_start_claiming().
+ *
+ * CONTEXT:
+ * Grabs and releases bdev_lock.
+ */
+static void bd_finish_claiming(struct block_device *bdev,
+				struct block_device *whole, void *holder)
+{
+	spin_lock(&bdev_lock);
+	BUG_ON(!bd_may_claim(bdev, whole, holder));
+	__bd_claim(bdev, whole, holder);
+	__bd_abort_claiming(whole, holder); /* not actually an abort */
+}
+
 /**
  * bd_claim - claim a block device
  * @bdev: block device to claim
  * @holder: holder trying to claim @bdev
  *
- * Try to claim @bdev which must have been opened successfully.  This
- * function may be called with or without preceding
- * blk_start_claiming().  In the former case, this function is always
- * successful and terminates the claiming block.
+ * Try to claim @bdev which must have been opened successfully.
  *
  * CONTEXT:
  * Might sleep.
@@ -806,23 +843,10 @@ int bd_claim(struct block_device *bdev, void *holder)
 	might_sleep();
 
 	spin_lock(&bdev_lock);
-
 	res = bd_prepare_to_claim(bdev, whole, holder);
-	if (res == 0) {
-		/* note that for a whole device bd_holders
-		 * will be incremented twice, and bd_holder will
-		 * be set to bd_claim before being set to holder
-		 */
-		whole->bd_holders++;
-		whole->bd_holder = bd_claim;
-		bdev->bd_holders++;
-		bdev->bd_holder = holder;
-	}
-
-	if (whole->bd_claiming)
-		__bd_abort_claiming(whole, holder);	/* releases bdev_lock */
-	else
-		spin_unlock(&bdev_lock);
+	if (res == 0)
+		__bd_claim(bdev, whole, holder);
+	spin_unlock(&bdev_lock);
 
 	return res;
 }
@@ -1476,7 +1500,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 
 	if (whole) {
 		if (res == 0)
-			BUG_ON(bd_claim(bdev, filp) != 0);
+			bd_finish_claiming(bdev, whole, filp);
 		else
 			bd_abort_claiming(whole, filp);
 	}
@@ -1712,7 +1736,7 @@ struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *h
 	if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
 		goto out_blkdev_put;
 
-	BUG_ON(bd_claim(bdev, holder) != 0);
+	bd_finish_claiming(bdev, whole, holder);
 	return bdev;
 
 out_blkdev_put:
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 8d432cd9d580..2222d161c7b6 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -60,6 +60,8 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 		size = __btrfs_getxattr(inode, name, value, size);
 		if (size > 0) {
 			acl = posix_acl_from_xattr(value, size);
+			if (IS_ERR(acl))
+				return acl;
 			set_cached_acl(inode, type, acl);
 		}
 		kfree(value);
@@ -160,6 +162,12 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
 	int ret;
 	struct posix_acl *acl = NULL;
 
+	if (!is_owner_or_cap(dentry->d_inode))
+		return -EPERM;
+
+	if (!IS_POSIXACL(dentry->d_inode))
+		return -EOPNOTSUPP;
+
 	if (value) {
 		acl = posix_acl_from_xattr(value, size);
 		if (acl == NULL) {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f3b287c22caf..34f7c375567e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1941,8 +1941,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 		     btrfs_level_size(tree_root,
 				      btrfs_super_log_root_level(disk_super));
 
-		log_tree_root = kzalloc(sizeof(struct btrfs_root),
-						      GFP_NOFS);
+		log_tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+		if (!log_tree_root) {
+			err = -ENOMEM;
+			goto fail_trans_kthread;
+		}
 
 		__setup_root(nodesize, leafsize, sectorsize, stripesize,
 			     log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
@@ -1982,6 +1985,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
 	if (!fs_info->fs_root)
 		goto fail_trans_kthread;
+	if (IS_ERR(fs_info->fs_root)) {
+		err = PTR_ERR(fs_info->fs_root);
+		goto fail_trans_kthread;
+	}
 
 	if (!(sb->s_flags & MS_RDONLY)) {
 		down_read(&fs_info->cleanup_work_sem);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b9080d71991a..32d094002a57 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4360,7 +4360,8 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 
 	block_rsv = get_block_rsv(trans, root);
 	cache = btrfs_lookup_block_group(root->fs_info, buf->start);
-	BUG_ON(block_rsv->space_info != cache->space_info);
+	if (block_rsv->space_info != cache->space_info)
+		goto out;
 
 	if (btrfs_header_generation(buf) == trans->transid) {
 		if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a4080c21ec55..d74e6af9b53a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
 		.sync_io = wbc->sync_mode == WB_SYNC_ALL,
 	};
 	struct writeback_control wbc_writepages = {
-		.bdi		= wbc->bdi,
 		.sync_mode	= wbc->sync_mode,
 		.older_than_this = NULL,
 		.nr_to_write	= 64,
@@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
 		.sync_io = mode == WB_SYNC_ALL,
 	};
 	struct writeback_control wbc_writepages = {
-		.bdi		= inode->i_mapping->backing_dev_info,
 		.sync_mode	= mode,
 		.older_than_this = NULL,
 		.nr_to_write	= nr_pages * 2,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 787b50a16a14..e354c33df082 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1140,7 +1140,7 @@ int btrfs_sync_file(struct file *file, int datasync)
 	/*
 	 * ok we haven't committed the transaction yet, lets do a commit
 	 */
-	if (file && file->private_data)
+	if (file->private_data)
 		btrfs_ioctl_trans_end(file);
 
 	trans = btrfs_start_transaction(root, 0);
@@ -1190,14 +1190,22 @@ static const struct vm_operations_struct btrfs_file_vm_ops = {
 
 static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)
 {
-	vma->vm_ops = &btrfs_file_vm_ops;
+	struct address_space *mapping = filp->f_mapping;
+
+	if (!mapping->a_ops->readpage)
+		return -ENOEXEC;
+
 	file_accessed(filp);
+	vma->vm_ops = &btrfs_file_vm_ops;
+	vma->vm_flags |= VM_CAN_NONLINEAR;
+
 	return 0;
 }
 
 const struct file_operations btrfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
+	.write		= do_sync_write,
 	.aio_read       = generic_file_aio_read,
 	.splice_read	= generic_file_splice_read,
 	.aio_write	= btrfs_file_aio_write,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fa6ccc1bfe2a..1bff92ad4744 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2673,7 +2673,7 @@ static int check_path_shared(struct btrfs_root *root,
 	struct extent_buffer *eb;
 	int level;
 	int ret;
-	u64 refs;
+	u64 refs = 1;
 
 	for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
 		if (!path->nodes[level])
@@ -6884,7 +6884,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
 		if (em->block_start == EXTENT_MAP_HOLE ||
 		    (cur_offset >= inode->i_size &&
 		     !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
-			ret = btrfs_prealloc_file_range(inode, 0, cur_offset,
+			ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
 							last_byte - cur_offset,
 							1 << inode->i_blkbits,
 							offset + len,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4cdb98cf26de..4dbaf89b1337 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1280,7 +1280,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 	trans = btrfs_start_transaction(root, 0);
 	if (IS_ERR(trans)) {
 		err = PTR_ERR(trans);
-		goto out;
+		goto out_up_write;
 	}
 	trans->block_rsv = &root->fs_info->global_block_rsv;
 
@@ -1845,7 +1845,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
 	dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
 	di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path,
 				   dir_id, "default", 7, 1);
-	if (!di) {
+	if (IS_ERR_OR_NULL(di)) {
 		btrfs_free_path(path);
 		btrfs_end_transaction(trans, root);
 		printk(KERN_ERR "Umm, you don't have the default dir item, "
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 05d41e569236..b37d723b9d4a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -784,16 +784,17 @@ again:
 				struct btrfs_extent_ref_v0 *ref0;
 				ref0 = btrfs_item_ptr(eb, path1->slots[0],
 						struct btrfs_extent_ref_v0);
-				root = find_tree_root(rc, eb, ref0);
-				if (!root->ref_cows)
-					cur->cowonly = 1;
 				if (key.objectid == key.offset) {
+					root = find_tree_root(rc, eb, ref0);
 					if (root && !should_ignore_root(root))
 						cur->root = root;
 					else
 						list_add(&cur->list, &useless);
 					break;
 				}
+				if (is_cowonly_root(btrfs_ref_root_v0(eb,
+								      ref0)))
+					cur->cowonly = 1;
 			}
 #else
 		BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index b91ccd972644..2d958be761c8 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -330,7 +330,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 {
 	struct btrfs_path *path;
 	int ret;
-	u32 refs;
 	struct btrfs_root_item *ri;
 	struct extent_buffer *leaf;
 
@@ -344,8 +343,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 	leaf = path->nodes[0];
 	ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item);
 
-	refs = btrfs_disk_root_refs(leaf, ri);
-	BUG_ON(refs != 0);
 	ret = btrfs_del_item(trans, root, path);
 out:
 	btrfs_free_path(path);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d34b2dfc9628..f2393b390318 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -360,6 +360,8 @@ static struct dentry *get_default_root(struct super_block *sb,
 	 */
 	dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
 	di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
+	if (IS_ERR(di))
+		return ERR_CAST(di);
 	if (!di) {
 		/*
 		 * Ok the default dir item isn't there.  This is weird since
@@ -390,8 +392,8 @@ setup_root:
 	location.offset = 0;
 
 	inode = btrfs_iget(sb, &location, new_root, &new);
-	if (!inode)
-		return ERR_PTR(-ENOMEM);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
 
 	/*
 	 * If we're just mounting the root most subvol put the inode and return
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 83d4d2785ffe..3fe49042d8ad 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -493,7 +493,7 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
 		return -EAGAIN;
 	}
 
-	op = le32_to_cpu(head->op);
+	op = le16_to_cpu(head->op);
 	result = le32_to_cpu(head->result);
 	dout("handle_reply op %d result %d\n", op, result);
 	switch (op) {
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index ae3e3a306445..74144d6389f0 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -244,8 +244,14 @@ static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx)
 	struct ceph_cap *cap = NULL;
 
 	/* temporary, until we do something about cap import/export */
-	if (!ctx)
-		return kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+	if (!ctx) {
+		cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+		if (cap) {
+			caps_use_count++;
+			caps_total_count++;
+		}
+		return cap;
+	}
 
 	spin_lock(&caps_list_lock);
 	dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n",
@@ -981,6 +987,46 @@ static int send_cap_msg(struct ceph_mds_session *session,
 	return 0;
 }
 
+static void __queue_cap_release(struct ceph_mds_session *session,
+				u64 ino, u64 cap_id, u32 migrate_seq,
+				u32 issue_seq)
+{
+	struct ceph_msg *msg;
+	struct ceph_mds_cap_release *head;
+	struct ceph_mds_cap_item *item;
+
+	spin_lock(&session->s_cap_lock);
+	BUG_ON(!session->s_num_cap_releases);
+	msg = list_first_entry(&session->s_cap_releases,
+			       struct ceph_msg, list_head);
+
+	dout(" adding %llx release to mds%d msg %p (%d left)\n",
+	     ino, session->s_mds, msg, session->s_num_cap_releases);
+
+	BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
+	head = msg->front.iov_base;
+	head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
+	item = msg->front.iov_base + msg->front.iov_len;
+	item->ino = cpu_to_le64(ino);
+	item->cap_id = cpu_to_le64(cap_id);
+	item->migrate_seq = cpu_to_le32(migrate_seq);
+	item->seq = cpu_to_le32(issue_seq);
+
+	session->s_num_cap_releases--;
+
+	msg->front.iov_len += sizeof(*item);
+	if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
+		dout(" release msg %p full\n", msg);
+		list_move_tail(&msg->list_head, &session->s_cap_releases_done);
+	} else {
+		dout(" release msg %p at %d/%d (%d)\n", msg,
+		     (int)le32_to_cpu(head->num),
+		     (int)CEPH_CAPS_PER_RELEASE,
+		     (int)msg->front.iov_len);
+	}
+	spin_unlock(&session->s_cap_lock);
+}
+
 /*
  * Queue cap releases when an inode is dropped from our cache.  Since
  * inode is about to be destroyed, there is no need for i_lock.
@@ -994,41 +1040,9 @@ void ceph_queue_caps_release(struct inode *inode)
 	while (p) {
 		struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
 		struct ceph_mds_session *session = cap->session;
-		struct ceph_msg *msg;
-		struct ceph_mds_cap_release *head;
-		struct ceph_mds_cap_item *item;
 
-		spin_lock(&session->s_cap_lock);
-		BUG_ON(!session->s_num_cap_releases);
-		msg = list_first_entry(&session->s_cap_releases,
-				       struct ceph_msg, list_head);
-
-		dout(" adding %p release to mds%d msg %p (%d left)\n",
-		     inode, session->s_mds, msg, session->s_num_cap_releases);
-
-		BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
-		head = msg->front.iov_base;
-		head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
-		item = msg->front.iov_base + msg->front.iov_len;
-		item->ino = cpu_to_le64(ceph_ino(inode));
-		item->cap_id = cpu_to_le64(cap->cap_id);
-		item->migrate_seq = cpu_to_le32(cap->mseq);
-		item->seq = cpu_to_le32(cap->issue_seq);
-
-		session->s_num_cap_releases--;
-
-		msg->front.iov_len += sizeof(*item);
-		if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
-			dout(" release msg %p full\n", msg);
-			list_move_tail(&msg->list_head,
-				       &session->s_cap_releases_done);
-		} else {
-			dout(" release msg %p at %d/%d (%d)\n", msg,
-			     (int)le32_to_cpu(head->num),
-			     (int)CEPH_CAPS_PER_RELEASE,
-			     (int)msg->front.iov_len);
-		}
-		spin_unlock(&session->s_cap_lock);
+		__queue_cap_release(session, ceph_ino(inode), cap->cap_id,
+				    cap->mseq, cap->issue_seq);
 		p = rb_next(p);
 		__ceph_remove_cap(cap);
 	}
@@ -2655,7 +2669,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	struct ceph_mds_caps *h;
 	int mds = session->s_mds;
 	int op;
-	u32 seq;
+	u32 seq, mseq;
 	struct ceph_vino vino;
 	u64 cap_id;
 	u64 size, max_size;
@@ -2675,6 +2689,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	vino.snap = CEPH_NOSNAP;
 	cap_id = le64_to_cpu(h->cap_id);
 	seq = le32_to_cpu(h->seq);
+	mseq = le32_to_cpu(h->migrate_seq);
 	size = le64_to_cpu(h->size);
 	max_size = le64_to_cpu(h->max_size);
 
@@ -2689,6 +2704,18 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	     vino.snap, inode);
 	if (!inode) {
 		dout(" i don't have ino %llx\n", vino.ino);
+
+		if (op == CEPH_CAP_OP_IMPORT)
+			__queue_cap_release(session, vino.ino, cap_id,
+					    mseq, seq);
+
+		/*
+		 * send any full release message to try to move things
+		 * along for the mds (who clearly thinks we still have this
+		 * cap).
+		 */
+		ceph_add_cap_releases(mdsc, session, -1);
+		ceph_send_cap_releases(mdsc, session);
 		goto done;
 	}
 
@@ -2714,7 +2741,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	spin_lock(&inode->i_lock);
 	cap = __get_cap_for_mds(ceph_inode(inode), mds);
 	if (!cap) {
-		dout("no cap on %p ino %llx.%llx from mds%d, releasing\n",
+		dout(" no cap on %p ino %llx.%llx from mds%d\n",
 		     inode, ceph_ino(inode), ceph_snap(inode), mds);
 		spin_unlock(&inode->i_lock);
 		goto done;
@@ -2865,18 +2892,19 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_cap *cap;
 	struct ceph_mds_request_release *rel = *p;
+	int used, dirty;
 	int ret = 0;
-	int used = 0;
 
 	spin_lock(&inode->i_lock);
 	used = __ceph_caps_used(ci);
+	dirty = __ceph_caps_dirty(ci);
 
-	dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode,
-	     mds, ceph_cap_string(used), ceph_cap_string(drop),
+	dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n",
+	     inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop),
 	     ceph_cap_string(unless));
 
-	/* only drop unused caps */
-	drop &= ~used;
+	/* only drop unused, clean caps */
+	drop &= ~(used | dirty);
 
 	cap = __get_cap_for_mds(ci, mds);
 	if (cap && __cap_is_valid(cap)) {
diff --git a/fs/ceph/crush/mapper.c b/fs/ceph/crush/mapper.c
index 9ba54efb6543..a4eec133258e 100644
--- a/fs/ceph/crush/mapper.c
+++ b/fs/ceph/crush/mapper.c
@@ -238,7 +238,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
 
 static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
 {
-	dprintk("choose %d x=%d r=%d\n", in->id, x, r);
+	dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
 	switch (in->alg) {
 	case CRUSH_BUCKET_UNIFORM:
 		return bucket_uniform_choose((struct crush_bucket_uniform *)in,
@@ -264,7 +264,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
  */
 static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
 {
-	if (weight[item] >= 0x1000)
+	if (weight[item] >= 0x10000)
 		return 0;
 	if (weight[item] == 0)
 		return 1;
@@ -305,7 +305,9 @@ static int crush_choose(struct crush_map *map,
 	int itemtype;
 	int collide, reject;
 	const int orig_tries = 5; /* attempts before we fall back to search */
-	dprintk("choose bucket %d x %d outpos %d\n", bucket->id, x, outpos);
+
+	dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
+		bucket->id, x, outpos, numrep);
 
 	for (rep = outpos; rep < numrep; rep++) {
 		/* keep trying until we get a non-out, non-colliding item */
@@ -366,6 +368,7 @@ static int crush_choose(struct crush_map *map,
 					BUG_ON(item >= 0 ||
 					       (-1-item) >= map->max_buckets);
 					in = map->buckets[-1-item];
+					retry_bucket = 1;
 					continue;
 				}
 
@@ -377,15 +380,25 @@ static int crush_choose(struct crush_map *map,
 					}
 				}
 
-				if (recurse_to_leaf &&
-				    item < 0 &&
-				    crush_choose(map, map->buckets[-1-item],
-						 weight,
-						 x, outpos+1, 0,
-						 out2, outpos,
-						 firstn, 0, NULL) <= outpos) {
-					reject = 1;
-				} else {
+				reject = 0;
+				if (recurse_to_leaf) {
+					if (item < 0) {
+						if (crush_choose(map,
+							 map->buckets[-1-item],
+							 weight,
+							 x, outpos+1, 0,
+							 out2, outpos,
+							 firstn, 0,
+							 NULL) <= outpos)
+							/* didn't get leaf */
+							reject = 1;
+					} else {
+						/* we already have a leaf! */
+						out2[outpos] = item;
+					}
+				}
+
+				if (!reject) {
 					/* out? */
 					if (itemtype == 0)
 						reject = is_out(map, weight,
@@ -424,12 +437,12 @@ reject:
 			continue;
 		}
 
-		dprintk("choose got %d\n", item);
+		dprintk("CHOOSE got %d\n", item);
 		out[outpos] = item;
 		outpos++;
 	}
 
-	dprintk("choose returns %d\n", outpos);
+	dprintk("CHOOSE returns %d\n", outpos);
 	return outpos;
 }
 
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 3be33fb066cc..f2f5332ddbba 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -261,7 +261,7 @@ static int osdc_show(struct seq_file *s, void *pp)
 
 static int caps_show(struct seq_file *s, void *p)
 {
-	struct ceph_client *client = p;
+	struct ceph_client *client = s->private;
 	int total, avail, used, reserved, min;
 
 	ceph_reservation_status(client, &total, &avail, &used, &reserved, &min);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 226f5a50d362..8f9b9fe8ef9f 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -827,7 +827,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
 
 	spin_lock(&dcache_lock);
 	spin_lock(&dn->d_lock);
-	list_move_tail(&dir->d_subdirs, &dn->d_u.d_child);
+	list_move(&dn->d_u.d_child, &dir->d_subdirs);
 	dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
 	     dn->d_u.d_child.prev, dn->d_u.d_child.next);
 	spin_unlock(&dn->d_lock);
@@ -854,8 +854,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
 		d_drop(dn);
 	realdn = d_materialise_unique(dn, in);
 	if (IS_ERR(realdn)) {
-		pr_err("splice_dentry error %p inode %p ino %llx.%llx\n",
-		       dn, in, ceph_vinop(in));
+		pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
+		       PTR_ERR(realdn), dn, in, ceph_vinop(in));
 		if (prehash)
 			*prehash = false; /* don't rehash on error */
 		dn = realdn; /* note realdn contains the error */
@@ -1234,18 +1234,23 @@ retry_lookup:
 				goto out;
 			}
 			dn = splice_dentry(dn, in, NULL);
+			if (IS_ERR(dn))
+				dn = NULL;
 		}
 
 		if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
 			       req->r_request_started, -1,
 			       &req->r_caps_reservation) < 0) {
 			pr_err("fill_inode badness on %p\n", in);
-			dput(dn);
-			continue;
+			goto next_item;
 		}
-		update_dentry_lease(dn, rinfo->dir_dlease[i],
-				    req->r_session, req->r_request_started);
-		dput(dn);
+		if (dn)
+			update_dentry_lease(dn, rinfo->dir_dlease[i],
+					    req->r_session,
+					    req->r_request_started);
+next_item:
+		if (dn)
+			dput(dn);
 	}
 	req->r_did_prepopulate = true;
 
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index b49f12822cbc..3ab79f6c4ce8 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1066,9 +1066,9 @@ static int trim_caps(struct ceph_mds_client *mdsc,
  *
  * Called under s_mutex.
  */
-static int add_cap_releases(struct ceph_mds_client *mdsc,
-			    struct ceph_mds_session *session,
-			    int extra)
+int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
+			  struct ceph_mds_session *session,
+			  int extra)
 {
 	struct ceph_msg *msg;
 	struct ceph_mds_cap_release *head;
@@ -1176,8 +1176,8 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
 /*
  * called under s_mutex
  */
-static void send_cap_releases(struct ceph_mds_client *mdsc,
-		       struct ceph_mds_session *session)
+void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
+			    struct ceph_mds_session *session)
 {
 	struct ceph_msg *msg;
 
@@ -1980,7 +1980,7 @@ out_err:
 	}
 	mutex_unlock(&mdsc->mutex);
 
-	add_cap_releases(mdsc, req->r_session, -1);
+	ceph_add_cap_releases(mdsc, req->r_session, -1);
 	mutex_unlock(&session->s_mutex);
 
 	/* kick calling process */
@@ -2433,6 +2433,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
 	struct ceph_dentry_info *di;
 	int mds = session->s_mds;
 	struct ceph_mds_lease *h = msg->front.iov_base;
+	u32 seq;
 	struct ceph_vino vino;
 	int mask;
 	struct qstr dname;
@@ -2446,6 +2447,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
 	vino.ino = le64_to_cpu(h->ino);
 	vino.snap = CEPH_NOSNAP;
 	mask = le16_to_cpu(h->mask);
+	seq = le32_to_cpu(h->seq);
 	dname.name = (void *)h + sizeof(*h) + sizeof(u32);
 	dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32);
 	if (dname.len != get_unaligned_le32(h+1))
@@ -2456,8 +2458,9 @@ static void handle_lease(struct ceph_mds_client *mdsc,
 
 	/* lookup inode */
 	inode = ceph_find_inode(sb, vino);
-	dout("handle_lease '%s', mask %d, ino %llx %p\n",
-	     ceph_lease_op_name(h->action), mask, vino.ino, inode);
+	dout("handle_lease %s, mask %d, ino %llx %p %.*s\n",
+	     ceph_lease_op_name(h->action), mask, vino.ino, inode,
+	     dname.len, dname.name);
 	if (inode == NULL) {
 		dout("handle_lease no inode %llx\n", vino.ino);
 		goto release;
@@ -2482,7 +2485,8 @@ static void handle_lease(struct ceph_mds_client *mdsc,
 	switch (h->action) {
 	case CEPH_MDS_LEASE_REVOKE:
 		if (di && di->lease_session == session) {
-			h->seq = cpu_to_le32(di->lease_seq);
+			if (ceph_seq_cmp(di->lease_seq, seq) > 0)
+				h->seq = cpu_to_le32(di->lease_seq);
 			__ceph_mdsc_drop_dentry_lease(dentry);
 		}
 		release = 1;
@@ -2496,7 +2500,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
 			unsigned long duration =
 				le32_to_cpu(h->duration_ms) * HZ / 1000;
 
-			di->lease_seq = le32_to_cpu(h->seq);
+			di->lease_seq = seq;
 			dentry->d_time = di->lease_renew_from + duration;
 			di->lease_renew_after = di->lease_renew_from +
 				(duration >> 1);
@@ -2686,10 +2690,10 @@ static void delayed_work(struct work_struct *work)
 			send_renew_caps(mdsc, s);
 		else
 			ceph_con_keepalive(&s->s_con);
-		add_cap_releases(mdsc, s, -1);
+		ceph_add_cap_releases(mdsc, s, -1);
 		if (s->s_state == CEPH_MDS_SESSION_OPEN ||
 		    s->s_state == CEPH_MDS_SESSION_HUNG)
-			send_cap_releases(mdsc, s);
+			ceph_send_cap_releases(mdsc, s);
 		mutex_unlock(&s->s_mutex);
 		ceph_put_mds_session(s);
 
@@ -2779,6 +2783,12 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
 	drop_leases(mdsc);
 	ceph_flush_dirty_caps(mdsc);
 	wait_requests(mdsc);
+
+	/*
+	 * wait for reply handlers to drop their request refs and
+	 * their inode/dcache refs
+	 */
+	ceph_msgr_flush();
 }
 
 /*
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index d9936c4f1212..b292fa42a66d 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -322,6 +322,12 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req)
 	kref_put(&req->r_kref, ceph_mdsc_release_request);
 }
 
+extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
+				 struct ceph_mds_session *session,
+				 int extra);
+extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
+				   struct ceph_mds_session *session);
+
 extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
 
 extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 64b8b1f7863d..9ad43a310a41 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -657,7 +657,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
 	dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
 	     con->connect_seq, global_seq, proto);
 
-	con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT;
+	con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED_CLIENT);
 	con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
 	con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
 	con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -1396,10 +1396,12 @@ static int read_partial_message(struct ceph_connection *con)
 	if (!con->in_msg) {
 		dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
 		     con->in_hdr.front_len, con->in_hdr.data_len);
+		skip = 0;
 		con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
 		if (skip) {
 			/* skip this message */
 			dout("alloc_msg said skip message\n");
+			BUG_ON(con->in_msg);
 			con->in_base_pos = -front_len - middle_len - data_len -
 				sizeof(m->footer);
 			con->in_tag = CEPH_MSGR_TAG_READY;
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c
index 21c62e9b7d1d..cc115eafae11 100644
--- a/fs/ceph/mon_client.c
+++ b/fs/ceph/mon_client.c
@@ -400,6 +400,8 @@ static void release_generic_request(struct kref *kref)
 		ceph_msg_put(req->reply);
 	if (req->request)
 		ceph_msg_put(req->request);
+
+	kfree(req);
 }
 
 static void put_generic_request(struct ceph_mon_generic_request *req)
@@ -723,7 +725,8 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
 		dout("authenticated, starting session\n");
 
 		monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
-		monc->client->msgr->inst.name.num = monc->auth->global_id;
+		monc->client->msgr->inst.name.num =
+					cpu_to_le64(monc->auth->global_id);
 
 		__send_subscribe(monc);
 		__resend_generic_request(monc);
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index d25b4add85b4..92b7251a53f1 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -1344,7 +1344,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 	int type = le16_to_cpu(msg->hdr.type);
 
 	if (!osd)
-		return;
+		goto out;
 	osdc = osd->o_osdc;
 
 	switch (type) {
@@ -1359,6 +1359,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 		pr_err("received unknown message type %d %s\n", type,
 		       ceph_msg_type_name(type));
 	}
+out:
 	ceph_msg_put(msg);
 }
 
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index ddc656fb5c05..50ce64ebd330 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -707,6 +707,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 		newcrush = crush_decode(*p, min(*p+len, end));
 		if (IS_ERR(newcrush))
 			return ERR_CAST(newcrush);
+		*p += len;
 	}
 
 	/* new flags? */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 4e0bee240b9d..fa87f51e38e1 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -89,7 +89,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 	buf->f_files = le64_to_cpu(st.num_objects);
 	buf->f_ffree = -1;
-	buf->f_namelen = PATH_MAX;
+	buf->f_namelen = NAME_MAX;
 	buf->f_frsize = PAGE_CACHE_SIZE;
 
 	/* leave fsid little-endian, regardless of host endianness */
@@ -926,7 +926,7 @@ static int ceph_compare_super(struct super_block *sb, void *data)
 /*
  * construct our own bdi so we can control readahead, etc.
  */
-static atomic_long_t bdi_seq = ATOMIC_INIT(0);
+static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
 
 static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
 {
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 78c02eb4cb1f..484e52bb40bb 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -473,14 +473,24 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
 	return 0;
 }
 
+void cifs_drop_inode(struct inode *inode)
+{
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
+		return generic_drop_inode(inode);
+
+	return generic_delete_inode(inode);
+}
+
 static const struct super_operations cifs_super_ops = {
 	.put_super = cifs_put_super,
 	.statfs = cifs_statfs,
 	.alloc_inode = cifs_alloc_inode,
 	.destroy_inode = cifs_destroy_inode,
-/*	.drop_inode	    = generic_delete_inode,
-	.delete_inode	= cifs_delete_inode,  */  /* Do not need above two
-	functions unless later we add lazy close of inodes or unless the
+	.drop_inode	= cifs_drop_inode,
+/*	.delete_inode	= cifs_delete_inode,  */  /* Do not need above
+	function unless later we add lazy close of inodes or unless the
 	kernel forgets to call us with the same number of releases (closes)
 	as opens */
 	.show_options = cifs_show_options,
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index fb1657e0fdb8..fb6318b81509 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -106,7 +106,6 @@ extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode,
 				__u16 fileHandle, struct file *file,
 				struct vfsmount *mnt, unsigned int oflags);
 extern int cifs_posix_open(char *full_path, struct inode **pinode,
-				struct vfsmount *mnt,
 				struct super_block *sb,
 				int mode, int oflags,
 				__u32 *poplock, __u16 *pnetfid, int xid);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 391816b461ca..e7ae78b66fa1 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
+#include <linux/file.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
 #include "cifsglob.h"
@@ -184,12 +185,13 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
 	}
 	write_unlock(&GlobalSMBSeslock);
 
+	file->private_data = pCifsFile;
+
 	return pCifsFile;
 }
 
 int cifs_posix_open(char *full_path, struct inode **pinode,
-			struct vfsmount *mnt, struct super_block *sb,
-			int mode, int oflags,
+			struct super_block *sb, int mode, int oflags,
 			__u32 *poplock, __u16 *pnetfid, int xid)
 {
 	int rc;
@@ -258,19 +260,6 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
 		cifs_fattr_to_inode(*pinode, &fattr);
 	}
 
-	/*
-	 * cifs_fill_filedata() takes care of setting cifsFileInfo pointer to
-	 * file->private_data.
-	 */
-	if (mnt) {
-		struct cifsFileInfo *pfile_info;
-
-		pfile_info = cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt,
-					       oflags);
-		if (pfile_info == NULL)
-			rc = -ENOMEM;
-	}
-
 posix_open_ret:
 	kfree(presp_data);
 	return rc;
@@ -298,7 +287,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 	int create_options = CREATE_NOT_DIR;
 	__u32 oplock = 0;
 	int oflags;
-	bool posix_create = false;
 	/*
 	 * BB below access is probably too much for mknod to request
 	 *    but we have to do query and setpathinfo so requesting
@@ -339,7 +327,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 			le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 		rc = cifs_posix_open(full_path, &newinode,
-			nd ? nd->path.mnt : NULL,
 			inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
 		/* EIO could indicate that (posix open) operation is not
 		   supported, despite what server claimed in capability
@@ -347,7 +334,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 		   handled in posix open */
 
 		if (rc == 0) {
-			posix_create = true;
 			if (newinode == NULL) /* query inode info */
 				goto cifs_create_get_file_info;
 			else /* success, no need to query */
@@ -478,21 +464,28 @@ cifs_create_set_dentry:
 	else
 		cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
 
-	/* nfsd case - nfs srv does not set nd */
-	if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) {
-		/* mknod case - do not leave file open */
-		CIFSSMBClose(xid, tcon, fileHandle);
-	} else if (!(posix_create) && (newinode)) {
+	if (newinode && nd && (nd->flags & LOOKUP_OPEN)) {
 		struct cifsFileInfo *pfile_info;
-		/*
-		 * cifs_fill_filedata() takes care of setting cifsFileInfo
-		 * pointer to file->private_data.
-		 */
-		pfile_info = cifs_new_fileinfo(newinode, fileHandle, NULL,
+		struct file *filp;
+
+		filp = lookup_instantiate_filp(nd, direntry, generic_file_open);
+		if (IS_ERR(filp)) {
+			rc = PTR_ERR(filp);
+			CIFSSMBClose(xid, tcon, fileHandle);
+			goto cifs_create_out;
+		}
+
+		pfile_info = cifs_new_fileinfo(newinode, fileHandle, filp,
 					       nd->path.mnt, oflags);
-		if (pfile_info == NULL)
+		if (pfile_info == NULL) {
+			fput(filp);
+			CIFSSMBClose(xid, tcon, fileHandle);
 			rc = -ENOMEM;
+		}
+	} else {
+		CIFSSMBClose(xid, tcon, fileHandle);
 	}
+
 cifs_create_out:
 	kfree(buf);
 	kfree(full_path);
@@ -636,6 +629,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 	bool posix_open = false;
 	struct cifs_sb_info *cifs_sb;
 	struct cifsTconInfo *pTcon;
+	struct cifsFileInfo *cfile;
 	struct inode *newInode = NULL;
 	char *full_path = NULL;
 	struct file *filp;
@@ -703,7 +697,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 		if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
 		     (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
 		     (nd->intent.open.flags & O_CREAT)) {
-			rc = cifs_posix_open(full_path, &newInode, nd->path.mnt,
+			rc = cifs_posix_open(full_path, &newInode,
 					parent_dir_inode->i_sb,
 					nd->intent.open.create_mode,
 					nd->intent.open.flags, &oplock,
@@ -733,8 +727,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 		else
 			direntry->d_op = &cifs_dentry_ops;
 		d_add(direntry, newInode);
-		if (posix_open)
-			filp = lookup_instantiate_filp(nd, direntry, NULL);
+		if (posix_open) {
+			filp = lookup_instantiate_filp(nd, direntry,
+						       generic_file_open);
+			if (IS_ERR(filp)) {
+				rc = PTR_ERR(filp);
+				CIFSSMBClose(xid, pTcon, fileHandle);
+				goto lookup_out;
+			}
+
+			cfile = cifs_new_fileinfo(newInode, fileHandle, filp,
+						  nd->path.mnt,
+						  nd->intent.open.flags);
+			if (cfile == NULL) {
+				fput(filp);
+				CIFSSMBClose(xid, pTcon, fileHandle);
+				rc = -ENOMEM;
+				goto lookup_out;
+			}
+		}
 		/* since paths are not looked up by component - the parent
 		   directories are presumed to be good here */
 		renew_parental_timestamps(direntry);
@@ -755,6 +766,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 		is a common return code */
 	}
 
+lookup_out:
 	kfree(full_path);
 	FreeXid(xid);
 	return ERR_PTR(rc);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 75541af4b3db..409e4f523e61 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -162,44 +162,12 @@ psx_client_can_cache:
 	return 0;
 }
 
-static struct cifsFileInfo *
-cifs_fill_filedata(struct file *file)
-{
-	struct list_head *tmp;
-	struct cifsFileInfo *pCifsFile = NULL;
-	struct cifsInodeInfo *pCifsInode = NULL;
-
-	/* search inode for this file and fill in file->private_data */
-	pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
-	read_lock(&GlobalSMBSeslock);
-	list_for_each(tmp, &pCifsInode->openFileList) {
-		pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
-		if ((pCifsFile->pfile == NULL) &&
-		    (pCifsFile->pid == current->tgid)) {
-			/* mode set in cifs_create */
-
-			/* needed for writepage */
-			pCifsFile->pfile = file;
-			file->private_data = pCifsFile;
-			break;
-		}
-	}
-	read_unlock(&GlobalSMBSeslock);
-
-	if (file->private_data != NULL) {
-		return pCifsFile;
-	} else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
-			cERROR(1, "could not find file instance for "
-				   "new file %p", file);
-	return NULL;
-}
-
 /* all arguments to this function must be checked for validity in caller */
-static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
-	struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
+static inline int cifs_open_inode_helper(struct inode *inode,
 	struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
 	char *full_path, int xid)
 {
+	struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
 	struct timespec temp;
 	int rc;
 
@@ -213,36 +181,35 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
 	/* if not oplocked, invalidate inode pages if mtime or file
 	   size changed */
 	temp = cifs_NTtimeToUnix(buf->LastWriteTime);
-	if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
-			   (file->f_path.dentry->d_inode->i_size ==
+	if (timespec_equal(&inode->i_mtime, &temp) &&
+			   (inode->i_size ==
 			    (loff_t)le64_to_cpu(buf->EndOfFile))) {
 		cFYI(1, "inode unchanged on server");
 	} else {
-		if (file->f_path.dentry->d_inode->i_mapping) {
+		if (inode->i_mapping) {
 			/* BB no need to lock inode until after invalidate
 			since namei code should already have it locked? */
-			rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
+			rc = filemap_write_and_wait(inode->i_mapping);
 			if (rc != 0)
-				CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
+				pCifsInode->write_behind_rc = rc;
 		}
 		cFYI(1, "invalidating remote inode since open detected it "
 			 "changed");
-		invalidate_remote_inode(file->f_path.dentry->d_inode);
+		invalidate_remote_inode(inode);
 	}
 
 client_can_cache:
 	if (pTcon->unix_ext)
-		rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode,
-			full_path, inode->i_sb, xid);
+		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
+					      xid);
 	else
-		rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
-			full_path, buf, inode->i_sb, xid, NULL);
+		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
+					 xid, NULL);
 
 	if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
 		pCifsInode->clientCanCacheAll = true;
 		pCifsInode->clientCanCacheRead = true;
-		cFYI(1, "Exclusive Oplock granted on inode %p",
-			 file->f_path.dentry->d_inode);
+		cFYI(1, "Exclusive Oplock granted on inode %p", inode);
 	} else if ((*oplock & 0xF) == OPLOCK_READ)
 		pCifsInode->clientCanCacheRead = true;
 
@@ -256,7 +223,7 @@ int cifs_open(struct inode *inode, struct file *file)
 	__u32 oplock;
 	struct cifs_sb_info *cifs_sb;
 	struct cifsTconInfo *tcon;
-	struct cifsFileInfo *pCifsFile;
+	struct cifsFileInfo *pCifsFile = NULL;
 	struct cifsInodeInfo *pCifsInode;
 	char *full_path = NULL;
 	int desiredAccess;
@@ -270,12 +237,6 @@ int cifs_open(struct inode *inode, struct file *file)
 	tcon = cifs_sb->tcon;
 
 	pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
-	pCifsFile = cifs_fill_filedata(file);
-	if (pCifsFile) {
-		rc = 0;
-		FreeXid(xid);
-		return rc;
-	}
 
 	full_path = build_path_from_dentry(file->f_path.dentry);
 	if (full_path == NULL) {
@@ -299,8 +260,7 @@ int cifs_open(struct inode *inode, struct file *file)
 		int oflags = (int) cifs_posix_convert_flags(file->f_flags);
 		oflags |= SMB_O_CREAT;
 		/* can not refresh inode info since size could be stale */
-		rc = cifs_posix_open(full_path, &inode, file->f_path.mnt,
-				inode->i_sb,
+		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 				cifs_sb->mnt_file_mode /* ignored */,
 				oflags, &oplock, &netfid, xid);
 		if (rc == 0) {
@@ -308,9 +268,20 @@ int cifs_open(struct inode *inode, struct file *file)
 			/* no need for special case handling of setting mode
 			   on read only files needed here */
 
-			pCifsFile = cifs_fill_filedata(file);
-			cifs_posix_open_inode_helper(inode, file, pCifsInode,
-						     oplock, netfid);
+			rc = cifs_posix_open_inode_helper(inode, file,
+					pCifsInode, oplock, netfid);
+			if (rc != 0) {
+				CIFSSMBClose(xid, tcon, netfid);
+				goto out;
+			}
+
+			pCifsFile = cifs_new_fileinfo(inode, netfid, file,
+							file->f_path.mnt,
+							oflags);
+			if (pCifsFile == NULL) {
+				CIFSSMBClose(xid, tcon, netfid);
+				rc = -ENOMEM;
+			}
 			goto out;
 		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 			if (tcon->ses->serverNOS)
@@ -391,17 +362,17 @@ int cifs_open(struct inode *inode, struct file *file)
 		goto out;
 	}
 
+	rc = cifs_open_inode_helper(inode, tcon, &oplock, buf, full_path, xid);
+	if (rc != 0)
+		goto out;
+
 	pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt,
 					file->f_flags);
-	file->private_data = pCifsFile;
-	if (file->private_data == NULL) {
+	if (pCifsFile == NULL) {
 		rc = -ENOMEM;
 		goto out;
 	}
 
-	rc = cifs_open_inode_helper(inode, file, pCifsInode, pCifsFile, tcon,
-				    &oplock, buf, full_path, xid);
-
 	if (oplock & CIFS_CREATE_ACTION) {
 		/* time to set mode which we can not set earlier due to
 		   problems creating new read-only files */
@@ -513,8 +484,7 @@ reopen_error_exit:
 			le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 		int oflags = (int) cifs_posix_convert_flags(file->f_flags);
 		/* can not refresh inode info since size could be stale */
-		rc = cifs_posix_open(full_path, NULL, file->f_path.mnt,
-				inode->i_sb,
+		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 				cifs_sb->mnt_file_mode /* ignored */,
 				oflags, &oplock, &netfid, xid);
 		if (rc == 0) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 62b324f26a56..6f0683c68952 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1401,6 +1401,10 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath,
 	if (rc == 0 || rc != -ETXTBSY)
 		return rc;
 
+	/* open-file renames don't work across directories */
+	if (to_dentry->d_parent != from_dentry->d_parent)
+		return rc;
+
 	/* open the file to be renamed -- we need DELETE perms */
 	rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE,
 			 CREATE_NOT_DIR, &srcfid, &oplock, NULL,
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7707389bdf2c..0a57cb7db5dd 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -730,15 +730,7 @@ ssetup_ntlmssp_authenticate:
 
 		/* calculate session key */
 		setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
-		if (first_time) /* should this be moved into common code
-				   with similar ntlmv2 path? */
-		/*   cifs_calculate_ntlmv2_mac_key(ses->server->mac_signing_key,
-				response BB FIXME, v2_sess_key); */
-
-		/* copy session key */
-
-	/*	memcpy(bcc_ptr, (char *)ntlm_session_key,LM2_SESS_KEY_SIZE);
-		bcc_ptr += LM2_SESS_KEY_SIZE; */
+		/* FIXME: calculate MAC key */
 		memcpy(bcc_ptr, (char *)v2_sess_key,
 		       sizeof(struct ntlmv2_resp));
 		bcc_ptr += sizeof(struct ntlmv2_resp);
diff --git a/fs/dcache.c b/fs/dcache.c
index d96047b4a633..c8c78ba07827 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -590,6 +590,8 @@ static void prune_dcache(int count)
 			up_read(&sb->s_umount);
 		}
 		spin_lock(&sb_lock);
+		/* lock was dropped, must reset next */
+		list_safe_reset_next(sb, n, s_list);
 		count -= pruned;
 		__put_super(sb);
 		/* more work left to do? */
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index ca7e2a0ed98a..2bcc0431bada 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -200,6 +200,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 					return error;
 				else {
 					inode->i_mode = mode;
+					inode->i_ctime = CURRENT_TIME_SEC;
 					mark_inode_dirty(inode);
 					if (error == 0)
 						acl = NULL;
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 01552abbca3c..8a11fe212183 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -205,6 +205,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 					return error;
 				else {
 					inode->i_mode = mode;
+					inode->i_ctime = CURRENT_TIME_SEC;
 					ext3_mark_inode_dirty(handle, inode);
 					if (error == 0)
 						acl = NULL;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 51e11bf5708f..9d175d623aab 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -733,12 +733,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
 {
 	while (fa) {
 		struct fown_struct *fown;
+		unsigned long flags;
+
 		if (fa->magic != FASYNC_MAGIC) {
 			printk(KERN_ERR "kill_fasync: bad magic number in "
 			       "fasync_struct!\n");
 			return;
 		}
-		spin_lock(&fa->fa_lock);
+		spin_lock_irqsave(&fa->fa_lock, flags);
 		if (fa->fa_file) {
 			fown = &fa->fa_file->f_owner;
 			/* Don't send SIGURG to processes which have not set a
@@ -747,7 +749,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
 			if (!(sig == SIGURG && fown->signum == 0))
 				send_sigio(fown, fa->fa_fd, band);
 		}
-		spin_unlock(&fa->fa_lock);
+		spin_unlock_irqrestore(&fa->fa_lock, flags);
 		fa = rcu_dereference(fa->fa_next);
 	}
 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1d1088f48bc2..d5be1693ac93 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -38,51 +38,18 @@ int nr_pdflush_threads;
 /*
  * Passed into wb_writeback(), essentially a subset of writeback_control
  */
-struct wb_writeback_args {
+struct wb_writeback_work {
 	long nr_pages;
 	struct super_block *sb;
 	enum writeback_sync_modes sync_mode;
 	unsigned int for_kupdate:1;
 	unsigned int range_cyclic:1;
 	unsigned int for_background:1;
-};
 
-/*
- * Work items for the bdi_writeback threads
- */
-struct bdi_work {
 	struct list_head list;		/* pending work list */
-	struct rcu_head rcu_head;	/* for RCU free/clear of work */
-
-	unsigned long seen;		/* threads that have seen this work */
-	atomic_t pending;		/* number of threads still to do work */
-
-	struct wb_writeback_args args;	/* writeback arguments */
-
-	unsigned long state;		/* flag bits, see WS_* */
+	struct completion *done;	/* set if the caller waits */
 };
 
-enum {
-	WS_USED_B = 0,
-	WS_ONSTACK_B,
-};
-
-#define WS_USED (1 << WS_USED_B)
-#define WS_ONSTACK (1 << WS_ONSTACK_B)
-
-static inline bool bdi_work_on_stack(struct bdi_work *work)
-{
-	return test_bit(WS_ONSTACK_B, &work->state);
-}
-
-static inline void bdi_work_init(struct bdi_work *work,
-				 struct wb_writeback_args *args)
-{
-	INIT_RCU_HEAD(&work->rcu_head);
-	work->args = *args;
-	work->state = WS_USED;
-}
-
 /**
  * writeback_in_progress - determine whether there is writeback in progress
  * @bdi: the device's backing_dev_info structure.
@@ -95,76 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi)
 	return !list_empty(&bdi->work_list);
 }
 
-static void bdi_work_clear(struct bdi_work *work)
-{
-	clear_bit(WS_USED_B, &work->state);
-	smp_mb__after_clear_bit();
-	/*
-	 * work can have disappeared at this point. bit waitq functions
-	 * should be able to tolerate this, provided bdi_sched_wait does
-	 * not dereference it's pointer argument.
-	*/
-	wake_up_bit(&work->state, WS_USED_B);
-}
-
-static void bdi_work_free(struct rcu_head *head)
-{
-	struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
-
-	if (!bdi_work_on_stack(work))
-		kfree(work);
-	else
-		bdi_work_clear(work);
-}
-
-static void wb_work_complete(struct bdi_work *work)
-{
-	const enum writeback_sync_modes sync_mode = work->args.sync_mode;
-	int onstack = bdi_work_on_stack(work);
-
-	/*
-	 * For allocated work, we can clear the done/seen bit right here.
-	 * For on-stack work, we need to postpone both the clear and free
-	 * to after the RCU grace period, since the stack could be invalidated
-	 * as soon as bdi_work_clear() has done the wakeup.
-	 */
-	if (!onstack)
-		bdi_work_clear(work);
-	if (sync_mode == WB_SYNC_NONE || onstack)
-		call_rcu(&work->rcu_head, bdi_work_free);
-}
-
-static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
-{
-	/*
-	 * The caller has retrieved the work arguments from this work,
-	 * drop our reference. If this is the last ref, delete and free it
-	 */
-	if (atomic_dec_and_test(&work->pending)) {
-		struct backing_dev_info *bdi = wb->bdi;
-
-		spin_lock(&bdi->wb_lock);
-		list_del_rcu(&work->list);
-		spin_unlock(&bdi->wb_lock);
-
-		wb_work_complete(work);
-	}
-}
-
-static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
+static void bdi_queue_work(struct backing_dev_info *bdi,
+		struct wb_writeback_work *work)
 {
-	work->seen = bdi->wb_mask;
-	BUG_ON(!work->seen);
-	atomic_set(&work->pending, bdi->wb_cnt);
-	BUG_ON(!bdi->wb_cnt);
-
-	/*
-	 * list_add_tail_rcu() contains the necessary barriers to
-	 * make sure the above stores are seen before the item is
-	 * noticed on the list
-	 */
 	spin_lock(&bdi->wb_lock);
-	list_add_tail_rcu(&work->list, &bdi->work_list);
+	list_add_tail(&work->list, &bdi->work_list);
 	spin_unlock(&bdi->wb_lock);
 
 	/*
@@ -181,97 +83,59 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
 	}
 }
 
-/*
- * Used for on-stack allocated work items. The caller needs to wait until
- * the wb threads have acked the work before it's safe to continue.
- */
-static void bdi_wait_on_work_clear(struct bdi_work *work)
-{
-	wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
-		    TASK_UNINTERRUPTIBLE);
-}
-
-static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
-				 struct wb_writeback_args *args)
+static void
+__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
+		bool range_cyclic, bool for_background)
 {
-	struct bdi_work *work;
+	struct wb_writeback_work *work;
 
 	/*
 	 * This is WB_SYNC_NONE writeback, so if allocation fails just
 	 * wakeup the thread for old dirty data writeback
 	 */
-	work = kmalloc(sizeof(*work), GFP_ATOMIC);
-	if (work) {
-		bdi_work_init(work, args);
-		bdi_queue_work(bdi, work);
-	} else {
-		struct bdi_writeback *wb = &bdi->wb;
-
-		if (wb->task)
-			wake_up_process(wb->task);
+	work = kzalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work) {
+		if (bdi->wb.task)
+			wake_up_process(bdi->wb.task);
+		return;
 	}
+
+	work->sync_mode	= WB_SYNC_NONE;
+	work->nr_pages	= nr_pages;
+	work->range_cyclic = range_cyclic;
+	work->for_background = for_background;
+
+	bdi_queue_work(bdi, work);
 }
 
 /**
- * bdi_sync_writeback - start and wait for writeback
+ * bdi_start_writeback - start writeback
  * @bdi: the backing device to write from
- * @sb: write inodes from this super_block
+ * @nr_pages: the number of pages to write
  *
  * Description:
- *   This does WB_SYNC_ALL data integrity writeback and waits for the
- *   IO to complete. Callers must hold the sb s_umount semaphore for
- *   reading, to avoid having the super disappear before we are done.
+ *   This does WB_SYNC_NONE opportunistic writeback. The IO is only
+ *   started when this function returns, we make no guarentees on
+ *   completion. Caller need not hold sb s_umount semaphore.
+ *
  */
-static void bdi_sync_writeback(struct backing_dev_info *bdi,
-			       struct super_block *sb)
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
 {
-	struct wb_writeback_args args = {
-		.sb		= sb,
-		.sync_mode	= WB_SYNC_ALL,
-		.nr_pages	= LONG_MAX,
-		.range_cyclic	= 0,
-	};
-	struct bdi_work work;
-
-	bdi_work_init(&work, &args);
-	work.state |= WS_ONSTACK;
-
-	bdi_queue_work(bdi, &work);
-	bdi_wait_on_work_clear(&work);
+	__bdi_start_writeback(bdi, nr_pages, true, false);
 }
 
 /**
- * bdi_start_writeback - start writeback
+ * bdi_start_background_writeback - start background writeback
  * @bdi: the backing device to write from
- * @sb: write inodes from this super_block
- * @nr_pages: the number of pages to write
  *
  * Description:
- *   This does WB_SYNC_NONE opportunistic writeback. The IO is only
+ *   This does WB_SYNC_NONE background writeback. The IO is only
  *   started when this function returns, we make no guarentees on
  *   completion. Caller need not hold sb s_umount semaphore.
- *
  */
-void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
-			 long nr_pages)
+void bdi_start_background_writeback(struct backing_dev_info *bdi)
 {
-	struct wb_writeback_args args = {
-		.sb		= sb,
-		.sync_mode	= WB_SYNC_NONE,
-		.nr_pages	= nr_pages,
-		.range_cyclic	= 1,
-	};
-
-	/*
-	 * We treat @nr_pages=0 as the special case to do background writeback,
-	 * ie. to sync pages until the background dirty threshold is reached.
-	 */
-	if (!nr_pages) {
-		args.nr_pages = LONG_MAX;
-		args.for_background = 1;
-	}
-
-	bdi_alloc_queue_work(bdi, &args);
+	__bdi_start_writeback(bdi, LONG_MAX, true, true);
 }
 
 /*
@@ -561,75 +425,69 @@ select_queue:
 	return ret;
 }
 
-static void unpin_sb_for_writeback(struct super_block *sb)
-{
-	up_read(&sb->s_umount);
-	put_super(sb);
-}
-
-enum sb_pin_state {
-	SB_PINNED,
-	SB_NOT_PINNED,
-	SB_PIN_FAILED
-};
-
 /*
- * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
+ * For background writeback the caller does not have the sb pinned
  * before calling writeback. So make sure that we do pin it, so it doesn't
  * go away while we are writing inodes from it.
  */
-static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
-					      struct super_block *sb)
+static bool pin_sb_for_writeback(struct super_block *sb)
 {
-	/*
-	 * Caller must already hold the ref for this
-	 */
-	if (wbc->sync_mode == WB_SYNC_ALL) {
-		WARN_ON(!rwsem_is_locked(&sb->s_umount));
-		return SB_NOT_PINNED;
-	}
 	spin_lock(&sb_lock);
+	if (list_empty(&sb->s_instances)) {
+		spin_unlock(&sb_lock);
+		return false;
+	}
+
 	sb->s_count++;
+	spin_unlock(&sb_lock);
+
 	if (down_read_trylock(&sb->s_umount)) {
-		if (sb->s_root) {
-			spin_unlock(&sb_lock);
-			return SB_PINNED;
-		}
-		/*
-		 * umounted, drop rwsem again and fall through to failure
-		 */
+		if (sb->s_root)
+			return true;
 		up_read(&sb->s_umount);
 	}
-	sb->s_count--;
-	spin_unlock(&sb_lock);
-	return SB_PIN_FAILED;
+
+	put_super(sb);
+	return false;
 }
 
 /*
  * Write a portion of b_io inodes which belong to @sb.
- * If @wbc->sb != NULL, then find and write all such
+ *
+ * If @only_this_sb is true, then find and write all such
  * inodes. Otherwise write only ones which go sequentially
  * in reverse order.
+ *
  * Return 1, if the caller writeback routine should be
  * interrupted. Otherwise return 0.
  */
-static int writeback_sb_inodes(struct super_block *sb,
-			       struct bdi_writeback *wb,
-			       struct writeback_control *wbc)
+static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
+		struct writeback_control *wbc, bool only_this_sb)
 {
 	while (!list_empty(&wb->b_io)) {
 		long pages_skipped;
 		struct inode *inode = list_entry(wb->b_io.prev,
 						 struct inode, i_list);
-		if (wbc->sb && sb != inode->i_sb) {
-			/* super block given and doesn't
-			   match, skip this inode */
-			redirty_tail(inode);
-			continue;
-		}
-		if (sb != inode->i_sb)
-			/* finish with this superblock */
+
+		if (inode->i_sb != sb) {
+			if (only_this_sb) {
+				/*
+				 * We only want to write back data for this
+				 * superblock, move all inodes not belonging
+				 * to it back onto the dirty list.
+				 */
+				redirty_tail(inode);
+				continue;
+			}
+
+			/*
+			 * The inode belongs to a different superblock.
+			 * Bounce back to the caller to unpin this and
+			 * pin the next superblock.
+			 */
 			return 0;
+		}
+
 		if (inode->i_state & (I_NEW | I_WILL_FREE)) {
 			requeue_io(inode);
 			continue;
@@ -667,8 +525,8 @@ static int writeback_sb_inodes(struct super_block *sb,
 	return 1;
 }
 
-static void writeback_inodes_wb(struct bdi_writeback *wb,
-				struct writeback_control *wbc)
+void writeback_inodes_wb(struct bdi_writeback *wb,
+		struct writeback_control *wbc)
 {
 	int ret = 0;
 
@@ -681,24 +539,14 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
 		struct inode *inode = list_entry(wb->b_io.prev,
 						 struct inode, i_list);
 		struct super_block *sb = inode->i_sb;
-		enum sb_pin_state state;
 
-		if (wbc->sb && sb != wbc->sb) {
-			/* super block given and doesn't
-			   match, skip this inode */
-			redirty_tail(inode);
-			continue;
-		}
-		state = pin_sb_for_writeback(wbc, sb);
-
-		if (state == SB_PIN_FAILED) {
+		if (!pin_sb_for_writeback(sb)) {
 			requeue_io(inode);
 			continue;
 		}
-		ret = writeback_sb_inodes(sb, wb, wbc);
+		ret = writeback_sb_inodes(sb, wb, wbc, false);
+		drop_super(sb);
 
-		if (state == SB_PINNED)
-			unpin_sb_for_writeback(sb);
 		if (ret)
 			break;
 	}
@@ -706,11 +554,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
 	/* Leave any unwritten inodes on b_io */
 }
 
-void writeback_inodes_wbc(struct writeback_control *wbc)
+static void __writeback_inodes_sb(struct super_block *sb,
+		struct bdi_writeback *wb, struct writeback_control *wbc)
 {
-	struct backing_dev_info *bdi = wbc->bdi;
+	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
-	writeback_inodes_wb(&bdi->wb, wbc);
+	wbc->wb_start = jiffies; /* livelock avoidance */
+	spin_lock(&inode_lock);
+	if (!wbc->for_kupdate || list_empty(&wb->b_io))
+		queue_io(wb, wbc->older_than_this);
+	writeback_sb_inodes(sb, wb, wbc, true);
+	spin_unlock(&inode_lock);
 }
 
 /*
@@ -748,16 +602,14 @@ static inline bool over_bground_thresh(void)
  * all dirty pages if they are all attached to "old" mappings.
  */
 static long wb_writeback(struct bdi_writeback *wb,
-			 struct wb_writeback_args *args)
+			 struct wb_writeback_work *work)
 {
 	struct writeback_control wbc = {
-		.bdi			= wb->bdi,
-		.sb			= args->sb,
-		.sync_mode		= args->sync_mode,
+		.sync_mode		= work->sync_mode,
 		.older_than_this	= NULL,
-		.for_kupdate		= args->for_kupdate,
-		.for_background		= args->for_background,
-		.range_cyclic		= args->range_cyclic,
+		.for_kupdate		= work->for_kupdate,
+		.for_background		= work->for_background,
+		.range_cyclic		= work->range_cyclic,
 	};
 	unsigned long oldest_jif;
 	long wrote = 0;
@@ -777,21 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb,
 		/*
 		 * Stop writeback when nr_pages has been consumed
 		 */
-		if (args->nr_pages <= 0)
+		if (work->nr_pages <= 0)
 			break;
 
 		/*
 		 * For background writeout, stop when we are below the
 		 * background dirty threshold
 		 */
-		if (args->for_background && !over_bground_thresh())
+		if (work->for_background && !over_bground_thresh())
 			break;
 
 		wbc.more_io = 0;
 		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
 		wbc.pages_skipped = 0;
-		writeback_inodes_wb(wb, &wbc);
-		args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+		if (work->sb)
+			__writeback_inodes_sb(work->sb, wb, &wbc);
+		else
+			writeback_inodes_wb(wb, &wbc);
+		work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 		wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 
 		/*
@@ -827,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb,
 }
 
 /*
- * Return the next bdi_work struct that hasn't been processed by this
- * wb thread yet. ->seen is initially set for each thread that exists
- * for this device, when a thread first notices a piece of work it
- * clears its bit. Depending on writeback type, the thread will notify
- * completion on either receiving the work (WB_SYNC_NONE) or after
- * it is done (WB_SYNC_ALL).
+ * Return the next wb_writeback_work struct that hasn't been processed yet.
  */
-static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
-					   struct bdi_writeback *wb)
+static struct wb_writeback_work *
+get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb)
 {
-	struct bdi_work *work, *ret = NULL;
+	struct wb_writeback_work *work = NULL;
 
-	rcu_read_lock();
-
-	list_for_each_entry_rcu(work, &bdi->work_list, list) {
-		if (!test_bit(wb->nr, &work->seen))
-			continue;
-		clear_bit(wb->nr, &work->seen);
-
-		ret = work;
-		break;
+	spin_lock(&bdi->wb_lock);
+	if (!list_empty(&bdi->work_list)) {
+		work = list_entry(bdi->work_list.next,
+				  struct wb_writeback_work, list);
+		list_del_init(&work->list);
 	}
-
-	rcu_read_unlock();
-	return ret;
+	spin_unlock(&bdi->wb_lock);
+	return work;
 }
 
 static long wb_check_old_data_flush(struct bdi_writeback *wb)
@@ -876,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
 			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
 
 	if (nr_pages) {
-		struct wb_writeback_args args = {
+		struct wb_writeback_work work = {
 			.nr_pages	= nr_pages,
 			.sync_mode	= WB_SYNC_NONE,
 			.for_kupdate	= 1,
 			.range_cyclic	= 1,
 		};
 
-		return wb_writeback(wb, &args);
+		return wb_writeback(wb, &work);
 	}
 
 	return 0;
@@ -895,33 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 {
 	struct backing_dev_info *bdi = wb->bdi;
-	struct bdi_work *work;
+	struct wb_writeback_work *work;
 	long wrote = 0;
 
 	while ((work = get_next_work_item(bdi, wb)) != NULL) {
-		struct wb_writeback_args args = work->args;
-
 		/*
 		 * Override sync mode, in case we must wait for completion
+		 * because this thread is exiting now.
 		 */
 		if (force_wait)
-			work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
-
-		/*
-		 * If this isn't a data integrity operation, just notify
-		 * that we have seen this work and we are now starting it.
-		 */
-		if (args.sync_mode == WB_SYNC_NONE)
-			wb_clear_pending(wb, work);
+			work->sync_mode = WB_SYNC_ALL;
 
-		wrote += wb_writeback(wb, &args);
+		wrote += wb_writeback(wb, work);
 
 		/*
-		 * This is a data integrity writeback, so only do the
-		 * notification when we have completed the work.
+		 * Notify the caller of completion if this is a synchronous
+		 * work item, otherwise just free it.
 		 */
-		if (args.sync_mode == WB_SYNC_ALL)
-			wb_clear_pending(wb, work);
+		if (work->done)
+			complete(work->done);
+		else
+			kfree(work);
 	}
 
 	/*
@@ -978,42 +817,27 @@ int bdi_writeback_task(struct bdi_writeback *wb)
 }
 
 /*
- * Schedule writeback for all backing devices. This does WB_SYNC_NONE
- * writeback, for integrity writeback see bdi_sync_writeback().
+ * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
+ * the whole world.
  */
-static void bdi_writeback_all(struct super_block *sb, long nr_pages)
+void wakeup_flusher_threads(long nr_pages)
 {
-	struct wb_writeback_args args = {
-		.sb		= sb,
-		.nr_pages	= nr_pages,
-		.sync_mode	= WB_SYNC_NONE,
-	};
 	struct backing_dev_info *bdi;
 
-	rcu_read_lock();
+	if (!nr_pages) {
+		nr_pages = global_page_state(NR_FILE_DIRTY) +
+				global_page_state(NR_UNSTABLE_NFS);
+	}
 
+	rcu_read_lock();
 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
 		if (!bdi_has_dirty_io(bdi))
 			continue;
-
-		bdi_alloc_queue_work(bdi, &args);
+		__bdi_start_writeback(bdi, nr_pages, false, false);
 	}
-
 	rcu_read_unlock();
 }
 
-/*
- * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
- * the whole world.
- */
-void wakeup_flusher_threads(long nr_pages)
-{
-	if (nr_pages == 0)
-		nr_pages = global_page_state(NR_FILE_DIRTY) +
-				global_page_state(NR_UNSTABLE_NFS);
-	bdi_writeback_all(NULL, nr_pages);
-}
-
 static noinline void block_dump___mark_inode_dirty(struct inode *inode)
 {
 	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1218,12 +1042,20 @@ void writeback_inodes_sb(struct super_block *sb)
 {
 	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
 	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
-	long nr_to_write;
+	DECLARE_COMPLETION_ONSTACK(done);
+	struct wb_writeback_work work = {
+		.sb		= sb,
+		.sync_mode	= WB_SYNC_NONE,
+		.done		= &done,
+	};
+
+	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
-	nr_to_write = nr_dirty + nr_unstable +
+	work.nr_pages = nr_dirty + nr_unstable +
 			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
 
-	bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
+	bdi_queue_work(sb->s_bdi, &work);
+	wait_for_completion(&done);
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
@@ -1237,7 +1069,9 @@ EXPORT_SYMBOL(writeback_inodes_sb);
 int writeback_inodes_sb_if_idle(struct super_block *sb)
 {
 	if (!writeback_in_progress(sb->s_bdi)) {
+		down_read(&sb->s_umount);
 		writeback_inodes_sb(sb);
+		up_read(&sb->s_umount);
 		return 1;
 	} else
 		return 0;
@@ -1253,7 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
  */
 void sync_inodes_sb(struct super_block *sb)
 {
-	bdi_sync_writeback(sb->s_bdi, sb);
+	DECLARE_COMPLETION_ONSTACK(done);
+	struct wb_writeback_work work = {
+		.sb		= sb,
+		.sync_mode	= WB_SYNC_ALL,
+		.nr_pages	= LONG_MAX,
+		.range_cyclic	= 0,
+		.done		= &done,
+	};
+
+	WARN_ON(!rwsem_is_locked(&sb->s_umount));
+
+	bdi_queue_work(sb->s_bdi, &work);
+	wait_for_completion(&done);
+
 	wait_sb_inodes(sb);
 }
 EXPORT_SYMBOL(sync_inodes_sb);
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index a33aab6b5e68..54a92fd02bbd 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -234,8 +234,9 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 			if (inode->i_mode != mode) {
 				struct iattr attr;
 
-				attr.ia_valid = ATTR_MODE;
+				attr.ia_valid = ATTR_MODE | ATTR_CTIME;
 				attr.ia_mode = mode;
+				attr.ia_ctime = CURRENT_TIME_SEC;
 				rc = jffs2_do_setattr(inode, &attr);
 				if (rc < 0)
 					return rc;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 7aa4417e085f..166062a68230 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -222,15 +222,18 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
 	dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime));
 
 	jffs2_free_raw_inode(ri);
-	d_instantiate(dentry, inode);
 
 	D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n",
 		  inode->i_ino, inode->i_mode, inode->i_nlink,
 		  f->inocache->pino_nlink, inode->i_mapping->nrpages));
+
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
 	return 0;
 
  fail:
 	make_bad_inode(inode);
+	unlock_new_inode(inode);
 	iput(inode);
 	jffs2_free_raw_inode(ri);
 	return ret;
@@ -360,8 +363,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 		/* Eeek. Wave bye bye */
 		mutex_unlock(&f->sem);
 		jffs2_complete_reservation(c);
-		jffs2_clear_inode(inode);
-		return PTR_ERR(fn);
+		ret = PTR_ERR(fn);
+		goto fail;
 	}
 
 	/* We use f->target field to store the target path. */
@@ -370,8 +373,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 		printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1);
 		mutex_unlock(&f->sem);
 		jffs2_complete_reservation(c);
-		jffs2_clear_inode(inode);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto fail;
 	}
 
 	memcpy(f->target, target, targetlen + 1);
@@ -386,30 +389,24 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 	jffs2_complete_reservation(c);
 
 	ret = jffs2_init_security(inode, dir_i);
-	if (ret) {
-		jffs2_clear_inode(inode);
-		return ret;
-	}
+	if (ret)
+		goto fail;
+
 	ret = jffs2_init_acl_post(inode);
-	if (ret) {
-		jffs2_clear_inode(inode);
-		return ret;
-	}
+	if (ret)
+		goto fail;
 
 	ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
 				  ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
-	if (ret) {
-		/* Eep. */
-		jffs2_clear_inode(inode);
-		return ret;
-	}
+	if (ret)
+		goto fail;
 
 	rd = jffs2_alloc_raw_dirent();
 	if (!rd) {
 		/* Argh. Now we treat it like a normal delete */
 		jffs2_complete_reservation(c);
-		jffs2_clear_inode(inode);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto fail;
 	}
 
 	dir_f = JFFS2_INODE_INFO(dir_i);
@@ -437,8 +434,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 		jffs2_complete_reservation(c);
 		jffs2_free_raw_dirent(rd);
 		mutex_unlock(&dir_f->sem);
-		jffs2_clear_inode(inode);
-		return PTR_ERR(fd);
+		ret = PTR_ERR(fd);
+		goto fail;
 	}
 
 	dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -453,7 +450,14 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 	jffs2_complete_reservation(c);
 
 	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
 	return 0;
+
+ fail:
+	make_bad_inode(inode);
+	unlock_new_inode(inode);
+	iput(inode);
+	return ret;
 }
 
 
@@ -519,8 +523,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
 		/* Eeek. Wave bye bye */
 		mutex_unlock(&f->sem);
 		jffs2_complete_reservation(c);
-		jffs2_clear_inode(inode);
-		return PTR_ERR(fn);
+		ret = PTR_ERR(fn);
+		goto fail;
 	}
 	/* No data here. Only a metadata node, which will be
 	   obsoleted by the first data write
@@ -531,30 +535,24 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
 	jffs2_complete_reservation(c);
 
 	ret = jffs2_init_security(inode, dir_i);
-	if (ret) {
-		jffs2_clear_inode(inode);
-		return ret;
-	}
+	if (ret)
+		goto fail;
+
 	ret = jffs2_init_acl_post(inode);
-	if (ret) {
-		jffs2_clear_inode(inode);
-		return ret;
-	}
+	if (ret)
+		goto fail;
 
 	ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
 				  ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
-	if (ret) {
-		/* Eep. */
-		jffs2_clear_inode(inode);
-		return ret;
-	}
+	if (ret)
+		goto fail;
 
 	rd = jffs2_alloc_raw_dirent();
 	if (!rd) {
 		/* Argh. Now we treat it like a normal delete */
 		jffs2_complete_reservation(c);
-		jffs2_clear_inode(inode);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto fail;
 	}
 
 	dir_f = JFFS2_INODE_INFO(dir_i);
@@ -582,8 +580,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
 		jffs2_complete_reservation(c);
 		jffs2_free_raw_dirent(rd);
 		mutex_unlock(&dir_f->sem);
-		jffs2_clear_inode(inode);
-		return PTR_ERR(fd);
+		ret = PTR_ERR(fd);
+		goto fail;
 	}
 
 	dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -599,7 +597,14 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
 	jffs2_complete_reservation(c);
 
 	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
 	return 0;
+
+ fail:
+	make_bad_inode(inode);
+	unlock_new_inode(inode);
+	iput(inode);
+	return ret;
 }
 
 static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
@@ -693,8 +698,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
 		/* Eeek. Wave bye bye */
 		mutex_unlock(&f->sem);
 		jffs2_complete_reservation(c);
-		jffs2_clear_inode(inode);
-		return PTR_ERR(fn);
+		ret = PTR_ERR(fn);
+		goto fail;
 	}
 	/* No data here. Only a metadata node, which will be
 	   obsoleted by the first data write
@@ -705,30 +710,24 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
 	jffs2_complete_reservation(c);
 
 	ret = jffs2_init_security(inode, dir_i);
-	if (ret) {
-		jffs2_clear_inode(inode);
-		return ret;
-	}
+	if (ret)
+		goto fail;
+
 	ret = jffs2_init_acl_post(inode);
-	if (ret) {
-		jffs2_clear_inode(inode);
-		return ret;
-	}
+	if (ret)
+		goto fail;
 
 	ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
 				  ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
-	if (ret) {
-		/* Eep. */
-		jffs2_clear_inode(inode);
-		return ret;
-	}
+	if (ret)
+		goto fail;
 
 	rd = jffs2_alloc_raw_dirent();
 	if (!rd) {
 		/* Argh. Now we treat it like a normal delete */
 		jffs2_complete_reservation(c);
-		jffs2_clear_inode(inode);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto fail;
 	}
 
 	dir_f = JFFS2_INODE_INFO(dir_i);
@@ -759,8 +758,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
 		jffs2_complete_reservation(c);
 		jffs2_free_raw_dirent(rd);
 		mutex_unlock(&dir_f->sem);
-		jffs2_clear_inode(inode);
-		return PTR_ERR(fd);
+		ret = PTR_ERR(fd);
+		goto fail;
 	}
 
 	dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -775,8 +774,14 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
 	jffs2_complete_reservation(c);
 
 	d_instantiate(dentry, inode);
-
+	unlock_new_inode(inode);
 	return 0;
+
+ fail:
+	make_bad_inode(inode);
+	unlock_new_inode(inode);
+	iput(inode);
+	return ret;
 }
 
 static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 8bc2c80ab159..459d39d1ea0b 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -465,7 +465,12 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
 	inode->i_blocks = 0;
 	inode->i_size = 0;
 
-	insert_inode_hash(inode);
+	if (insert_inode_locked(inode) < 0) {
+		make_bad_inode(inode);
+		unlock_new_inode(inode);
+		iput(inode);
+		return ERR_PTR(-EINVAL);
+	}
 
 	return inode;
 }
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 7ec9b34a59f8..d25b5257b7a1 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1286,6 +1286,55 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
 #endif /* CONFIG_NFS_V4_1 */
 }
 
+static int nfs4_server_common_setup(struct nfs_server *server,
+		struct nfs_fh *mntfh)
+{
+	struct nfs_fattr *fattr;
+	int error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	fattr = nfs_alloc_fattr();
+	if (fattr == NULL)
+		return -ENOMEM;
+
+	/* We must ensure the session is initialised first */
+	error = nfs4_init_session(server);
+	if (error < 0)
+		goto out;
+
+	/* Probe the root fh to retrieve its FSID and filehandle */
+	error = nfs4_get_rootfh(server, mntfh);
+	if (error < 0)
+		goto out;
+
+	dprintk("Server FSID: %llx:%llx\n",
+			(unsigned long long) server->fsid.major,
+			(unsigned long long) server->fsid.minor);
+	dprintk("Mount FH: %d\n", mntfh->size);
+
+	nfs4_session_set_rwsize(server);
+
+	error = nfs_probe_fsinfo(server, mntfh, fattr);
+	if (error < 0)
+		goto out;
+
+	if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
+		server->namelen = NFS4_MAXNAMLEN;
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+out:
+	nfs_free_fattr(fattr);
+	return error;
+}
+
 /*
  * Create a version 4 volume record
  */
@@ -1346,7 +1395,6 @@ error:
 struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
 				      struct nfs_fh *mntfh)
 {
-	struct nfs_fattr *fattr;
 	struct nfs_server *server;
 	int error;
 
@@ -1356,55 +1404,19 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
 	if (!server)
 		return ERR_PTR(-ENOMEM);
 
-	error = -ENOMEM;
-	fattr = nfs_alloc_fattr();
-	if (fattr == NULL)
-		goto error;
-
 	/* set up the general RPC client */
 	error = nfs4_init_server(server, data);
 	if (error < 0)
 		goto error;
 
-	BUG_ON(!server->nfs_client);
-	BUG_ON(!server->nfs_client->rpc_ops);
-	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
-
-	error = nfs4_init_session(server);
-	if (error < 0)
-		goto error;
-
-	/* Probe the root fh to retrieve its FSID */
-	error = nfs4_get_rootfh(server, mntfh);
+	error = nfs4_server_common_setup(server, mntfh);
 	if (error < 0)
 		goto error;
 
-	dprintk("Server FSID: %llx:%llx\n",
-		(unsigned long long) server->fsid.major,
-		(unsigned long long) server->fsid.minor);
-	dprintk("Mount FH: %d\n", mntfh->size);
-
-	nfs4_session_set_rwsize(server);
-
-	error = nfs_probe_fsinfo(server, mntfh, fattr);
-	if (error < 0)
-		goto error;
-
-	if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
-		server->namelen = NFS4_MAXNAMLEN;
-
-	spin_lock(&nfs_client_lock);
-	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
-	list_add_tail(&server->master_link, &nfs_volume_list);
-	spin_unlock(&nfs_client_lock);
-
-	server->mount_time = jiffies;
 	dprintk("<-- nfs4_create_server() = %p\n", server);
-	nfs_free_fattr(fattr);
 	return server;
 
 error:
-	nfs_free_fattr(fattr);
 	nfs_free_server(server);
 	dprintk("<-- nfs4_create_server() = error %d\n", error);
 	return ERR_PTR(error);
@@ -1418,7 +1430,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 {
 	struct nfs_client *parent_client;
 	struct nfs_server *server, *parent_server;
-	struct nfs_fattr *fattr;
 	int error;
 
 	dprintk("--> nfs4_create_referral_server()\n");
@@ -1427,11 +1438,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 	if (!server)
 		return ERR_PTR(-ENOMEM);
 
-	error = -ENOMEM;
-	fattr = nfs_alloc_fattr();
-	if (fattr == NULL)
-		goto error;
-
 	parent_server = NFS_SB(data->sb);
 	parent_client = parent_server->nfs_client;
 
@@ -1456,40 +1462,14 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 	if (error < 0)
 		goto error;
 
-	BUG_ON(!server->nfs_client);
-	BUG_ON(!server->nfs_client->rpc_ops);
-	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
-
-	/* Probe the root fh to retrieve its FSID and filehandle */
-	error = nfs4_get_rootfh(server, mntfh);
-	if (error < 0)
-		goto error;
-
-	/* probe the filesystem info for this server filesystem */
-	error = nfs_probe_fsinfo(server, mntfh, fattr);
+	error = nfs4_server_common_setup(server, mntfh);
 	if (error < 0)
 		goto error;
 
-	if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
-		server->namelen = NFS4_MAXNAMLEN;
-
-	dprintk("Referral FSID: %llx:%llx\n",
-		(unsigned long long) server->fsid.major,
-		(unsigned long long) server->fsid.minor);
-
-	spin_lock(&nfs_client_lock);
-	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
-	list_add_tail(&server->master_link, &nfs_volume_list);
-	spin_unlock(&nfs_client_lock);
-
-	server->mount_time = jiffies;
-
-	nfs_free_fattr(fattr);
 	dprintk("<-- nfs_create_referral_server() = %p\n", server);
 	return server;
 
 error:
-	nfs_free_fattr(fattr);
 	nfs_free_server(server);
 	dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
 	return ERR_PTR(error);
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 7428f7d6273b..a70e446e1605 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -146,7 +146,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
 		goto out;
 	}
 
-	if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_MODE)
+	if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE)
 			|| !S_ISDIR(fsinfo.fattr->mode)) {
 		printk(KERN_ERR "nfs4_get_rootfh:"
 		       " getroot encountered non-directory\n");
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6bdef28efa33..65c8dae4b267 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -862,8 +862,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
 		bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
 		*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
 		*p++ = cpu_to_be32(0);
-		*p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
-		*p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
+		*p++ = cpu_to_be32(iap->ia_atime.tv_sec);
+		*p++ = cpu_to_be32(iap->ia_atime.tv_nsec);
 	}
 	else if (iap->ia_valid & ATTR_ATIME) {
 		bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 04214fc5c304..f9df16de4a56 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -570,6 +570,22 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
 	nfs_show_mountd_netid(m, nfss, showdefaults);
 }
 
+#ifdef CONFIG_NFS_V4
+static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
+				    int showdefaults)
+{
+	struct nfs_client *clp = nfss->nfs_client;
+
+	seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
+	seq_printf(m, ",minorversion=%u", clp->cl_minorversion);
+}
+#else
+static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
+				    int showdefaults)
+{
+}
+#endif
+
 /*
  * Describe the mount options in force on this server representation
  */
@@ -631,11 +647,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 
 	if (version != 4)
 		nfs_show_mountd_options(m, nfss, showdefaults);
+	else
+		nfs_show_nfsv4_options(m, nfss, showdefaults);
 
-#ifdef CONFIG_NFS_V4
-	if (clp->rpc_ops->version == 4)
-		seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
-#endif
 	if (nfss->options & NFS_OPTION_FSCACHE)
 		seq_printf(m, ",fsc");
 }
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 12f7109720c2..4a2734758778 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4122,8 +4122,8 @@ nfs4_state_shutdown(void)
 	nfs4_lock_state();
 	nfs4_release_reclaim();
 	__nfs4_state_shutdown();
-	nfsd4_destroy_callback_queue();
 	nfs4_unlock_state();
+	nfsd4_destroy_callback_queue();
 }
 
 /*
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ebbf3b6b2457..3c111120b619 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -443,8 +443,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 	if (size_change)
 		put_write_access(inode);
 	if (!err)
-		if (EX_ISSYNC(fhp->fh_export))
-			write_inode_now(inode, 1);
+		commit_metadata(fhp);
 out:
 	return err;
 
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index 40650021fc24..d8b6e4259b80 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -26,7 +26,6 @@
 
 #include <linux/fs.h>
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/bitops.h>
 #include <linux/list.h>
diff --git a/fs/pipe.c b/fs/pipe.c
index 69c4c7c13ea9..279eef96c51c 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1145,13 +1145,20 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
 	 * and adjust the indexes.
 	 */
 	if (pipe->nrbufs) {
-		const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1);
-		const unsigned int head = pipe->nrbufs - tail;
+		unsigned int tail;
+		unsigned int head;
 
+		tail = pipe->curbuf + pipe->nrbufs;
+		if (tail < pipe->buffers)
+			tail = 0;
+		else
+			tail &= (pipe->buffers - 1);
+
+		head = pipe->nrbufs - tail;
 		if (head)
 			memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer));
 		if (tail)
-			memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer));
+			memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
 	}
 
 	pipe->curbuf = 0;
@@ -1208,12 +1215,13 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
 		size = round_pipe_size(arg);
 		nr_pages = size >> PAGE_SHIFT;
 
+		ret = -EINVAL;
+		if (!nr_pages)
+			goto out;
+
 		if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
 			ret = -EPERM;
 			goto out;
-		} else if (nr_pages < PAGE_SIZE) {
-			ret = -EINVAL;
-			goto out;
 		}
 		ret = pipe_set_size(pipe, nr_pages);
 		break;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index ce94801f48ca..d9396a4fc7ff 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -209,6 +209,9 @@ void proc_device_tree_add_node(struct device_node *np,
 	for (pp = np->properties; pp != NULL; pp = pp->next) {
 		p = pp->name;
 
+		if (strchr(p, '/'))
+			continue;
+
 		if (duplicate_name(de, p))
 			p = fixup_name(np, de, p);
 
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 46d4b5d72bd3..cb6306e63843 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -122,11 +122,20 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
 	return size;
 }
 
+static void pad_len_spaces(struct seq_file *m, int len)
+{
+	len = 25 + sizeof(void*) * 6 - len;
+	if (len < 1)
+		len = 1;
+	seq_printf(m, "%*c", len, ' ');
+}
+
 /*
  * display a single VMA to a sequenced file
  */
 static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
 {
+	struct mm_struct *mm = vma->vm_mm;
 	unsigned long ino = 0;
 	struct file *file;
 	dev_t dev = 0;
@@ -155,11 +164,14 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
 		   MAJOR(dev), MINOR(dev), ino, &len);
 
 	if (file) {
-		len = 25 + sizeof(void *) * 6 - len;
-		if (len < 1)
-			len = 1;
-		seq_printf(m, "%*c", len, ' ');
+		pad_len_spaces(m, len);
 		seq_path(m, &file->f_path, "");
+	} else if (mm) {
+		if (vma->vm_start <= mm->start_stack &&
+			vma->vm_end >= mm->start_stack) {
+			pad_len_spaces(m, len);
+			seq_puts(m, "[stack]");
+		}
 	}
 
 	seq_putc(m, '\n');
diff --git a/fs/splice.c b/fs/splice.c
index 740e6b9faf7a..efdbfece9932 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1282,7 +1282,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
 {
 	struct file *file = sd->u.file;
 
-	return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
+	return do_splice_from(pipe, file, &file->f_pos, sd->total_len,
+			      sd->flags);
 }
 
 /**
@@ -1371,8 +1372,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 		if (off_in)
 			return -ESPIPE;
 		if (off_out) {
-			if (!out->f_op || !out->f_op->llseek ||
-			    out->f_op->llseek == no_llseek)
+			if (!(out->f_mode & FMODE_PWRITE))
 				return -EINVAL;
 			if (copy_from_user(&offset, off_out, sizeof(loff_t)))
 				return -EFAULT;
@@ -1392,8 +1392,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 		if (off_out)
 			return -ESPIPE;
 		if (off_in) {
-			if (!in->f_op || !in->f_op->llseek ||
-			    in->f_op->llseek == no_llseek)
+			if (!(in->f_mode & FMODE_PREAD))
 				return -EINVAL;
 			if (copy_from_user(&offset, off_in, sizeof(loff_t)))
 				return -EFAULT;
diff --git a/fs/super.c b/fs/super.c
index 5c35bc7a499e..938119ab8dcb 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -374,6 +374,8 @@ void sync_supers(void)
 			up_read(&sb->s_umount);
 
 			spin_lock(&sb_lock);
+			/* lock was dropped, must reset next */
+			list_safe_reset_next(sb, n, s_list);
 			__put_super(sb);
 		}
 	}
@@ -405,6 +407,8 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
 		up_read(&sb->s_umount);
 
 		spin_lock(&sb_lock);
+		/* lock was dropped, must reset next */
+		list_safe_reset_next(sb, n, s_list);
 		__put_super(sb);
 	}
 	spin_unlock(&sb_lock);
@@ -585,6 +589,8 @@ static void do_emergency_remount(struct work_struct *work)
 		}
 		up_write(&sb->s_umount);
 		spin_lock(&sb_lock);
+		/* lock was dropped, must reset next */
+		list_safe_reset_next(sb, n, s_list);
 		__put_super(sb);
 	}
 	spin_unlock(&sb_lock);
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index bbd69bdb0fa8..fcc498ec9b33 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -25,6 +25,7 @@
 #include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 #include "sysv.h"
 
 /* We don't trust the value of
@@ -139,6 +140,9 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
 	struct inode *inode;
 	sysv_ino_t ino;
 	unsigned count;
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_NONE
+	};
 
 	inode = new_inode(sb);
 	if (!inode)
@@ -168,7 +172,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
 	insert_inode_hash(inode);
 	mark_inode_dirty(inode);
 
-	sysv_write_inode(inode, 0);	/* ensure inode not allocated again */
+	sysv_write_inode(inode, &wbc);	/* ensure inode not allocated again */
 	mark_inode_dirty(inode);	/* cleared by sysv_write_inode() */
 	/* That's it. */
 	unlock_super(sb);
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 076ca50e9933..c8ff0d1ae5d3 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -62,7 +62,9 @@
  */
 static void shrink_liability(struct ubifs_info *c, int nr_to_write)
 {
+	down_read(&c->vfs_sb->s_umount);
 	writeback_inodes_sb(c->vfs_sb);
+	up_read(&c->vfs_sb->s_umount);
 }
 
 /**
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a0fa3bf0d1bb..34640d6dbdcb 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1381,14 +1381,6 @@ xfs_vm_writepage(
 	if (!page_has_buffers(page))
 		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
 
-
-	/*
-	 *  VM calculation for nr_to_write seems off.  Bump it way
-	 *  up, this gets simple streaming writes zippy again.
-	 *  To be reviewed again after Jens' writeback changes.
-	 */
-	wbc->nr_to_write *= 4;
-
 	/*
 	 * Convert delayed allocate, unwritten or unmapped space
 	 * to real space and flush out to disk.
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 846b75aeb2ab..e7839ee49e43 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -128,13 +128,12 @@ xfs_nfs_get_inode(
 		return ERR_PTR(-ESTALE);
 
 	/*
-	 * The XFS_IGET_BULKSTAT means that an invalid inode number is just
-	 * fine and not an indication of a corrupted filesystem.  Because
-	 * clients can send any kind of invalid file handle, e.g. after
-	 * a restore on the server we have to deal with this case gracefully.
+	 * The XFS_IGET_UNTRUSTED means that an invalid inode number is just
+	 * fine and not an indication of a corrupted filesystem as clients can
+	 * send invalid file handles and we have to handle it gracefully..
 	 */
-	error = xfs_iget(mp, NULL, ino, XFS_IGET_BULKSTAT,
-			 XFS_ILOCK_SHARED, &ip, 0);
+	error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED,
+			 XFS_ILOCK_SHARED, &ip);
 	if (error) {
 		/*
 		 * EINVAL means the inode cluster doesn't exist anymore.
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 699b60cbab9c..e59a81062830 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -679,10 +679,9 @@ xfs_ioc_bulkstat(
 		error = xfs_bulkstat_single(mp, &inlast,
 						bulkreq.ubuffer, &done);
 	else	/* XFS_IOC_FSBULKSTAT */
-		error = xfs_bulkstat(mp, &inlast, &count,
-			(bulkstat_one_pf)xfs_bulkstat_one, NULL,
-			sizeof(xfs_bstat_t), bulkreq.ubuffer,
-			BULKSTAT_FG_QUICK, &done);
+		error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
+				     sizeof(xfs_bstat_t), bulkreq.ubuffer,
+				     &done);
 
 	if (error)
 		return -error;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 9287135e9bfc..52ed49e6465c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -237,15 +237,12 @@ xfs_bulkstat_one_compat(
 	xfs_ino_t	ino,		/* inode number to get data for */
 	void		__user *buffer,	/* buffer to place output in */
 	int		ubsize,		/* size of buffer */
-	void		*private_data,	/* my private data */
-	xfs_daddr_t	bno,		/* starting bno of inode cluster */
 	int		*ubused,	/* bytes used by me */
-	void		*dibuff,	/* on-disk inode buffer */
 	int		*stat)		/* BULKSTAT_RV_... */
 {
 	return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
-				    xfs_bulkstat_one_fmt_compat, bno,
-				    ubused, dibuff, stat);
+				    xfs_bulkstat_one_fmt_compat,
+				    ubused, stat);
 }
 
 /* copied from xfs_ioctl.c */
@@ -298,13 +295,11 @@ xfs_compat_ioc_bulkstat(
 		int res;
 
 		error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
-				sizeof(compat_xfs_bstat_t),
-				NULL, 0, NULL, NULL, &res);
+				sizeof(compat_xfs_bstat_t), 0, &res);
 	} else if (cmd == XFS_IOC_FSBULKSTAT_32) {
 		error = xfs_bulkstat(mp, &inlast, &count,
-			xfs_bulkstat_one_compat, NULL,
-			sizeof(compat_xfs_bstat_t), bulkreq.ubuffer,
-			BULKSTAT_FG_QUICK, &done);
+			xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
+			bulkreq.ubuffer, &done);
 	} else
 		error = XFS_ERROR(EINVAL);
 	if (error)
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 2d8b7bc792c9..8c117ff2e3ab 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1632,10 +1632,7 @@ xfs_qm_dqusage_adjust(
 	xfs_ino_t	ino,		/* inode number to get data for */
 	void		__user *buffer,	/* not used */
 	int		ubsize,		/* not used */
-	void		*private_data,	/* not used */
-	xfs_daddr_t	bno,		/* starting block of inode cluster */
 	int		*ubused,	/* not used */
-	void		*dip,		/* on-disk inode pointer (not used) */
 	int		*res)		/* result code value */
 {
 	xfs_inode_t	*ip;
@@ -1660,7 +1657,7 @@ xfs_qm_dqusage_adjust(
 	 * the case in all other instances. It's OK that we do this because
 	 * quotacheck is done only at mount time.
 	 */
-	if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) {
+	if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) {
 		*res = BULKSTAT_RV_NOTHING;
 		return error;
 	}
@@ -1796,12 +1793,13 @@ xfs_qm_quotacheck(
 		 * Iterate thru all the inodes in the file system,
 		 * adjusting the corresponding dquot counters in core.
 		 */
-		if ((error = xfs_bulkstat(mp, &lastino, &count,
-				     xfs_qm_dqusage_adjust, NULL,
-				     structsz, NULL, BULKSTAT_FG_IGET, &done)))
+		error = xfs_bulkstat(mp, &lastino, &count,
+				     xfs_qm_dqusage_adjust,
+				     structsz, NULL, &done);
+		if (error)
 			break;
 
-	} while (! done);
+	} while (!done);
 
 	/*
 	 * We've made all the changes that we need to make incore.
@@ -1889,14 +1887,14 @@ xfs_qm_init_quotainos(
 		    mp->m_sb.sb_uquotino != NULLFSINO) {
 			ASSERT(mp->m_sb.sb_uquotino > 0);
 			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
-					     0, 0, &uip, 0)))
+					     0, 0, &uip)))
 				return XFS_ERROR(error);
 		}
 		if (XFS_IS_OQUOTA_ON(mp) &&
 		    mp->m_sb.sb_gquotino != NULLFSINO) {
 			ASSERT(mp->m_sb.sb_gquotino > 0);
 			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
-					     0, 0, &gip, 0))) {
+					     0, 0, &gip))) {
 				if (uip)
 					IRELE(uip);
 				return XFS_ERROR(error);
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 92b002f1805f..b4487764e923 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -262,7 +262,7 @@ xfs_qm_scall_trunc_qfiles(
 	}
 
 	if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) {
-		error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0);
+		error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip);
 		if (!error) {
 			error = xfs_truncate_file(mp, qip);
 			IRELE(qip);
@@ -271,7 +271,7 @@ xfs_qm_scall_trunc_qfiles(
 
 	if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) &&
 	    mp->m_sb.sb_gquotino != NULLFSINO) {
-		error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0);
+		error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip);
 		if (!error2) {
 			error2 = xfs_truncate_file(mp, qip);
 			IRELE(qip);
@@ -417,12 +417,12 @@ xfs_qm_scall_getqstat(
 	}
 	if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
 		if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
-					0, 0, &uip, 0) == 0)
+					0, 0, &uip) == 0)
 			tempuqip = B_TRUE;
 	}
 	if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
 		if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
-					0, 0, &gip, 0) == 0)
+					0, 0, &gip) == 0)
 			tempgqip = B_TRUE;
 	}
 	if (uip) {
@@ -1109,10 +1109,7 @@ xfs_qm_internalqcheck_adjust(
 	xfs_ino_t	ino,		/* inode number to get data for */
 	void		__user *buffer,	/* not used */
 	int		ubsize,		/* not used */
-	void		*private_data,	/* not used */
-	xfs_daddr_t	bno,		/* starting block of inode cluster */
 	int		*ubused,	/* not used */
-	void		*dip,		/* not used */
 	int		*res)		/* bulkstat result code */
 {
 	xfs_inode_t		*ip;
@@ -1134,7 +1131,7 @@ xfs_qm_internalqcheck_adjust(
 	ipreleased = B_FALSE;
  again:
 	lock_flags = XFS_ILOCK_SHARED;
-	if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip, bno))) {
+	if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) {
 		*res = BULKSTAT_RV_NOTHING;
 		return (error);
 	}
@@ -1205,15 +1202,15 @@ xfs_qm_internalqcheck(
 		 * Iterate thru all the inodes in the file system,
 		 * adjusting the corresponding dquot counters
 		 */
-		if ((error = xfs_bulkstat(mp, &lastino, &count,
-				 xfs_qm_internalqcheck_adjust, NULL,
-				 0, NULL, BULKSTAT_FG_IGET, &done))) {
+		error = xfs_bulkstat(mp, &lastino, &count,
+				 xfs_qm_internalqcheck_adjust,
+				 0, NULL, &done);
+		if (error) {
+			cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
 			break;
 		}
-	} while (! done);
-	if (error) {
-		cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
-	}
+	} while (!done);
+
 	cmn_err(CE_DEBUG, "Checking results against system dquots");
 	for (i = 0; i < qmtest_hashmask; i++) {
 		xfs_dqtest_t	*d, *n;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 5bba29a07812..7f159d2a429a 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -69,7 +69,9 @@ xfs_swapext(
 		goto out;
 	}
 
-	if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) {
+	if (!(file->f_mode & FMODE_WRITE) ||
+	    !(file->f_mode & FMODE_READ) ||
+	    (file->f_flags & O_APPEND)) {
 		error = XFS_ERROR(EBADF);
 		goto out_put_file;
 	}
@@ -81,6 +83,7 @@ xfs_swapext(
 	}
 
 	if (!(tmp_file->f_mode & FMODE_WRITE) ||
+	    !(tmp_file->f_mode & FMODE_READ) ||
 	    (tmp_file->f_flags & O_APPEND)) {
 		error = XFS_ERROR(EBADF);
 		goto out_put_tmp_file;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 9d884c127bb9..c7142a064c48 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1203,6 +1203,63 @@ error0:
 	return error;
 }
 
+STATIC int
+xfs_imap_lookup(
+	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
+	xfs_agnumber_t		agno,
+	xfs_agino_t		agino,
+	xfs_agblock_t		agbno,
+	xfs_agblock_t		*chunk_agbno,
+	xfs_agblock_t		*offset_agbno,
+	int			flags)
+{
+	struct xfs_inobt_rec_incore rec;
+	struct xfs_btree_cur	*cur;
+	struct xfs_buf		*agbp;
+	xfs_agino_t		startino;
+	int			error;
+	int			i;
+
+	error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+	if (error) {
+		xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
+				"xfs_ialloc_read_agi() returned "
+				"error %d, agno %d",
+				error, agno);
+		return error;
+	}
+
+	/*
+	 * derive and lookup the exact inode record for the given agino. If the
+	 * record cannot be found, then it's an invalid inode number and we
+	 * should abort.
+	 */
+	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
+	startino = agino & ~(XFS_IALLOC_INODES(mp) - 1);
+	error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
+	if (!error) {
+		if (i)
+			error = xfs_inobt_get_rec(cur, &rec, &i);
+		if (!error && i == 0)
+			error = EINVAL;
+	}
+
+	xfs_trans_brelse(tp, agbp);
+	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+	if (error)
+		return error;
+
+	/* for untrusted inodes check it is allocated first */
+	if ((flags & XFS_IGET_UNTRUSTED) &&
+	    (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
+		return EINVAL;
+
+	*chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
+	*offset_agbno = agbno - *chunk_agbno;
+	return 0;
+}
+
 /*
  * Return the location of the inode in imap, for mapping it into a buffer.
  */
@@ -1235,8 +1292,11 @@ xfs_imap(
 	if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
 	    ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
 #ifdef DEBUG
-		/* no diagnostics for bulkstat, ino comes from userspace */
-		if (flags & XFS_IGET_BULKSTAT)
+		/*
+		 * Don't output diagnostic information for untrusted inodes
+		 * as they can be invalid without implying corruption.
+		 */
+		if (flags & XFS_IGET_UNTRUSTED)
 			return XFS_ERROR(EINVAL);
 		if (agno >= mp->m_sb.sb_agcount) {
 			xfs_fs_cmn_err(CE_ALERT, mp,
@@ -1263,6 +1323,23 @@ xfs_imap(
 		return XFS_ERROR(EINVAL);
 	}
 
+	blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
+
+	/*
+	 * For bulkstat and handle lookups, we have an untrusted inode number
+	 * that we have to verify is valid. We cannot do this just by reading
+	 * the inode buffer as it may have been unlinked and removed leaving
+	 * inodes in stale state on disk. Hence we have to do a btree lookup
+	 * in all cases where an untrusted inode number is passed.
+	 */
+	if (flags & XFS_IGET_UNTRUSTED) {
+		error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
+					&chunk_agbno, &offset_agbno, flags);
+		if (error)
+			return error;
+		goto out_map;
+	}
+
 	/*
 	 * If the inode cluster size is the same as the blocksize or
 	 * smaller we get to the buffer by simple arithmetics.
@@ -1277,24 +1354,6 @@ xfs_imap(
 		return 0;
 	}
 
-	blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
-
-	/*
-	 * If we get a block number passed from bulkstat we can use it to
-	 * find the buffer easily.
-	 */
-	if (imap->im_blkno) {
-		offset = XFS_INO_TO_OFFSET(mp, ino);
-		ASSERT(offset < mp->m_sb.sb_inopblock);
-
-		cluster_agbno = xfs_daddr_to_agbno(mp, imap->im_blkno);
-		offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock;
-
-		imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
-		imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
-		return 0;
-	}
-
 	/*
 	 * If the inode chunks are aligned then use simple maths to
 	 * find the location. Otherwise we have to do a btree
@@ -1304,50 +1363,13 @@ xfs_imap(
 		offset_agbno = agbno & mp->m_inoalign_mask;
 		chunk_agbno = agbno - offset_agbno;
 	} else {
-		xfs_btree_cur_t	*cur;	/* inode btree cursor */
-		xfs_inobt_rec_incore_t chunk_rec;
-		xfs_buf_t	*agbp;	/* agi buffer */
-		int		i;	/* temp state */
-
-		error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
-		if (error) {
-			xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
-					"xfs_ialloc_read_agi() returned "
-					"error %d, agno %d",
-					error, agno);
-			return error;
-		}
-
-		cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
-		error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
-		if (error) {
-			xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
-					"xfs_inobt_lookup() failed");
-			goto error0;
-		}
-
-		error = xfs_inobt_get_rec(cur, &chunk_rec, &i);
-		if (error) {
-			xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
-					"xfs_inobt_get_rec() failed");
-			goto error0;
-		}
-		if (i == 0) {
-#ifdef DEBUG
-			xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
-					"xfs_inobt_get_rec() failed");
-#endif /* DEBUG */
-			error = XFS_ERROR(EINVAL);
-		}
- error0:
-		xfs_trans_brelse(tp, agbp);
-		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+		error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
+					&chunk_agbno, &offset_agbno, flags);
 		if (error)
 			return error;
-		chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino);
-		offset_agbno = agbno - chunk_agbno;
 	}
 
+out_map:
 	ASSERT(agbno >= chunk_agbno);
 	cluster_agbno = chunk_agbno +
 		((offset_agbno / blks_per_cluster) * blks_per_cluster);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 75df75f43d48..8f8b91be2c99 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -259,7 +259,6 @@ xfs_iget_cache_miss(
 	xfs_trans_t		*tp,
 	xfs_ino_t		ino,
 	struct xfs_inode	**ipp,
-	xfs_daddr_t		bno,
 	int			flags,
 	int			lock_flags)
 {
@@ -272,7 +271,7 @@ xfs_iget_cache_miss(
 	if (!ip)
 		return ENOMEM;
 
-	error = xfs_iread(mp, tp, ip, bno, flags);
+	error = xfs_iread(mp, tp, ip, flags);
 	if (error)
 		goto out_destroy;
 
@@ -358,8 +357,6 @@ out_destroy:
  *        within the file system for the inode being requested.
  * lock_flags -- flags indicating how to lock the inode.  See the comment
  *		 for xfs_ilock() for a list of valid values.
- * bno -- the block number starting the buffer containing the inode,
- *	  if known (as by bulkstat), else 0.
  */
 int
 xfs_iget(
@@ -368,8 +365,7 @@ xfs_iget(
 	xfs_ino_t	ino,
 	uint		flags,
 	uint		lock_flags,
-	xfs_inode_t	**ipp,
-	xfs_daddr_t	bno)
+	xfs_inode_t	**ipp)
 {
 	xfs_inode_t	*ip;
 	int		error;
@@ -397,7 +393,7 @@ again:
 		read_unlock(&pag->pag_ici_lock);
 		XFS_STATS_INC(xs_ig_missed);
 
-		error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno,
+		error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
 							flags, lock_flags);
 		if (error)
 			goto out_error_or_again;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d53c39de7d05..b76a829d7e20 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -177,7 +177,7 @@ xfs_imap_to_bp(
 		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
 						XFS_ERRTAG_ITOBP_INOTOBP,
 						XFS_RANDOM_ITOBP_INOTOBP))) {
-			if (iget_flags & XFS_IGET_BULKSTAT) {
+			if (iget_flags & XFS_IGET_UNTRUSTED) {
 				xfs_trans_brelse(tp, bp);
 				return XFS_ERROR(EINVAL);
 			}
@@ -787,7 +787,6 @@ xfs_iread(
 	xfs_mount_t	*mp,
 	xfs_trans_t	*tp,
 	xfs_inode_t	*ip,
-	xfs_daddr_t	bno,
 	uint		iget_flags)
 {
 	xfs_buf_t	*bp;
@@ -797,11 +796,9 @@ xfs_iread(
 	/*
 	 * Fill in the location information in the in-core inode.
 	 */
-	ip->i_imap.im_blkno = bno;
 	error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
 	if (error)
 		return error;
-	ASSERT(bno == 0 || bno == ip->i_imap.im_blkno);
 
 	/*
 	 * Get pointers to the on-disk inode and the buffer containing it.
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 9965e40a4615..78550df13cd6 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -442,7 +442,7 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
  * xfs_iget.c prototypes.
  */
 int		xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
-			 uint, uint, xfs_inode_t **, xfs_daddr_t);
+			 uint, uint, xfs_inode_t **);
 void		xfs_iput(xfs_inode_t *, uint);
 void		xfs_iput_new(xfs_inode_t *, uint);
 void		xfs_ilock(xfs_inode_t *, uint);
@@ -500,7 +500,7 @@ do { \
  * Flags for xfs_iget()
  */
 #define XFS_IGET_CREATE		0x1
-#define XFS_IGET_BULKSTAT	0x2
+#define XFS_IGET_UNTRUSTED	0x2
 
 int		xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
 			    xfs_ino_t, struct xfs_dinode **,
@@ -509,7 +509,7 @@ int		xfs_itobp(struct xfs_mount *, struct xfs_trans *,
 			  struct xfs_inode *, struct xfs_dinode **,
 			  struct xfs_buf **, uint);
 int		xfs_iread(struct xfs_mount *, struct xfs_trans *,
-			  struct xfs_inode *, xfs_daddr_t, uint);
+			  struct xfs_inode *, uint);
 void		xfs_dinode_to_disk(struct xfs_dinode *,
 				   struct xfs_icdinode *);
 void		xfs_idestroy_fork(struct xfs_inode *, int);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index b1b801e4a28e..2b86f8610512 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -49,24 +49,40 @@ xfs_internal_inum(
 		 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino)));
 }
 
-STATIC int
-xfs_bulkstat_one_iget(
-	xfs_mount_t	*mp,		/* mount point for filesystem */
-	xfs_ino_t	ino,		/* inode number to get data for */
-	xfs_daddr_t	bno,		/* starting bno of inode cluster */
-	xfs_bstat_t	*buf,		/* return buffer */
-	int		*stat)		/* BULKSTAT_RV_... */
+/*
+ * Return stat information for one inode.
+ * Return 0 if ok, else errno.
+ */
+int
+xfs_bulkstat_one_int(
+	struct xfs_mount	*mp,		/* mount point for filesystem */
+	xfs_ino_t		ino,		/* inode to get data for */
+	void __user		*buffer,	/* buffer to place output in */
+	int			ubsize,		/* size of buffer */
+	bulkstat_one_fmt_pf	formatter,	/* formatter, copy to user */
+	int			*ubused,	/* bytes used by me */
+	int			*stat)		/* BULKSTAT_RV_... */
 {
-	xfs_icdinode_t	*dic;	/* dinode core info pointer */
-	xfs_inode_t	*ip;		/* incore inode pointer */
-	struct inode	*inode;
-	int		error;
+	struct xfs_icdinode	*dic;		/* dinode core info pointer */
+	struct xfs_inode	*ip;		/* incore inode pointer */
+	struct inode		*inode;
+	struct xfs_bstat	*buf;		/* return buffer */
+	int			error = 0;	/* error value */
+
+	*stat = BULKSTAT_RV_NOTHING;
+
+	if (!buffer || xfs_internal_inum(mp, ino))
+		return XFS_ERROR(EINVAL);
+
+	buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
+	if (!buf)
+		return XFS_ERROR(ENOMEM);
 
 	error = xfs_iget(mp, NULL, ino,
-			 XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno);
+			 XFS_IGET_UNTRUSTED, XFS_ILOCK_SHARED, &ip);
 	if (error) {
 		*stat = BULKSTAT_RV_NOTHING;
-		return error;
+		goto out_free;
 	}
 
 	ASSERT(ip != NULL);
@@ -127,77 +143,16 @@ xfs_bulkstat_one_iget(
 		buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks;
 		break;
 	}
-
 	xfs_iput(ip, XFS_ILOCK_SHARED);
-	return error;
-}
 
-STATIC void
-xfs_bulkstat_one_dinode(
-	xfs_mount_t	*mp,		/* mount point for filesystem */
-	xfs_ino_t	ino,		/* inode number to get data for */
-	xfs_dinode_t	*dic,		/* dinode inode pointer */
-	xfs_bstat_t	*buf)		/* return buffer */
-{
-	/*
-	 * The inode format changed when we moved the link count and
-	 * made it 32 bits long.  If this is an old format inode,
-	 * convert it in memory to look like a new one.  If it gets
-	 * flushed to disk we will convert back before flushing or
-	 * logging it.  We zero out the new projid field and the old link
-	 * count field.  We'll handle clearing the pad field (the remains
-	 * of the old uuid field) when we actually convert the inode to
-	 * the new format. We don't change the version number so that we
-	 * can distinguish this from a real new format inode.
-	 */
-	if (dic->di_version == 1) {
-		buf->bs_nlink = be16_to_cpu(dic->di_onlink);
-		buf->bs_projid = 0;
-	} else {
-		buf->bs_nlink = be32_to_cpu(dic->di_nlink);
-		buf->bs_projid = be16_to_cpu(dic->di_projid);
-	}
+	error = formatter(buffer, ubsize, ubused, buf);
 
-	buf->bs_ino = ino;
-	buf->bs_mode = be16_to_cpu(dic->di_mode);
-	buf->bs_uid = be32_to_cpu(dic->di_uid);
-	buf->bs_gid = be32_to_cpu(dic->di_gid);
-	buf->bs_size = be64_to_cpu(dic->di_size);
-	buf->bs_atime.tv_sec = be32_to_cpu(dic->di_atime.t_sec);
-	buf->bs_atime.tv_nsec = be32_to_cpu(dic->di_atime.t_nsec);
-	buf->bs_mtime.tv_sec = be32_to_cpu(dic->di_mtime.t_sec);
-	buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec);
-	buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec);
-	buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec);
-	buf->bs_xflags = xfs_dic2xflags(dic);
-	buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog;
-	buf->bs_extents = be32_to_cpu(dic->di_nextents);
-	buf->bs_gen = be32_to_cpu(dic->di_gen);
-	memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
-	buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask);
-	buf->bs_dmstate = be16_to_cpu(dic->di_dmstate);
-	buf->bs_aextents = be16_to_cpu(dic->di_anextents);
-	buf->bs_forkoff = XFS_DFORK_BOFF(dic);
+	if (!error)
+		*stat = BULKSTAT_RV_DIDONE;
 
-	switch (dic->di_format) {
-	case XFS_DINODE_FMT_DEV:
-		buf->bs_rdev = xfs_dinode_get_rdev(dic);
-		buf->bs_blksize = BLKDEV_IOSIZE;
-		buf->bs_blocks = 0;
-		break;
-	case XFS_DINODE_FMT_LOCAL:
-	case XFS_DINODE_FMT_UUID:
-		buf->bs_rdev = 0;
-		buf->bs_blksize = mp->m_sb.sb_blocksize;
-		buf->bs_blocks = 0;
-		break;
-	case XFS_DINODE_FMT_EXTENTS:
-	case XFS_DINODE_FMT_BTREE:
-		buf->bs_rdev = 0;
-		buf->bs_blksize = mp->m_sb.sb_blocksize;
-		buf->bs_blocks = be64_to_cpu(dic->di_nblocks);
-		break;
-	}
+ out_free:
+	kmem_free(buf);
+	return error;
 }
 
 /* Return 0 on success or positive error */
@@ -217,118 +172,17 @@ xfs_bulkstat_one_fmt(
 	return 0;
 }
 
-/*
- * Return stat information for one inode.
- * Return 0 if ok, else errno.
- */
-int		   	    		/* error status */
-xfs_bulkstat_one_int(
-	xfs_mount_t	*mp,		/* mount point for filesystem */
-	xfs_ino_t	ino,		/* inode number to get data for */
-	void		__user *buffer,	/* buffer to place output in */
-	int		ubsize,		/* size of buffer */
-	bulkstat_one_fmt_pf formatter,	/* formatter, copy to user */
-	xfs_daddr_t	bno,		/* starting bno of inode cluster */
-	int		*ubused,	/* bytes used by me */
-	void		*dibuff,	/* on-disk inode buffer */
-	int		*stat)		/* BULKSTAT_RV_... */
-{
-	xfs_bstat_t	*buf;		/* return buffer */
-	int		error = 0;	/* error value */
-	xfs_dinode_t	*dip;		/* dinode inode pointer */
-
-	dip = (xfs_dinode_t *)dibuff;
-	*stat = BULKSTAT_RV_NOTHING;
-
-	if (!buffer || xfs_internal_inum(mp, ino))
-		return XFS_ERROR(EINVAL);
-
-	buf = kmem_alloc(sizeof(*buf), KM_SLEEP);
-
-	if (dip == NULL) {
-		/* We're not being passed a pointer to a dinode.  This happens
-		 * if BULKSTAT_FG_IGET is selected.  Do the iget.
-		 */
-		error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat);
-		if (error)
-			goto out_free;
-	} else {
-		xfs_bulkstat_one_dinode(mp, ino, dip, buf);
-	}
-
-	error = formatter(buffer, ubsize, ubused, buf);
-	if (error)
-		goto out_free;
-
-	*stat = BULKSTAT_RV_DIDONE;
-
- out_free:
-	kmem_free(buf);
-	return error;
-}
-
 int
 xfs_bulkstat_one(
 	xfs_mount_t	*mp,		/* mount point for filesystem */
 	xfs_ino_t	ino,		/* inode number to get data for */
 	void		__user *buffer,	/* buffer to place output in */
 	int		ubsize,		/* size of buffer */
-	void		*private_data,	/* my private data */
-	xfs_daddr_t	bno,		/* starting bno of inode cluster */
 	int		*ubused,	/* bytes used by me */
-	void		*dibuff,	/* on-disk inode buffer */
 	int		*stat)		/* BULKSTAT_RV_... */
 {
 	return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
-				    xfs_bulkstat_one_fmt, bno,
-				    ubused, dibuff, stat);
-}
-
-/*
- * Test to see whether we can use the ondisk inode directly, based
- * on the given bulkstat flags, filling in dipp accordingly.
- * Returns zero if the inode is dodgey.
- */
-STATIC int
-xfs_bulkstat_use_dinode(
-	xfs_mount_t	*mp,
-	int		flags,
-	xfs_buf_t	*bp,
-	int		clustidx,
-	xfs_dinode_t	**dipp)
-{
-	xfs_dinode_t	*dip;
-	unsigned int	aformat;
-
-	*dipp = NULL;
-	if (!bp || (flags & BULKSTAT_FG_IGET))
-		return 1;
-	dip = (xfs_dinode_t *)
-			xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog);
-	/*
-	 * Check the buffer containing the on-disk inode for di_mode == 0.
-	 * This is to prevent xfs_bulkstat from picking up just reclaimed
-	 * inodes that have their in-core state initialized but not flushed
-	 * to disk yet. This is a temporary hack that would require a proper
-	 * fix in the future.
-	 */
-	if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
-	    !XFS_DINODE_GOOD_VERSION(dip->di_version) ||
-	    !dip->di_mode)
-		return 0;
-	if (flags & BULKSTAT_FG_QUICK) {
-		*dipp = dip;
-		return 1;
-	}
-	/* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */
-	aformat = dip->di_aformat;
-	if ((XFS_DFORK_Q(dip) == 0) ||
-	    (aformat == XFS_DINODE_FMT_LOCAL) ||
-	    (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) {
-		*dipp = dip;
-		return 1;
-	}
-	return 1;
+				    xfs_bulkstat_one_fmt, ubused, stat);
 }
 
 #define XFS_BULKSTAT_UBLEFT(ubleft)	((ubleft) >= statstruct_size)
@@ -342,10 +196,8 @@ xfs_bulkstat(
 	xfs_ino_t		*lastinop, /* last inode returned */
 	int			*ubcountp, /* size of buffer/count returned */
 	bulkstat_one_pf		formatter, /* func that'd fill a single buf */
-	void			*private_data,/* private data for formatter */
 	size_t			statstruct_size, /* sizeof struct filling */
 	char			__user *ubuffer, /* buffer with inode stats */
-	int			flags,	/* defined in xfs_itable.h */
 	int			*done)	/* 1 if there are more stats to get */
 {
 	xfs_agblock_t		agbno=0;/* allocation group block number */
@@ -380,14 +232,12 @@ xfs_bulkstat(
 	int			ubelem;	/* spaces used in user's buffer */
 	int			ubused;	/* bytes used by formatter */
 	xfs_buf_t		*bp;	/* ptr to on-disk inode cluster buf */
-	xfs_dinode_t		*dip;	/* ptr into bp for specific inode */
 
 	/*
 	 * Get the last inode value, see if there's nothing to do.
 	 */
 	ino = (xfs_ino_t)*lastinop;
 	lastino = ino;
-	dip = NULL;
 	agno = XFS_INO_TO_AGNO(mp, ino);
 	agino = XFS_INO_TO_AGINO(mp, ino);
 	if (agno >= mp->m_sb.sb_agcount ||
@@ -612,37 +462,6 @@ xfs_bulkstat(
 							irbp->ir_startino) +
 						((chunkidx & nimask) >>
 						 mp->m_sb.sb_inopblog);
-
-					if (flags & (BULKSTAT_FG_QUICK |
-						     BULKSTAT_FG_INLINE)) {
-						int offset;
-
-						ino = XFS_AGINO_TO_INO(mp, agno,
-								       agino);
-						bno = XFS_AGB_TO_DADDR(mp, agno,
-								       agbno);
-
-						/*
-						 * Get the inode cluster buffer
-						 */
-						if (bp)
-							xfs_buf_relse(bp);
-
-						error = xfs_inotobp(mp, NULL, ino, &dip,
-								    &bp, &offset,
-								    XFS_IGET_BULKSTAT);
-
-						if (!error)
-							clustidx = offset / mp->m_sb.sb_inodesize;
-						if (XFS_TEST_ERROR(error != 0,
-								   mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
-								   XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
-							bp = NULL;
-							ubleft = 0;
-							rval = error;
-							break;
-						}
-					}
 				}
 				ino = XFS_AGINO_TO_INO(mp, agno, agino);
 				bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
@@ -658,35 +477,13 @@ xfs_bulkstat(
 				 * when the chunk is used up.
 				 */
 				irbp->ir_freecount++;
-				if (!xfs_bulkstat_use_dinode(mp, flags, bp,
-							     clustidx, &dip)) {
-					lastino = ino;
-					continue;
-				}
-				/*
-				 * If we need to do an iget, cannot hold bp.
-				 * Drop it, until starting the next cluster.
-				 */
-				if ((flags & BULKSTAT_FG_INLINE) && !dip) {
-					if (bp)
-						xfs_buf_relse(bp);
-					bp = NULL;
-				}
 
 				/*
 				 * Get the inode and fill in a single buffer.
-				 * BULKSTAT_FG_QUICK uses dip to fill it in.
-				 * BULKSTAT_FG_IGET uses igets.
-				 * BULKSTAT_FG_INLINE uses dip if we have an
-				 * inline attr fork, else igets.
-				 * See: xfs_bulkstat_one & xfs_dm_bulkstat_one.
-				 * This is also used to count inodes/blks, etc
-				 * in xfs_qm_quotacheck.
 				 */
 				ubused = statstruct_size;
-				error = formatter(mp, ino, ubufp,
-						ubleft, private_data,
-						bno, &ubused, dip, &fmterror);
+				error = formatter(mp, ino, ubufp, ubleft,
+						  &ubused, &fmterror);
 				if (fmterror == BULKSTAT_RV_NOTHING) {
 					if (error && error != ENOENT &&
 						error != EINVAL) {
@@ -778,8 +575,7 @@ xfs_bulkstat_single(
 	 */
 
 	ino = (xfs_ino_t)*lastinop;
-	error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t),
-				 NULL, 0, NULL, NULL, &res);
+	error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 0, &res);
 	if (error) {
 		/*
 		 * Special case way failed, do it the "long" way
@@ -788,8 +584,7 @@ xfs_bulkstat_single(
 		(*lastinop)--;
 		count = 1;
 		if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one,
-				NULL, sizeof(xfs_bstat_t), buffer,
-				BULKSTAT_FG_IGET, done))
+				sizeof(xfs_bstat_t), buffer, done))
 			return error;
 		if (count == 0 || (xfs_ino_t)*lastinop != ino)
 			return error == EFSCORRUPTED ?
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 20792bf45946..97295d91d170 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -27,10 +27,7 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount	*mp,
 			       xfs_ino_t	ino,
 			       void		__user *buffer,
 			       int		ubsize,
-			       void		*private_data,
-			       xfs_daddr_t	bno,
 			       int		*ubused,
-			       void		*dip,
 			       int		*stat);
 
 /*
@@ -41,13 +38,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount	*mp,
 #define BULKSTAT_RV_GIVEUP	2
 
 /*
- * Values for bulkstat flag argument.
- */
-#define BULKSTAT_FG_IGET	0x1	/* Go through the buffer cache */
-#define BULKSTAT_FG_QUICK	0x2	/* No iget, walk the dinode cluster */
-#define BULKSTAT_FG_INLINE	0x4	/* No iget if inline attrs */
-
-/*
  * Return stat information in bulk (by-inode) for the filesystem.
  */
 int					/* error status */
@@ -56,10 +46,8 @@ xfs_bulkstat(
 	xfs_ino_t	*lastino,	/* last inode returned */
 	int		*count,		/* size of buffer/count returned */
 	bulkstat_one_pf formatter,	/* func that'd fill a single buf */
-	void		*private_data,	/* private data for formatter */
 	size_t		statstruct_size,/* sizeof struct that we're filling */
 	char		__user *ubuffer,/* buffer with inode stats */
-	int		flags,		/* flag to control access method */
 	int		*done);		/* 1 if there are more stats to get */
 
 int
@@ -82,9 +70,7 @@ xfs_bulkstat_one_int(
 	void			__user *buffer,
 	int			ubsize,
 	bulkstat_one_fmt_pf	formatter,
-	xfs_daddr_t		bno,
 	int			*ubused,
-	void			*dibuff,
 	int			*stat);
 
 int
@@ -93,10 +79,7 @@ xfs_bulkstat_one(
 	xfs_ino_t		ino,
 	void			__user *buffer,
 	int			ubsize,
-	void			*private_data,
-	xfs_daddr_t		bno,
 	int			*ubused,
-	void			*dibuff,
 	int			*stat);
 
 typedef int (*inumbers_fmt_pf)(
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index ed0684cc50ee..9ac5cfab27b9 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3198,7 +3198,7 @@ xlog_recover_process_one_iunlink(
 	int				error;
 
 	ino = XFS_AGINO_TO_INO(mp, agno, agino);
-	error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0);
+	error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
 	if (error)
 		goto fail;
 
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d59f4e8bedcf..69f62d8b2816 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1300,7 +1300,7 @@ xfs_mountfs(
 	 * Get and sanity-check the root inode.
 	 * Save the pointer to it in the mount structure.
 	 */
-	error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0);
+	error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip);
 	if (error) {
 		cmn_err(CE_WARN, "XFS: failed to read root inode");
 		goto out_log_dealloc;
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 16445518506d..a2d32ce335aa 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -2277,12 +2277,12 @@ xfs_rtmount_inodes(
 	sbp = &mp->m_sb;
 	if (sbp->sb_rbmino == NULLFSINO)
 		return 0;
-	error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip, 0);
+	error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip);
 	if (error)
 		return error;
 	ASSERT(mp->m_rbmip != NULL);
 	ASSERT(sbp->sb_rsumino != NULLFSINO);
-	error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip, 0);
+	error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip);
 	if (error) {
 		IRELE(mp->m_rbmip);
 		return error;
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 785ff101da0a..2559dfec946b 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -62,7 +62,7 @@ xfs_trans_iget(
 {
 	int			error;
 
-	error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp, 0);
+	error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp);
 	if (!error && tp)
 		xfs_trans_ijoin(tp, *ipp, lock_flags);
 	return error;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index a06bd62504fc..c1646838898f 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1269,7 +1269,7 @@ xfs_lookup(
 	if (error)
 		goto out;
 
-	error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
+	error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
 	if (error)
 		goto out_free_name;