75 files changed, 1095 insertions, 925 deletions
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 475f9c597cb7..326dc08d3e3f 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -39,27 +39,17 @@
 
 /* #define DEBUG */
 
-#ifdef DEBUG
-#define DPRINTK(fmt, args...)				\
-do {							\
-	printk(KERN_DEBUG "pid %d: %s: " fmt "\n",	\
-		current->pid, __func__, ##args);	\
-} while (0)
-#else
-#define DPRINTK(fmt, args...) do {} while (0)
-#endif
-
-#define AUTOFS_WARN(fmt, args...)			\
-do {							\
+#define DPRINTK(fmt, ...)				\
+	pr_debug("pid %d: %s: " fmt "\n",		\
+		current->pid, __func__, ##__VA_ARGS__)
+
+#define AUTOFS_WARN(fmt, ...)				\
 	printk(KERN_WARNING "pid %d: %s: " fmt "\n",	\
-		current->pid, __func__, ##args);	\
-} while (0)
+		current->pid, __func__, ##__VA_ARGS__)
 
-#define AUTOFS_ERROR(fmt, args...)			\
-do {							\
+#define AUTOFS_ERROR(fmt, ...)				\
 	printk(KERN_ERR "pid %d: %s: " fmt "\n",	\
-		current->pid, __func__, ##args);	\
-} while (0)
+		current->pid, __func__, ##__VA_ARGS__)
 
 /* Unified info structure.  This is pointed to by both the dentry and
    inode structures.  Each file in the filesystem has an instance of this
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 25435987d6ae..e1fbdeef85db 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -104,7 +104,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 	size_t pktsz;
 
 	DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d",
-		wq->wait_queue_token, wq->name.len, wq->name.name, type);
+		(unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, type);
 
 	memset(&pkt,0,sizeof pkt); /* For security reasons */
 
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 54b8c28bebc8..720d885e8dca 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -474,17 +474,22 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
 		befs_data_stream *data = &befs_ino->i_data.ds;
 		befs_off_t len = data->size;
 
-		befs_debug(sb, "Follow long symlink");
-
-		link = kmalloc(len, GFP_NOFS);
-		if (!link) {
-			link = ERR_PTR(-ENOMEM);
-		} else if (befs_read_lsymlink(sb, data, link, len) != len) {
-			kfree(link);
-			befs_error(sb, "Failed to read entire long symlink");
+		if (len == 0) {
+			befs_error(sb, "Long symlink with illegal length");
 			link = ERR_PTR(-EIO);
 		} else {
-			link[len - 1] = '\0';
+			befs_debug(sb, "Follow long symlink");
+
+			link = kmalloc(len, GFP_NOFS);
+			if (!link) {
+				link = ERR_PTR(-ENOMEM);
+			} else if (befs_read_lsymlink(sb, data, link, len) != len) {
+				kfree(link);
+				befs_error(sb, "Failed to read entire long symlink");
+				link = ERR_PTR(-EIO);
+			} else {
+				link[len - 1] = '\0';
+			}
 		}
 	} else {
 		link = befs_ino->i_data.symlink;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index f28680553288..ff77262e887c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -387,6 +387,10 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 	struct inode *bd_inode = filp->f_mapping->host;
 	struct block_device *bdev = I_BDEV(bd_inode);
 	int error;
+	
+	error = filemap_write_and_wait_range(filp->f_mapping, start, end);
+	if (error)
+		return error;
 
 	/*
 	 * There is no need to serialise calls to blkdev_issue_flush with
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0469263e327e..03912c5c6f49 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1415,17 +1415,15 @@ void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val);
 #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits)		\
 static inline u##bits btrfs_##name(struct extent_buffer *eb)		\
 {									\
-	type *p = kmap_atomic(eb->first_page, KM_USER0);		\
+	type *p = page_address(eb->first_page);				\
 	u##bits res = le##bits##_to_cpu(p->member);			\
-	kunmap_atomic(p, KM_USER0);					\
 	return res;							\
 }									\
 static inline void btrfs_set_##name(struct extent_buffer *eb,		\
 				    u##bits val)			\
 {									\
-	type *p = kmap_atomic(eb->first_page, KM_USER0);		\
+	type *p = page_address(eb->first_page);				\
 	p->member = cpu_to_le##bits(val);				\
-	kunmap_atomic(p, KM_USER0);					\
 }
 
 #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits)		\
@@ -2367,8 +2365,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
-int btrfs_drop_snapshot(struct btrfs_root *root,
-			struct btrfs_block_rsv *block_rsv, int update_ref);
+void btrfs_drop_snapshot(struct btrfs_root *root,
+			 struct btrfs_block_rsv *block_rsv, int update_ref);
 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root,
 			struct extent_buffer *node,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 66bac226944e..80d6148f60ac 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1782,6 +1782,9 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
 
 
 		for (i = 0; i < multi->num_stripes; i++, stripe++) {
+			if (!stripe->dev->can_discard)
+				continue;
+
 			ret = btrfs_issue_discard(stripe->dev->bdev,
 						  stripe->physical,
 						  stripe->length);
@@ -1789,11 +1792,16 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
 				discarded_bytes += stripe->length;
 			else if (ret != -EOPNOTSUPP)
 				break;
+
+			/*
+			 * Just in case we get back EOPNOTSUPP for some reason,
+			 * just ignore the return value so we don't screw up
+			 * people calling discard_extent.
+			 */
+			ret = 0;
 		}
 		kfree(multi);
 	}
-	if (discarded_bytes && ret == -EOPNOTSUPP)
-		ret = 0;
 
 	if (actual_bytes)
 		*actual_bytes = discarded_bytes;
@@ -6269,8 +6277,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
  * also make sure backrefs for the shared block and all lower level
  * blocks are properly updated.
  */
-int btrfs_drop_snapshot(struct btrfs_root *root,
-			struct btrfs_block_rsv *block_rsv, int update_ref)
+void btrfs_drop_snapshot(struct btrfs_root *root,
+			 struct btrfs_block_rsv *block_rsv, int update_ref)
 {
 	struct btrfs_path *path;
 	struct btrfs_trans_handle *trans;
@@ -6283,13 +6291,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
 	int level;
 
 	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
+	if (!path) {
+		err = -ENOMEM;
+		goto out;
+	}
 
 	wc = kzalloc(sizeof(*wc), GFP_NOFS);
 	if (!wc) {
 		btrfs_free_path(path);
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto out;
 	}
 
 	trans = btrfs_start_transaction(tree_root, 0);
@@ -6318,7 +6329,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
 		path->lowest_level = 0;
 		if (ret < 0) {
 			err = ret;
-			goto out;
+			goto out_free;
 		}
 		WARN_ON(ret > 0);
 
@@ -6425,11 +6436,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
 		free_extent_buffer(root->commit_root);
 		kfree(root);
 	}
-out:
+out_free:
 	btrfs_end_transaction_throttle(trans, tree_root);
 	kfree(wc);
 	btrfs_free_path(path);
-	return err;
+out:
+	if (err)
+		btrfs_std_error(root->fs_info, err);
+	return;
 }
 
 /*
@@ -6720,6 +6734,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
 	struct btrfs_space_info *space_info;
 	struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
 	struct btrfs_device *device;
+	u64 min_free;
+	int index;
+	int dev_nr = 0;
+	int dev_min = 1;
 	int full = 0;
 	int ret = 0;
 
@@ -6729,8 +6747,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
 	if (!block_group)
 		return -1;
 
+	min_free = btrfs_block_group_used(&block_group->item);
+
 	/* no bytes used, we're good */
-	if (!btrfs_block_group_used(&block_group->item))
+	if (!min_free)
 		goto out;
 
 	space_info = block_group->space_info;
@@ -6746,10 +6766,9 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
 	 * all of the extents from this block group.  If we can, we're good
 	 */
 	if ((space_info->total_bytes != block_group->key.offset) &&
-	   (space_info->bytes_used + space_info->bytes_reserved +
-	    space_info->bytes_pinned + space_info->bytes_readonly +
-	    btrfs_block_group_used(&block_group->item) <
-	    space_info->total_bytes)) {
+	    (space_info->bytes_used + space_info->bytes_reserved +
+	     space_info->bytes_pinned + space_info->bytes_readonly +
+	     min_free < space_info->total_bytes)) {
 		spin_unlock(&space_info->lock);
 		goto out;
 	}
@@ -6766,9 +6785,29 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
 	if (full)
 		goto out;
 
+	/*
+	 * index:
+	 *      0: raid10
+	 *      1: raid1
+	 *      2: dup
+	 *      3: raid0
+	 *      4: single
+	 */
+	index = get_block_group_index(block_group);
+	if (index == 0) {
+		dev_min = 4;
+		min_free /= 2;
+	} else if (index == 1) {
+		dev_min = 2;
+	} else if (index == 2) {
+		min_free *= 2;
+	} else if (index == 3) {
+		dev_min = fs_devices->rw_devices;
+		min_free /= dev_min;
+	}
+
 	mutex_lock(&root->fs_info->chunk_mutex);
 	list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
-		u64 min_free = btrfs_block_group_used(&block_group->item);
 		u64 dev_offset;
 
 		/*
@@ -6779,7 +6818,11 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
 			ret = find_free_dev_extent(NULL, device, min_free,
 						   &dev_offset, NULL);
 			if (!ret)
+				dev_nr++;
+
+			if (dev_nr >= dev_min)
 				break;
+
 			ret = -1;
 		}
 	}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 658d66959abe..e7872e485f13 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -150,6 +150,8 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
 	spin_lock(&root->fs_info->defrag_inodes_lock);
 	if (!BTRFS_I(inode)->in_defrag)
 		__btrfs_add_inode_defrag(inode, defrag);
+	else
+		kfree(defrag);
 	spin_unlock(&root->fs_info->defrag_inodes_lock);
 	return 0;
 }
@@ -1638,11 +1640,15 @@ static long btrfs_fallocate(struct file *file, int mode,
 
 	cur_offset = alloc_start;
 	while (1) {
+		u64 actual_end;
+
 		em = btrfs_get_extent(inode, NULL, 0, cur_offset,
 				      alloc_end - cur_offset, 0);
 		BUG_ON(IS_ERR_OR_NULL(em));
 		last_byte = min(extent_map_end(em), alloc_end);
+		actual_end = min_t(u64, extent_map_end(em), offset + len);
 		last_byte = (last_byte + mask) & ~mask;
+
 		if (em->block_start == EXTENT_MAP_HOLE ||
 		    (cur_offset >= inode->i_size &&
 		     !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
@@ -1655,6 +1661,16 @@ static long btrfs_fallocate(struct file *file, int mode,
 				free_extent_map(em);
 				break;
 			}
+		} else if (actual_end > inode->i_size &&
+			   !(mode & FALLOC_FL_KEEP_SIZE)) {
+			/*
+			 * We didn't need to allocate any more space, but we
+			 * still extended the size of the file so we need to
+			 * update i_size.
+			 */
+			inode->i_ctime = CURRENT_TIME;
+			i_size_write(inode, actual_end);
+			btrfs_ordered_update_i_size(inode, actual_end, NULL);
 		}
 		free_extent_map(em);
 
@@ -1804,10 +1820,14 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
 		}
 	}
 
-	if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
-		return -EINVAL;
-	if (offset > inode->i_sb->s_maxbytes)
-		return -EINVAL;
+	if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (offset > inode->i_sb->s_maxbytes) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	/* Special lock needed here? */
 	if (offset != file->f_pos) {
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 6377713f639c..6a265b9f85f2 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1168,9 +1168,9 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
 		div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
 }
 
-static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
-			      struct btrfs_free_space *info, u64 offset,
-			      u64 bytes)
+static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
+				       struct btrfs_free_space *info,
+				       u64 offset, u64 bytes)
 {
 	unsigned long start, count;
 
@@ -1181,6 +1181,13 @@ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
 	bitmap_clear(info->bitmap, start, count);
 
 	info->bytes -= bytes;
+}
+
+static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
+			      struct btrfs_free_space *info, u64 offset,
+			      u64 bytes)
+{
+	__bitmap_clear_bits(ctl, info, offset, bytes);
 	ctl->free_space -= bytes;
 }
 
@@ -1984,7 +1991,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
 		return 0;
 
 	ret = search_start;
-	bitmap_clear_bits(ctl, entry, ret, bytes);
+	__bitmap_clear_bits(ctl, entry, ret, bytes);
 
 	return ret;
 }
@@ -2039,7 +2046,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
 				continue;
 			}
 		} else {
-
 			ret = entry->offset;
 
 			entry->offset += bytes;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 15fceefbca0a..0ccc7438ad34 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7354,11 +7354,15 @@ static int btrfs_set_page_dirty(struct page *page)
 static int btrfs_permission(struct inode *inode, int mask)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
+	umode_t mode = inode->i_mode;
 
-	if (btrfs_root_readonly(root) && (mask & MAY_WRITE))
-		return -EROFS;
-	if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
-		return -EACCES;
+	if (mask & MAY_WRITE &&
+	    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
+		if (btrfs_root_readonly(root))
+			return -EROFS;
+		if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
+			return -EACCES;
+	}
 	return generic_permission(inode, mask);
 }
 
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7cf013349941..970977aab224 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2236,6 +2236,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 		btrfs_wait_ordered_range(src, off, len);
 	}
 
+	/* truncate page cache pages from target inode range */
+	truncate_inode_pages_range(&inode->i_data, off,
+				   ALIGN(off + len, PAGE_CACHE_SIZE) - 1);
+
 	/* clone data */
 	key.objectid = btrfs_ino(src);
 	key.type = BTRFS_EXTENT_DATA_KEY;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index babee65f8eda..786639fca067 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -799,14 +799,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 				  struct extent_buffer *eb, int slot,
 				  struct btrfs_key *key)
 {
-	struct inode *dir;
-	int ret;
 	struct btrfs_inode_ref *ref;
+	struct btrfs_dir_item *di;
+	struct inode *dir;
 	struct inode *inode;
-	char *name;
-	int namelen;
 	unsigned long ref_ptr;
 	unsigned long ref_end;
+	char *name;
+	int namelen;
+	int ret;
 	int search_done = 0;
 
 	/*
@@ -909,6 +910,25 @@ again:
 	}
 	btrfs_release_path(path);
 
+	/* look for a conflicting sequence number */
+	di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
+					 btrfs_inode_ref_index(eb, ref),
+					 name, namelen, 0);
+	if (di && !IS_ERR(di)) {
+		ret = drop_one_dir_item(trans, root, path, dir, di);
+		BUG_ON(ret);
+	}
+	btrfs_release_path(path);
+
+	/* look for a conflicing name */
+	di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir),
+				   name, namelen, 0);
+	if (di && !IS_ERR(di)) {
+		ret = drop_one_dir_item(trans, root, path, dir, di);
+		BUG_ON(ret);
+	}
+	btrfs_release_path(path);
+
 insert:
 	/* insert our name */
 	ret = btrfs_add_link(trans, dir, inode, name, namelen, 0,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 53875ae73ad4..f2a4cc79da61 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -142,6 +142,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
 	unsigned long limit;
 	unsigned long last_waited = 0;
 	int force_reg = 0;
+	int sync_pending = 0;
 	struct blk_plug plug;
 
 	/*
@@ -229,6 +230,22 @@ loop_lock:
 
 		BUG_ON(atomic_read(&cur->bi_cnt) == 0);
 
+		/*
+		 * if we're doing the sync list, record that our
+		 * plug has some sync requests on it
+		 *
+		 * If we're doing the regular list and there are
+		 * sync requests sitting around, unplug before
+		 * we add more
+		 */
+		if (pending_bios == &device->pending_sync_bios) {
+			sync_pending = 1;
+		} else if (sync_pending) {
+			blk_finish_plug(&plug);
+			blk_start_plug(&plug);
+			sync_pending = 0;
+		}
+
 		submit_bio(cur->bi_rw, cur);
 		num_run++;
 		batch_run++;
@@ -500,6 +517,9 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 			fs_devices->rw_devices--;
 		}
 
+		if (device->can_discard)
+			fs_devices->num_can_discard--;
+
 		new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
 		BUG_ON(!new_device);
 		memcpy(new_device, device, sizeof(*new_device));
@@ -508,6 +528,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 		new_device->bdev = NULL;
 		new_device->writeable = 0;
 		new_device->in_fs_metadata = 0;
+		new_device->can_discard = 0;
 		list_replace_rcu(&device->dev_list, &new_device->dev_list);
 
 		call_rcu(&device->rcu, free_device);
@@ -547,6 +568,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 				fmode_t flags, void *holder)
 {
+	struct request_queue *q;
 	struct block_device *bdev;
 	struct list_head *head = &fs_devices->devices;
 	struct btrfs_device *device;
@@ -603,6 +625,12 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 			seeding = 0;
 		}
 
+		q = bdev_get_queue(bdev);
+		if (blk_queue_discard(q)) {
+			device->can_discard = 1;
+			fs_devices->num_can_discard++;
+		}
+
 		device->bdev = bdev;
 		device->in_fs_metadata = 0;
 		device->mode = flags;
@@ -835,6 +863,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
 
 	max_hole_start = search_start;
 	max_hole_size = 0;
+	hole_size = 0;
 
 	if (search_start >= search_end) {
 		ret = -ENOSPC;
@@ -917,7 +946,14 @@ next:
 		cond_resched();
 	}
 
-	hole_size = search_end- search_start;
+	/*
+	 * At this point, search_start should be the end of
+	 * allocated dev extents, and when shrinking the device,
+	 * search_end may be smaller than search_start.
+	 */
+	if (search_end > search_start)
+		hole_size = search_end - search_start;
+
 	if (hole_size > max_hole_size) {
 		max_hole_start = search_start;
 		max_hole_size = hole_size;
@@ -1543,6 +1579,7 @@ error:
 
 int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 {
+	struct request_queue *q;
 	struct btrfs_trans_handle *trans;
 	struct btrfs_device *device;
 	struct block_device *bdev;
@@ -1612,6 +1649,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 
 	lock_chunks(root);
 
+	q = bdev_get_queue(bdev);
+	if (blk_queue_discard(q))
+		device->can_discard = 1;
 	device->writeable = 1;
 	device->work.func = pending_bios_fn;
 	generate_random_uuid(device->uuid);
@@ -1647,6 +1687,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 	root->fs_info->fs_devices->num_devices++;
 	root->fs_info->fs_devices->open_devices++;
 	root->fs_info->fs_devices->rw_devices++;
+	if (device->can_discard)
+		root->fs_info->fs_devices->num_can_discard++;
 	root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
 
 	if (!blk_queue_nonrot(bdev_get_queue(bdev)))
@@ -2413,9 +2455,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 			total_avail = device->total_bytes - device->bytes_used;
 		else
 			total_avail = 0;
-		/* avail is off by max(alloc_start, 1MB), but that is the same
-		 * for all devices, so it doesn't hurt the sorting later on
-		 */
+
+		/* If there is no space on this device, skip it. */
+		if (total_avail == 0)
+			continue;
 
 		ret = find_free_dev_extent(trans, device,
 					   max_stripe_size * dev_stripes,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 7c12d61ae7ae..6d866db4e177 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -48,6 +48,7 @@ struct btrfs_device {
 	int writeable;
 	int in_fs_metadata;
 	int missing;
+	int can_discard;
 
 	spinlock_t io_lock;
 
@@ -104,6 +105,7 @@ struct btrfs_fs_devices {
 	u64 rw_devices;
 	u64 missing_devices;
 	u64 total_rw_bytes;
+	u64 num_can_discard;
 	struct block_device *latest_bdev;
 
 	/* all of the devices in the FS, protected by a mutex
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 2fe3cf13b2e9..6d40656e1e29 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -176,7 +176,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 
 #ifdef CONFIG_CIFS_STATS2
 			seq_printf(m, " In Send: %d In MaxReq Wait: %d",
-				atomic_read(&server->inSend),
+				atomic_read(&server->in_send),
 				atomic_read(&server->num_waiters));
 #endif
 
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 8d8f28c94c0f..6873bb634a97 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -141,10 +141,11 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
 
 	rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
 	if (rc < 0) {
-		cERROR(1, "%s: Failed to resolve server part of %s to IP: %d",
-			  __func__, *devname, rc);
+		cFYI(1, "%s: Failed to resolve server part of %s to IP: %d",
+			__func__, *devname, rc);
 		goto compose_mount_options_err;
 	}
+
 	/* md_len = strlen(...) + 12 for 'sep+prefixpath='
 	 * assuming that we have 'unc=' and 'ip=' in
 	 * the original sb_mountdata
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 21de1d6d5849..d0f59faefb78 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -991,24 +991,6 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
 	return pntsd;
 }
 
-static int set_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, __u16 fid,
-		struct cifs_ntsd *pnntsd, u32 acllen)
-{
-	int xid, rc;
-	struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
-
-	if (IS_ERR(tlink))
-		return PTR_ERR(tlink);
-
-	xid = GetXid();
-	rc = CIFSSMBSetCIFSACL(xid, tlink_tcon(tlink), fid, pnntsd, acllen);
-	FreeXid(xid);
-	cifs_put_tlink(tlink);
-
-	cFYI(DBG2, "SetCIFSACL rc = %d", rc);
-	return rc;
-}
-
 static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path,
 		struct cifs_ntsd *pnntsd, u32 acllen)
 {
@@ -1047,18 +1029,10 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
 				struct inode *inode, const char *path)
 {
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
-	struct cifsFileInfo *open_file;
-	int rc;
 
 	cFYI(DBG2, "set ACL for %s from mode 0x%x", path, inode->i_mode);
 
-	open_file = find_readable_file(CIFS_I(inode), true);
-	if (!open_file)
-		return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen);
-
-	rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen);
-	cifsFileInfo_put(open_file);
-	return rc;
+	return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen);
 }
 
 /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 212e5629cc1d..f93eb948d071 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -563,6 +563,10 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
 		mutex_unlock(&dir->i_mutex);
 		dput(dentry);
 		dentry = child;
+		if (!dentry->d_inode) {
+			dput(dentry);
+			dentry = ERR_PTR(-ENOENT);
+		}
 	} while (!IS_ERR(dentry));
 	_FreeXid(xid);
 	kfree(full_path);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index cb71dc1f94d1..95da8027983d 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -125,5 +125,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "1.74"
+#define CIFS_VERSION   "1.75"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 38ce6d44b145..95dad9d14cf1 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -291,7 +291,7 @@ struct TCP_Server_Info {
 	struct fscache_cookie   *fscache; /* client index cache cookie */
 #endif
 #ifdef CONFIG_CIFS_STATS2
-	atomic_t inSend; /* requests trying to send */
+	atomic_t in_send; /* requests trying to send */
 	atomic_t num_waiters;   /* blocked waiting to get in sendrecv */
 #endif
 };
@@ -672,12 +672,54 @@ struct mid_q_entry {
 	bool multiEnd:1;	/* both received */
 };
 
-struct oplock_q_entry {
-	struct list_head qhead;
-	struct inode *pinode;
-	struct cifs_tcon *tcon;
-	__u16 netfid;
-};
+/*	Make code in transport.c a little cleaner by moving
+	update of optional stats into function below */
+#ifdef CONFIG_CIFS_STATS2
+
+static inline void cifs_in_send_inc(struct TCP_Server_Info *server)
+{
+	atomic_inc(&server->in_send);
+}
+
+static inline void cifs_in_send_dec(struct TCP_Server_Info *server)
+{
+	atomic_dec(&server->in_send);
+}
+
+static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server)
+{
+	atomic_inc(&server->num_waiters);
+}
+
+static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server)
+{
+	atomic_dec(&server->num_waiters);
+}
+
+static inline void cifs_save_when_sent(struct mid_q_entry *mid)
+{
+	mid->when_sent = jiffies;
+}
+#else
+static inline void cifs_in_send_inc(struct TCP_Server_Info *server)
+{
+}
+static inline void cifs_in_send_dec(struct TCP_Server_Info *server)
+{
+}
+
+static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server)
+{
+}
+
+static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server)
+{
+}
+
+static inline void cifs_save_when_sent(struct mid_q_entry *mid)
+{
+}
+#endif
 
 /* for pending dnotify requests */
 struct dir_notify_req {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 80c2e3add3a2..633c246b6775 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2878,7 +2878,8 @@ cleanup_volume_info_contents(struct smb_vol *volume_info)
 	kfree(volume_info->username);
 	kzfree(volume_info->password);
 	kfree(volume_info->UNC);
-	kfree(volume_info->UNCip);
+	if (volume_info->UNCip != volume_info->UNC + 2)
+		kfree(volume_info->UNCip);
 	kfree(volume_info->domainname);
 	kfree(volume_info->iocharset);
 	kfree(volume_info->prepath);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index ae576fbb5142..72d448bf96ce 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -105,8 +105,8 @@ cifs_bp_rename_retry:
 	}
 	rcu_read_unlock();
 	if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) {
-		cERROR(1, "did not end path lookup where expected namelen is %d",
-			namelen);
+		cFYI(1, "did not end path lookup where expected. namelen=%d "
+			"dfsplen=%d", namelen, dfsplen);
 		/* presumably this is only possible if racing with a rename
 		of one of the parent directories  (we can not lock the dentries
 		above us to prevent this, but retrying should be harmless) */
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 548f06230a6d..1d2d91d9bf65 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -79,8 +79,8 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
 	/* Perform the upcall */
 	rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL);
 	if (rc < 0)
-		cERROR(1, "%s: unable to resolve: %*.*s",
-		       __func__, len, len, hostname);
+		cFYI(1, "%s: unable to resolve: %*.*s",
+			__func__, len, len, hostname);
 	else
 		cFYI(1, "%s: resolved: %*.*s to %s",
 		     __func__, len, len, hostname, *ip_addr);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 9b018c8334fa..a7b2dcd4a53e 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -764,20 +764,10 @@ char *cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
 	if (full_path == NULL)
 		return full_path;
 
-	if (dfsplen) {
+	if (dfsplen)
 		strncpy(full_path, tcon->treeName, dfsplen);
-		/* switch slash direction in prepath depending on whether
-		 * windows or posix style path names
-		 */
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) {
-			int i;
-			for (i = 0; i < dfsplen; i++) {
-				if (full_path[i] == '\\')
-					full_path[i] = '/';
-			}
-		}
-	}
 	strncpy(full_path + dfsplen, vol->prepath, pplen);
+	convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb));
 	full_path[dfsplen + pplen] = 0; /* add trailing null */
 	return full_path;
 }
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 243d58720513..d3e619692ee0 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -124,8 +124,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
 	/*	that we use in next few lines                               */
 	/* Note that header is initialized to zero in header_assemble */
 	pSMB->req.AndXCommand = 0xFF;
-	pSMB->req.MaxBufferSize = cpu_to_le16(min_t(u32, CIFSMaxBufSize - 4,
-						USHRT_MAX));
+	pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
 	pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
 	pSMB->req.VcNumber = get_next_vcnum(ses);
 
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 147aa22c3c3a..10ca6b2c26b7 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -266,15 +266,11 @@ static int wait_for_free_request(struct TCP_Server_Info *server,
 	while (1) {
 		if (atomic_read(&server->inFlight) >= cifs_max_pending) {
 			spin_unlock(&GlobalMid_Lock);
-#ifdef CONFIG_CIFS_STATS2
-			atomic_inc(&server->num_waiters);
-#endif
+			cifs_num_waiters_inc(server);
 			wait_event(server->request_q,
 				   atomic_read(&server->inFlight)
 				     < cifs_max_pending);
-#ifdef CONFIG_CIFS_STATS2
-			atomic_dec(&server->num_waiters);
-#endif
+			cifs_num_waiters_dec(server);
 			spin_lock(&GlobalMid_Lock);
 		} else {
 			if (server->tcpStatus == CifsExiting) {
@@ -362,6 +358,8 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
 	mid = AllocMidQEntry(hdr, server);
 	if (mid == NULL) {
 		mutex_unlock(&server->srv_mutex);
+		atomic_dec(&server->inFlight);
+		wake_up(&server->request_q);
 		return -ENOMEM;
 	}
 
@@ -379,15 +377,13 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
 	mid->callback = callback;
 	mid->callback_data = cbdata;
 	mid->midState = MID_REQUEST_SUBMITTED;
-#ifdef CONFIG_CIFS_STATS2
-	atomic_inc(&server->inSend);
-#endif
+
+	cifs_in_send_inc(server);
 	rc = smb_sendv(server, iov, nvec);
-#ifdef CONFIG_CIFS_STATS2
-	atomic_dec(&server->inSend);
-	mid->when_sent = jiffies;
-#endif
+	cifs_in_send_dec(server);
+	cifs_save_when_sent(mid);
 	mutex_unlock(&server->srv_mutex);
+
 	if (rc)
 		goto out_err;
 
@@ -573,14 +569,10 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
 	}
 
 	midQ->midState = MID_REQUEST_SUBMITTED;
-#ifdef CONFIG_CIFS_STATS2
-	atomic_inc(&ses->server->inSend);
-#endif
+	cifs_in_send_inc(ses->server);
 	rc = smb_sendv(ses->server, iov, n_vec);
-#ifdef CONFIG_CIFS_STATS2
-	atomic_dec(&ses->server->inSend);
-	midQ->when_sent = jiffies;
-#endif
+	cifs_in_send_dec(ses->server);
+	cifs_save_when_sent(midQ);
 
 	mutex_unlock(&ses->server->srv_mutex);
 
@@ -701,14 +693,11 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
 	}
 
 	midQ->midState = MID_REQUEST_SUBMITTED;
-#ifdef CONFIG_CIFS_STATS2
-	atomic_inc(&ses->server->inSend);
-#endif
+
+	cifs_in_send_inc(ses->server);
 	rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
-#ifdef CONFIG_CIFS_STATS2
-	atomic_dec(&ses->server->inSend);
-	midQ->when_sent = jiffies;
-#endif
+	cifs_in_send_dec(ses->server);
+	cifs_save_when_sent(midQ);
 	mutex_unlock(&ses->server->srv_mutex);
 
 	if (rc < 0)
@@ -841,14 +830,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
 	}
 
 	midQ->midState = MID_REQUEST_SUBMITTED;
-#ifdef CONFIG_CIFS_STATS2
-	atomic_inc(&ses->server->inSend);
-#endif
+	cifs_in_send_inc(ses->server);
 	rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
-#ifdef CONFIG_CIFS_STATS2
-	atomic_dec(&ses->server->inSend);
-	midQ->when_sent = jiffies;
-#endif
+	cifs_in_send_dec(ses->server);
+	cifs_save_when_sent(midQ);
 	mutex_unlock(&ses->server->srv_mutex);
 
 	if (rc < 0) {
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 8be086e9abe4..51352de88ef1 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1003,6 +1003,7 @@ COMPATIBLE_IOCTL(PPPIOCCONNECT)
 COMPATIBLE_IOCTL(PPPIOCDISCONN)
 COMPATIBLE_IOCTL(PPPIOCATTCHAN)
 COMPATIBLE_IOCTL(PPPIOCGCHAN)
+COMPATIBLE_IOCTL(PPPIOCGL2TPSTATS)
 /* PPPOX */
 COMPATIBLE_IOCTL(PPPOEIOCSFWD)
 COMPATIBLE_IOCTL(PPPOEIOCDFWD)
diff --git a/fs/dcache.c b/fs/dcache.c
index c83cae19161e..a88948b8bd17 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1729,7 +1729,7 @@ seqretry:
 		 */
 		if (read_seqcount_retry(&dentry->d_seq, *seq))
 			goto seqretry;
-		if (parent->d_flags & DCACHE_OP_COMPARE) {
+		if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
 			if (parent->d_op->d_compare(parent, *inode,
 						dentry, i,
 						tlen, tname, name))
diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig
index 1cd6d9d3e29a..cc16562654de 100644
--- a/fs/ecryptfs/Kconfig
+++ b/fs/ecryptfs/Kconfig
@@ -1,6 +1,6 @@
 config ECRYPT_FS
 	tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && KEYS && CRYPTO
+	depends on EXPERIMENTAL && KEYS && CRYPTO && (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n)
 	select CRYPTO_ECB
 	select CRYPTO_CBC
 	select CRYPTO_MD5
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 08a2b52bf565..ac1ad48c2376 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1973,7 +1973,7 @@ pki_encrypt_session_key(struct key *auth_tok_key,
 {
 	struct ecryptfs_msg_ctx *msg_ctx = NULL;
 	char *payload = NULL;
-	size_t payload_len;
+	size_t payload_len = 0;
 	struct ecryptfs_message *msg;
 	int rc;
 
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 9f1bb747d77d..b4a6befb1216 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -175,6 +175,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
        ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig,
        ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes,
        ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only,
+       ecryptfs_opt_check_dev_ruid,
        ecryptfs_opt_err };
 
 static const match_table_t tokens = {
@@ -191,6 +192,7 @@ static const match_table_t tokens = {
 	{ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"},
 	{ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"},
 	{ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"},
+	{ecryptfs_opt_check_dev_ruid, "ecryptfs_check_dev_ruid"},
 	{ecryptfs_opt_err, NULL}
 };
 
@@ -236,6 +238,7 @@ static void ecryptfs_init_mount_crypt_stat(
  * ecryptfs_parse_options
  * @sb: The ecryptfs super block
  * @options: The options passed to the kernel
+ * @check_ruid: set to 1 if device uid should be checked against the ruid
  *
  * Parse mount options:
  * debug=N 	   - ecryptfs_verbosity level for debug output
@@ -251,7 +254,8 @@ static void ecryptfs_init_mount_crypt_stat(
  *
  * Returns zero on success; non-zero on error
  */
-static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
+static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
+				  uid_t *check_ruid)
 {
 	char *p;
 	int rc = 0;
@@ -276,6 +280,8 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
 	char *cipher_key_bytes_src;
 	char *fn_cipher_key_bytes_src;
 
+	*check_ruid = 0;
+
 	if (!options) {
 		rc = -EINVAL;
 		goto out;
@@ -380,6 +386,9 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
 			mount_crypt_stat->flags |=
 				ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY;
 			break;
+		case ecryptfs_opt_check_dev_ruid:
+			*check_ruid = 1;
+			break;
 		case ecryptfs_opt_err:
 		default:
 			printk(KERN_WARNING
@@ -475,6 +484,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 	const char *err = "Getting sb failed";
 	struct inode *inode;
 	struct path path;
+	uid_t check_ruid;
 	int rc;
 
 	sbi = kmem_cache_zalloc(ecryptfs_sb_info_cache, GFP_KERNEL);
@@ -483,7 +493,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 		goto out;
 	}
 
-	rc = ecryptfs_parse_options(sbi, raw_data);
+	rc = ecryptfs_parse_options(sbi, raw_data, &check_ruid);
 	if (rc) {
 		err = "Error parsing options";
 		goto out;
@@ -521,6 +531,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 			"known incompatibilities\n");
 		goto out_free;
 	}
+
+	if (check_ruid && path.dentry->d_inode->i_uid != current_uid()) {
+		rc = -EPERM;
+		printk(KERN_ERR "Mount of device (uid: %d) not owned by "
+		       "requested user (uid: %d)\n",
+		       path.dentry->d_inode->i_uid, current_uid());
+		goto out_free;
+	}
+
 	ecryptfs_set_superblock_lower(s, path.dentry->d_sb);
 	s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
 	s->s_blocksize = path.dentry->d_sb->s_blocksize;
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 85d430963116..3745f7c2b9c2 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -39,15 +39,16 @@
 int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
 			 loff_t offset, size_t size)
 {
-	struct ecryptfs_inode_info *inode_info;
+	struct file *lower_file;
 	mm_segment_t fs_save;
 	ssize_t rc;
 
-	inode_info = ecryptfs_inode_to_private(ecryptfs_inode);
-	BUG_ON(!inode_info->lower_file);
+	lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file;
+	if (!lower_file)
+		return -EIO;
 	fs_save = get_fs();
 	set_fs(get_ds());
-	rc = vfs_write(inode_info->lower_file, data, size, &offset);
+	rc = vfs_write(lower_file, data, size, &offset);
 	set_fs(fs_save);
 	mark_inode_dirty_sync(ecryptfs_inode);
 	return rc;
@@ -225,15 +226,16 @@ out:
 int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
 			struct inode *ecryptfs_inode)
 {
-	struct ecryptfs_inode_info *inode_info =
-		ecryptfs_inode_to_private(ecryptfs_inode);
+	struct file *lower_file;
 	mm_segment_t fs_save;
 	ssize_t rc;
 
-	BUG_ON(!inode_info->lower_file);
+	lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file;
+	if (!lower_file)
+		return -EIO;
 	fs_save = get_fs();
 	set_fs(get_ds());
-	rc = vfs_read(inode_info->lower_file, data, size, &offset);
+	rc = vfs_read(lower_file, data, size, &offset);
 	set_fs(fs_save);
 	return rc;
 }
diff --git a/fs/exec.c b/fs/exec.c
index da80612a35f4..25dcbe5fc356 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1459,6 +1459,23 @@ static int do_execve_common(const char *filename,
 	struct files_struct *displaced;
 	bool clear_in_exec;
 	int retval;
+	const struct cred *cred = current_cred();
+
+	/*
+	 * We move the actual failure in case of RLIMIT_NPROC excess from
+	 * set*uid() to execve() because too many poorly written programs
+	 * don't check setuid() return code.  Here we additionally recheck
+	 * whether NPROC limit is still exceeded.
+	 */
+	if ((current->flags & PF_NPROC_EXCEEDED) &&
+	    atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) {
+		retval = -EAGAIN;
+		goto out_ret;
+	}
+
+	/* We're below the limit (still or again), so we don't want to make
+	 * further execve() calls fail. */
+	current->flags &= ~PF_NPROC_EXCEEDED;
 
 	retval = unshare_files(&displaced);
 	if (retval)
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index 2d0f757fda3e..c5a5855a6c44 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -12,5 +12,8 @@
 # Kbuild - Gets included from the Kernels Makefile and build system
 #
 
-exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o
+# ore module library
+obj-$(CONFIG_ORE) += ore.o
+
+exofs-y := inode.o file.o symlink.o namei.o dir.o super.o
 obj-$(CONFIG_EXOFS_FS) += exofs.o
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig
index 86194b2f799d..70bae4149291 100644
--- a/fs/exofs/Kconfig
+++ b/fs/exofs/Kconfig
@@ -1,6 +1,10 @@
+config ORE
+	tristate
+
 config EXOFS_FS
 	tristate "exofs: OSD based file system support"
 	depends on SCSI_OSD_ULD
+	select ORE
 	help
 	  EXOFS is a file system that uses an OSD storage device,
 	  as its backing storage.
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index c965806c2821..f4e442ec7445 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -36,12 +36,9 @@
 #include <linux/fs.h>
 #include <linux/time.h>
 #include <linux/backing-dev.h>
-#include "common.h"
+#include <scsi/osd_ore.h>
 
-/* FIXME: Remove once pnfs hits mainline
- * #include <linux/exportfs/pnfs_osd_xdr.h>
- */
-#include "pnfs.h"
+#include "common.h"
 
 #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
 
@@ -56,27 +53,11 @@
 /* u64 has problems with printk this will cast it to unsigned long long */
 #define _LLU(x) (unsigned long long)(x)
 
-struct exofs_layout {
-	osd_id		s_pid;			/* partition ID of file system*/
-
-	/* Our way of looking at the data_map */
-	unsigned stripe_unit;
-	unsigned mirrors_p1;
-
-	unsigned group_width;
-	u64	 group_depth;
-	unsigned group_count;
-
-	enum exofs_inode_layout_gen_functions lay_func;
-
-	unsigned	s_numdevs;		/* Num of devices in array    */
-	struct osd_dev	*s_ods[0];		/* Variable length            */
-};
-
 /*
  * our extension to the in-memory superblock
  */
 struct exofs_sb_info {
+	struct backing_dev_info bdi;		/* register our bdi with VFS  */
 	struct exofs_sb_stats s_ess;		/* Written often, pre-allocate*/
 	int		s_timeout;		/* timeout for OSD operations */
 	uint64_t	s_nextid;		/* highest object ID used     */
@@ -84,16 +65,13 @@ struct exofs_sb_info {
 	spinlock_t	s_next_gen_lock;	/* spinlock for gen # update  */
 	u32		s_next_generation;	/* next gen # to use          */
 	atomic_t	s_curr_pending;		/* number of pending commands */
-	uint8_t		s_cred[OSD_CAP_LEN];	/* credential for the fscb    */
-	struct 		backing_dev_info bdi;	/* register our bdi with VFS  */
 
 	struct pnfs_osd_data_map data_map;	/* Default raid to use
 						 * FIXME: Needed ?
 						 */
-/*	struct exofs_layout	dir_layout;*/	/* Default dir layout */
-	struct exofs_layout	layout;		/* Default files layout,
-						 * contains the variable osd_dev
-						 * array. Keep last */
+	struct ore_layout	layout;		/* Default files layout       */
+	struct ore_comp one_comp;		/* id & cred of partition id=0*/
+	struct ore_components comps;		/* comps for the partition    */
 	struct osd_dev	*_min_one_dev[1];	/* Place holder for one dev   */
 };
 
@@ -107,7 +85,8 @@ struct exofs_i_info {
 	uint32_t       i_data[EXOFS_IDATA];/*short symlink names and device #s*/
 	uint32_t       i_dir_start_lookup; /* which page to start lookup      */
 	uint64_t       i_commit_size;      /* the object's written length     */
-	uint8_t        i_cred[OSD_CAP_LEN];/* all-powerful credential         */
+	struct ore_comp one_comp;	   /* same component for all devices  */
+	struct ore_components comps;	   /* inode view of the device table  */
 };
 
 static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
@@ -115,52 +94,6 @@ static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
 	return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF;
 }
 
-struct exofs_io_state;
-typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private);
-
-struct exofs_io_state {
-	struct kref		kref;
-
-	void			*private;
-	exofs_io_done_fn	done;
-
-	struct exofs_layout	*layout;
-	struct osd_obj_id	obj;
-	u8			*cred;
-
-	/* Global read/write IO*/
-	loff_t			offset;
-	unsigned long		length;
-	void			*kern_buff;
-
-	struct page		**pages;
-	unsigned		nr_pages;
-	unsigned		pgbase;
-	unsigned		pages_consumed;
-
-	/* Attributes */
-	unsigned		in_attr_len;
-	struct osd_attr 	*in_attr;
-	unsigned		out_attr_len;
-	struct osd_attr 	*out_attr;
-
-	/* Variable array of size numdevs */
-	unsigned numdevs;
-	struct exofs_per_dev_state {
-		struct osd_request *or;
-		struct bio *bio;
-		loff_t offset;
-		unsigned length;
-		unsigned dev;
-	} per_dev[];
-};
-
-static inline unsigned exofs_io_state_size(unsigned numdevs)
-{
-	return sizeof(struct exofs_io_state) +
-		sizeof(struct exofs_per_dev_state) * numdevs;
-}
-
 /*
  * our inode flags
  */
@@ -205,12 +138,6 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode)
 }
 
 /*
- * Given a layout, object_number and stripe_index return the associated global
- * dev_index
- */
-unsigned exofs_layout_od_id(struct exofs_layout *layout,
-			    osd_id obj_no, unsigned layout_index);
-/*
  * Maximum count of links to a file
  */
 #define EXOFS_LINK_MAX           32000
@@ -219,44 +146,8 @@ unsigned exofs_layout_od_id(struct exofs_layout *layout,
  * function declarations *
  *************************/
 
-/* ios.c */
-void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
-			   const struct osd_obj_id *obj);
-int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
-		    u64 offset, void *p, unsigned length);
-
-int  exofs_get_io_state(struct exofs_layout *layout,
-			struct exofs_io_state **ios);
-void exofs_put_io_state(struct exofs_io_state *ios);
-
-int exofs_check_io(struct exofs_io_state *ios, u64 *resid);
-
-int exofs_sbi_create(struct exofs_io_state *ios);
-int exofs_sbi_remove(struct exofs_io_state *ios);
-int exofs_sbi_write(struct exofs_io_state *ios);
-int exofs_sbi_read(struct exofs_io_state *ios);
-
-int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr);
-
-int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);
-static inline int exofs_oi_write(struct exofs_i_info *oi,
-				 struct exofs_io_state *ios)
-{
-	ios->obj.id = exofs_oi_objno(oi);
-	ios->cred = oi->i_cred;
-	return exofs_sbi_write(ios);
-}
-
-static inline int exofs_oi_read(struct exofs_i_info *oi,
-				struct exofs_io_state *ios)
-{
-	ios->obj.id = exofs_oi_objno(oi);
-	ios->cred = oi->i_cred;
-	return exofs_sbi_read(ios);
-}
-
 /* inode.c               */
-unsigned exofs_max_io_pages(struct exofs_layout *layout,
+unsigned exofs_max_io_pages(struct ore_layout *layout,
 			    unsigned expected_pages);
 int exofs_setattr(struct dentry *, struct iattr *);
 int exofs_write_begin(struct file *file, struct address_space *mapping,
@@ -281,6 +172,8 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
 		    struct inode *);
 
 /* super.c               */
+void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
+			   const struct osd_obj_id *obj);
 int exofs_sbi_write_stats(struct exofs_sb_info *sbi);
 
 /*********************
@@ -295,7 +188,6 @@ extern const struct file_operations exofs_file_operations;
 
 /* inode.c           */
 extern const struct address_space_operations exofs_aops;
-extern const struct osd_attr g_attr_logical_length;
 
 /* namei.c           */
 extern const struct inode_operations exofs_dir_inode_operations;
@@ -305,4 +197,33 @@ extern const struct inode_operations exofs_special_inode_operations;
 extern const struct inode_operations exofs_symlink_inode_operations;
 extern const struct inode_operations exofs_fast_symlink_inode_operations;
 
+/* exofs_init_comps will initialize an ore_components device array
+ * pointing to a single ore_comp struct, and a round-robin view
+ * of the device table.
+ * The first device of each inode is the [inode->ino % num_devices]
+ * and the rest of the devices sequentially following where the
+ * first device is after the last device.
+ * It is assumed that the global device array at @sbi is twice
+ * bigger and that the device table repeats twice.
+ * See: exofs_read_lookup_dev_table()
+ */
+static inline void exofs_init_comps(struct ore_components *comps,
+				    struct ore_comp *one_comp,
+				    struct exofs_sb_info *sbi, osd_id oid)
+{
+	unsigned dev_mod = (unsigned)oid, first_dev;
+
+	one_comp->obj.partition = sbi->one_comp.obj.partition;
+	one_comp->obj.id = oid;
+	exofs_make_credential(one_comp->cred, &one_comp->obj);
+
+	comps->numdevs = sbi->comps.numdevs;
+	comps->single_comp = EC_SINGLE_COMP;
+	comps->comps = one_comp;
+
+	/* Round robin device view of the table */
+	first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->comps.numdevs;
+	comps->ods = sbi->comps.ods + first_dev;
+}
+
 #endif
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 8472c098445d..f39a38fc2349 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -43,7 +43,7 @@ enum { BIO_MAX_PAGES_KMALLOC =
 		PAGE_SIZE / sizeof(struct page *),
 };
 
-unsigned exofs_max_io_pages(struct exofs_layout *layout,
+unsigned exofs_max_io_pages(struct ore_layout *layout,
 			    unsigned expected_pages)
 {
 	unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC);
@@ -58,7 +58,7 @@ struct page_collect {
 	struct exofs_sb_info *sbi;
 	struct inode *inode;
 	unsigned expected_pages;
-	struct exofs_io_state *ios;
+	struct ore_io_state *ios;
 
 	struct page **pages;
 	unsigned alloc_pages;
@@ -110,13 +110,6 @@ static int pcol_try_alloc(struct page_collect *pcol)
 {
 	unsigned pages;
 
-	if (!pcol->ios) { /* First time allocate io_state */
-		int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
-
-		if (ret)
-			return ret;
-	}
-
 	/* TODO: easily support bio chaining */
 	pages =  exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages);
 
@@ -140,7 +133,7 @@ static void pcol_free(struct page_collect *pcol)
 	pcol->pages = NULL;
 
 	if (pcol->ios) {
-		exofs_put_io_state(pcol->ios);
+		ore_put_io_state(pcol->ios);
 		pcol->ios = NULL;
 	}
 }
@@ -200,7 +193,7 @@ static int __readpages_done(struct page_collect *pcol)
 	u64 resid;
 	u64 good_bytes;
 	u64 length = 0;
-	int ret = exofs_check_io(pcol->ios, &resid);
+	int ret = ore_check_io(pcol->ios, &resid);
 
 	if (likely(!ret))
 		good_bytes = pcol->length;
@@ -241,7 +234,7 @@ static int __readpages_done(struct page_collect *pcol)
 }
 
 /* callback of async reads */
-static void readpages_done(struct exofs_io_state *ios, void *p)
+static void readpages_done(struct ore_io_state *ios, void *p)
 {
 	struct page_collect *pcol = p;
 
@@ -269,20 +262,28 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
 static int read_exec(struct page_collect *pcol)
 {
 	struct exofs_i_info *oi = exofs_i(pcol->inode);
-	struct exofs_io_state *ios = pcol->ios;
+	struct ore_io_state *ios;
 	struct page_collect *pcol_copy = NULL;
 	int ret;
 
 	if (!pcol->pages)
 		return 0;
 
+	if (!pcol->ios) {
+		int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, true,
+					     pcol->pg_first << PAGE_CACHE_SHIFT,
+					     pcol->length, &pcol->ios);
+
+		if (ret)
+			return ret;
+	}
+
+	ios = pcol->ios;
 	ios->pages = pcol->pages;
 	ios->nr_pages = pcol->nr_pages;
-	ios->length = pcol->length;
-	ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
 
 	if (pcol->read_4_write) {
-		exofs_oi_read(oi, pcol->ios);
+		ore_read(pcol->ios);
 		return __readpages_done(pcol);
 	}
 
@@ -295,14 +296,14 @@ static int read_exec(struct page_collect *pcol)
 	*pcol_copy = *pcol;
 	ios->done = readpages_done;
 	ios->private = pcol_copy;
-	ret = exofs_oi_read(oi, ios);
+	ret = ore_read(ios);
 	if (unlikely(ret))
 		goto err;
 
 	atomic_inc(&pcol->sbi->s_curr_pending);
 
 	EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
-		  ios->obj.id, _LLU(ios->offset), pcol->length);
+		  oi->one_comp.obj.id, _LLU(ios->offset), pcol->length);
 
 	/* pages ownership was passed to pcol_copy */
 	_pcol_reset(pcol);
@@ -457,14 +458,14 @@ static int exofs_readpage(struct file *file, struct page *page)
 }
 
 /* Callback for osd_write. All writes are asynchronous */
-static void writepages_done(struct exofs_io_state *ios, void *p)
+static void writepages_done(struct ore_io_state *ios, void *p)
 {
 	struct page_collect *pcol = p;
 	int i;
 	u64 resid;
 	u64  good_bytes;
 	u64  length = 0;
-	int ret = exofs_check_io(ios, &resid);
+	int ret = ore_check_io(ios, &resid);
 
 	atomic_dec(&pcol->sbi->s_curr_pending);
 
@@ -507,13 +508,21 @@ static void writepages_done(struct exofs_io_state *ios, void *p)
 static int write_exec(struct page_collect *pcol)
 {
 	struct exofs_i_info *oi = exofs_i(pcol->inode);
-	struct exofs_io_state *ios = pcol->ios;
+	struct ore_io_state *ios;
 	struct page_collect *pcol_copy = NULL;
 	int ret;
 
 	if (!pcol->pages)
 		return 0;
 
+	BUG_ON(pcol->ios);
+	ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, false,
+				 pcol->pg_first << PAGE_CACHE_SHIFT,
+				 pcol->length, &pcol->ios);
+
+	if (unlikely(ret))
+		goto err;
+
 	pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
 	if (!pcol_copy) {
 		EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n");
@@ -523,16 +532,15 @@ static int write_exec(struct page_collect *pcol)
 
 	*pcol_copy = *pcol;
 
+	ios = pcol->ios;
 	ios->pages = pcol_copy->pages;
 	ios->nr_pages = pcol_copy->nr_pages;
-	ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT;
-	ios->length = pcol_copy->length;
 	ios->done = writepages_done;
 	ios->private = pcol_copy;
 
-	ret = exofs_oi_write(oi, ios);
+	ret = ore_write(ios);
 	if (unlikely(ret)) {
-		EXOFS_ERR("write_exec: exofs_oi_write() Failed\n");
+		EXOFS_ERR("write_exec: ore_write() Failed\n");
 		goto err;
 	}
 
@@ -844,17 +852,15 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode)
 	return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0);
 }
 
-const struct osd_attr g_attr_logical_length = ATTR_DEF(
-	OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
-
 static int _do_truncate(struct inode *inode, loff_t newsize)
 {
 	struct exofs_i_info *oi = exofs_i(inode);
+	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
 	int ret;
 
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 
-	ret = exofs_oi_truncate(oi, (u64)newsize);
+	ret = ore_truncate(&sbi->layout, &oi->comps, (u64)newsize);
 	if (likely(!ret))
 		truncate_setsize(inode, newsize);
 
@@ -917,30 +923,26 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
 		[1] = g_attr_inode_file_layout,
 		[2] = g_attr_inode_dir_layout,
 	};
-	struct exofs_io_state *ios;
+	struct ore_io_state *ios;
 	struct exofs_on_disk_inode_layout *layout;
 	int ret;
 
-	ret = exofs_get_io_state(&sbi->layout, &ios);
+	ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios);
 	if (unlikely(ret)) {
-		EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
+		EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
 		return ret;
 	}
 
-	ios->obj.id = exofs_oi_objno(oi);
-	exofs_make_credential(oi->i_cred, &ios->obj);
-	ios->cred = oi->i_cred;
-
-	attrs[1].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
-	attrs[2].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
+	attrs[1].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs);
+	attrs[2].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs);
 
 	ios->in_attr = attrs;
 	ios->in_attr_len = ARRAY_SIZE(attrs);
 
-	ret = exofs_sbi_read(ios);
+	ret = ore_read(ios);
 	if (unlikely(ret)) {
 		EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n",
-			  _LLU(ios->obj.id), ret);
+			  _LLU(oi->one_comp.obj.id), ret);
 		memset(inode, 0, sizeof(*inode));
 		inode->i_mode = 0040000 | (0777 & ~022);
 		/* If object is lost on target we might as well enable it's
@@ -990,7 +992,7 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
 	}
 
 out:
-	exofs_put_io_state(ios);
+	ore_put_io_state(ios);
 	return ret;
 }
 
@@ -1016,6 +1018,8 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
 		return inode;
 	oi = exofs_i(inode);
 	__oi_init(oi);
+	exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info,
+			 exofs_oi_objno(oi));
 
 	/* read the inode from the osd */
 	ret = exofs_get_inode(sb, oi, &fcb);
@@ -1107,21 +1111,22 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
  * set the obj_created flag so that other methods know that the object exists on
  * the OSD.
  */
-static void create_done(struct exofs_io_state *ios, void *p)
+static void create_done(struct ore_io_state *ios, void *p)
 {
 	struct inode *inode = p;
 	struct exofs_i_info *oi = exofs_i(inode);
 	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
 	int ret;
 
-	ret = exofs_check_io(ios, NULL);
-	exofs_put_io_state(ios);
+	ret = ore_check_io(ios, NULL);
+	ore_put_io_state(ios);
 
 	atomic_dec(&sbi->s_curr_pending);
 
 	if (unlikely(ret)) {
 		EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx",
-			  _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid));
+			  _LLU(exofs_oi_objno(oi)),
+			  _LLU(oi->one_comp.obj.partition));
 		/*TODO: When FS is corrupted creation can fail, object already
 		 * exist. Get rid of this asynchronous creation, if exist
 		 * increment the obj counter and try the next object. Until we
@@ -1140,14 +1145,13 @@ static void create_done(struct exofs_io_state *ios, void *p)
  */
 struct inode *exofs_new_inode(struct inode *dir, int mode)
 {
-	struct super_block *sb;
+	struct super_block *sb = dir->i_sb;
+	struct exofs_sb_info *sbi = sb->s_fs_info;
 	struct inode *inode;
 	struct exofs_i_info *oi;
-	struct exofs_sb_info *sbi;
-	struct exofs_io_state *ios;
+	struct ore_io_state *ios;
 	int ret;
 
-	sb = dir->i_sb;
 	inode = new_inode(sb);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
@@ -1157,8 +1161,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
 
 	set_obj_2bcreated(oi);
 
-	sbi = sb->s_fs_info;
-
 	inode->i_mapping->backing_dev_info = sb->s_bdi;
 	inode_init_owner(inode, dir, mode);
 	inode->i_ino = sbi->s_nextid++;
@@ -1170,25 +1172,24 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
 	spin_unlock(&sbi->s_next_gen_lock);
 	insert_inode_hash(inode);
 
+	exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info,
+			 exofs_oi_objno(oi));
 	exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */
 
 	mark_inode_dirty(inode);
 
-	ret = exofs_get_io_state(&sbi->layout, &ios);
+	ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios);
 	if (unlikely(ret)) {
-		EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n");
+		EXOFS_ERR("exofs_new_inode: ore_get_io_state failed\n");
 		return ERR_PTR(ret);
 	}
 
-	ios->obj.id = exofs_oi_objno(oi);
-	exofs_make_credential(oi->i_cred, &ios->obj);
-
 	ios->done = create_done;
 	ios->private = inode;
-	ios->cred = oi->i_cred;
-	ret = exofs_sbi_create(ios);
+
+	ret = ore_create(ios);
 	if (ret) {
-		exofs_put_io_state(ios);
+		ore_put_io_state(ios);
 		return ERR_PTR(ret);
 	}
 	atomic_inc(&sbi->s_curr_pending);
@@ -1207,11 +1208,11 @@ struct updatei_args {
 /*
  * Callback function from exofs_update_inode().
  */
-static void updatei_done(struct exofs_io_state *ios, void *p)
+static void updatei_done(struct ore_io_state *ios, void *p)
 {
 	struct updatei_args *args = p;
 
-	exofs_put_io_state(ios);
+	ore_put_io_state(ios);
 
 	atomic_dec(&args->sbi->s_curr_pending);
 
@@ -1227,7 +1228,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
 	struct exofs_i_info *oi = exofs_i(inode);
 	struct super_block *sb = inode->i_sb;
 	struct exofs_sb_info *sbi = sb->s_fs_info;
-	struct exofs_io_state *ios;
+	struct ore_io_state *ios;
 	struct osd_attr attr;
 	struct exofs_fcb *fcb;
 	struct updatei_args *args;
@@ -1266,9 +1267,9 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
 	} else
 		memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
 
-	ret = exofs_get_io_state(&sbi->layout, &ios);
+	ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios);
 	if (unlikely(ret)) {
-		EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
+		EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
 		goto free_args;
 	}
 
@@ -1285,13 +1286,13 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
 		ios->private = args;
 	}
 
-	ret = exofs_oi_write(oi, ios);
+	ret = ore_write(ios);
 	if (!do_sync && !ret) {
 		atomic_inc(&sbi->s_curr_pending);
 		goto out; /* deallocation in updatei_done */
 	}
 
-	exofs_put_io_state(ios);
+	ore_put_io_state(ios);
 free_args:
 	kfree(args);
 out:
@@ -1310,11 +1311,11 @@ int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
  * Callback function from exofs_delete_inode() - don't have much cleaning up to
  * do.
  */
-static void delete_done(struct exofs_io_state *ios, void *p)
+static void delete_done(struct ore_io_state *ios, void *p)
 {
 	struct exofs_sb_info *sbi = p;
 
-	exofs_put_io_state(ios);
+	ore_put_io_state(ios);
 
 	atomic_dec(&sbi->s_curr_pending);
 }
@@ -1329,7 +1330,7 @@ void exofs_evict_inode(struct inode *inode)
 	struct exofs_i_info *oi = exofs_i(inode);
 	struct super_block *sb = inode->i_sb;
 	struct exofs_sb_info *sbi = sb->s_fs_info;
-	struct exofs_io_state *ios;
+	struct ore_io_state *ios;
 	int ret;
 
 	truncate_inode_pages(&inode->i_data, 0);
@@ -1349,20 +1350,19 @@ void exofs_evict_inode(struct inode *inode)
 	/* ignore the error, attempt a remove anyway */
 
 	/* Now Remove the OSD objects */
-	ret = exofs_get_io_state(&sbi->layout, &ios);
+	ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios);
 	if (unlikely(ret)) {
-		EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
+		EXOFS_ERR("%s: ore_get_io_state failed\n", __func__);
 		return;
 	}
 
-	ios->obj.id = exofs_oi_objno(oi);
 	ios->done = delete_done;
 	ios->private = sbi;
-	ios->cred = oi->i_cred;
-	ret = exofs_sbi_remove(ios);
+
+	ret = ore_remove(ios);
 	if (ret) {
-		EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__);
-		exofs_put_io_state(ios);
+		EXOFS_ERR("%s: ore_remove failed\n", __func__);
+		ore_put_io_state(ios);
 		return;
 	}
 	atomic_inc(&sbi->s_curr_pending);
diff --git a/fs/exofs/ios.c b/fs/exofs/ore.c
index f74a2ec027a6..25305af88198 100644
--- a/fs/exofs/ios.c
+++ b/fs/exofs/ore.c
@@ -23,81 +23,87 @@
  */
 
 #include <linux/slab.h>
-#include <scsi/scsi_device.h>
 #include <asm/div64.h>
 
-#include "exofs.h"
+#include <scsi/osd_ore.h>
 
-#define EXOFS_DBGMSG2(M...) do {} while (0)
-/* #define EXOFS_DBGMSG2 EXOFS_DBGMSG */
+#define ORE_ERR(fmt, a...) printk(KERN_ERR "ore: " fmt, ##a)
 
-void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
-{
-	osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
-}
+#ifdef CONFIG_EXOFS_DEBUG
+#define ORE_DBGMSG(fmt, a...) \
+	printk(KERN_NOTICE "ore @%s:%d: " fmt, __func__, __LINE__, ##a)
+#else
+#define ORE_DBGMSG(fmt, a...) \
+	do { if (0) printk(fmt, ##a); } while (0)
+#endif
 
-int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
-		    u64 offset, void *p, unsigned length)
-{
-	struct osd_request *or = osd_start_request(od, GFP_KERNEL);
-/*	struct osd_sense_info osi = {.key = 0};*/
-	int ret;
+/* u64 has problems with printk this will cast it to unsigned long long */
+#define _LLU(x) (unsigned long long)(x)
 
-	if (unlikely(!or)) {
-		EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__);
-		return -ENOMEM;
-	}
-	ret = osd_req_read_kern(or, obj, offset, p, length);
-	if (unlikely(ret)) {
-		EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
-		goto out;
-	}
+#define ORE_DBGMSG2(M...) do {} while (0)
+/* #define ORE_DBGMSG2 ORE_DBGMSG */
 
-	ret = osd_finalize_request(or, 0, cred, NULL);
-	if (unlikely(ret)) {
-		EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret);
-		goto out;
-	}
+MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
+MODULE_DESCRIPTION("Objects Raid Engine ore.ko");
+MODULE_LICENSE("GPL");
 
-	ret = osd_execute_request(or);
-	if (unlikely(ret))
-		EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
-	/* osd_req_decode_sense(or, ret); */
+static u8 *_ios_cred(struct ore_io_state *ios, unsigned index)
+{
+	return ios->comps->comps[index & ios->comps->single_comp].cred;
+}
 
-out:
-	osd_end_request(or);
-	return ret;
+static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index)
+{
+	return &ios->comps->comps[index & ios->comps->single_comp].obj;
 }
 
-int exofs_get_io_state(struct exofs_layout *layout,
-		       struct exofs_io_state **pios)
+static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index)
 {
-	struct exofs_io_state *ios;
+	return ios->comps->ods[index];
+}
+
+int  ore_get_rw_state(struct ore_layout *layout, struct ore_components *comps,
+		      bool is_reading, u64 offset, u64 length,
+		      struct ore_io_state **pios)
+{
+	struct ore_io_state *ios;
 
 	/*TODO: Maybe use kmem_cach per sbi of size
 	 * exofs_io_state_size(layout->s_numdevs)
 	 */
-	ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL);
+	ios = kzalloc(ore_io_state_size(comps->numdevs), GFP_KERNEL);
 	if (unlikely(!ios)) {
-		EXOFS_DBGMSG("Failed kzalloc bytes=%d\n",
-			     exofs_io_state_size(layout->s_numdevs));
+		ORE_DBGMSG("Failed kzalloc bytes=%d\n",
+			     ore_io_state_size(comps->numdevs));
 		*pios = NULL;
 		return -ENOMEM;
 	}
 
 	ios->layout = layout;
-	ios->obj.partition = layout->s_pid;
+	ios->comps = comps;
+	ios->offset = offset;
+	ios->length = length;
+	ios->reading = is_reading;
+
 	*pios = ios;
 	return 0;
 }
+EXPORT_SYMBOL(ore_get_rw_state);
+
+int  ore_get_io_state(struct ore_layout *layout, struct ore_components *comps,
+		      struct ore_io_state **ios)
+{
+	return ore_get_rw_state(layout, comps, true, 0, 0, ios);
+}
+EXPORT_SYMBOL(ore_get_io_state);
 
-void exofs_put_io_state(struct exofs_io_state *ios)
+void ore_put_io_state(struct ore_io_state *ios)
 {
 	if (ios) {
 		unsigned i;
 
 		for (i = 0; i < ios->numdevs; i++) {
-			struct exofs_per_dev_state *per_dev = &ios->per_dev[i];
+			struct ore_per_dev_state *per_dev = &ios->per_dev[i];
 
 			if (per_dev->or)
 				osd_end_request(per_dev->or);
@@ -108,31 +114,9 @@ void exofs_put_io_state(struct exofs_io_state *ios)
 		kfree(ios);
 	}
 }
+EXPORT_SYMBOL(ore_put_io_state);
 
-unsigned exofs_layout_od_id(struct exofs_layout *layout,
-			    osd_id obj_no, unsigned layout_index)
-{
-/*	switch (layout->lay_func) {
-	case LAYOUT_MOVING_WINDOW:
-	{*/
-		unsigned dev_mod = obj_no;
-
-		return (layout_index + dev_mod * layout->mirrors_p1) %
-							      layout->s_numdevs;
-/*	}
-	case LAYOUT_FUNC_IMPLICT:
-		return layout->devs[layout_index];
-	}*/
-}
-
-static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios,
-					   unsigned layout_index)
-{
-	return ios->layout->s_ods[
-		exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)];
-}
-
-static void _sync_done(struct exofs_io_state *ios, void *p)
+static void _sync_done(struct ore_io_state *ios, void *p)
 {
 	struct completion *waiting = p;
 
@@ -141,20 +125,20 @@ static void _sync_done(struct exofs_io_state *ios, void *p)
 
 static void _last_io(struct kref *kref)
 {
-	struct exofs_io_state *ios = container_of(
-					kref, struct exofs_io_state, kref);
+	struct ore_io_state *ios = container_of(
+					kref, struct ore_io_state, kref);
 
 	ios->done(ios, ios->private);
 }
 
 static void _done_io(struct osd_request *or, void *p)
 {
-	struct exofs_io_state *ios = p;
+	struct ore_io_state *ios = p;
 
 	kref_put(&ios->kref, _last_io);
 }
 
-static int exofs_io_execute(struct exofs_io_state *ios)
+static int ore_io_execute(struct ore_io_state *ios)
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
 	bool sync = (ios->done == NULL);
@@ -170,9 +154,9 @@ static int exofs_io_execute(struct exofs_io_state *ios)
 		if (unlikely(!or))
 			continue;
 
-		ret = osd_finalize_request(or, 0, ios->cred, NULL);
+		ret = osd_finalize_request(or, 0, _ios_cred(ios, i), NULL);
 		if (unlikely(ret)) {
-			EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n",
+			ORE_DBGMSG("Failed to osd_finalize_request() => %d\n",
 				     ret);
 			return ret;
 		}
@@ -194,7 +178,7 @@ static int exofs_io_execute(struct exofs_io_state *ios)
 
 	if (sync) {
 		wait_for_completion(&wait);
-		ret = exofs_check_io(ios, NULL);
+		ret = ore_check_io(ios, NULL);
 	}
 	return ret;
 }
@@ -214,7 +198,7 @@ static void _clear_bio(struct bio *bio)
 	}
 }
 
-int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
+int ore_check_io(struct ore_io_state *ios, u64 *resid)
 {
 	enum osd_err_priority acumulated_osd_err = 0;
 	int acumulated_lin_err = 0;
@@ -235,7 +219,7 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
 		if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
 			/* start read offset passed endof file */
 			_clear_bio(ios->per_dev[i].bio);
-			EXOFS_DBGMSG("start read offset passed end of file "
+			ORE_DBGMSG("start read offset passed end of file "
 				"offset=0x%llx, length=0x%llx\n",
 				_LLU(ios->per_dev[i].offset),
 				_LLU(ios->per_dev[i].length));
@@ -259,6 +243,7 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
 
 	return acumulated_lin_err;
 }
+EXPORT_SYMBOL(ore_check_io);
 
 /*
  * L - logical offset into the file
@@ -305,20 +290,21 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
 struct _striping_info {
 	u64 obj_offset;
 	u64 group_length;
+	u64 M; /* for truncate */
 	unsigned dev;
 	unsigned unit_off;
 };
 
-static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset,
+static void _calc_stripe_info(struct ore_layout *layout, u64 file_offset,
 			      struct _striping_info *si)
 {
-	u32	stripe_unit = ios->layout->stripe_unit;
-	u32	group_width = ios->layout->group_width;
-	u64	group_depth = ios->layout->group_depth;
+	u32	stripe_unit = layout->stripe_unit;
+	u32	group_width = layout->group_width;
+	u64	group_depth = layout->group_depth;
 
 	u32	U = stripe_unit * group_width;
 	u64	T = U * group_depth;
-	u64	S = T * ios->layout->group_count;
+	u64	S = T * layout->group_count;
 	u64	M = div64_u64(file_offset, S);
 
 	/*
@@ -333,7 +319,7 @@ static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset,
 
 	/* "H - (N * U)" is just "H % U" so it's bound to u32 */
 	si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
-	si->dev *= ios->layout->mirrors_p1;
+	si->dev *= layout->mirrors_p1;
 
 	div_u64_rem(file_offset, stripe_unit, &si->unit_off);
 
@@ -341,15 +327,16 @@ static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset,
 				  (M * group_depth * stripe_unit);
 
 	si->group_length = T - H;
+	si->M = M;
 }
 
-static int _add_stripe_unit(struct exofs_io_state *ios,  unsigned *cur_pg,
-		unsigned pgbase, struct exofs_per_dev_state *per_dev,
+static int _add_stripe_unit(struct ore_io_state *ios,  unsigned *cur_pg,
+		unsigned pgbase, struct ore_per_dev_state *per_dev,
 		int cur_len)
 {
 	unsigned pg = *cur_pg;
 	struct request_queue *q =
-			osd_request_queue(exofs_ios_od(ios, per_dev->dev));
+			osd_request_queue(_ios_od(ios, per_dev->dev));
 
 	per_dev->length += cur_len;
 
@@ -361,7 +348,7 @@ static int _add_stripe_unit(struct exofs_io_state *ios,  unsigned *cur_pg,
 
 		per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
 		if (unlikely(!per_dev->bio)) {
-			EXOFS_DBGMSG("Failed to allocate BIO size=%u\n",
+			ORE_DBGMSG("Failed to allocate BIO size=%u\n",
 				     bio_size);
 			return -ENOMEM;
 		}
@@ -387,7 +374,7 @@ static int _add_stripe_unit(struct exofs_io_state *ios,  unsigned *cur_pg,
 	return 0;
 }
 
-static int _prepare_one_group(struct exofs_io_state *ios, u64 length,
+static int _prepare_one_group(struct ore_io_state *ios, u64 length,
 			      struct _striping_info *si)
 {
 	unsigned stripe_unit = ios->layout->stripe_unit;
@@ -400,7 +387,7 @@ static int _prepare_one_group(struct exofs_io_state *ios, u64 length,
 	int ret = 0;
 
 	while (length) {
-		struct exofs_per_dev_state *per_dev = &ios->per_dev[dev];
+		struct ore_per_dev_state *per_dev = &ios->per_dev[dev];
 		unsigned cur_len, page_off = 0;
 
 		if (!per_dev->length) {
@@ -443,7 +430,7 @@ out:
 	return ret;
 }
 
-static int _prepare_for_striping(struct exofs_io_state *ios)
+static int _prepare_for_striping(struct ore_io_state *ios)
 {
 	u64 length = ios->length;
 	u64 offset = ios->offset;
@@ -452,9 +439,9 @@ static int _prepare_for_striping(struct exofs_io_state *ios)
 
 	if (!ios->pages) {
 		if (ios->kern_buff) {
-			struct exofs_per_dev_state *per_dev = &ios->per_dev[0];
+			struct ore_per_dev_state *per_dev = &ios->per_dev[0];
 
-			_calc_stripe_info(ios, ios->offset, &si);
+			_calc_stripe_info(ios->layout, ios->offset, &si);
 			per_dev->offset = si.obj_offset;
 			per_dev->dev = si.dev;
 
@@ -468,7 +455,7 @@ static int _prepare_for_striping(struct exofs_io_state *ios)
 	}
 
 	while (length) {
-		_calc_stripe_info(ios, offset, &si);
+		_calc_stripe_info(ios->layout, offset, &si);
 
 		if (length < si.group_length)
 			si.group_length = length;
@@ -485,57 +472,59 @@ out:
 	return ret;
 }
 
-int exofs_sbi_create(struct exofs_io_state *ios)
+int ore_create(struct ore_io_state *ios)
 {
 	int i, ret;
 
-	for (i = 0; i < ios->layout->s_numdevs; i++) {
+	for (i = 0; i < ios->comps->numdevs; i++) {
 		struct osd_request *or;
 
-		or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
+		or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
 		if (unlikely(!or)) {
-			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
+			ORE_ERR("%s: osd_start_request failed\n", __func__);
 			ret = -ENOMEM;
 			goto out;
 		}
 		ios->per_dev[i].or = or;
 		ios->numdevs++;
 
-		osd_req_create_object(or, &ios->obj);
+		osd_req_create_object(or, _ios_obj(ios, i));
 	}
-	ret = exofs_io_execute(ios);
+	ret = ore_io_execute(ios);
 
 out:
 	return ret;
 }
+EXPORT_SYMBOL(ore_create);
 
-int exofs_sbi_remove(struct exofs_io_state *ios)
+int ore_remove(struct ore_io_state *ios)
 {
 	int i, ret;
 
-	for (i = 0; i < ios->layout->s_numdevs; i++) {
+	for (i = 0; i < ios->comps->numdevs; i++) {
 		struct osd_request *or;
 
-		or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
+		or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
 		if (unlikely(!or)) {
-			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
+			ORE_ERR("%s: osd_start_request failed\n", __func__);
 			ret = -ENOMEM;
 			goto out;
 		}
 		ios->per_dev[i].or = or;
 		ios->numdevs++;
 
-		osd_req_remove_object(or, &ios->obj);
+		osd_req_remove_object(or, _ios_obj(ios, i));
 	}
-	ret = exofs_io_execute(ios);
+	ret = ore_io_execute(ios);
 
 out:
 	return ret;
 }
+EXPORT_SYMBOL(ore_remove);
 
-static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
+static int _write_mirror(struct ore_io_state *ios, int cur_comp)
 {
-	struct exofs_per_dev_state *master_dev = &ios->per_dev[cur_comp];
+	struct ore_per_dev_state *master_dev = &ios->per_dev[cur_comp];
 	unsigned dev = ios->per_dev[cur_comp].dev;
 	unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
 	int ret = 0;
@@ -544,12 +533,12 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
 		return 0; /* Just an empty slot */
 
 	for (; cur_comp < last_comp; ++cur_comp, ++dev) {
-		struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
+		struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
 		struct osd_request *or;
 
-		or = osd_start_request(exofs_ios_od(ios, dev), GFP_KERNEL);
+		or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL);
 		if (unlikely(!or)) {
-			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
+			ORE_ERR("%s: osd_start_request failed\n", __func__);
 			ret = -ENOMEM;
 			goto out;
 		}
@@ -563,7 +552,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
 				bio = bio_kmalloc(GFP_KERNEL,
 						  master_dev->bio->bi_max_vecs);
 				if (unlikely(!bio)) {
-					EXOFS_DBGMSG(
+					ORE_DBGMSG(
 					      "Failed to allocate BIO size=%u\n",
 					      master_dev->bio->bi_max_vecs);
 					ret = -ENOMEM;
@@ -582,25 +571,29 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
 				bio->bi_rw |= REQ_WRITE;
 			}
 
-			osd_req_write(or, &ios->obj, per_dev->offset, bio,
-				      per_dev->length);
-			EXOFS_DBGMSG("write(0x%llx) offset=0x%llx "
+			osd_req_write(or, _ios_obj(ios, dev), per_dev->offset,
+				      bio, per_dev->length);
+			ORE_DBGMSG("write(0x%llx) offset=0x%llx "
 				      "length=0x%llx dev=%d\n",
-				     _LLU(ios->obj.id), _LLU(per_dev->offset),
+				     _LLU(_ios_obj(ios, dev)->id),
+				     _LLU(per_dev->offset),
 				     _LLU(per_dev->length), dev);
 		} else if (ios->kern_buff) {
-			ret = osd_req_write_kern(or, &ios->obj, per_dev->offset,
-					   ios->kern_buff, ios->length);
+			ret = osd_req_write_kern(or, _ios_obj(ios, dev),
+						 per_dev->offset,
+						 ios->kern_buff, ios->length);
 			if (unlikely(ret))
 				goto out;
-			EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
+			ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
 				      "length=0x%llx dev=%d\n",
-				     _LLU(ios->obj.id), _LLU(per_dev->offset),
+				     _LLU(_ios_obj(ios, dev)->id),
+				     _LLU(per_dev->offset),
 				     _LLU(ios->length), dev);
 		} else {
-			osd_req_set_attributes(or, &ios->obj);
-			EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
-				     _LLU(ios->obj.id), ios->out_attr_len, dev);
+			osd_req_set_attributes(or, _ios_obj(ios, dev));
+			ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
+				     _LLU(_ios_obj(ios, dev)->id),
+				     ios->out_attr_len, dev);
 		}
 
 		if (ios->out_attr)
@@ -616,7 +609,7 @@ out:
 	return ret;
 }
 
-int exofs_sbi_write(struct exofs_io_state *ios)
+int ore_write(struct ore_io_state *ios)
 {
 	int i;
 	int ret;
@@ -626,52 +619,55 @@ int exofs_sbi_write(struct exofs_io_state *ios)
 		return ret;
 
 	for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
-		ret = _sbi_write_mirror(ios, i);
+		ret = _write_mirror(ios, i);
 		if (unlikely(ret))
 			return ret;
 	}
 
-	ret = exofs_io_execute(ios);
+	ret = ore_io_execute(ios);
 	return ret;
 }
+EXPORT_SYMBOL(ore_write);
 
-static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp)
+static int _read_mirror(struct ore_io_state *ios, unsigned cur_comp)
 {
 	struct osd_request *or;
-	struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
-	unsigned first_dev = (unsigned)ios->obj.id;
+	struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
+	struct osd_obj_id *obj = _ios_obj(ios, cur_comp);
+	unsigned first_dev = (unsigned)obj->id;
 
 	if (ios->pages && !per_dev->length)
 		return 0; /* Just an empty slot */
 
 	first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
-	or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL);
+	or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL);
 	if (unlikely(!or)) {
-		EXOFS_ERR("%s: osd_start_request failed\n", __func__);
+		ORE_ERR("%s: osd_start_request failed\n", __func__);
 		return -ENOMEM;
 	}
 	per_dev->or = or;
 
 	if (ios->pages) {
-		osd_req_read(or, &ios->obj, per_dev->offset,
+		osd_req_read(or, obj, per_dev->offset,
 				per_dev->bio, per_dev->length);
-		EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
-			     " dev=%d\n", _LLU(ios->obj.id),
+		ORE_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
+			     " dev=%d\n", _LLU(obj->id),
 			     _LLU(per_dev->offset), _LLU(per_dev->length),
 			     first_dev);
 	} else if (ios->kern_buff) {
-		int ret = osd_req_read_kern(or, &ios->obj, per_dev->offset,
+		int ret = osd_req_read_kern(or, obj, per_dev->offset,
 					    ios->kern_buff, ios->length);
-		EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
+		ORE_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
 			      "length=0x%llx dev=%d ret=>%d\n",
-			      _LLU(ios->obj.id), _LLU(per_dev->offset),
+			      _LLU(obj->id), _LLU(per_dev->offset),
 			      _LLU(ios->length), first_dev, ret);
 		if (unlikely(ret))
 			return ret;
 	} else {
-		osd_req_get_attributes(or, &ios->obj);
-		EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n",
-			      _LLU(ios->obj.id), ios->in_attr_len, first_dev);
+		osd_req_get_attributes(or, obj);
+		ORE_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n",
+			      _LLU(obj->id),
+			      ios->in_attr_len, first_dev);
 	}
 	if (ios->out_attr)
 		osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len);
@@ -682,7 +678,7 @@ static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp)
 	return 0;
 }
 
-int exofs_sbi_read(struct exofs_io_state *ios)
+int ore_read(struct ore_io_state *ios)
 {
 	int i;
 	int ret;
@@ -692,16 +688,17 @@ int exofs_sbi_read(struct exofs_io_state *ios)
 		return ret;
 
 	for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
-		ret = _sbi_read_mirror(ios, i);
+		ret = _read_mirror(ios, i);
 		if (unlikely(ret))
 			return ret;
 	}
 
-	ret = exofs_io_execute(ios);
+	ret = ore_io_execute(ios);
 	return ret;
 }
+EXPORT_SYMBOL(ore_read);
 
-int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr)
+int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr)
 {
 	struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
 	void *iter = NULL;
@@ -721,83 +718,118 @@ int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr)
 
 	return -EIO;
 }
+EXPORT_SYMBOL(extract_attr_from_ios);
 
-static int _truncate_mirrors(struct exofs_io_state *ios, unsigned cur_comp,
+static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp,
 			     struct osd_attr *attr)
 {
 	int last_comp = cur_comp + ios->layout->mirrors_p1;
 
 	for (; cur_comp < last_comp; ++cur_comp) {
-		struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
+		struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
 		struct osd_request *or;
 
-		or = osd_start_request(exofs_ios_od(ios, cur_comp), GFP_KERNEL);
+		or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL);
 		if (unlikely(!or)) {
-			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
+			ORE_ERR("%s: osd_start_request failed\n", __func__);
 			return -ENOMEM;
 		}
 		per_dev->or = or;
 
-		osd_req_set_attributes(or, &ios->obj);
+		osd_req_set_attributes(or, _ios_obj(ios, cur_comp));
 		osd_req_add_set_attr_list(or, attr, 1);
 	}
 
 	return 0;
 }
 
-int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
+struct _trunc_info {
+	struct _striping_info si;
+	u64 prev_group_obj_off;
+	u64 next_group_obj_off;
+
+	unsigned first_group_dev;
+	unsigned nex_group_dev;
+	unsigned max_devs;
+};
+
+void _calc_trunk_info(struct ore_layout *layout, u64 file_offset,
+		       struct _trunc_info *ti)
+{
+	unsigned stripe_unit = layout->stripe_unit;
+
+	_calc_stripe_info(layout, file_offset, &ti->si);
+
+	ti->prev_group_obj_off = ti->si.M * stripe_unit;
+	ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0;
+
+	ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width);
+	ti->nex_group_dev = ti->first_group_dev + layout->group_width;
+	ti->max_devs = layout->group_width * layout->group_count;
+}
+
+int ore_truncate(struct ore_layout *layout, struct ore_components *comps,
+		   u64 size)
 {
-	struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info;
-	struct exofs_io_state *ios;
+	struct ore_io_state *ios;
 	struct exofs_trunc_attr {
 		struct osd_attr attr;
 		__be64 newsize;
 	} *size_attrs;
-	struct _striping_info si;
+	struct _trunc_info ti;
 	int i, ret;
 
-	ret = exofs_get_io_state(&sbi->layout, &ios);
+	ret = ore_get_io_state(layout, comps, &ios);
 	if (unlikely(ret))
 		return ret;
 
-	size_attrs = kcalloc(ios->layout->group_width, sizeof(*size_attrs),
+	_calc_trunk_info(ios->layout, size, &ti);
+
+	size_attrs = kcalloc(ti.max_devs, sizeof(*size_attrs),
 			     GFP_KERNEL);
 	if (unlikely(!size_attrs)) {
 		ret = -ENOMEM;
 		goto out;
 	}
 
-	ios->obj.id = exofs_oi_objno(oi);
-	ios->cred = oi->i_cred;
+	ios->numdevs = ios->comps->numdevs;
 
-	ios->numdevs = ios->layout->s_numdevs;
-	_calc_stripe_info(ios, size, &si);
-
-	for (i = 0; i < ios->layout->group_width; ++i) {
+	for (i = 0; i < ti.max_devs; ++i) {
 		struct exofs_trunc_attr *size_attr = &size_attrs[i];
 		u64 obj_size;
 
-		if (i < si.dev)
-			obj_size = si.obj_offset +
-					ios->layout->stripe_unit - si.unit_off;
-		else if (i == si.dev)
-			obj_size = si.obj_offset;
-		else /* i > si.dev */
-			obj_size = si.obj_offset - si.unit_off;
+		if (i < ti.first_group_dev)
+			obj_size = ti.prev_group_obj_off;
+		else if (i >= ti.nex_group_dev)
+			obj_size = ti.next_group_obj_off;
+		else if (i < ti.si.dev) /* dev within this group */
+			obj_size = ti.si.obj_offset +
+				      ios->layout->stripe_unit - ti.si.unit_off;
+		else if (i == ti.si.dev)
+			obj_size = ti.si.obj_offset;
+		else /* i > ti.dev */
+			obj_size = ti.si.obj_offset - ti.si.unit_off;
 
 		size_attr->newsize = cpu_to_be64(obj_size);
 		size_attr->attr = g_attr_logical_length;
 		size_attr->attr.val_ptr = &size_attr->newsize;
 
+		ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
+			     _LLU(comps->comps->obj.id), _LLU(obj_size), i);
 		ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
 					&size_attr->attr);
 		if (unlikely(ret))
 			goto out;
 	}
-	ret = exofs_io_execute(ios);
+	ret = ore_io_execute(ios);
 
 out:
 	kfree(size_attrs);
-	exofs_put_io_state(ios);
+	ore_put_io_state(ios);
 	return ret;
 }
+EXPORT_SYMBOL(ore_truncate);
+
+const struct osd_attr g_attr_logical_length = ATTR_DEF(
+	OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
+EXPORT_SYMBOL(g_attr_logical_length);
diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h
deleted file mode 100644
index c52e9888b8ab..000000000000
--- a/fs/exofs/pnfs.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
- *
- * This file is part of exofs.
- *
- * exofs is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License  version 2 as published by the Free
- * Software Foundation.
- *
- */
-
-/* FIXME: Remove this file once pnfs hits mainline */
-
-#ifndef __EXOFS_PNFS_H__
-#define __EXOFS_PNFS_H__
-
-#if ! defined(__PNFS_OSD_XDR_H__)
-
-enum pnfs_iomode {
-	IOMODE_READ = 1,
-	IOMODE_RW = 2,
-	IOMODE_ANY = 3,
-};
-
-/* Layout Structure */
-enum pnfs_osd_raid_algorithm4 {
-	PNFS_OSD_RAID_0		= 1,
-	PNFS_OSD_RAID_4		= 2,
-	PNFS_OSD_RAID_5		= 3,
-	PNFS_OSD_RAID_PQ	= 4     /* Reed-Solomon P+Q */
-};
-
-struct pnfs_osd_data_map {
-	u32	odm_num_comps;
-	u64	odm_stripe_unit;
-	u32	odm_group_width;
-	u32	odm_group_depth;
-	u32	odm_mirror_cnt;
-	u32	odm_raid_algorithm;
-};
-
-#endif /* ! defined(__PNFS_OSD_XDR_H__) */
-
-#endif /* __EXOFS_PNFS_H__ */
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index c57beddcc217..274894053b02 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -40,6 +40,8 @@
 
 #include "exofs.h"
 
+#define EXOFS_DBGMSG2(M...) do {} while (0)
+
 /******************************************************************************
  * MOUNT OPTIONS
  *****************************************************************************/
@@ -208,10 +210,48 @@ static void destroy_inodecache(void)
 }
 
 /******************************************************************************
- * SUPERBLOCK FUNCTIONS
+ * Some osd helpers
  *****************************************************************************/
-static const struct super_operations exofs_sops;
-static const struct export_operations exofs_export_ops;
+void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
+{
+	osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
+}
+
+static int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
+		    u64 offset, void *p, unsigned length)
+{
+	struct osd_request *or = osd_start_request(od, GFP_KERNEL);
+/*	struct osd_sense_info osi = {.key = 0};*/
+	int ret;
+
+	if (unlikely(!or)) {
+		EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__);
+		return -ENOMEM;
+	}
+	ret = osd_req_read_kern(or, obj, offset, p, length);
+	if (unlikely(ret)) {
+		EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
+		goto out;
+	}
+
+	ret = osd_finalize_request(or, 0, cred, NULL);
+	if (unlikely(ret)) {
+		EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret);
+		goto out;
+	}
+
+	ret = osd_execute_request(or);
+	if (unlikely(ret))
+		EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
+	/* osd_req_decode_sense(or, ret); */
+
+out:
+	osd_end_request(or);
+	EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
+		      "length=0x%llx dev=%p ret=>%d\n",
+		      _LLU(obj->id), _LLU(offset), _LLU(length), od, ret);
+	return ret;
+}
 
 static const struct osd_attr g_attr_sb_stats = ATTR_DEF(
 	EXOFS_APAGE_SB_DATA,
@@ -223,21 +263,19 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi)
 	struct osd_attr attrs[] = {
 		[0] = g_attr_sb_stats,
 	};
-	struct exofs_io_state *ios;
+	struct ore_io_state *ios;
 	int ret;
 
-	ret = exofs_get_io_state(&sbi->layout, &ios);
+	ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios);
 	if (unlikely(ret)) {
-		EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
+		EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
 		return ret;
 	}
 
-	ios->cred = sbi->s_cred;
-
 	ios->in_attr = attrs;
 	ios->in_attr_len = ARRAY_SIZE(attrs);
 
-	ret = exofs_sbi_read(ios);
+	ret = ore_read(ios);
 	if (unlikely(ret)) {
 		EXOFS_ERR("Error reading super_block stats => %d\n", ret);
 		goto out;
@@ -264,13 +302,13 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi)
 	}
 
 out:
-	exofs_put_io_state(ios);
+	ore_put_io_state(ios);
 	return ret;
 }
 
-static void stats_done(struct exofs_io_state *ios, void *p)
+static void stats_done(struct ore_io_state *ios, void *p)
 {
-	exofs_put_io_state(ios);
+	ore_put_io_state(ios);
 	/* Good thanks nothing to do anymore */
 }
 
@@ -280,12 +318,12 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
 	struct osd_attr attrs[] = {
 		[0] = g_attr_sb_stats,
 	};
-	struct exofs_io_state *ios;
+	struct ore_io_state *ios;
 	int ret;
 
-	ret = exofs_get_io_state(&sbi->layout, &ios);
+	ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios);
 	if (unlikely(ret)) {
-		EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
+		EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
 		return ret;
 	}
 
@@ -293,21 +331,27 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
 	sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles);
 	attrs[0].val_ptr = &sbi->s_ess;
 
-	ios->cred = sbi->s_cred;
+
 	ios->done = stats_done;
 	ios->private = sbi;
 	ios->out_attr = attrs;
 	ios->out_attr_len = ARRAY_SIZE(attrs);
 
-	ret = exofs_sbi_write(ios);
+	ret = ore_write(ios);
 	if (unlikely(ret)) {
-		EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
-		exofs_put_io_state(ios);
+		EXOFS_ERR("%s: ore_write failed.\n", __func__);
+		ore_put_io_state(ios);
 	}
 
 	return ret;
 }
 
+/******************************************************************************
+ * SUPERBLOCK FUNCTIONS
+ *****************************************************************************/
+static const struct super_operations exofs_sops;
+static const struct export_operations exofs_export_ops;
+
 /*
  * Write the superblock to the OSD
  */
@@ -315,7 +359,9 @@ int exofs_sync_fs(struct super_block *sb, int wait)
 {
 	struct exofs_sb_info *sbi;
 	struct exofs_fscb *fscb;
-	struct exofs_io_state *ios;
+	struct ore_comp one_comp;
+	struct ore_components comps;
+	struct ore_io_state *ios;
 	int ret = -ENOMEM;
 
 	fscb = kmalloc(sizeof(*fscb), GFP_KERNEL);
@@ -331,7 +377,10 @@ int exofs_sync_fs(struct super_block *sb, int wait)
 	 * version). Otherwise the exofs_fscb is read-only from mkfs time. All
 	 * the writeable info is set in exofs_sbi_write_stats() above.
 	 */
-	ret = exofs_get_io_state(&sbi->layout, &ios);
+
+	exofs_init_comps(&comps, &one_comp, sbi, EXOFS_SUPER_ID);
+
+	ret = ore_get_io_state(&sbi->layout, &comps, &ios);
 	if (unlikely(ret))
 		goto out;
 
@@ -345,14 +394,12 @@ int exofs_sync_fs(struct super_block *sb, int wait)
 	fscb->s_newfs = 0;
 	fscb->s_version = EXOFS_FSCB_VER;
 
-	ios->obj.id = EXOFS_SUPER_ID;
 	ios->offset = 0;
 	ios->kern_buff = fscb;
-	ios->cred = sbi->s_cred;
 
-	ret = exofs_sbi_write(ios);
+	ret = ore_write(ios);
 	if (unlikely(ret))
-		EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
+		EXOFS_ERR("%s: ore_write failed.\n", __func__);
 	else
 		sb->s_dirt = 0;
 
@@ -360,7 +407,7 @@ int exofs_sync_fs(struct super_block *sb, int wait)
 	unlock_super(sb);
 out:
 	EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
-	exofs_put_io_state(ios);
+	ore_put_io_state(ios);
 	kfree(fscb);
 	return ret;
 }
@@ -384,15 +431,17 @@ static void _exofs_print_device(const char *msg, const char *dev_path,
 
 void exofs_free_sbi(struct exofs_sb_info *sbi)
 {
-	while (sbi->layout.s_numdevs) {
-		int i = --sbi->layout.s_numdevs;
-		struct osd_dev *od = sbi->layout.s_ods[i];
+	while (sbi->comps.numdevs) {
+		int i = --sbi->comps.numdevs;
+		struct osd_dev *od = sbi->comps.ods[i];
 
 		if (od) {
-			sbi->layout.s_ods[i] = NULL;
+			sbi->comps.ods[i] = NULL;
 			osduld_put_device(od);
 		}
 	}
+	if (sbi->comps.ods != sbi->_min_one_dev)
+		kfree(sbi->comps.ods);
 	kfree(sbi);
 }
 
@@ -419,8 +468,8 @@ static void exofs_put_super(struct super_block *sb)
 				  msecs_to_jiffies(100));
 	}
 
-	_exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0],
-			    sbi->layout.s_pid);
+	_exofs_print_device("Unmounting", NULL, sbi->comps.ods[0],
+			    sbi->one_comp.obj.partition);
 
 	bdi_destroy(&sbi->bdi);
 	exofs_free_sbi(sbi);
@@ -501,10 +550,19 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
 		return -EINVAL;
 	}
 
+	EXOFS_DBGMSG("exofs: layout: "
+		"num_comps=%u stripe_unit=0x%x group_width=%u "
+		"group_depth=0x%llx mirrors_p1=%u raid_algorithm=%u\n",
+		numdevs,
+		sbi->layout.stripe_unit,
+		sbi->layout.group_width,
+		_LLU(sbi->layout.group_depth),
+		sbi->layout.mirrors_p1,
+		sbi->data_map.odm_raid_algorithm);
 	return 0;
 }
 
-static unsigned __ra_pages(struct exofs_layout *layout)
+static unsigned __ra_pages(struct ore_layout *layout)
 {
 	const unsigned _MIN_RA = 32; /* min 128K read-ahead */
 	unsigned ra_pages = layout->group_width * layout->stripe_unit /
@@ -547,13 +605,11 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
 	return !(odi->systemid_len || odi->osdname_len);
 }
 
-static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
+static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
+				       struct osd_dev *fscb_od,
 				       unsigned table_count)
 {
-	struct exofs_sb_info *sbi = *psbi;
-	struct osd_dev *fscb_od;
-	struct osd_obj_id obj = {.partition = sbi->layout.s_pid,
-				 .id = EXOFS_DEVTABLE_ID};
+	struct ore_comp comp;
 	struct exofs_device_table *dt;
 	unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) +
 					     sizeof(*dt);
@@ -567,10 +623,14 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
 		return -ENOMEM;
 	}
 
-	fscb_od = sbi->layout.s_ods[0];
-	sbi->layout.s_ods[0] = NULL;
-	sbi->layout.s_numdevs = 0;
-	ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes);
+	sbi->comps.numdevs = 0;
+
+	comp.obj.partition = sbi->one_comp.obj.partition;
+	comp.obj.id = EXOFS_DEVTABLE_ID;
+	exofs_make_credential(comp.cred, &comp.obj);
+
+	ret = exofs_read_kern(fscb_od, comp.cred, &comp.obj, 0, dt,
+			      table_bytes);
 	if (unlikely(ret)) {
 		EXOFS_ERR("ERROR: reading device table\n");
 		goto out;
@@ -588,16 +648,18 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
 		goto out;
 
 	if (likely(numdevs > 1)) {
-		unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]);
+		unsigned size = numdevs * sizeof(sbi->comps.ods[0]);
 
-		sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL);
-		if (unlikely(!sbi)) {
+		/* Twice bigger table: See exofs_init_comps() and below
+		 * comment
+		 */
+		sbi->comps.ods = kzalloc(size + size - 1, GFP_KERNEL);
+		if (unlikely(!sbi->comps.ods)) {
+			EXOFS_ERR("ERROR: faild allocating Device array[%d]\n",
+				  numdevs);
 			ret = -ENOMEM;
 			goto out;
 		}
-		memset(&sbi->layout.s_ods[1], 0,
-		       size - sizeof(sbi->layout.s_ods[0]));
-		*psbi = sbi;
 	}
 
 	for (i = 0; i < numdevs; i++) {
@@ -619,8 +681,8 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
 		 * line. We always keep them in device-table order.
 		 */
 		if (fscb_od && osduld_device_same(fscb_od, &odi)) {
-			sbi->layout.s_ods[i] = fscb_od;
-			++sbi->layout.s_numdevs;
+			sbi->comps.ods[i] = fscb_od;
+			++sbi->comps.numdevs;
 			fscb_od = NULL;
 			continue;
 		}
@@ -633,13 +695,13 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
 			goto out;
 		}
 
-		sbi->layout.s_ods[i] = od;
-		++sbi->layout.s_numdevs;
+		sbi->comps.ods[i] = od;
+		++sbi->comps.numdevs;
 
 		/* Read the fscb of the other devices to make sure the FS
 		 * partition is there.
 		 */
-		ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb,
+		ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb,
 				      sizeof(fscb));
 		if (unlikely(ret)) {
 			EXOFS_ERR("ERROR: Malformed participating device "
@@ -656,13 +718,22 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
 
 out:
 	kfree(dt);
-	if (unlikely(!ret && fscb_od)) {
-		EXOFS_ERR(
-		      "ERROR: Bad device-table container device not present\n");
-		osduld_put_device(fscb_od);
-		ret = -EINVAL;
-	}
+	if (likely(!ret)) {
+		unsigned numdevs = sbi->comps.numdevs;
 
+		if (unlikely(fscb_od)) {
+			EXOFS_ERR("ERROR: Bad device-table container device not present\n");
+			osduld_put_device(fscb_od);
+			return -EINVAL;
+		}
+		/* exofs round-robins the device table view according to inode
+		 * number. We hold a: twice bigger table hence inodes can point
+		 * to any device and have a sequential view of the table
+		 * starting at this device. See exofs_init_comps()
+		 */
+		for (i = 0; i < numdevs - 1; ++i)
+			sbi->comps.ods[i + numdevs] = sbi->comps.ods[i];
+	}
 	return ret;
 }
 
@@ -676,7 +747,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
 	struct exofs_sb_info *sbi;	/*extended info                  */
 	struct osd_dev *od;		/* Master device                 */
 	struct exofs_fscb fscb;		/*on-disk superblock info        */
-	struct osd_obj_id obj;
+	struct ore_comp comp;
 	unsigned table_count;
 	int ret;
 
@@ -684,10 +755,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
 	if (!sbi)
 		return -ENOMEM;
 
-	ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
-	if (ret)
-		goto free_bdi;
-
 	/* use mount options to fill superblock */
 	if (opts->is_osdname) {
 		struct osd_dev_info odi = {.systemid_len = 0};
@@ -695,6 +762,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
 		odi.osdname_len = strlen(opts->dev_name);
 		odi.osdname = (u8 *)opts->dev_name;
 		od = osduld_info_lookup(&odi);
+		kfree(opts->dev_name);
+		opts->dev_name = NULL;
 	} else {
 		od = osduld_path_lookup(opts->dev_name);
 	}
@@ -709,11 +778,16 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->layout.group_width = 1;
 	sbi->layout.group_depth = -1;
 	sbi->layout.group_count = 1;
-	sbi->layout.s_ods[0] = od;
-	sbi->layout.s_numdevs = 1;
-	sbi->layout.s_pid = opts->pid;
 	sbi->s_timeout = opts->timeout;
 
+	sbi->one_comp.obj.partition = opts->pid;
+	sbi->one_comp.obj.id = 0;
+	exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj);
+	sbi->comps.numdevs = 1;
+	sbi->comps.single_comp = EC_SINGLE_COMP;
+	sbi->comps.comps = &sbi->one_comp;
+	sbi->comps.ods = sbi->_min_one_dev;
+
 	/* fill in some other data by hand */
 	memset(sb->s_id, 0, sizeof(sb->s_id));
 	strcpy(sb->s_id, "exofs");
@@ -724,11 +798,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_bdev = NULL;
 	sb->s_dev = 0;
 
-	obj.partition = sbi->layout.s_pid;
-	obj.id = EXOFS_SUPER_ID;
-	exofs_make_credential(sbi->s_cred, &obj);
+	comp.obj.partition = sbi->one_comp.obj.partition;
+	comp.obj.id = EXOFS_SUPER_ID;
+	exofs_make_credential(comp.cred, &comp.obj);
 
-	ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb));
+	ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb, sizeof(fscb));
 	if (unlikely(ret))
 		goto free_sbi;
 
@@ -757,9 +831,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
 
 	table_count = le64_to_cpu(fscb.s_dev_table_count);
 	if (table_count) {
-		ret = exofs_read_lookup_dev_table(&sbi, table_count);
+		ret = exofs_read_lookup_dev_table(sbi, od, table_count);
 		if (unlikely(ret))
 			goto free_sbi;
+	} else {
+		sbi->comps.ods[0] = od;
 	}
 
 	__sbi_read_stats(sbi);
@@ -793,20 +869,20 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
 		goto free_sbi;
 	}
 
-	_exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0],
-			    sbi->layout.s_pid);
-	if (opts->is_osdname)
-		kfree(opts->dev_name);
+	ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
+	if (ret) {
+		EXOFS_DBGMSG("Failed to bdi_setup_and_register\n");
+		goto free_sbi;
+	}
+
+	_exofs_print_device("Mounting", opts->dev_name, sbi->comps.ods[0],
+			    sbi->one_comp.obj.partition);
 	return 0;
 
 free_sbi:
-	bdi_destroy(&sbi->bdi);
-free_bdi:
 	EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
-		  opts->dev_name, sbi->layout.s_pid, ret);
+		  opts->dev_name, sbi->one_comp.obj.partition, ret);
 	exofs_free_sbi(sbi);
-	if (opts->is_osdname)
-		kfree(opts->dev_name);
 	return ret;
 }
 
@@ -837,7 +913,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct super_block *sb = dentry->d_sb;
 	struct exofs_sb_info *sbi = sb->s_fs_info;
-	struct exofs_io_state *ios;
+	struct ore_io_state *ios;
 	struct osd_attr attrs[] = {
 		ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS,
 			OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)),
@@ -846,21 +922,18 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	};
 	uint64_t capacity = ULLONG_MAX;
 	uint64_t used = ULLONG_MAX;
-	uint8_t cred_a[OSD_CAP_LEN];
 	int ret;
 
-	ret = exofs_get_io_state(&sbi->layout, &ios);
+	ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios);
 	if (ret) {
-		EXOFS_DBGMSG("exofs_get_io_state failed.\n");
+		EXOFS_DBGMSG("ore_get_io_state failed.\n");
 		return ret;
 	}
 
-	exofs_make_credential(cred_a, &ios->obj);
-	ios->cred = sbi->s_cred;
 	ios->in_attr = attrs;
 	ios->in_attr_len = ARRAY_SIZE(attrs);
 
-	ret = exofs_sbi_read(ios);
+	ret = ore_read(ios);
 	if (unlikely(ret))
 		goto out;
 
@@ -889,7 +962,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_namelen = EXOFS_NAME_LEN;
 
 out:
-	exofs_put_io_state(ios);
+	ore_put_io_state(ios);
 	return ret;
 }
 
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 6e18a0b7750d..5571708b6a58 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2209,9 +2209,11 @@ static int ext3_symlink (struct inode * dir,
 		/*
 		 * For non-fast symlinks, we just allocate inode and put it on
 		 * orphan list in the first transaction => we need bitmap,
-		 * group descriptor, sb, inode block, quota blocks.
+		 * group descriptor, sb, inode block, quota blocks, and
+		 * possibly selinux xattr blocks.
 		 */
-		credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
+		credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
+			  EXT3_XATTR_TRANS_BLOCKS;
 	} else {
 		/*
 		 * Fast symlink. We have to add entry to directory
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 565a154e22d4..f8068c7bae9f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2253,9 +2253,11 @@ static int ext4_symlink(struct inode *dir,
 		/*
 		 * For non-fast symlinks, we just allocate inode and put it on
 		 * orphan list in the first transaction => we need bitmap,
-		 * group descriptor, sb, inode block, quota blocks.
+		 * group descriptor, sb, inode block, quota blocks, and
+		 * possibly selinux xattr blocks.
 		 */
-		credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
+		credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
+			  EXT4_XATTR_TRANS_BLOCKS;
 	} else {
 		/*
 		 * Fast symlink. We have to add entry to directory
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 4ad64732cbce..5efbd5d7701a 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -1231,7 +1231,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
 	struct super_block *sb = dir->i_sb;
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
 	struct buffer_head *bh, *prev, *bhs[3]; /* 32*slots (672bytes) */
-	struct msdos_dir_entry *de;
+	struct msdos_dir_entry *uninitialized_var(de);
 	int err, free_slots, i, nr_bhs;
 	loff_t pos, i_pos;
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 5942fec22c65..1726d7303047 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1188,9 +1188,9 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
 out:
 	/* UTF-8 doesn't provide FAT semantics */
 	if (!strcmp(opts->iocharset, "utf8")) {
-		fat_msg(sb, KERN_ERR, "utf8 is not a recommended IO charset"
+		fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset"
 		       " for FAT filesystems, filesystem will be "
-		       "case sensitive!\n");
+		       "case sensitive!");
 	}
 
 	/* If user doesn't specify allow_utime, it's initialized from dmask. */
@@ -1367,6 +1367,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
 	sbi->free_clusters = -1;	/* Don't know yet */
 	sbi->free_clus_valid = 0;
 	sbi->prev_free = FAT_START_ENT;
+	sb->s_maxbytes = 0xffffffff;
 
 	if (!sbi->fat_length && b->fat32_length) {
 		struct fat_boot_fsinfo *fsinfo;
@@ -1377,8 +1378,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
 		sbi->fat_length = le32_to_cpu(b->fat32_length);
 		sbi->root_cluster = le32_to_cpu(b->root_cluster);
 
-		sb->s_maxbytes = 0xffffffff;
-
 		/* MC - if info_sector is 0, don't multiply by 0 */
 		sbi->fsinfo_sector = le16_to_cpu(b->info_sector);
 		if (sbi->fsinfo_sector == 0)
diff --git a/fs/inode.c b/fs/inode.c
index 5aab80dc008c..73920d555c88 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -143,6 +143,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->i_op = &empty_iops;
 	inode->i_fop = &empty_fops;
 	inode->i_nlink = 1;
+	inode->i_opflags = 0;
 	inode->i_uid = 0;
 	inode->i_gid = 0;
 	atomic_set(&inode->i_writecount, 0);
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c
index adcf92d3b603..7971f37534a3 100644
--- a/fs/jfs/jfs_umount.c
+++ b/fs/jfs/jfs_umount.c
@@ -68,7 +68,7 @@ int jfs_umount(struct super_block *sb)
 		/*
 		 * Wait for outstanding transactions to be written to log:
 		 */
-		jfs_flush_journal(log, 1);
+		jfs_flush_journal(log, 2);
 
 	/*
 	 * close fileset inode allocation map (aka fileset inode)
@@ -146,7 +146,7 @@ int jfs_umount_rw(struct super_block *sb)
 	 *
 	 * remove file system from log active file system list.
 	 */
-	jfs_flush_journal(log, 1);
+	jfs_flush_journal(log, 2);
 
 	/*
 	 * Make sure all metadata makes it to disk
diff --git a/fs/namei.c b/fs/namei.c
index 445fd5da11fa..2826db35dc25 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -179,19 +179,14 @@ static int check_acl(struct inode *inode, int mask)
 #ifdef CONFIG_FS_POSIX_ACL
 	struct posix_acl *acl;
 
-	/*
-	 * Under RCU walk, we cannot even do a "get_cached_acl()",
-	 * because that involves locking and getting a refcount on
-	 * a cached ACL.
-	 *
-	 * So the only case we handle during RCU walking is the
-	 * case of a cached "no ACL at all", which needs no locks
-	 * or refcounts.
-	 */
 	if (mask & MAY_NOT_BLOCK) {
-	        if (negative_cached_acl(inode, ACL_TYPE_ACCESS))
+		acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS);
+	        if (!acl)
 	                return -EAGAIN;
-	        return -ECHILD;
+		/* no ->get_acl() calls in RCU mode... */
+		if (acl == ACL_NOT_CACHED)
+			return -ECHILD;
+	        return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK);
 	}
 
 	acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
@@ -313,6 +308,26 @@ int generic_permission(struct inode *inode, int mask)
 	return -EACCES;
 }
 
+/*
+ * We _really_ want to just do "generic_permission()" without
+ * even looking at the inode->i_op values. So we keep a cache
+ * flag in inode->i_opflags, that says "this has not special
+ * permission function, use the fast case".
+ */
+static inline int do_inode_permission(struct inode *inode, int mask)
+{
+	if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
+		if (likely(inode->i_op->permission))
+			return inode->i_op->permission(inode, mask);
+
+		/* This gets set once for the inode lifetime */
+		spin_lock(&inode->i_lock);
+		inode->i_opflags |= IOP_FASTPERM;
+		spin_unlock(&inode->i_lock);
+	}
+	return generic_permission(inode, mask);
+}
+
 /**
  * inode_permission  -  check for access rights to a given inode
  * @inode:	inode to check permission on
@@ -327,7 +342,7 @@ int inode_permission(struct inode *inode, int mask)
 {
 	int retval;
 
-	if (mask & MAY_WRITE) {
+	if (unlikely(mask & MAY_WRITE)) {
 		umode_t mode = inode->i_mode;
 
 		/*
@@ -344,11 +359,7 @@ int inode_permission(struct inode *inode, int mask)
 			return -EACCES;
 	}
 
-	if (inode->i_op->permission)
-		retval = inode->i_op->permission(inode, mask);
-	else
-		retval = generic_permission(inode, mask);
-
+	retval = do_inode_permission(inode, mask);
 	if (retval)
 		return retval;
 
@@ -1250,6 +1261,26 @@ static void terminate_walk(struct nameidata *nd)
 	}
 }
 
+/*
+ * Do we need to follow links? We _really_ want to be able
+ * to do this check without having to look at inode->i_op,
+ * so we keep a cache of "no, this doesn't need follow_link"
+ * for the common case.
+ */
+static inline int should_follow_link(struct inode *inode, int follow)
+{
+	if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
+		if (likely(inode->i_op->follow_link))
+			return follow;
+
+		/* This gets set once for the inode lifetime */
+		spin_lock(&inode->i_lock);
+		inode->i_opflags |= IOP_NOFOLLOW;
+		spin_unlock(&inode->i_lock);
+	}
+	return 0;
+}
+
 static inline int walk_component(struct nameidata *nd, struct path *path,
 		struct qstr *name, int type, int follow)
 {
@@ -1272,7 +1303,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
 		terminate_walk(nd);
 		return -ENOENT;
 	}
-	if (unlikely(inode->i_op->follow_link) && follow) {
+	if (should_follow_link(inode, follow)) {
 		if (nd->flags & LOOKUP_RCU) {
 			if (unlikely(unlazy_walk(nd, path->dentry))) {
 				terminate_walk(nd);
@@ -1325,6 +1356,26 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
 }
 
 /*
+ * We really don't want to look at inode->i_op->lookup
+ * when we don't have to. So we keep a cache bit in
+ * the inode ->i_opflags field that says "yes, we can
+ * do lookup on this inode".
+ */
+static inline int can_lookup(struct inode *inode)
+{
+	if (likely(inode->i_opflags & IOP_LOOKUP))
+		return 1;
+	if (likely(!inode->i_op->lookup))
+		return 0;
+
+	/* We do this once for the lifetime of the inode */
+	spin_lock(&inode->i_lock);
+	inode->i_opflags |= IOP_LOOKUP;
+	spin_unlock(&inode->i_lock);
+	return 1;
+}
+
+/*
  * Name resolution.
  * This is the basic name resolution function, turning a pathname into
  * the final dentry. We expect 'base' to be positive and a directory.
@@ -1403,10 +1454,10 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 			if (err)
 				return err;
 		}
+		if (can_lookup(nd->inode))
+			continue;
 		err = -ENOTDIR; 
-		if (!nd->inode->i_op->lookup)
-			break;
-		continue;
+		break;
 		/* here ends the main loop */
 
 last_component:
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index be020771c6b4..dbcd82126aed 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -79,12 +79,9 @@ config NFS_V4_1
 	depends on NFS_FS && NFS_V4 && EXPERIMENTAL
 	select SUNRPC_BACKCHANNEL
 	select PNFS_FILE_LAYOUT
-	select PNFS_BLOCK
-	select MD
-	select BLK_DEV_DM
 	help
 	  This option enables support for minor version 1 of the NFSv4 protocol
-	  (RFC 5661 and RFC 5663) in the kernel's NFS client.
+	  (RFC 5661) in the kernel's NFS client.
 
 	  If unsure, say N.
 
@@ -93,16 +90,13 @@ config PNFS_FILE_LAYOUT
 
 config PNFS_BLOCK
 	tristate
+	depends on NFS_FS && NFS_V4_1 && BLK_DEV_DM
+	default m
 
 config PNFS_OBJLAYOUT
-	tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)"
+	tristate
 	depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD
-	help
-	  Say M here if you want your pNFS client to support the Objects Layout Driver.
-	  Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and
-	  upper level driver (SCSI_OSD_ULD).
-
-	  If unsure, say N.
+	default m
 
 config ROOT_NFS
 	bool "Root file system on NFS"
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 08e3eccf9a12..5eb02069e1b8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1118,7 +1118,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	 * Warn that /proc/pid/oom_adj is deprecated, see
 	 * Documentation/feature-removal-schedule.txt.
 	 */
-	WARN_ONCE(1, "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
+	printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
 		  current->comm, task_pid_nr(current), task_pid_nr(task),
 		  task_pid_nr(task));
 	task->signal->oom_adj = oom_adjust;
@@ -1919,6 +1919,14 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
 		spin_lock(&files->file_lock);
 		file = fcheck_files(files, fd);
 		if (file) {
+			unsigned int f_flags;
+			struct fdtable *fdt;
+
+			fdt = files_fdtable(files);
+			f_flags = file->f_flags & ~O_CLOEXEC;
+			if (FD_ISSET(fd, fdt->close_on_exec))
+				f_flags |= O_CLOEXEC;
+
 			if (path) {
 				*path = file->f_path;
 				path_get(&file->f_path);
@@ -1928,7 +1936,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
 					 "pos:\t%lli\n"
 					 "flags:\t0%o\n",
 					 (long long) file->f_pos,
-					 file->f_flags);
+					 f_flags);
 			spin_unlock(&files->file_lock);
 			put_files_struct(files);
 			return 0;
diff --git a/fs/stat.c b/fs/stat.c
index 961039121cb8..ba5316ffac61 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -27,12 +27,12 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
 	stat->uid = inode->i_uid;
 	stat->gid = inode->i_gid;
 	stat->rdev = inode->i_rdev;
+	stat->size = i_size_read(inode);
 	stat->atime = inode->i_atime;
 	stat->mtime = inode->i_mtime;
 	stat->ctime = inode->i_ctime;
-	stat->size = i_size_read(inode);
-	stat->blocks = inode->i_blocks;
 	stat->blksize = (1 << inode->i_blkbits);
+	stat->blocks = inode->i_blocks;
 }
 
 EXPORT_SYMBOL(generic_fillattr);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index d1fe74506c4c..c57836dc778f 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -596,7 +596,7 @@ _xfs_buf_read(
 	bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
 
 	status = xfs_buf_iorequest(bp);
-	if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC))
+	if (status || bp->b_error || (flags & XBF_ASYNC))
 		return status;
 	return xfs_buf_iowait(bp);
 }
@@ -679,7 +679,6 @@ xfs_buf_read_uncached(
 	/* set up the buffer for a read IO */
 	XFS_BUF_SET_ADDR(bp, daddr);
 	XFS_BUF_READ(bp);
-	XFS_BUF_BUSY(bp);
 
 	xfsbdstrat(mp, bp);
 	error = xfs_buf_iowait(bp);
@@ -1069,7 +1068,7 @@ xfs_bioerror(
 	/*
 	 * No need to wait until the buffer is unpinned, we aren't flushing it.
 	 */
-	XFS_BUF_ERROR(bp, EIO);
+	xfs_buf_ioerror(bp, EIO);
 
 	/*
 	 * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
@@ -1094,7 +1093,7 @@ STATIC int
 xfs_bioerror_relse(
 	struct xfs_buf	*bp)
 {
-	int64_t		fl = XFS_BUF_BFLAGS(bp);
+	int64_t		fl = bp->b_flags;
 	/*
 	 * No need to wait until the buffer is unpinned.
 	 * We aren't flushing it.
@@ -1115,7 +1114,7 @@ xfs_bioerror_relse(
 		 * There's no reason to mark error for
 		 * ASYNC buffers.
 		 */
-		XFS_BUF_ERROR(bp, EIO);
+		xfs_buf_ioerror(bp, EIO);
 		XFS_BUF_FINISH_IOWAIT(bp);
 	} else {
 		xfs_buf_relse(bp);
@@ -1324,7 +1323,7 @@ xfs_buf_offset(
 	struct page		*page;
 
 	if (bp->b_flags & XBF_MAPPED)
-		return XFS_BUF_PTR(bp) + offset;
+		return bp->b_addr + offset;
 
 	offset += bp->b_offset;
 	page = bp->b_pages[offset >> PAGE_SHIFT];
@@ -1484,7 +1483,7 @@ xfs_setsize_buftarg_flags(
 	if (set_blocksize(btp->bt_bdev, sectorsize)) {
 		xfs_warn(btp->bt_mount,
 			"Cannot set_blocksize to %u on device %s\n",
-			sectorsize, XFS_BUFTARG_NAME(btp));
+			sectorsize, xfs_buf_target_name(btp));
 		return EINVAL;
 	}
 
@@ -1681,7 +1680,7 @@ xfs_buf_delwri_split(
 	list_for_each_entry_safe(bp, n, dwq, b_list) {
 		ASSERT(bp->b_flags & XBF_DELWRI);
 
-		if (!XFS_BUF_ISPINNED(bp) && xfs_buf_trylock(bp)) {
+		if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) {
 			if (!force &&
 			    time_before(jiffies, bp->b_queuetime + age)) {
 				xfs_buf_unlock(bp);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 6a83b46b4bcf..620972b8094d 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -228,11 +228,15 @@ extern void xfs_buf_delwri_promote(xfs_buf_t *);
 extern int xfs_buf_init(void);
 extern void xfs_buf_terminate(void);
 
-#define xfs_buf_target_name(target)	\
-	({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; })
+static inline const char *
+xfs_buf_target_name(struct xfs_buftarg *target)
+{
+	static char __b[BDEVNAME_SIZE];
+
+	return bdevname(target->bt_bdev, __b);
+}
 
 
-#define XFS_BUF_BFLAGS(bp)	((bp)->b_flags)
 #define XFS_BUF_ZEROFLAGS(bp) \
 	((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
 			    XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
@@ -251,23 +255,14 @@ void xfs_buf_stale(struct xfs_buf *bp);
 #define XFS_BUF_UNDELAYWRITE(bp)	xfs_buf_delwri_dequeue(bp)
 #define XFS_BUF_ISDELAYWRITE(bp)	((bp)->b_flags & XBF_DELWRI)
 
-#define XFS_BUF_ERROR(bp,no)	xfs_buf_ioerror(bp,no)
-#define XFS_BUF_GETERROR(bp)	xfs_buf_geterror(bp)
-#define XFS_BUF_ISERROR(bp)	(xfs_buf_geterror(bp) ? 1 : 0)
-
 #define XFS_BUF_DONE(bp)	((bp)->b_flags |= XBF_DONE)
 #define XFS_BUF_UNDONE(bp)	((bp)->b_flags &= ~XBF_DONE)
 #define XFS_BUF_ISDONE(bp)	((bp)->b_flags & XBF_DONE)
 
-#define XFS_BUF_BUSY(bp)	do { } while (0)
-#define XFS_BUF_UNBUSY(bp)	do { } while (0)
-#define XFS_BUF_ISBUSY(bp)	(1)
-
 #define XFS_BUF_ASYNC(bp)	((bp)->b_flags |= XBF_ASYNC)
 #define XFS_BUF_UNASYNC(bp)	((bp)->b_flags &= ~XBF_ASYNC)
 #define XFS_BUF_ISASYNC(bp)	((bp)->b_flags & XBF_ASYNC)
 
-#define XFS_BUF_HOLD(bp)	xfs_buf_hold(bp)
 #define XFS_BUF_READ(bp)	((bp)->b_flags |= XBF_READ)
 #define XFS_BUF_UNREAD(bp)	((bp)->b_flags &= ~XBF_READ)
 #define XFS_BUF_ISREAD(bp)	((bp)->b_flags & XBF_READ)
@@ -276,10 +271,6 @@ void xfs_buf_stale(struct xfs_buf *bp);
 #define XFS_BUF_UNWRITE(bp)	((bp)->b_flags &= ~XBF_WRITE)
 #define XFS_BUF_ISWRITE(bp)	((bp)->b_flags & XBF_WRITE)
 
-#define XFS_BUF_SET_START(bp)			do { } while (0)
-
-#define XFS_BUF_PTR(bp)			(xfs_caddr_t)((bp)->b_addr)
-#define XFS_BUF_SET_PTR(bp, val, cnt)	xfs_buf_associate_memory(bp, val, cnt)
 #define XFS_BUF_ADDR(bp)		((bp)->b_bn)
 #define XFS_BUF_SET_ADDR(bp, bno)	((bp)->b_bn = (xfs_daddr_t)(bno))
 #define XFS_BUF_OFFSET(bp)		((bp)->b_file_offset)
@@ -299,14 +290,13 @@ xfs_buf_set_ref(
 #define XFS_BUF_SET_VTYPE_REF(bp, type, ref)	xfs_buf_set_ref(bp, ref)
 #define XFS_BUF_SET_VTYPE(bp, type)		do { } while (0)
 
-#define XFS_BUF_ISPINNED(bp)	atomic_read(&((bp)->b_pin_count))
+static inline int xfs_buf_ispinned(struct xfs_buf *bp)
+{
+	return atomic_read(&bp->b_pin_count);
+}
 
 #define XFS_BUF_FINISH_IOWAIT(bp)	complete(&bp->b_iowait);
 
-#define XFS_BUF_SET_TARGET(bp, target)	((bp)->b_target = (target))
-#define XFS_BUF_TARGET(bp)		((bp)->b_target)
-#define XFS_BUFTARG_NAME(target)	xfs_buf_target_name(target)
-
 static inline void xfs_buf_relse(xfs_buf_t *bp)
 {
 	xfs_buf_unlock(bp);
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index e4c938afb910..4604f90f86a3 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -332,7 +332,7 @@ xfs_sync_fsdata(
 	 * between there and here.
 	 */
 	bp = xfs_getsb(mp, 0);
-	if (XFS_BUF_ISPINNED(bp))
+	if (xfs_buf_ispinned(bp))
 		xfs_log_force(mp, 0);
 
 	return xfs_bwrite(mp, bp);
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 837f31158d43..db62959bed13 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -318,10 +318,9 @@ xfs_qm_init_dquot_blk(
 	int		curid, i;
 
 	ASSERT(tp);
-	ASSERT(XFS_BUF_ISBUSY(bp));
 	ASSERT(xfs_buf_islocked(bp));
 
-	d = (xfs_dqblk_t *)XFS_BUF_PTR(bp);
+	d = bp->b_addr;
 
 	/*
 	 * ID of the first dquot in the block - id's are zero based.
@@ -403,7 +402,7 @@ xfs_qm_dqalloc(
 			       dqp->q_blkno,
 			       mp->m_quotainfo->qi_dqchunklen,
 			       0);
-	if (!bp || (error = XFS_BUF_GETERROR(bp)))
+	if (!bp || (error = xfs_buf_geterror(bp)))
 		goto error1;
 	/*
 	 * Make a chunk of dquots out of this buffer and log
@@ -534,13 +533,12 @@ xfs_qm_dqtobp(
 			return XFS_ERROR(error);
 	}
 
-	ASSERT(XFS_BUF_ISBUSY(bp));
 	ASSERT(xfs_buf_islocked(bp));
 
 	/*
 	 * calculate the location of the dquot inside the buffer.
 	 */
-	ddq = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset);
+	ddq = bp->b_addr + dqp->q_bufoffset;
 
 	/*
 	 * A simple sanity check in case we got a corrupted dquot...
@@ -553,7 +551,6 @@ xfs_qm_dqtobp(
 			xfs_trans_brelse(tp, bp);
 			return XFS_ERROR(EIO);
 		}
-		XFS_BUF_BUSY(bp); /* We dirtied this */
 	}
 
 	*O_bpp = bp;
@@ -622,7 +619,6 @@ xfs_qm_dqread(
 	 * this particular dquot was repaired. We still aren't afraid to
 	 * brelse it because we have the changes incore.
 	 */
-	ASSERT(XFS_BUF_ISBUSY(bp));
 	ASSERT(xfs_buf_islocked(bp));
 	xfs_trans_brelse(tp, bp);
 
@@ -1204,7 +1200,7 @@ xfs_qm_dqflush(
 	/*
 	 * Calculate the location of the dquot inside the buffer.
 	 */
-	ddqp = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset);
+	ddqp = bp->b_addr + dqp->q_bufoffset;
 
 	/*
 	 * A simple sanity check in case we got a corrupted dquot..
@@ -1240,7 +1236,7 @@ xfs_qm_dqflush(
 	 * If the buffer is pinned then push on the log so we won't
 	 * get stuck waiting in the write for too long.
 	 */
-	if (XFS_BUF_ISPINNED(bp)) {
+	if (xfs_buf_ispinned(bp)) {
 		trace_xfs_dqflush_force(dqp);
 		xfs_log_force(mp, 0);
 	}
@@ -1447,7 +1443,7 @@ xfs_qm_dqflock_pushbuf_wait(
 		goto out_lock;
 
 	if (XFS_BUF_ISDELAYWRITE(bp)) {
-		if (XFS_BUF_ISPINNED(bp))
+		if (xfs_buf_ispinned(bp))
 			xfs_log_force(mp, 0);
 		xfs_buf_delwri_promote(bp);
 		wake_up_process(bp->b_target->bt_task);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 46e54ad9a2dc..9a0aa76facdf 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1240,7 +1240,7 @@ xfs_qm_reset_dqcounts(
 	do_div(j, sizeof(xfs_dqblk_t));
 	ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
 #endif
-	ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
+	ddq = bp->b_addr;
 	for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
 		/*
 		 * Do a sanity check, and if needed, repair the dqblk. Don't
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 6530769a999b..4805f009f923 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -103,7 +103,7 @@ typedef struct xfs_agf {
 /* disk block (xfs_daddr_t) in the AG */
 #define XFS_AGF_DADDR(mp)	((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
 #define	XFS_AGF_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp))
-#define	XFS_BUF_TO_AGF(bp)	((xfs_agf_t *)XFS_BUF_PTR(bp))
+#define	XFS_BUF_TO_AGF(bp)	((xfs_agf_t *)((bp)->b_addr))
 
 extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
 			xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
@@ -156,7 +156,7 @@ typedef struct xfs_agi {
 /* disk block (xfs_daddr_t) in the AG */
 #define XFS_AGI_DADDR(mp)	((xfs_daddr_t)(2 << (mp)->m_sectbb_log))
 #define	XFS_AGI_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp))
-#define	XFS_BUF_TO_AGI(bp)	((xfs_agi_t *)XFS_BUF_PTR(bp))
+#define	XFS_BUF_TO_AGI(bp)	((xfs_agi_t *)((bp)->b_addr))
 
 extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
 				xfs_agnumber_t agno, struct xfs_buf **bpp);
@@ -168,7 +168,7 @@ extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
 #define XFS_AGFL_DADDR(mp)	((xfs_daddr_t)(3 << (mp)->m_sectbb_log))
 #define	XFS_AGFL_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp))
 #define XFS_AGFL_SIZE(mp)	((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t))
-#define	XFS_BUF_TO_AGFL(bp)	((xfs_agfl_t *)XFS_BUF_PTR(bp))
+#define	XFS_BUF_TO_AGFL(bp)	((xfs_agfl_t *)((bp)->b_addr))
 
 typedef struct xfs_agfl {
 	__be32		agfl_bno[1];	/* actually XFS_AGFL_SIZE(mp) */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 1e00b3ef6274..bdd9cb54d63b 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -451,8 +451,7 @@ xfs_alloc_read_agfl(
 			XFS_FSS_TO_BB(mp, 1), 0, &bp);
 	if (error)
 		return error;
-	ASSERT(bp);
-	ASSERT(!XFS_BUF_GETERROR(bp));
+	ASSERT(!xfs_buf_geterror(bp));
 	XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF);
 	*bpp = bp;
 	return 0;
@@ -2116,7 +2115,7 @@ xfs_read_agf(
 	if (!*bpp)
 		return 0;
 
-	ASSERT(!XFS_BUF_GETERROR(*bpp));
+	ASSERT(!(*bpp)->b_error);
 	agf = XFS_BUF_TO_AGF(*bpp);
 
 	/*
@@ -2168,7 +2167,7 @@ xfs_alloc_read_agf(
 		return error;
 	if (!*bpp)
 		return 0;
-	ASSERT(!XFS_BUF_GETERROR(*bpp));
+	ASSERT(!(*bpp)->b_error);
 
 	agf = XFS_BUF_TO_AGF(*bpp);
 	pag = xfs_perag_get(mp, agno);
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index cbae424fe1ba..160bcdc34a6e 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -2121,8 +2121,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
 
 		bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt,
 				 XBF_LOCK | XBF_DONT_BLOCK);
-		ASSERT(bp);
-		ASSERT(!XFS_BUF_GETERROR(bp));
+		ASSERT(!xfs_buf_geterror(bp));
 
 		tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
 							XFS_BUF_SIZE(bp);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index ab3e5c6c4642..452a291383ab 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3383,8 +3383,7 @@ xfs_bmap_local_to_extents(
 		ASSERT(args.len == 1);
 		*firstblock = args.fsbno;
 		bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
-		memcpy((char *)XFS_BUF_PTR(bp), ifp->if_u1.if_data,
-			ifp->if_bytes);
+		memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
 		xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
 		xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
 		xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index cabf4b5604aa..2b9fd385e27d 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -275,8 +275,7 @@ xfs_btree_dup_cursor(
 				return error;
 			}
 			new->bc_bufs[i] = bp;
-			ASSERT(bp);
-			ASSERT(!XFS_BUF_GETERROR(bp));
+			ASSERT(!xfs_buf_geterror(bp));
 		} else
 			new->bc_bufs[i] = NULL;
 	}
@@ -467,8 +466,7 @@ xfs_btree_get_bufl(
 	ASSERT(fsbno != NULLFSBLOCK);
 	d = XFS_FSB_TO_DADDR(mp, fsbno);
 	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
-	ASSERT(bp);
-	ASSERT(!XFS_BUF_GETERROR(bp));
+	ASSERT(!xfs_buf_geterror(bp));
 	return bp;
 }
 
@@ -491,8 +489,7 @@ xfs_btree_get_bufs(
 	ASSERT(agbno != NULLAGBLOCK);
 	d = XFS_AGB_TO_DADDR(mp, agno, agbno);
 	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
-	ASSERT(bp);
-	ASSERT(!XFS_BUF_GETERROR(bp));
+	ASSERT(!xfs_buf_geterror(bp));
 	return bp;
 }
 
@@ -632,7 +629,7 @@ xfs_btree_read_bufl(
 			mp->m_bsize, lock, &bp))) {
 		return error;
 	}
-	ASSERT(!bp || !XFS_BUF_GETERROR(bp));
+	ASSERT(!xfs_buf_geterror(bp));
 	if (bp)
 		XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
 	*bpp = bp;
@@ -973,8 +970,7 @@ xfs_btree_get_buf_block(
 	*bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
 				 mp->m_bsize, flags);
 
-	ASSERT(*bpp);
-	ASSERT(!XFS_BUF_GETERROR(*bpp));
+	ASSERT(!xfs_buf_geterror(*bpp));
 
 	*block = XFS_BUF_TO_BLOCK(*bpp);
 	return 0;
@@ -1006,8 +1002,7 @@ xfs_btree_read_buf_block(
 	if (error)
 		return error;
 
-	ASSERT(*bpp != NULL);
-	ASSERT(!XFS_BUF_GETERROR(*bpp));
+	ASSERT(!xfs_buf_geterror(*bpp));
 
 	xfs_btree_set_refs(cur, *bpp);
 	*block = XFS_BUF_TO_BLOCK(*bpp);
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 8d05a6a46ce3..5b240de104c0 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -262,7 +262,7 @@ typedef struct xfs_btree_cur
 /*
  * Convert from buffer to btree block header.
  */
-#define	XFS_BUF_TO_BLOCK(bp)	((struct xfs_btree_block *)XFS_BUF_PTR(bp))
+#define	XFS_BUF_TO_BLOCK(bp)	((struct xfs_btree_block *)((bp)->b_addr))
 
 
 /*
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 88492916c3dc..cac2ecfa6746 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -124,9 +124,9 @@ xfs_buf_item_log_check(
 
 	bp = bip->bli_buf;
 	ASSERT(XFS_BUF_COUNT(bp) > 0);
-	ASSERT(XFS_BUF_PTR(bp) != NULL);
+	ASSERT(bp->b_addr != NULL);
 	orig = bip->bli_orig;
-	buffer = XFS_BUF_PTR(bp);
+	buffer = bp->b_addr;
 	for (x = 0; x < XFS_BUF_COUNT(bp); x++) {
 		if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) {
 			xfs_emerg(bp->b_mount,
@@ -371,7 +371,6 @@ xfs_buf_item_pin(
 {
 	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
 
-	ASSERT(XFS_BUF_ISBUSY(bip->bli_buf));
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
 	ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
 	       (bip->bli_flags & XFS_BLI_STALE));
@@ -479,13 +478,13 @@ xfs_buf_item_trylock(
 	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
 	struct xfs_buf		*bp = bip->bli_buf;
 
-	if (XFS_BUF_ISPINNED(bp))
+	if (xfs_buf_ispinned(bp))
 		return XFS_ITEM_PINNED;
 	if (!xfs_buf_trylock(bp))
 		return XFS_ITEM_LOCKED;
 
 	/* take a reference to the buffer.  */
-	XFS_BUF_HOLD(bp);
+	xfs_buf_hold(bp);
 
 	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
 	trace_xfs_buf_item_trylock(bip);
@@ -726,7 +725,7 @@ xfs_buf_item_init(
 	 * to have logged.
 	 */
 	bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP);
-	memcpy(bip->bli_orig, XFS_BUF_PTR(bp), XFS_BUF_COUNT(bp));
+	memcpy(bip->bli_orig, bp->b_addr, XFS_BUF_COUNT(bp));
 	bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP);
 #endif
 
@@ -895,7 +894,6 @@ xfs_buf_attach_iodone(
 {
 	xfs_log_item_t	*head_lip;
 
-	ASSERT(XFS_BUF_ISBUSY(bp));
 	ASSERT(xfs_buf_islocked(bp));
 
 	lip->li_cb = cb;
@@ -960,7 +958,7 @@ xfs_buf_iodone_callbacks(
 	static ulong		lasttime;
 	static xfs_buftarg_t	*lasttarg;
 
-	if (likely(!XFS_BUF_GETERROR(bp)))
+	if (likely(!xfs_buf_geterror(bp)))
 		goto do_callbacks;
 
 	/*
@@ -973,14 +971,14 @@ xfs_buf_iodone_callbacks(
 		goto do_callbacks;
 	}
 
-	if (XFS_BUF_TARGET(bp) != lasttarg ||
+	if (bp->b_target != lasttarg ||
 	    time_after(jiffies, (lasttime + 5*HZ))) {
 		lasttime = jiffies;
 		xfs_alert(mp, "Device %s: metadata write error block 0x%llx",
-			XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
+			xfs_buf_target_name(bp->b_target),
 		      (__uint64_t)XFS_BUF_ADDR(bp));
 	}
-	lasttarg = XFS_BUF_TARGET(bp);
+	lasttarg = bp->b_target;
 
 	/*
 	 * If the write was asynchronous then no one will be looking for the
@@ -991,12 +989,11 @@ xfs_buf_iodone_callbacks(
 	 * around.
 	 */
 	if (XFS_BUF_ISASYNC(bp)) {
-		XFS_BUF_ERROR(bp, 0); /* errno of 0 unsets the flag */
+		xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
 
 		if (!XFS_BUF_ISSTALE(bp)) {
 			XFS_BUF_DELAYWRITE(bp);
 			XFS_BUF_DONE(bp);
-			XFS_BUF_SET_START(bp);
 		}
 		ASSERT(bp->b_iodone != NULL);
 		trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
@@ -1013,7 +1010,6 @@ xfs_buf_iodone_callbacks(
 	XFS_BUF_UNDELAYWRITE(bp);
 
 	trace_xfs_buf_error_relse(bp, _RET_IP_);
-	xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
 
 do_callbacks:
 	xfs_buf_do_callbacks(bp);
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 5bfcb8779f9f..ee9d5427fcd4 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2050,7 +2050,7 @@ xfs_da_do_buf(
 		case 0:
 			bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
 				mappedbno, nmapped, 0);
-			error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO);
+			error = bp ? bp->b_error : XFS_ERROR(EIO);
 			break;
 		case 1:
 		case 2:
@@ -2268,7 +2268,7 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps)
 		dabuf->nbuf = 1;
 		bp = bps[0];
 		dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
-		dabuf->data = XFS_BUF_PTR(bp);
+		dabuf->data = bp->b_addr;
 		dabuf->bps[0] = bp;
 	} else {
 		dabuf->nbuf = nbuf;
@@ -2279,7 +2279,7 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps)
 		dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
 		for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
 			bp = bps[i];
-			memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
+			memcpy((char *)dabuf->data + off, bp->b_addr,
 				XFS_BUF_COUNT(bp));
 		}
 	}
@@ -2302,8 +2302,8 @@ xfs_da_buf_clean(xfs_dabuf_t *dabuf)
 		for (i = off = 0; i < dabuf->nbuf;
 				i++, off += XFS_BUF_COUNT(bp)) {
 			bp = dabuf->bps[i];
-			memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
-				XFS_BUF_COUNT(bp));
+			memcpy(bp->b_addr, dabuf->data + off,
+						XFS_BUF_COUNT(bp));
 		}
 	}
 }
@@ -2340,7 +2340,7 @@ xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
 
 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
 	if (dabuf->nbuf == 1) {
-		ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0]));
+		ASSERT(dabuf->data == dabuf->bps[0]->b_addr);
 		xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
 		return;
 	}
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index dffba9ba0db6..a3721633abc8 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -148,7 +148,7 @@ typedef enum xfs_dinode_fmt {
 		be32_to_cpu((dip)->di_nextents) : \
 		be16_to_cpu((dip)->di_anextents))
 
-#define	XFS_BUF_TO_DINODE(bp)	((xfs_dinode_t *)XFS_BUF_PTR(bp))
+#define	XFS_BUF_TO_DINODE(bp)	((xfs_dinode_t *)((bp)->b_addr))
 
 /*
  * For block and character special files the 32bit dev_t is stored at the
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index dd5628bd8d0b..9f24ec28283b 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -202,8 +202,7 @@ xfs_ialloc_inode_init(
 		fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
 					 mp->m_bsize * blks_per_cluster,
 					 XBF_LOCK);
-		ASSERT(fbuf);
-		ASSERT(!XFS_BUF_GETERROR(fbuf));
+		ASSERT(!xfs_buf_geterror(fbuf));
 
 		/*
 		 * Initialize all inodes in this buffer and then log them.
@@ -1486,7 +1485,7 @@ xfs_read_agi(
 	if (error)
 		return error;
 
-	ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp));
+	ASSERT(!xfs_buf_geterror(*bpp));
 	agi = XFS_BUF_TO_AGI(*bpp);
 
 	/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 2fcca4b03ed3..0239a7c7c886 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2473,7 +2473,7 @@ cluster_corrupt_out:
 		if (bp->b_iodone) {
 			XFS_BUF_UNDONE(bp);
 			XFS_BUF_STALE(bp);
-			XFS_BUF_ERROR(bp,EIO);
+			xfs_buf_ioerror(bp, EIO);
 			xfs_buf_ioend(bp, 0);
 		} else {
 			XFS_BUF_STALE(bp);
@@ -2585,7 +2585,7 @@ xfs_iflush(
 	 * If the buffer is pinned then push on the log now so we won't
 	 * get stuck waiting in the write for too long.
 	 */
-	if (XFS_BUF_ISPINNED(bp))
+	if (xfs_buf_ispinned(bp))
 		xfs_log_force(mp, 0);
 
 	/*
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 06ff8437ed8e..3a8d4f66d702 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -878,7 +878,7 @@ xlog_iodone(xfs_buf_t *bp)
 	/*
 	 * Race to shutdown the filesystem if we see an error.
 	 */
-	if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp,
+	if (XFS_TEST_ERROR((xfs_buf_geterror(bp)), l->l_mp,
 			XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) {
 		xfs_ioerror_alert("xlog_iodone", l->l_mp, bp, XFS_BUF_ADDR(bp));
 		XFS_BUF_STALE(bp);
@@ -1051,7 +1051,6 @@ xlog_alloc_log(xfs_mount_t	*mp,
 	if (!bp)
 		goto out_free_log;
 	bp->b_iodone = xlog_iodone;
-	ASSERT(XFS_BUF_ISBUSY(bp));
 	ASSERT(xfs_buf_islocked(bp));
 	log->l_xbuf = bp;
 
@@ -1108,7 +1107,6 @@ xlog_alloc_log(xfs_mount_t	*mp,
 		iclog->ic_callback_tail = &(iclog->ic_callback);
 		iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
 
-		ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
 		ASSERT(xfs_buf_islocked(iclog->ic_bp));
 		init_waitqueue_head(&iclog->ic_force_wait);
 		init_waitqueue_head(&iclog->ic_write_wait);
@@ -1248,7 +1246,7 @@ xlog_bdstrat(
 	struct xlog_in_core	*iclog = bp->b_fspriv;
 
 	if (iclog->ic_state & XLOG_STATE_IOERROR) {
-		XFS_BUF_ERROR(bp, EIO);
+		xfs_buf_ioerror(bp, EIO);
 		XFS_BUF_STALE(bp);
 		xfs_buf_ioend(bp, 0);
 		/*
@@ -1355,7 +1353,6 @@ xlog_sync(xlog_t		*log,
 	XFS_BUF_SET_COUNT(bp, count);
 	bp->b_fspriv = iclog;
 	XFS_BUF_ZEROFLAGS(bp);
-	XFS_BUF_BUSY(bp);
 	XFS_BUF_ASYNC(bp);
 	bp->b_flags |= XBF_SYNCIO;
 
@@ -1398,16 +1395,15 @@ xlog_sync(xlog_t		*log,
 	if (split) {
 		bp = iclog->ic_log->l_xbuf;
 		XFS_BUF_SET_ADDR(bp, 0);	     /* logical 0 */
-		XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+
-					    (__psint_t)count), split);
+		xfs_buf_associate_memory(bp,
+				(char *)&iclog->ic_header + count, split);
 		bp->b_fspriv = iclog;
 		XFS_BUF_ZEROFLAGS(bp);
-		XFS_BUF_BUSY(bp);
 		XFS_BUF_ASYNC(bp);
 		bp->b_flags |= XBF_SYNCIO;
 		if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
 			bp->b_flags |= XBF_FUA;
-		dptr = XFS_BUF_PTR(bp);
+		dptr = bp->b_addr;
 		/*
 		 * Bump the cycle numbers at the start of each block
 		 * since this part of the buffer is at the start of
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 052a2c0ec5fb..a199dbcee7d8 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -147,7 +147,7 @@ xlog_align(
 	xfs_daddr_t	offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
 
 	ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp));
-	return XFS_BUF_PTR(bp) + BBTOB(offset);
+	return bp->b_addr + BBTOB(offset);
 }
 
 
@@ -178,9 +178,7 @@ xlog_bread_noalign(
 
 	XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
 	XFS_BUF_READ(bp);
-	XFS_BUF_BUSY(bp);
 	XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
-	XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
 
 	xfsbdstrat(log->l_mp, bp);
 	error = xfs_buf_iowait(bp);
@@ -220,18 +218,18 @@ xlog_bread_offset(
 	xfs_buf_t	*bp,
 	xfs_caddr_t	offset)
 {
-	xfs_caddr_t	orig_offset = XFS_BUF_PTR(bp);
+	xfs_caddr_t	orig_offset = bp->b_addr;
 	int		orig_len = bp->b_buffer_length;
 	int		error, error2;
 
-	error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks));
+	error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks));
 	if (error)
 		return error;
 
 	error = xlog_bread_noalign(log, blk_no, nbblks, bp);
 
 	/* must reset buffer pointer even on error */
-	error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len);
+	error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len);
 	if (error)
 		return error;
 	return error2;
@@ -266,11 +264,9 @@ xlog_bwrite(
 
 	XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
 	XFS_BUF_ZEROFLAGS(bp);
-	XFS_BUF_BUSY(bp);
-	XFS_BUF_HOLD(bp);
+	xfs_buf_hold(bp);
 	xfs_buf_lock(bp);
 	XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
-	XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
 
 	if ((error = xfs_bwrite(log->l_mp, bp)))
 		xfs_ioerror_alert("xlog_bwrite", log->l_mp,
@@ -360,7 +356,7 @@ STATIC void
 xlog_recover_iodone(
 	struct xfs_buf	*bp)
 {
-	if (XFS_BUF_GETERROR(bp)) {
+	if (bp->b_error) {
 		/*
 		 * We're not going to bother about retrying
 		 * this during recovery. One strike!
@@ -1262,7 +1258,7 @@ xlog_write_log_records(
 		 */
 		ealign = round_down(end_block, sectbb);
 		if (j == 0 && (start_block + endcount > ealign)) {
-			offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block);
+			offset = bp->b_addr + BBTOB(ealign - start_block);
 			error = xlog_bread_offset(log, ealign, sectbb,
 							bp, offset);
 			if (error)
@@ -2135,15 +2131,16 @@ xlog_recover_buffer_pass2(
 
 	bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
 			  buf_flags);
-	if (XFS_BUF_ISERROR(bp)) {
+	if (!bp)
+		return XFS_ERROR(ENOMEM);
+	error = bp->b_error;
+	if (error) {
 		xfs_ioerror_alert("xlog_recover_do..(read#1)", mp,
 				  bp, buf_f->blf_blkno);
-		error = XFS_BUF_GETERROR(bp);
 		xfs_buf_relse(bp);
 		return error;
 	}
 
-	error = 0;
 	if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
 		error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
 	} else if (buf_f->blf_flags &
@@ -2227,14 +2224,17 @@ xlog_recover_inode_pass2(
 
 	bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
 			  XBF_LOCK);
-	if (XFS_BUF_ISERROR(bp)) {
+	if (!bp) {
+		error = ENOMEM;
+		goto error;
+	}
+	error = bp->b_error;
+	if (error) {
 		xfs_ioerror_alert("xlog_recover_do..(read#2)", mp,
 				  bp, in_f->ilf_blkno);
-		error = XFS_BUF_GETERROR(bp);
 		xfs_buf_relse(bp);
 		goto error;
 	}
-	error = 0;
 	ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
 	dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset);
 
@@ -3437,7 +3437,7 @@ xlog_do_recovery_pass(
 			/*
 			 * Check for header wrapping around physical end-of-log
 			 */
-			offset = XFS_BUF_PTR(hbp);
+			offset = hbp->b_addr;
 			split_hblks = 0;
 			wrapped_hblks = 0;
 			if (blk_no + hblks <= log->l_logBBsize) {
@@ -3497,7 +3497,7 @@ xlog_do_recovery_pass(
 			} else {
 				/* This log record is split across the
 				 * physical end of log */
-				offset = XFS_BUF_PTR(dbp);
+				offset = dbp->b_addr;
 				split_bblks = 0;
 				if (blk_no != log->l_logBBsize) {
 					/* some data is before the physical
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 092e16ae4d9d..0081657ad985 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1615,7 +1615,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
 		XFS_BUF_UNDELAYWRITE(sbp);
 		XFS_BUF_WRITE(sbp);
 		XFS_BUF_UNASYNC(sbp);
-		ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp);
+		ASSERT(sbp->b_target == mp->m_ddev_targp);
 		xfsbdstrat(mp, sbp);
 		error = xfs_buf_iowait(sbp);
 		if (error)
@@ -1938,7 +1938,7 @@ xfs_getsb(
 		xfs_buf_lock(bp);
 	}
 
-	XFS_BUF_HOLD(bp);
+	xfs_buf_hold(bp);
 	ASSERT(XFS_BUF_ISDONE(bp));
 	return bp;
 }
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 8f76fdff4f46..35561a511b57 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -168,7 +168,7 @@ error_cancel:
 				xfs_trans_cancel(tp, cancelflags);
 				goto error;
 			}
-			memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize);
+			memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
 			xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 			/*
 			 * Commit the transaction.
@@ -883,7 +883,7 @@ xfs_rtbuf_get(
 	if (error) {
 		return error;
 	}
-	ASSERT(bp && !XFS_BUF_GETERROR(bp));
+	ASSERT(!xfs_buf_geterror(bp));
 	*bpp = bp;
 	return 0;
 }
@@ -943,7 +943,7 @@ xfs_rtcheck_range(
 	if (error) {
 		return error;
 	}
-	bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+	bufp = bp->b_addr;
 	/*
 	 * Compute the starting word's address, and starting bit.
 	 */
@@ -994,7 +994,7 @@ xfs_rtcheck_range(
 			if (error) {
 				return error;
 			}
-			b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+			b = bufp = bp->b_addr;
 			word = 0;
 		} else {
 			/*
@@ -1040,7 +1040,7 @@ xfs_rtcheck_range(
 			if (error) {
 				return error;
 			}
-			b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+			b = bufp = bp->b_addr;
 			word = 0;
 		} else {
 			/*
@@ -1158,7 +1158,7 @@ xfs_rtfind_back(
 	if (error) {
 		return error;
 	}
-	bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+	bufp = bp->b_addr;
 	/*
 	 * Get the first word's index & point to it.
 	 */
@@ -1210,7 +1210,7 @@ xfs_rtfind_back(
 			if (error) {
 				return error;
 			}
-			bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+			bufp = bp->b_addr;
 			word = XFS_BLOCKWMASK(mp);
 			b = &bufp[word];
 		} else {
@@ -1256,7 +1256,7 @@ xfs_rtfind_back(
 			if (error) {
 				return error;
 			}
-			bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+			bufp = bp->b_addr;
 			word = XFS_BLOCKWMASK(mp);
 			b = &bufp[word];
 		} else {
@@ -1333,7 +1333,7 @@ xfs_rtfind_forw(
 	if (error) {
 		return error;
 	}
-	bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+	bufp = bp->b_addr;
 	/*
 	 * Get the first word's index & point to it.
 	 */
@@ -1384,7 +1384,7 @@ xfs_rtfind_forw(
 			if (error) {
 				return error;
 			}
-			b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+			b = bufp = bp->b_addr;
 			word = 0;
 		} else {
 			/*
@@ -1429,7 +1429,7 @@ xfs_rtfind_forw(
 			if (error) {
 				return error;
 			}
-			b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+			b = bufp = bp->b_addr;
 			word = 0;
 		} else {
 			/*
@@ -1649,7 +1649,7 @@ xfs_rtmodify_range(
 	if (error) {
 		return error;
 	}
-	bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+	bufp = bp->b_addr;
 	/*
 	 * Compute the starting word's address, and starting bit.
 	 */
@@ -1694,7 +1694,7 @@ xfs_rtmodify_range(
 			if (error) {
 				return error;
 			}
-			first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+			first = b = bufp = bp->b_addr;
 			word = 0;
 		} else {
 			/*
@@ -1734,7 +1734,7 @@ xfs_rtmodify_range(
 			if (error) {
 				return error;
 			}
-			first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+			first = b = bufp = bp->b_addr;
 			word = 0;
 		} else {
 			/*
@@ -1832,8 +1832,8 @@ xfs_rtmodify_summary(
 	 */
 	sp = XFS_SUMPTR(mp, bp, so);
 	*sp += delta;
-	xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)XFS_BUF_PTR(bp)),
-		(uint)((char *)sp - (char *)XFS_BUF_PTR(bp) + sizeof(*sp) - 1));
+	xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr),
+		(uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1));
 	return 0;
 }
 
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 09e1f4f35e97..f7f3a359c1c5 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -47,7 +47,7 @@ struct xfs_trans;
 #define	XFS_SUMOFFSTOBLOCK(mp,s)	\
 	(((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog)
 #define	XFS_SUMPTR(mp,bp,so)	\
-	((xfs_suminfo_t *)((char *)XFS_BUF_PTR(bp) + \
+	((xfs_suminfo_t *)((bp)->b_addr + \
 		(((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp))))
 
 #define	XFS_BITTOBLOCK(mp,bi)	((bi) >> (mp)->m_blkbit_log)
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index d6d6fdfe9422..c96a8a05ac03 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -104,9 +104,9 @@ xfs_ioerror_alert(
 	xfs_alert(mp,
 		 "I/O error occurred: meta-data dev %s block 0x%llx"
 		 "       (\"%s\") error %d buf count %zd",
-		XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
+		xfs_buf_target_name(bp->b_target),
 		(__uint64_t)blkno, func,
-		XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp));
+		bp->b_error, XFS_BUF_COUNT(bp));
 }
 
 /*
@@ -137,8 +137,8 @@ xfs_read_buf(
 	bp = xfs_buf_read(target, blkno, len, flags);
 	if (!bp)
 		return XFS_ERROR(EIO);
-	error = XFS_BUF_GETERROR(bp);
-	if (bp && !error && !XFS_FORCED_SHUTDOWN(mp)) {
+	error = bp->b_error;
+	if (!error && !XFS_FORCED_SHUTDOWN(mp)) {
 		*bpp = bp;
 	} else {
 		*bpp = NULL;
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 1eb2ba586814..cb6ae715814a 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -509,7 +509,7 @@ static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp)
 
 #define XFS_SB_DADDR		((xfs_daddr_t)0) /* daddr in filesystem/ag */
 #define	XFS_SB_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_SB_DADDR)
-#define XFS_BUF_TO_SBP(bp)	((xfs_dsb_t *)XFS_BUF_PTR(bp))
+#define XFS_BUF_TO_SBP(bp)	((xfs_dsb_t *)((bp)->b_addr))
 
 #define	XFS_HDR_BLOCK(mp,d)	((xfs_agblock_t)XFS_BB_TO_FSBT(mp,d))
 #define	XFS_DADDR_TO_FSB(mp,d)	XFS_AGB_TO_FSB(mp, \
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 43233e92f0f6..c15aa29fa169 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -299,7 +299,7 @@ xfs_trans_ail_cursor_last(
  * Splice the log item list into the AIL at the given LSN. We splice to the
  * tail of the given LSN to maintain insert order for push traversals. The
  * cursor is optional, allowing repeated updates to the same LSN to avoid
- * repeated traversals.
+ * repeated traversals.  This should not be called with an empty list.
  */
 static void
 xfs_ail_splice(
@@ -308,50 +308,39 @@ xfs_ail_splice(
 	struct list_head	*list,
 	xfs_lsn_t		lsn)
 {
-	struct xfs_log_item	*lip = cur ? cur->item : NULL;
-	struct xfs_log_item	*next_lip;
+	struct xfs_log_item	*lip;
+
+	ASSERT(!list_empty(list));
 
 	/*
-	 * Get a new cursor if we don't have a placeholder or the existing one
-	 * has been invalidated.
+	 * Use the cursor to determine the insertion point if one is
+	 * provided.  If not, or if the one we got is not valid,
+	 * find the place in the AIL where the items belong.
 	 */
-	if (!lip || (__psint_t)lip & 1) {
+	lip = cur ? cur->item : NULL;
+	if (!lip || (__psint_t) lip & 1)
 		lip = __xfs_trans_ail_cursor_last(ailp, lsn);
 
-		if (!lip) {
-			/* The list is empty, so just splice and return.  */
-			if (cur)
-				cur->item = NULL;
-			list_splice(list, &ailp->xa_ail);
-			return;
-		}
-	}
+	/*
+	 * If a cursor is provided, we know we're processing the AIL
+	 * in lsn order, and future items to be spliced in will
+	 * follow the last one being inserted now.  Update the
+	 * cursor to point to that last item, now while we have a
+	 * reliable pointer to it.
+	 */
+	if (cur)
+		cur->item = list_entry(list->prev, struct xfs_log_item, li_ail);
 
 	/*
-	 * Our cursor points to the item we want to insert _after_, so we have
-	 * to update the cursor to point to the end of the list we are splicing
-	 * in so that it points to the correct location for the next splice.
-	 * i.e. before the splice
-	 *
-	 *  lsn -> lsn -> lsn + x -> lsn + x ...
-	 *          ^
-	 *          | cursor points here
-	 *
-	 * After the splice we have:
-	 *
-	 *  lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ...
-	 *          ^                            ^
-	 *          | cursor points here         | needs to move here
-	 *
-	 * So we set the cursor to the last item in the list to be spliced
-	 * before we execute the splice, resulting in the cursor pointing to
-	 * the correct item after the splice occurs.
+	 * Finally perform the splice.  Unless the AIL was empty,
+	 * lip points to the item in the AIL _after_ which the new
+	 * items should go.  If lip is null the AIL was empty, so
+	 * the new items go at the head of the AIL.
 	 */
-	if (cur) {
-		next_lip = list_entry(list->prev, struct xfs_log_item, li_ail);
-		cur->item = next_lip;
-	}
-	list_splice(list, &lip->li_ail);
+	if (lip)
+		list_splice(list, &lip->li_ail);
+	else
+		list_splice(list, &ailp->xa_ail);
 }
 
 /*
@@ -682,6 +671,7 @@ xfs_trans_ail_update_bulk(
 	int			i;
 	LIST_HEAD(tmp);
 
+	ASSERT(nr_items > 0);		/* Not required, but true. */
 	mlip = xfs_ail_min(ailp);
 
 	for (i = 0; i < nr_items; i++) {
@@ -701,7 +691,8 @@ xfs_trans_ail_update_bulk(
 		list_add(&lip->li_ail, &tmp);
 	}
 
-	xfs_ail_splice(ailp, cur, &tmp, lsn);
+	if (!list_empty(&tmp))
+		xfs_ail_splice(ailp, cur, &tmp, lsn);
 
 	if (!mlip_changed) {
 		spin_unlock(&ailp->xa_lock);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 15584fc3ed7d..137e2b9e2948 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -54,7 +54,7 @@ xfs_trans_buf_item_match(
 	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
 		blip = (struct xfs_buf_log_item *)lidp->lid_item;
 		if (blip->bli_item.li_type == XFS_LI_BUF &&
-		    XFS_BUF_TARGET(blip->bli_buf) == target &&
+		    blip->bli_buf->b_target == target &&
 		    XFS_BUF_ADDR(blip->bli_buf) == blkno &&
 		    XFS_BUF_COUNT(blip->bli_buf) == len)
 			return blip->bli_buf;
@@ -80,7 +80,6 @@ _xfs_trans_bjoin(
 {
 	struct xfs_buf_log_item	*bip;
 
-	ASSERT(XFS_BUF_ISBUSY(bp));
 	ASSERT(bp->b_transp == NULL);
 
 	/*
@@ -194,7 +193,7 @@ xfs_trans_get_buf(xfs_trans_t	*tp,
 		return NULL;
 	}
 
-	ASSERT(!XFS_BUF_GETERROR(bp));
+	ASSERT(!bp->b_error);
 
 	_xfs_trans_bjoin(tp, bp, 1);
 	trace_xfs_trans_get_buf(bp->b_fspriv);
@@ -293,10 +292,10 @@ xfs_trans_read_buf(
 			return (flags & XBF_TRYLOCK) ?
 					EAGAIN : XFS_ERROR(ENOMEM);
 
-		if (XFS_BUF_GETERROR(bp) != 0) {
+		if (bp->b_error) {
+			error = bp->b_error;
 			xfs_ioerror_alert("xfs_trans_read_buf", mp,
 					  bp, blkno);
-			error = XFS_BUF_GETERROR(bp);
 			xfs_buf_relse(bp);
 			return error;
 		}
@@ -330,7 +329,7 @@ xfs_trans_read_buf(
 		ASSERT(xfs_buf_islocked(bp));
 		ASSERT(bp->b_transp == tp);
 		ASSERT(bp->b_fspriv != NULL);
-		ASSERT((XFS_BUF_ISERROR(bp)) == 0);
+		ASSERT(!bp->b_error);
 		if (!(XFS_BUF_ISDONE(bp))) {
 			trace_xfs_trans_read_buf_io(bp, _RET_IP_);
 			ASSERT(!XFS_BUF_ISASYNC(bp));
@@ -386,10 +385,9 @@ xfs_trans_read_buf(
 		return (flags & XBF_TRYLOCK) ?
 					0 : XFS_ERROR(ENOMEM);
 	}
-	if (XFS_BUF_GETERROR(bp) != 0) {
-	    XFS_BUF_SUPER_STALE(bp);
-		error = XFS_BUF_GETERROR(bp);
-
+	if (bp->b_error) {
+		error = bp->b_error;
+		XFS_BUF_SUPER_STALE(bp);
 		xfs_ioerror_alert("xfs_trans_read_buf", mp,
 				  bp, blkno);
 		if (tp->t_flags & XFS_TRANS_DIRTY)
@@ -430,7 +428,7 @@ shutdown_abort:
 	if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp))
 		xfs_notice(mp, "about to pop assert, bp == 0x%p", bp);
 #endif
-	ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) !=
+	ASSERT((bp->b_flags & (XBF_STALE|XBF_DELWRI)) !=
 				     (XBF_STALE|XBF_DELWRI));
 
 	trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
@@ -581,7 +579,6 @@ xfs_trans_bhold(xfs_trans_t	*tp,
 {
 	xfs_buf_log_item_t	*bip = bp->b_fspriv;
 
-	ASSERT(XFS_BUF_ISBUSY(bp));
 	ASSERT(bp->b_transp == tp);
 	ASSERT(bip != NULL);
 	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
@@ -602,7 +599,6 @@ xfs_trans_bhold_release(xfs_trans_t	*tp,
 {
 	xfs_buf_log_item_t	*bip = bp->b_fspriv;
 
-	ASSERT(XFS_BUF_ISBUSY(bp));
 	ASSERT(bp->b_transp == tp);
 	ASSERT(bip != NULL);
 	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
@@ -631,7 +627,6 @@ xfs_trans_log_buf(xfs_trans_t	*tp,
 {
 	xfs_buf_log_item_t	*bip = bp->b_fspriv;
 
-	ASSERT(XFS_BUF_ISBUSY(bp));
 	ASSERT(bp->b_transp == tp);
 	ASSERT(bip != NULL);
 	ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp)));
@@ -702,7 +697,6 @@ xfs_trans_binval(
 {
 	xfs_buf_log_item_t	*bip = bp->b_fspriv;
 
-	ASSERT(XFS_BUF_ISBUSY(bp));
 	ASSERT(bp->b_transp == tp);
 	ASSERT(bip != NULL);
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -774,7 +768,6 @@ xfs_trans_inode_buf(
 {
 	xfs_buf_log_item_t	*bip = bp->b_fspriv;
 
-	ASSERT(XFS_BUF_ISBUSY(bp));
 	ASSERT(bp->b_transp == tp);
 	ASSERT(bip != NULL);
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -798,7 +791,6 @@ xfs_trans_stale_inode_buf(
 {
 	xfs_buf_log_item_t	*bip = bp->b_fspriv;
 
-	ASSERT(XFS_BUF_ISBUSY(bp));
 	ASSERT(bp->b_transp == tp);
 	ASSERT(bip != NULL);
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -823,7 +815,6 @@ xfs_trans_inode_alloc_buf(
 {
 	xfs_buf_log_item_t	*bip = bp->b_fspriv;
 
-	ASSERT(XFS_BUF_ISBUSY(bp));
 	ASSERT(bp->b_transp == tp);
 	ASSERT(bip != NULL);
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -851,7 +842,6 @@ xfs_trans_dquot_buf(
 {
 	xfs_buf_log_item_t	*bip = bp->b_fspriv;
 
-	ASSERT(XFS_BUF_ISBUSY(bp));
 	ASSERT(bp->b_transp == tp);
 	ASSERT(bip != NULL);
 	ASSERT(type == XFS_BLF_UDQUOT_BUF ||
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 9322e13f0c63..51fc429527bc 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -83,7 +83,9 @@ xfs_readlink_bmap(
 
 		bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt),
 				  XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK);
-		error = XFS_BUF_GETERROR(bp);
+		if (!bp)
+			return XFS_ERROR(ENOMEM);
+		error = bp->b_error;
 		if (error) {
 			xfs_ioerror_alert("xfs_readlink",
 				  ip->i_mount, bp, XFS_BUF_ADDR(bp));
@@ -94,7 +96,7 @@ xfs_readlink_bmap(
 			byte_cnt = pathlen;
 		pathlen -= byte_cnt;
 
-		memcpy(link, XFS_BUF_PTR(bp), byte_cnt);
+		memcpy(link, bp->b_addr, byte_cnt);
 		xfs_buf_relse(bp);
 	}
 
@@ -1648,13 +1650,13 @@ xfs_symlink(
 			byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
 			bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
 					       BTOBB(byte_cnt), 0);
-			ASSERT(bp && !XFS_BUF_GETERROR(bp));
+			ASSERT(!xfs_buf_geterror(bp));
 			if (pathlen < byte_cnt) {
 				byte_cnt = pathlen;
 			}
 			pathlen -= byte_cnt;
 
-			memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt);
+			memcpy(bp->b_addr, cur_chunk, byte_cnt);
 			cur_chunk += byte_cnt;
 
 			xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1);
@@ -1999,7 +2001,7 @@ xfs_zero_remaining_bytes(
 					  mp, bp, XFS_BUF_ADDR(bp));
 			break;
 		}
-		memset(XFS_BUF_PTR(bp) +
+		memset(bp->b_addr +
 			(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
 		      0, lastoffset - offset + 1);
 		XFS_BUF_UNDONE(bp);