Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason: "This includes a fairly large change from Josef around data writeback completion. Before, the writeback wasn't completed until the metadata insertions for the extent were done, and this made for fairly large latency spikes on the last page of each ordered extent. We already had a separate mechanism for tracking pending metadata insertions, so Josef just needed to tweak things a little to end writeback earlier on the page. Overall it makes us much friendly to memory reclaim and lowers latencies quite a lot for synchronous IO. Jan Schmidt has finished some background work required to track btree blocks as they go through changes in ownership. It's the missing piece he needed for both btrfs send/receive and subvolume quotas. Neither of those are ready yet, but the new tracking code is included here. Most of the time, the new code is off. It is only used by scrub and other backref walkers. Stefan Behrens has added io failure tracking. This includes counters for which drives are causing the most trouble so the admin (or an automated tool) can choose to kick them out. We're tracking IO errors, crc errors, and generation checks we do on each metadata block. RAID5/6 did miss the cut this time because I'm having trouble with corruptions. I'll nail it down next week and post as a beta testing before 3.6" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (58 commits) Btrfs: fix tree mod log rewinded level and rewinding of moved keys Btrfs: fix tree mod log del_ptr Btrfs: add tree_mod_dont_log helper Btrfs: add missing spin_lock for insertion into tree mod log Btrfs: add inodes before dropping the extent lock in find_all_leafs Btrfs: use delayed ref sequence numbers for all fs-tree updates Btrfs: fix false positive in check-integrity on unmount Btrfs: fix runtime warning in check-integrity check data mode Btrfs: set ioprio of scrub readahead to idle Btrfs: fix return code in drop_objectid_items Btrfs: check to see if the inode is in the log before fsyncing Btrfs: return value of btrfs_read_buffer is checked correctly Btrfs: read device stats on mount, write modified ones during commit Btrfs: add ioctl to get and reset the device stats Btrfs: add device counters for detected IO and checksum errors btrfs: Drop unused function btrfs_abort_devices() Btrfs: fix the same inode id problem when doing auto defragment Btrfs: fall back to non-inline if we don't have enough space Btrfs: fix how we deal with the orphan block rsv Btrfs: convert the inode bit field to use the actual bit operations ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-06-01 08:37:31 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-06-01 08:37:31 -0700
commit: 51eab603f5c86dd1eae4c525df3e7f7eeab401d6 (patch)
tree: e7a8c6214b072db126cca62d39008b3620134798 /fs/btrfs/file.c
parent: 419f4319495043a9507ac3e616be9ca60af09744 (diff)
parent: 1e20932a23578bb1ec59107843574e259b96193f (diff)
1 files changed, 53 insertions, 23 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 53bf2d764bbc..876cddd6b2f0 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -65,6 +65,21 @@ struct inode_defrag {
 	int cycled;
 };
 
+static int __compare_inode_defrag(struct inode_defrag *defrag1,
+				  struct inode_defrag *defrag2)
+{
+	if (defrag1->root > defrag2->root)
+		return 1;
+	else if (defrag1->root < defrag2->root)
+		return -1;
+	else if (defrag1->ino > defrag2->ino)
+		return 1;
+	else if (defrag1->ino < defrag2->ino)
+		return -1;
+	else
+		return 0;
+}
+
 /* pop a record for an inode into the defrag tree.  The lock
  * must be held already
  *
@@ -81,15 +96,17 @@ static void __btrfs_add_inode_defrag(struct inode *inode,
 	struct inode_defrag *entry;
 	struct rb_node **p;
 	struct rb_node *parent = NULL;
+	int ret;
 
 	p = &root->fs_info->defrag_inodes.rb_node;
 	while (*p) {
 		parent = *p;
 		entry = rb_entry(parent, struct inode_defrag, rb_node);
 
-		if (defrag->ino < entry->ino)
+		ret = __compare_inode_defrag(defrag, entry);
+		if (ret < 0)
 			p = &parent->rb_left;
-		else if (defrag->ino > entry->ino)
+		else if (ret > 0)
 			p = &parent->rb_right;
 		else {
 			/* if we're reinserting an entry for
@@ -103,7 +120,7 @@ static void __btrfs_add_inode_defrag(struct inode *inode,
 			goto exists;
 		}
 	}
-	BTRFS_I(inode)->in_defrag = 1;
+	set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
 	rb_link_node(&defrag->rb_node, parent, p);
 	rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
 	return;
@@ -131,7 +148,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
 	if (btrfs_fs_closing(root->fs_info))
 		return 0;
 
-	if (BTRFS_I(inode)->in_defrag)
+	if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
 		return 0;
 
 	if (trans)
@@ -148,7 +165,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
 	defrag->root = root->root_key.objectid;
 
 	spin_lock(&root->fs_info->defrag_inodes_lock);
-	if (!BTRFS_I(inode)->in_defrag)
+	if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
 		__btrfs_add_inode_defrag(inode, defrag);
 	else
 		kfree(defrag);
@@ -159,28 +176,35 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
 /*
  * must be called with the defrag_inodes lock held
  */
-struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino,
+struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info,
+					     u64 root, u64 ino,
 					     struct rb_node **next)
 {
 	struct inode_defrag *entry = NULL;
+	struct inode_defrag tmp;
 	struct rb_node *p;
 	struct rb_node *parent = NULL;
+	int ret;
+
+	tmp.ino = ino;
+	tmp.root = root;
 
 	p = info->defrag_inodes.rb_node;
 	while (p) {
 		parent = p;
 		entry = rb_entry(parent, struct inode_defrag, rb_node);
 
-		if (ino < entry->ino)
+		ret = __compare_inode_defrag(&tmp, entry);
+		if (ret < 0)
 			p = parent->rb_left;
-		else if (ino > entry->ino)
+		else if (ret > 0)
 			p = parent->rb_right;
 		else
 			return entry;
 	}
 
 	if (next) {
-		while (parent && ino > entry->ino) {
+		while (parent && __compare_inode_defrag(&tmp, entry) > 0) {
 			parent = rb_next(parent);
 			entry = rb_entry(parent, struct inode_defrag, rb_node);
 		}
@@ -202,6 +226,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
 	struct btrfs_key key;
 	struct btrfs_ioctl_defrag_range_args range;
 	u64 first_ino = 0;
+	u64 root_objectid = 0;
 	int num_defrag;
 	int defrag_batch = 1024;
 
@@ -214,11 +239,14 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
 		n = NULL;
 
 		/* find an inode to defrag */
-		defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n);
+		defrag = btrfs_find_defrag_inode(fs_info, root_objectid,
+						 first_ino, &n);
 		if (!defrag) {
-			if (n)
-				defrag = rb_entry(n, struct inode_defrag, rb_node);
-			else if (first_ino) {
+			if (n) {
+				defrag = rb_entry(n, struct inode_defrag,
+						  rb_node);
+			} else if (root_objectid || first_ino) {
+				root_objectid = 0;
 				first_ino = 0;
 				continue;
 			} else {
@@ -228,6 +256,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
 
 		/* remove it from the rbtree */
 		first_ino = defrag->ino + 1;
+		root_objectid = defrag->root;
 		rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
 
 		if (btrfs_fs_closing(fs_info))
@@ -252,7 +281,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
 			goto next;
 
 		/* do a chunk of defrag */
-		BTRFS_I(inode)->in_defrag = 0;
+		clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
 		range.start = defrag->last_offset;
 		num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
 					       defrag_batch);
@@ -1409,7 +1438,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 		mutex_unlock(&inode->i_mutex);
 		goto out;
 	}
-	BTRFS_I(inode)->sequence++;
 
 	start_pos = round_down(pos, root->sectorsize);
 	if (start_pos > i_size_read(inode)) {
@@ -1466,8 +1494,8 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
 	 * flush down new bytes that may have been written if the
 	 * application were using truncate to replace a file in place.
 	 */
-	if (BTRFS_I(inode)->ordered_data_close) {
-		BTRFS_I(inode)->ordered_data_close = 0;
+	if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
+			       &BTRFS_I(inode)->runtime_flags)) {
 		btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
 		if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
 			filemap_flush(inode->i_mapping);
@@ -1498,14 +1526,15 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 
 	trace_btrfs_sync_file(file, datasync);
 
-	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
-	if (ret)
-		return ret;
 	mutex_lock(&inode->i_mutex);
 
-	/* we wait first, since the writeback may change the inode */
+	/*
+	 * we wait first, since the writeback may change the inode, also wait
+	 * ordered range does a filemape_write_and_wait_range which is why we
+	 * don't do it above like other file systems.
+	 */
 	root->log_batch++;
-	btrfs_wait_ordered_range(inode, 0, (u64)-1);
+	btrfs_wait_ordered_range(inode, start, end);
 	root->log_batch++;
 
 	/*
@@ -1523,7 +1552,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	 * syncing
 	 */
 	smp_mb();
-	if (BTRFS_I(inode)->last_trans <=
+	if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
+	    BTRFS_I(inode)->last_trans <=
 	    root->fs_info->last_trans_committed) {
 		BTRFS_I(inode)->last_trans = 0;
 		mutex_unlock(&inode->i_mutex);
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-06-01 08:37:31 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-06-01 08:37:31 -0700
commit	51eab603f5c86dd1eae4c525df3e7f7eeab401d6 (patch)
tree	e7a8c6214b072db126cca62d39008b3620134798 /fs/btrfs/file.c
parent	419f4319495043a9507ac3e616be9ca60af09744 (diff)
parent	1e20932a23578bb1ec59107843574e259b96193f (diff)