From 8461a3de770477a9a7b8eeaebcc4804dbc26ca38 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 13 Mar 2015 15:12:08 -0400
Subject: Btrfs: fix merge delalloc logic

My patch to properly count outstanding extents wrt MAX_EXTENT_SIZE introduced a
regression when re-dirtying already dirty areas.  We have logic in split to make
sure we are taking the largest space into account but didn't have it for merge,
so it was sometimes making us think we were turning a tiny extent into a huge
extent, when in reality we already had a huge extent and needed to use the other
side in our logic.  This fixes the regression that was reported by a user on
list.  Thanks,

Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/inode.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 91a87f53be3c..97b601bec326 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1592,7 +1592,12 @@ static void btrfs_merge_extent_hook(struct inode *inode,
 		return;
 
 	old_size = other->end - other->start + 1;
-	new_size = old_size + (new->end - new->start + 1);
+	if (old_size < (new->end - new->start + 1))
+		old_size = (new->end - new->start + 1);
+	if (new->start > other->start)
+		new_size = new->end - other->start + 1;
+	else
+		new_size = other->end - new->start + 1;
 
 	/* we're not bigger than the max, unreserve the space and go */
 	if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
-- 
cgit v1.2.3


From ba117213554bc747561c5b7bf274d60ac93b8598 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 13 Mar 2015 15:01:24 -0400
Subject: Btrfs: account merges/splits properly

My fix

Btrfs: fix merge delalloc logic

only fixed half of the problems, it didn't fix the case where we have two large
extents on either side and then join them together with a new small extent.  We
need to instead keep track of how many extents we have accounted for with each
side of the new extent, and then see how many extents we need for the new large
extent.  If they match then we know we need to keep our reservation, otherwise
we need to drop our reservation.  This shows up with a case like this

[BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K]

Previously the logic would have said that the number extents required for the
new size (3) is larger than the number of extents required for the largest side
(2) therefore we need to keep our reservation.  But this isn't the case, since
both sides require a reservation of 2 which leads to 4 for the whole range
currently reserved, but we only need 3, so we need to drop one of the
reservations.  The same problem existed for splits, we'd think we only need 3
extents when creating the hole but in reality we need 4.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 fs/btrfs/inode.c | 57 +++++++++++++++++++++++++++++---------------------------
 1 file changed, 30 insertions(+), 27 deletions(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 97b601bec326..bb74a4181075 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1542,30 +1542,17 @@ static void btrfs_split_extent_hook(struct inode *inode,
 		u64 new_size;
 
 		/*
-		 * We need the largest size of the remaining extent to see if we
-		 * need to add a new outstanding extent.  Think of the following
-		 * case
-		 *
-		 * [MEAX_EXTENT_SIZEx2 - 4k][4k]
-		 *
-		 * The new_size would just be 4k and we'd think we had enough
-		 * outstanding extents for this if we only took one side of the
-		 * split, same goes for the other direction.  We need to see if
-		 * the larger size still is the same amount of extents as the
-		 * original size, because if it is we need to add a new
-		 * outstanding extent.  But if we split up and the larger size
-		 * is less than the original then we are good to go since we've
-		 * already accounted for the extra extent in our original
-		 * accounting.
+		 * See the explanation in btrfs_merge_extent_hook, the same
+		 * applies here, just in reverse.
 		 */
 		new_size = orig->end - split + 1;
-		if ((split - orig->start) > new_size)
-			new_size = split - orig->start;
-
-		num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
+		num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
 					BTRFS_MAX_EXTENT_SIZE);
-		if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
-			      BTRFS_MAX_EXTENT_SIZE) < num_extents)
+		new_size = split - orig->start;
+		num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
+					BTRFS_MAX_EXTENT_SIZE);
+		if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
+			      BTRFS_MAX_EXTENT_SIZE) >= num_extents)
 			return;
 	}
 
@@ -1591,9 +1578,6 @@ static void btrfs_merge_extent_hook(struct inode *inode,
 	if (!(other->state & EXTENT_DELALLOC))
 		return;
 
-	old_size = other->end - other->start + 1;
-	if (old_size < (new->end - new->start + 1))
-		old_size = (new->end - new->start + 1);
 	if (new->start > other->start)
 		new_size = new->end - other->start + 1;
 	else
@@ -1608,13 +1592,32 @@ static void btrfs_merge_extent_hook(struct inode *inode,
 	}
 
 	/*
-	 * If we grew by another max_extent, just return, we want to keep that
-	 * reserved amount.
+	 * We have to add up either side to figure out how many extents were
+	 * accounted for before we merged into one big extent.  If the number of
+	 * extents we accounted for is <= the amount we need for the new range
+	 * then we can return, otherwise drop.  Think of it like this
+	 *
+	 * [ 4k][MAX_SIZE]
+	 *
+	 * So we've grown the extent by a MAX_SIZE extent, this would mean we
+	 * need 2 outstanding extents, on one side we have 1 and the other side
+	 * we have 1 so they are == and we can return.  But in this case
+	 *
+	 * [MAX_SIZE+4k][MAX_SIZE+4k]
+	 *
+	 * Each range on their own accounts for 2 extents, but merged together
+	 * they are only 3 extents worth of accounting, so we need to drop in
+	 * this case.
 	 */
+	old_size = other->end - other->start + 1;
 	num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
 				BTRFS_MAX_EXTENT_SIZE);
+	old_size = new->end - new->start + 1;
+	num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
+				 BTRFS_MAX_EXTENT_SIZE);
+
 	if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
-		      BTRFS_MAX_EXTENT_SIZE) > num_extents)
+		      BTRFS_MAX_EXTENT_SIZE) >= num_extents)
 		return;
 
 	spin_lock(&BTRFS_I(inode)->lock);
-- 
cgit v1.2.3


From 6a3891c551268dd4ff0969b883c4c8b8d974db8f Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Mon, 16 Mar 2015 17:38:52 -0400
Subject: Btrfs: add sanity test for outstanding_extents accounting

I introduced a regression wrt outstanding_extents accounting.  These are tricky
areas that aren't easily covered by xfstests as we could change MAX_EXTENT_SIZE
at any time.  So add sanity tests to cover the various conditions that are
tricky in order to make sure we don't introduce regressions in the future.
Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 fs/btrfs/inode.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index bb74a4181075..3717b3d4c2ce 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
 
 static int btrfs_dirty_inode(struct inode *inode);
 
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+void btrfs_test_inode_set_ops(struct inode *inode)
+{
+	BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+}
+#endif
+
 static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
 				     struct inode *inode,  struct inode *dir,
 				     const struct qstr *qstr)
@@ -1694,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode,
 			spin_unlock(&BTRFS_I(inode)->lock);
 		}
 
+		/* For sanity tests */
+		if (btrfs_test_is_dummy_root(root))
+			return;
+
 		__percpu_counter_add(&root->fs_info->delalloc_bytes, len,
 				     root->fs_info->delalloc_batch);
 		spin_lock(&BTRFS_I(inode)->lock);
@@ -1749,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode,
 		    root != root->fs_info->tree_root)
 			btrfs_delalloc_release_metadata(inode, len);
 
+		/* For sanity tests. */
+		if (btrfs_test_is_dummy_root(root))
+			return;
+
 		if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
 		    && do_list && !(state->state & EXTENT_NORESERVE))
 			btrfs_free_reserved_data_space(inode, len);
-- 
cgit v1.2.3


From e1cbbfa5f5aaf40a1fe70856fac4dfcc33e0e651 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 17 Mar 2015 10:52:28 -0400
Subject: Btrfs: fix outstanding_extents accounting in DIO

We are keeping track of how many extents we need to reserve properly based on
the amount we want to write, but we were still incrementing outstanding_extents
if we wrote less than what we requested.  This isn't quite right since we will
be limited to our max extent size.  So instead lets do something horrible!  Keep
track of how many outstanding_extents we reserved, and decrement each time we
allocate an extent.  If we use our entire reserve make sure to jack up
outstanding_extents on the inode so the accounting works out properly.  Thanks,

Reported-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 fs/btrfs/inode.c | 37 +++++++++++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3717b3d4c2ce..aa1fb534f69f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7239,7 +7239,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 	u64 start = iblock << inode->i_blkbits;
 	u64 lockstart, lockend;
 	u64 len = bh_result->b_size;
-	u64 orig_len = len;
+	u64 *outstanding_extents = NULL;
 	int unlock_bits = EXTENT_LOCKED;
 	int ret = 0;
 
@@ -7251,6 +7251,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 	lockstart = start;
 	lockend = start + len - 1;
 
+	if (current->journal_info) {
+		/*
+		 * Need to pull our outstanding extents and set journal_info to NULL so
+		 * that anything that needs to check if there's a transction doesn't get
+		 * confused.
+		 */
+		outstanding_extents = current->journal_info;
+		current->journal_info = NULL;
+	}
+
 	/*
 	 * If this errors out it's because we couldn't invalidate pagecache for
 	 * this range and we need to fallback to buffered.
@@ -7374,11 +7384,20 @@ unlock:
 		if (start + len > i_size_read(inode))
 			i_size_write(inode, start + len);
 
-		if (len < orig_len) {
+		/*
+		 * If we have an outstanding_extents count still set then we're
+		 * within our reservation, otherwise we need to adjust our inode
+		 * counter appropriately.
+		 */
+		if (*outstanding_extents) {
+			(*outstanding_extents)--;
+		} else {
 			spin_lock(&BTRFS_I(inode)->lock);
 			BTRFS_I(inode)->outstanding_extents++;
 			spin_unlock(&BTRFS_I(inode)->lock);
 		}
+
+		current->journal_info = outstanding_extents;
 		btrfs_free_reserved_data_space(inode, len);
 	}
 
@@ -7402,6 +7421,8 @@ unlock:
 unlock_err:
 	clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
 			 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
+	if (outstanding_extents)
+		current->journal_info = outstanding_extents;
 	return ret;
 }
 
@@ -8101,6 +8122,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
+	u64 outstanding_extents = 0;
 	size_t count = 0;
 	int flags = 0;
 	bool wakeup = true;
@@ -8138,6 +8160,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 		ret = btrfs_delalloc_reserve_space(inode, count);
 		if (ret)
 			goto out;
+		outstanding_extents = div64_u64(count +
+						BTRFS_MAX_EXTENT_SIZE - 1,
+						BTRFS_MAX_EXTENT_SIZE);
+
+		/*
+		 * We need to know how many extents we reserved so that we can
+		 * do the accounting properly if we go over the number we
+		 * originally calculated.  Abuse current->journal_info for this.
+		 */
+		current->journal_info = &outstanding_extents;
 	} else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
 				     &BTRFS_I(inode)->runtime_flags)) {
 		inode_dio_done(inode);
@@ -8150,6 +8182,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 			iter, offset, btrfs_get_blocks_direct, NULL,
 			btrfs_submit_direct, flags);
 	if (rw & WRITE) {
+		current->journal_info = NULL;
 		if (ret < 0 && ret != -EIOCBQUEUED)
 			btrfs_delalloc_release_space(inode, count);
 		else if (ret >= 0 && (size_t)ret < count)
-- 
cgit v1.2.3