From 1e8f915868c59be4d6e49d9aff928454a5d5d569 Mon Sep 17 00:00:00 2001
From: Wang Shilong <wangsl-fnst@cn.fujitsu.com>
Date: Mon, 6 May 2013 11:03:27 +0000
Subject: Btrfs: introduce qgroup_ulist to avoid frequently allocating/freeing
 ulist

When doing qgroup accounting, we call ulist_alloc()/ulist_free() every time
when we want to walk qgroup tree.

By introducing 'qgroup_ulist', we only need to call ulist_alloc()/ulist_free()
once. This reduce some sys time to allocate memory, see the measurements below

fsstress -p 4 -n 10000 -d $dir

With this patch:

real    0m50.153s
user    0m0.081s
sys     0m6.294s

real    0m51.113s
user    0m0.092s
sys     0m6.220s

real    0m52.610s
user    0m0.096s
sys     0m6.125s	avg 6.213
-----------------------------------------------------
Without the patch:

real    0m54.825s
user    0m0.061s
sys     0m10.665s

real    1m6.401s
user    0m0.089s
sys     0m11.218s

real    1m13.768s
user    0m0.087s
sys     0m10.665s       avg 10.849

we can see the sys time reduce ~43%.

Signed-off-by: Wang Shilong <wangsl-fnst@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/ctree.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs/btrfs/ctree.h')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d6dd49b51ba8..a365400e38da 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1594,6 +1594,12 @@ struct btrfs_fs_info {
 	struct rb_root qgroup_tree;
 	spinlock_t qgroup_lock;
 
+	/*
+	 * used to avoid frequently calling ulist_alloc()/ulist_free()
+	 * when doing qgroup accounting, it must be protected by qgroup_lock.
+	 */
+	struct ulist *qgroup_ulist;
+
 	/* protect user change for quota operations */
 	struct mutex qgroup_ioctl_lock;
 
-- 
cgit v1.2.3


From 57254b6ebce4ceca02d9c8b615f6059c56c19238 Mon Sep 17 00:00:00 2001
From: Jan Schmidt <list.btrfs@jan-o-sch.net>
Date: Mon, 6 May 2013 19:14:17 +0000
Subject: Btrfs: add ioctl to wait for qgroup rescan completion

btrfs_qgroup_wait_for_completion waits until the currently running qgroup
operation completes. It returns immediately when no rescan process is in
progress. This is useful to automate things around the rescan process (e.g.
testing).

Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/ctree.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs/btrfs/ctree.h')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a365400e38da..e36e97b473a8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1613,6 +1613,7 @@ struct btrfs_fs_info {
 	struct mutex qgroup_rescan_lock; /* protects the progress item */
 	struct btrfs_key qgroup_rescan_progress;
 	struct btrfs_workers qgroup_rescan_workers;
+	struct completion qgroup_rescan_completion;
 
 	/* filesystem state */
 	unsigned long fs_state;
@@ -3820,6 +3821,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
 			struct btrfs_fs_info *fs_info);
 int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
+int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
-- 
cgit v1.2.3


From 1c89cdd1ce1b8b9ff7bca64bc9beea2c917e5693 Mon Sep 17 00:00:00 2001
From: Andreas Philipp <philipp.andreas@gmail.com>
Date: Sat, 11 May 2013 11:12:54 +0000
Subject: Minor format cleanup.

Clean up the format of the definitions of BTRFS_BLOCK_GROUP_RAID5 and
BTRFS_BLOCK_GROUP_RAID6.

Signed-off-by: Andreas Philipp <philipp.andreas@gmail.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/ctree.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs/btrfs/ctree.h')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e36e97b473a8..d354de31b81b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -961,8 +961,8 @@ struct btrfs_dev_replace_item {
 #define BTRFS_BLOCK_GROUP_RAID1		(1ULL << 4)
 #define BTRFS_BLOCK_GROUP_DUP		(1ULL << 5)
 #define BTRFS_BLOCK_GROUP_RAID10	(1ULL << 6)
-#define BTRFS_BLOCK_GROUP_RAID5    (1 << 7)
-#define BTRFS_BLOCK_GROUP_RAID6    (1 << 8)
+#define BTRFS_BLOCK_GROUP_RAID5         (1ULL << 7)
+#define BTRFS_BLOCK_GROUP_RAID6         (1ULL << 8)
 #define BTRFS_BLOCK_GROUP_RESERVED	BTRFS_AVAIL_ALLOC_BIT_SINGLE
 
 enum btrfs_raid_types {
-- 
cgit v1.2.3


From babbf170c781f24095336c82ebf18ad272ddb773 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Tue, 14 May 2013 10:20:43 +0000
Subject: Btrfs: make the snap/subv deletion end more early when the fs is R/O

The snapshot/subvolume deletion might spend lots of time, it would make
the remount task wait for a long time. This patch improve this problem,
we will break the deletion if the fs is remounted to be R/O. It will make
the users happy.

Cc: David Sterba <dsterba@suse.cz>
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/ctree.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'fs/btrfs/ctree.h')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d354de31b81b..d9ff585aadba 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3318,6 +3318,18 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
 	smp_mb();
 	return fs_info->closing;
 }
+
+/*
+ * If we remount the fs to be R/O or umount the fs, the cleaner needn't do
+ * anything except sleeping. This function is used to check the status of
+ * the fs.
+ */
+static inline int btrfs_need_cleaner_sleep(struct btrfs_root *root)
+{
+	return (root->fs_info->sb->s_flags & MS_RDONLY ||
+		btrfs_fs_closing(root->fs_info));
+}
+
 static inline void free_fs_info(struct btrfs_fs_info *fs_info)
 {
 	kfree(fs_info->balance_ctl);
-- 
cgit v1.2.3


From cb517eabba4f109810dba2e5f37b0dcf22103065 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Wed, 15 May 2013 07:48:19 +0000
Subject: Btrfs: cleanup the similar code of the fs root read

There are several functions whose code is similar, such as
  btrfs_find_last_root()
  btrfs_read_fs_root_no_radix()

Besides that, some functions are invoked twice, it is unnecessary,
for example, we are sure that all roots which is found in
  btrfs_find_orphan_roots()
have their orphan items, so it is unnecessary to check the orphan
item again.

So cleanup it.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/ctree.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs/btrfs/ctree.h')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d9ff585aadba..a84e59b7b006 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3376,9 +3376,9 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
 				   struct btrfs_root_item *item);
 void btrfs_read_root_item(struct extent_buffer *eb, int slot,
 			  struct btrfs_root_item *item);
-int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
-			 btrfs_root_item *item, struct btrfs_key *key);
-int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
+int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key,
+		    struct btrfs_path *path, struct btrfs_root_item *root_item,
+		    struct btrfs_key *root_key);
 int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
 void btrfs_set_root_node(struct btrfs_root_item *item,
 			 struct extent_buffer *node);
-- 
cgit v1.2.3


From b0feb9d96e71a88d7eec56f41b8f23e92af889b0 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Wed, 15 May 2013 07:48:20 +0000
Subject: Btrfs: introduce grab/put functions for the root of the fs/file tree

The grab/put funtions will be used in the next patch, which need grab
the root object and ensure it is not freed. We use reference counter
instead of the srcu lock is to aovid blocking the memory reclaim task,
which invokes synchronize_srcu().

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/ctree.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/btrfs/ctree.h')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a84e59b7b006..91a8ca7af77e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1746,6 +1746,7 @@ struct btrfs_root {
 	int force_cow;
 
 	spinlock_t root_item_lock;
+	atomic_t refs;
 };
 
 struct btrfs_ioctl_defrag_range_args {
-- 
cgit v1.2.3


From eb73c1b7cea7d533288ef5297a0ea0e159db85b0 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Wed, 15 May 2013 07:48:22 +0000
Subject: Btrfs: introduce per-subvolume delalloc inode list

When we create a snapshot, we need flush all delalloc inodes in the
fs, just flushing the inodes in the source tree is OK. So we introduce
per-subvolume delalloc inode list.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/ctree.h | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

(limited to 'fs/btrfs/ctree.h')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 91a8ca7af77e..43c073533940 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1449,13 +1449,9 @@ struct btrfs_fs_info {
 	 */
 	struct list_head ordered_extents;
 
-	spinlock_t delalloc_lock;
-	/*
-	 * all of the inodes that have delalloc bytes.  It is possible for
-	 * this list to be empty even when there is still dirty data=ordered
-	 * extents waiting to finish IO.
-	 */
-	struct list_head delalloc_inodes;
+	spinlock_t delalloc_root_lock;
+	/* all fs/file tree roots that have delalloc inodes. */
+	struct list_head delalloc_roots;
 
 	/*
 	 * there is a pool of worker threads for checksumming during writes
@@ -1747,6 +1743,16 @@ struct btrfs_root {
 
 	spinlock_t root_item_lock;
 	atomic_t refs;
+
+	spinlock_t delalloc_lock;
+	/*
+	 * all of the inodes that have delalloc bytes.  It is possible for
+	 * this list to be empty even when there is still dirty data=ordered
+	 * extents waiting to finish IO.
+	 */
+	struct list_head delalloc_inodes;
+	struct list_head delalloc_root;
+	u64 nr_delalloc_inodes;
 };
 
 struct btrfs_ioctl_defrag_range_args {
@@ -3550,6 +3556,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 			       u32 min_type);
 
 int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
+int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info,
+				    int delay_iput);
 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
 			      struct extent_state **cached_state);
 int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
-- 
cgit v1.2.3


From 199c2a9c3d1389db7f7a211e64f6809d352ce5f6 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Wed, 15 May 2013 07:48:23 +0000
Subject: Btrfs: introduce per-subvolume ordered extent list

The reason we introduce per-subvolume ordered extent list is the same
as the per-subvolume delalloc inode list.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/ctree.h | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

(limited to 'fs/btrfs/ctree.h')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 43c073533940..905f7c6c82f3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1437,17 +1437,18 @@ struct btrfs_fs_info {
 	atomic_t open_ioctl_trans;
 
 	/*
-	 * this is used by the balancing code to wait for all the pending
-	 * ordered extents
+	 * this is used to protect the following list -- ordered_roots.
 	 */
-	spinlock_t ordered_extent_lock;
+	spinlock_t ordered_root_lock;
 
 	/*
-	 * all of the data=ordered extents pending writeback
+	 * all fs/file tree roots in which there are data=ordered extents
+	 * pending writeback are added into this list.
+	 *
 	 * these can span multiple transactions and basically include
 	 * every dirty data page that isn't from nodatacow
 	 */
-	struct list_head ordered_extents;
+	struct list_head ordered_roots;
 
 	spinlock_t delalloc_root_lock;
 	/* all fs/file tree roots that have delalloc inodes. */
@@ -1753,6 +1754,20 @@ struct btrfs_root {
 	struct list_head delalloc_inodes;
 	struct list_head delalloc_root;
 	u64 nr_delalloc_inodes;
+	/*
+	 * this is used by the balancing code to wait for all the pending
+	 * ordered extents
+	 */
+	spinlock_t ordered_extent_lock;
+
+	/*
+	 * all of the data=ordered extents pending writeback
+	 * these can span multiple transactions and basically include
+	 * every dirty data page that isn't from nodatacow
+	 */
+	struct list_head ordered_extents;
+	struct list_head ordered_root;
+	u64 nr_ordered_extents;
 };
 
 struct btrfs_ioctl_defrag_range_args {
-- 
cgit v1.2.3


From 4a9d8bdee368de78ace8b36da4eb2186afea162d Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Fri, 17 May 2013 03:53:43 +0000
Subject: Btrfs: make the state of the transaction more readable

We used 3 variants to track the state of the transaction, it was complex
and wasted the memory space. Besides that, it was hard to understand that
which types of the transaction handles should be blocked in each transaction
state, so the developers often made mistakes.

This patch improved the above problem. In this patch, we define 6 states
for the transaction,
  enum btrfs_trans_state {
	TRANS_STATE_RUNNING		= 0,
	TRANS_STATE_BLOCKED		= 1,
	TRANS_STATE_COMMIT_START	= 2,
	TRANS_STATE_COMMIT_DOING	= 3,
	TRANS_STATE_UNBLOCKED		= 4,
	TRANS_STATE_COMPLETED		= 5,
	TRANS_STATE_MAX			= 6,
  }
and just use 1 variant to track those state.

In order to make the blocked handle types for each state more clear,
we introduce a array:
  unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
	[TRANS_STATE_RUNNING]		= 0U,
	[TRANS_STATE_BLOCKED]		= (__TRANS_USERSPACE |
					   __TRANS_START),
	[TRANS_STATE_COMMIT_START]	= (__TRANS_USERSPACE |
					   __TRANS_START |
					   __TRANS_ATTACH),
	[TRANS_STATE_COMMIT_DOING]	= (__TRANS_USERSPACE |
					   __TRANS_START |
					   __TRANS_ATTACH |
					   __TRANS_JOIN),
	[TRANS_STATE_UNBLOCKED]		= (__TRANS_USERSPACE |
					   __TRANS_START |
					   __TRANS_ATTACH |
					   __TRANS_JOIN |
					   __TRANS_JOIN_NOLOCK),
	[TRANS_STATE_COMPLETED]		= (__TRANS_USERSPACE |
					   __TRANS_START |
					   __TRANS_ATTACH |
					   __TRANS_JOIN |
					   __TRANS_JOIN_NOLOCK),
  }
it is very intuitionistic.

Besides that, because we remove ->in_commit in transaction structure, so
the lock ->commit_lock which was used to protect it is unnecessary, remove
->commit_lock.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/ctree.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs/btrfs/ctree.h')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 905f7c6c82f3..fd62aa856d1b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1496,7 +1496,6 @@ struct btrfs_fs_info {
 	int closing;
 	int log_root_recovering;
 	int enospc_unlink;
-	int trans_no_join;
 
 	u64 total_pinned;
 
-- 
cgit v1.2.3


From d52be818e618bd252601b340ca6df760d77410e8 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fusionio.com>
Date: Wed, 29 May 2013 14:54:47 -0400
Subject: Btrfs: simplify unlink reservations

Dave pointed out a problem where if you filled up a file system as much as
possible you couldn't remove any files.  The whole unlink reservation thing is
convoluted because it tries to guess if it's going to add space to unlink
something or not, and has all these odd uncommented cases where it simply does
not try.  So to fix this I've added a way to conditionally steal from the global
reserve if we can't make our normal reservation.  If we have more than half the
space in the global reserve free we will go ahead and steal from the global
reserve.  With this patch Dave's reproducer now works and I can rm all the files
on the file system.  Thanks,

Reported-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/ctree.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/ctree.h')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index fd62aa856d1b..a07b8c0a260d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1495,7 +1495,6 @@ struct btrfs_fs_info {
 	int do_barriers;
 	int closing;
 	int log_root_recovering;
-	int enospc_unlink;
 
 	u64 total_pinned;
 
@@ -3183,6 +3182,9 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
 int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
 			    struct btrfs_block_rsv *dst_rsv,
 			    u64 num_bytes);
+int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
+			     struct btrfs_block_rsv *dest, u64 num_bytes,
+			     int min_factor);
 void btrfs_block_rsv_release(struct btrfs_root *root,
 			     struct btrfs_block_rsv *block_rsv,
 			     u64 num_bytes);
-- 
cgit v1.2.3


From b382a324b60f4923e9fc8e11f023e4f493c51318 Mon Sep 17 00:00:00 2001
From: Jan Schmidt <list.btrfs@jan-o-sch.net>
Date: Tue, 28 May 2013 15:47:24 +0000
Subject: Btrfs: fix qgroup rescan resume on mount

When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.

First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.

Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
	(A) rescan ioctl
	(B) rescan resume by mount
	(C) rescan by quota enable

Each case needs its own combination of the four following steps:
	(1) set the progress [A, C: zero; B: state of umount]
	(2) commit the transaction [A]
	(3) set the counters [A, C: zero; B: state of umount]
	(4) start worker [A, B, C]

qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.

We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.

As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.

Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/ctree.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs/btrfs/ctree.h')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a07b8c0a260d..80ab1a6f4fe3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1609,6 +1609,7 @@ struct btrfs_fs_info {
 	struct btrfs_key qgroup_rescan_progress;
 	struct btrfs_workers qgroup_rescan_workers;
 	struct completion qgroup_rescan_completion;
+	struct btrfs_work qgroup_rescan_work;
 
 	/* filesystem state */
 	unsigned long fs_state;
@@ -3858,6 +3859,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
 			struct btrfs_fs_info *fs_info);
 int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
+void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
-- 
cgit v1.2.3


From 8c2a1a3028d560cfb95f1c583e872c65ed2f0b3d Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fusionio.com>
Date: Thu, 6 Jun 2013 13:19:32 -0400
Subject: Btrfs: exclude logged extents before replying when we are mixed

With non-mixed block groups we replay the logs before we're allowed to do any
writes, so we get away with not pinning/removing the data extents until right
when we replay them.  However with mixed block groups we allocate out of the
same pool, so we could easily allocate a metadata block that was logged in our
tree log.  To deal with this we just need to notice that we have mixed block
groups and do the normal excluding/removal dance during the pin stage of the log
replay and that way we don't allocate metadata blocks from areas we have logged
data extents.  With this patch we now pass xfstests generic/311 with mixed
block groups turned on.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/ctree.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs/btrfs/ctree.h')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 80ab1a6f4fe3..0049fe0f3f74 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3067,6 +3067,8 @@ int btrfs_pin_extent(struct btrfs_root *root,
 		     u64 bytenr, u64 num, int reserved);
 int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
 				    u64 bytenr, u64 num_bytes);
+int btrfs_exclude_logged_extents(struct btrfs_root *root,
+				 struct extent_buffer *eb);
 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root,
 			  u64 objectid, u64 offset, u64 bytenr);
-- 
cgit v1.2.3


From 1be41b78bc688fc634bf30965d2be692c99fd11d Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fusionio.com>
Date: Wed, 12 Jun 2013 13:56:06 -0400
Subject: Btrfs: fix transaction throttling for delayed refs

Dave has this fs_mark script that can make btrfs abort with sufficient amount of
ram.  This is because with more ram we can keep more dirty metadata in cache
which in a round about way makes for many more pending delayed refs.  What
happens is we end up not throttling the transaction enough so when we go to
commit the transaction when we've completely filled the file system we'll
abort() because we use all of the space in the global reserve and we still have
delayed refs to run.  To fix this we need to make the delayed ref flushing and
the transaction throttling dependant upon the number of delayed refs that we
have instead of how much reserved space is left in the global reserve.  With
this patch we not only stop aborting transactions but we also get a smoother run
speed with fs_mark and it makes us about 10% faster.  Thanks,

Reported-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/ctree.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs/btrfs/ctree.h')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0049fe0f3f74..76e4983b39ea 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3056,6 +3056,8 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root,
 		num_items;
 }
 
+int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
+				       struct btrfs_root *root);
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root, unsigned long count);
-- 
cgit v1.2.3


From b150a4f10d8786a204db1ae3dccada17f950cf54 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fusionio.com>
Date: Wed, 19 Jun 2013 15:00:04 -0400
Subject: Btrfs: use a percpu to keep track of possibly pinned bytes

There are all of these checks in the ENOSPC code to see if committing the
transaction would free up enough space to make the allocation.  This is because
early on we just committed the transaction and hoped and prayed, which resulted
in cases where it took _forever_ to get an ENOSPC when we really were out of
space.  So we check space_info->bytes_pinned, except this isn't completely true
because it doesn't account for space we may free but are stuck in delayed refs.
So tests like xfstests 226 would fail because we wouldn't commit the transaction
to free up the data space.  So instead add a percpu counter that will be a
little fuzzier, it will add bytes as soon as we try to free up the space, and
remove any space it doesn't actually free up when we get around to doing the
actual free.  We then 0 out this counter every transaction period so we have a
better idea of how much space we will actually free up by committing this
transaction.  With this patch we now pass xfstests 226.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/ctree.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'fs/btrfs/ctree.h')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 76e4983b39ea..b528a5509cb8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1101,6 +1101,18 @@ struct btrfs_space_info {
 	u64 disk_total;		/* total bytes on disk, takes mirrors into
 				   account */
 
+	/*
+	 * bytes_pinned is kept in line with what is actually pinned, as in
+	 * we've called update_block_group and dropped the bytes_used counter
+	 * and increased the bytes_pinned counter.  However this means that
+	 * bytes_pinned does not reflect the bytes that will be pinned once the
+	 * delayed refs are flushed, so this counter is inc'ed everytime we call
+	 * btrfs_free_extent so it is a realtime count of what will be freed
+	 * once the transaction is committed.  It will be zero'ed everytime the
+	 * transaction commits.
+	 */
+	struct percpu_counter total_bytes_pinned;
+
 	/*
 	 * we bump reservation progress every time we decrement
 	 * bytes_reserved.  This way people waiting for reservations
-- 
cgit v1.2.3


From 7ee9e4405f264e9eda808aa5ca4522746a1af9c1 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fusionio.com>
Date: Fri, 21 Jun 2013 16:37:03 -0400
Subject: Btrfs: check if we can nocow if we don't have data space

We always just try and reserve data space when we write, but if we are out of
space but have prealloc'ed extents we should still successfully write.  This
patch will try and see if we can write to prealloc'ed space and if we can go
ahead and allow the write to continue.  With this patch we now pass xfstests
generic/274.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/ctree.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs/btrfs/ctree.h')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b528a5509cb8..e795bf135e80 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3552,6 +3552,10 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
 struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
 					   size_t pg_offset, u64 start, u64 len,
 					   int create);
+noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
+			      struct inode *inode, u64 offset, u64 *len,
+			      u64 *orig_start, u64 *orig_block_len,
+			      u64 *ram_bytes);
 
 /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
 #if defined(ClearPageFsMisc) && !defined(ClearPageChecked)
-- 
cgit v1.2.3