From 1e8f915868c59be4d6e49d9aff928454a5d5d569 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Mon, 6 May 2013 11:03:27 +0000 Subject: Btrfs: introduce qgroup_ulist to avoid frequently allocating/freeing ulist When doing qgroup accounting, we call ulist_alloc()/ulist_free() every time when we want to walk qgroup tree. By introducing 'qgroup_ulist', we only need to call ulist_alloc()/ulist_free() once. This reduce some sys time to allocate memory, see the measurements below fsstress -p 4 -n 10000 -d $dir With this patch: real 0m50.153s user 0m0.081s sys 0m6.294s real 0m51.113s user 0m0.092s sys 0m6.220s real 0m52.610s user 0m0.096s sys 0m6.125s avg 6.213 ----------------------------------------------------- Without the patch: real 0m54.825s user 0m0.061s sys 0m10.665s real 1m6.401s user 0m0.089s sys 0m11.218s real 1m13.768s user 0m0.087s sys 0m10.665s avg 10.849 we can see the sys time reduce ~43%. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d6dd49b51ba8..a365400e38da 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1594,6 +1594,12 @@ struct btrfs_fs_info { struct rb_root qgroup_tree; spinlock_t qgroup_lock; + /* + * used to avoid frequently calling ulist_alloc()/ulist_free() + * when doing qgroup accounting, it must be protected by qgroup_lock. + */ + struct ulist *qgroup_ulist; + /* protect user change for quota operations */ struct mutex qgroup_ioctl_lock; -- cgit v1.2.3 From 57254b6ebce4ceca02d9c8b615f6059c56c19238 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Mon, 6 May 2013 19:14:17 +0000 Subject: Btrfs: add ioctl to wait for qgroup rescan completion btrfs_qgroup_wait_for_completion waits until the currently running qgroup operation completes. It returns immediately when no rescan process is in progress. This is useful to automate things around the rescan process (e.g. testing). Signed-off-by: Jan Schmidt Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a365400e38da..e36e97b473a8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1613,6 +1613,7 @@ struct btrfs_fs_info { struct mutex qgroup_rescan_lock; /* protects the progress item */ struct btrfs_key qgroup_rescan_progress; struct btrfs_workers qgroup_rescan_workers; + struct completion qgroup_rescan_completion; /* filesystem state */ unsigned long fs_state; @@ -3820,6 +3821,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, int btrfs_quota_disable(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); +int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 src, u64 dst); int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, -- cgit v1.2.3 From 1c89cdd1ce1b8b9ff7bca64bc9beea2c917e5693 Mon Sep 17 00:00:00 2001 From: Andreas Philipp Date: Sat, 11 May 2013 11:12:54 +0000 Subject: Minor format cleanup. Clean up the format of the definitions of BTRFS_BLOCK_GROUP_RAID5 and BTRFS_BLOCK_GROUP_RAID6. Signed-off-by: Andreas Philipp Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e36e97b473a8..d354de31b81b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -961,8 +961,8 @@ struct btrfs_dev_replace_item { #define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4) #define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) -#define BTRFS_BLOCK_GROUP_RAID5 (1 << 7) -#define BTRFS_BLOCK_GROUP_RAID6 (1 << 8) +#define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7) +#define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8) #define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE enum btrfs_raid_types { -- cgit v1.2.3 From babbf170c781f24095336c82ebf18ad272ddb773 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 14 May 2013 10:20:43 +0000 Subject: Btrfs: make the snap/subv deletion end more early when the fs is R/O The snapshot/subvolume deletion might spend lots of time, it would make the remount task wait for a long time. This patch improve this problem, we will break the deletion if the fs is remounted to be R/O. It will make the users happy. Cc: David Sterba Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d354de31b81b..d9ff585aadba 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3318,6 +3318,18 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) smp_mb(); return fs_info->closing; } + +/* + * If we remount the fs to be R/O or umount the fs, the cleaner needn't do + * anything except sleeping. This function is used to check the status of + * the fs. + */ +static inline int btrfs_need_cleaner_sleep(struct btrfs_root *root) +{ + return (root->fs_info->sb->s_flags & MS_RDONLY || + btrfs_fs_closing(root->fs_info)); +} + static inline void free_fs_info(struct btrfs_fs_info *fs_info) { kfree(fs_info->balance_ctl); -- cgit v1.2.3 From cb517eabba4f109810dba2e5f37b0dcf22103065 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:19 +0000 Subject: Btrfs: cleanup the similar code of the fs root read There are several functions whose code is similar, such as btrfs_find_last_root() btrfs_read_fs_root_no_radix() Besides that, some functions are invoked twice, it is unnecessary, for example, we are sure that all roots which is found in btrfs_find_orphan_roots() have their orphan items, so it is unnecessary to check the orphan item again. So cleanup it. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d9ff585aadba..a84e59b7b006 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3376,9 +3376,9 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root_item *item); void btrfs_read_root_item(struct extent_buffer *eb, int slot, struct btrfs_root_item *item); -int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct - btrfs_root_item *item, struct btrfs_key *key); -int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); +int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key, + struct btrfs_path *path, struct btrfs_root_item *root_item, + struct btrfs_key *root_key); int btrfs_find_orphan_roots(struct btrfs_root *tree_root); void btrfs_set_root_node(struct btrfs_root_item *item, struct extent_buffer *node); -- cgit v1.2.3 From b0feb9d96e71a88d7eec56f41b8f23e92af889b0 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:20 +0000 Subject: Btrfs: introduce grab/put functions for the root of the fs/file tree The grab/put funtions will be used in the next patch, which need grab the root object and ensure it is not freed. We use reference counter instead of the srcu lock is to aovid blocking the memory reclaim task, which invokes synchronize_srcu(). Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a84e59b7b006..91a8ca7af77e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1746,6 +1746,7 @@ struct btrfs_root { int force_cow; spinlock_t root_item_lock; + atomic_t refs; }; struct btrfs_ioctl_defrag_range_args { -- cgit v1.2.3 From eb73c1b7cea7d533288ef5297a0ea0e159db85b0 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:22 +0000 Subject: Btrfs: introduce per-subvolume delalloc inode list When we create a snapshot, we need flush all delalloc inodes in the fs, just flushing the inodes in the source tree is OK. So we introduce per-subvolume delalloc inode list. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 91a8ca7af77e..43c073533940 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1449,13 +1449,9 @@ struct btrfs_fs_info { */ struct list_head ordered_extents; - spinlock_t delalloc_lock; - /* - * all of the inodes that have delalloc bytes. It is possible for - * this list to be empty even when there is still dirty data=ordered - * extents waiting to finish IO. - */ - struct list_head delalloc_inodes; + spinlock_t delalloc_root_lock; + /* all fs/file tree roots that have delalloc inodes. */ + struct list_head delalloc_roots; /* * there is a pool of worker threads for checksumming during writes @@ -1747,6 +1743,16 @@ struct btrfs_root { spinlock_t root_item_lock; atomic_t refs; + + spinlock_t delalloc_lock; + /* + * all of the inodes that have delalloc bytes. It is possible for + * this list to be empty even when there is still dirty data=ordered + * extents waiting to finish IO. + */ + struct list_head delalloc_inodes; + struct list_head delalloc_root; + u64 nr_delalloc_inodes; }; struct btrfs_ioctl_defrag_range_args { @@ -3550,6 +3556,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, u32 min_type); int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); +int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info, + int delay_iput); int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, struct extent_state **cached_state); int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, -- cgit v1.2.3 From 199c2a9c3d1389db7f7a211e64f6809d352ce5f6 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:23 +0000 Subject: Btrfs: introduce per-subvolume ordered extent list The reason we introduce per-subvolume ordered extent list is the same as the per-subvolume delalloc inode list. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 43c073533940..905f7c6c82f3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1437,17 +1437,18 @@ struct btrfs_fs_info { atomic_t open_ioctl_trans; /* - * this is used by the balancing code to wait for all the pending - * ordered extents + * this is used to protect the following list -- ordered_roots. */ - spinlock_t ordered_extent_lock; + spinlock_t ordered_root_lock; /* - * all of the data=ordered extents pending writeback + * all fs/file tree roots in which there are data=ordered extents + * pending writeback are added into this list. + * * these can span multiple transactions and basically include * every dirty data page that isn't from nodatacow */ - struct list_head ordered_extents; + struct list_head ordered_roots; spinlock_t delalloc_root_lock; /* all fs/file tree roots that have delalloc inodes. */ @@ -1753,6 +1754,20 @@ struct btrfs_root { struct list_head delalloc_inodes; struct list_head delalloc_root; u64 nr_delalloc_inodes; + /* + * this is used by the balancing code to wait for all the pending + * ordered extents + */ + spinlock_t ordered_extent_lock; + + /* + * all of the data=ordered extents pending writeback + * these can span multiple transactions and basically include + * every dirty data page that isn't from nodatacow + */ + struct list_head ordered_extents; + struct list_head ordered_root; + u64 nr_ordered_extents; }; struct btrfs_ioctl_defrag_range_args { -- cgit v1.2.3 From 4a9d8bdee368de78ace8b36da4eb2186afea162d Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 17 May 2013 03:53:43 +0000 Subject: Btrfs: make the state of the transaction more readable We used 3 variants to track the state of the transaction, it was complex and wasted the memory space. Besides that, it was hard to understand that which types of the transaction handles should be blocked in each transaction state, so the developers often made mistakes. This patch improved the above problem. In this patch, we define 6 states for the transaction, enum btrfs_trans_state { TRANS_STATE_RUNNING = 0, TRANS_STATE_BLOCKED = 1, TRANS_STATE_COMMIT_START = 2, TRANS_STATE_COMMIT_DOING = 3, TRANS_STATE_UNBLOCKED = 4, TRANS_STATE_COMPLETED = 5, TRANS_STATE_MAX = 6, } and just use 1 variant to track those state. In order to make the blocked handle types for each state more clear, we introduce a array: unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { [TRANS_STATE_RUNNING] = 0U, [TRANS_STATE_BLOCKED] = (__TRANS_USERSPACE | __TRANS_START), [TRANS_STATE_COMMIT_START] = (__TRANS_USERSPACE | __TRANS_START | __TRANS_ATTACH), [TRANS_STATE_COMMIT_DOING] = (__TRANS_USERSPACE | __TRANS_START | __TRANS_ATTACH | __TRANS_JOIN), [TRANS_STATE_UNBLOCKED] = (__TRANS_USERSPACE | __TRANS_START | __TRANS_ATTACH | __TRANS_JOIN | __TRANS_JOIN_NOLOCK), [TRANS_STATE_COMPLETED] = (__TRANS_USERSPACE | __TRANS_START | __TRANS_ATTACH | __TRANS_JOIN | __TRANS_JOIN_NOLOCK), } it is very intuitionistic. Besides that, because we remove ->in_commit in transaction structure, so the lock ->commit_lock which was used to protect it is unnecessary, remove ->commit_lock. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 905f7c6c82f3..fd62aa856d1b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1496,7 +1496,6 @@ struct btrfs_fs_info { int closing; int log_root_recovering; int enospc_unlink; - int trans_no_join; u64 total_pinned; -- cgit v1.2.3 From d52be818e618bd252601b340ca6df760d77410e8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 29 May 2013 14:54:47 -0400 Subject: Btrfs: simplify unlink reservations Dave pointed out a problem where if you filled up a file system as much as possible you couldn't remove any files. The whole unlink reservation thing is convoluted because it tries to guess if it's going to add space to unlink something or not, and has all these odd uncommented cases where it simply does not try. So to fix this I've added a way to conditionally steal from the global reserve if we can't make our normal reservation. If we have more than half the space in the global reserve free we will go ahead and steal from the global reserve. With this patch Dave's reproducer now works and I can rm all the files on the file system. Thanks, Reported-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fd62aa856d1b..a07b8c0a260d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1495,7 +1495,6 @@ struct btrfs_fs_info { int do_barriers; int closing; int log_root_recovering; - int enospc_unlink; u64 total_pinned; @@ -3183,6 +3182,9 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, struct btrfs_block_rsv *dst_rsv, u64 num_bytes); +int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_block_rsv *dest, u64 num_bytes, + int min_factor); void btrfs_block_rsv_release(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, u64 num_bytes); -- cgit v1.2.3 From b382a324b60f4923e9fc8e11f023e4f493c51318 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Tue, 28 May 2013 15:47:24 +0000 Subject: Btrfs: fix qgroup rescan resume on mount When called during mount, we cannot start the rescan worker thread until open_ctree is done. This commit restuctures the qgroup rescan internals to enable a clean deferral of the rescan resume operation. First of all, the struct qgroup_rescan is removed, saving us a malloc and some initialization synchronizations problems. Its only element (the worker struct) now lives within fs_info just as the rest of the rescan code. Then setting up a rescan worker is split into several reusable stages. Currently we have three different rescan startup scenarios: (A) rescan ioctl (B) rescan resume by mount (C) rescan by quota enable Each case needs its own combination of the four following steps: (1) set the progress [A, C: zero; B: state of umount] (2) commit the transaction [A] (3) set the counters [A, C: zero; B: state of umount] (4) start worker [A, B, C] qgroup_rescan_init does step (1). There's no extra function added to commit a transaction, we've got that already. qgroup_rescan_zero_tracking does step (3). Step (4) is nothing more than a call to the generic btrfs_queue_worker. We also get rid of a double check for the rescan progress during btrfs_qgroup_account_ref, which is no longer required due to having step 2 from the list above. As a side effect, this commit prepares to move the rescan start code from btrfs_run_qgroups (which is run during commit) to a less time critical section. Signed-off-by: Jan Schmidt Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a07b8c0a260d..80ab1a6f4fe3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1609,6 +1609,7 @@ struct btrfs_fs_info { struct btrfs_key qgroup_rescan_progress; struct btrfs_workers qgroup_rescan_workers; struct completion qgroup_rescan_completion; + struct btrfs_work qgroup_rescan_work; /* filesystem state */ unsigned long fs_state; @@ -3858,6 +3859,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, int btrfs_quota_disable(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); +void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 src, u64 dst); -- cgit v1.2.3 From 8c2a1a3028d560cfb95f1c583e872c65ed2f0b3d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 6 Jun 2013 13:19:32 -0400 Subject: Btrfs: exclude logged extents before replying when we are mixed With non-mixed block groups we replay the logs before we're allowed to do any writes, so we get away with not pinning/removing the data extents until right when we replay them. However with mixed block groups we allocate out of the same pool, so we could easily allocate a metadata block that was logged in our tree log. To deal with this we just need to notice that we have mixed block groups and do the normal excluding/removal dance during the pin stage of the log replay and that way we don't allocate metadata blocks from areas we have logged data extents. With this patch we now pass xfstests generic/311 with mixed block groups turned on. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 80ab1a6f4fe3..0049fe0f3f74 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3067,6 +3067,8 @@ int btrfs_pin_extent(struct btrfs_root *root, u64 bytenr, u64 num, int reserved); int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, u64 bytenr, u64 num_bytes); +int btrfs_exclude_logged_extents(struct btrfs_root *root, + struct extent_buffer *eb); int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, u64 bytenr); -- cgit v1.2.3 From 1be41b78bc688fc634bf30965d2be692c99fd11d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 12 Jun 2013 13:56:06 -0400 Subject: Btrfs: fix transaction throttling for delayed refs Dave has this fs_mark script that can make btrfs abort with sufficient amount of ram. This is because with more ram we can keep more dirty metadata in cache which in a round about way makes for many more pending delayed refs. What happens is we end up not throttling the transaction enough so when we go to commit the transaction when we've completely filled the file system we'll abort() because we use all of the space in the global reserve and we still have delayed refs to run. To fix this we need to make the delayed ref flushing and the transaction throttling dependant upon the number of delayed refs that we have instead of how much reserved space is left in the global reserve. With this patch we not only stop aborting transactions but we also get a smoother run speed with fs_mark and it makes us about 10% faster. Thanks, Reported-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0049fe0f3f74..76e4983b39ea 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3056,6 +3056,8 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root, num_items; } +int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, + struct btrfs_root *root); void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); -- cgit v1.2.3 From b150a4f10d8786a204db1ae3dccada17f950cf54 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 19 Jun 2013 15:00:04 -0400 Subject: Btrfs: use a percpu to keep track of possibly pinned bytes There are all of these checks in the ENOSPC code to see if committing the transaction would free up enough space to make the allocation. This is because early on we just committed the transaction and hoped and prayed, which resulted in cases where it took _forever_ to get an ENOSPC when we really were out of space. So we check space_info->bytes_pinned, except this isn't completely true because it doesn't account for space we may free but are stuck in delayed refs. So tests like xfstests 226 would fail because we wouldn't commit the transaction to free up the data space. So instead add a percpu counter that will be a little fuzzier, it will add bytes as soon as we try to free up the space, and remove any space it doesn't actually free up when we get around to doing the actual free. We then 0 out this counter every transaction period so we have a better idea of how much space we will actually free up by committing this transaction. With this patch we now pass xfstests 226. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 76e4983b39ea..b528a5509cb8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1101,6 +1101,18 @@ struct btrfs_space_info { u64 disk_total; /* total bytes on disk, takes mirrors into account */ + /* + * bytes_pinned is kept in line with what is actually pinned, as in + * we've called update_block_group and dropped the bytes_used counter + * and increased the bytes_pinned counter. However this means that + * bytes_pinned does not reflect the bytes that will be pinned once the + * delayed refs are flushed, so this counter is inc'ed everytime we call + * btrfs_free_extent so it is a realtime count of what will be freed + * once the transaction is committed. It will be zero'ed everytime the + * transaction commits. + */ + struct percpu_counter total_bytes_pinned; + /* * we bump reservation progress every time we decrement * bytes_reserved. This way people waiting for reservations -- cgit v1.2.3 From 7ee9e4405f264e9eda808aa5ca4522746a1af9c1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 21 Jun 2013 16:37:03 -0400 Subject: Btrfs: check if we can nocow if we don't have data space We always just try and reserve data space when we write, but if we are out of space but have prealloc'ed extents we should still successfully write. This patch will try and see if we can write to prealloc'ed space and if we can go ahead and allow the write to continue. With this patch we now pass xfstests generic/274. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b528a5509cb8..e795bf135e80 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3552,6 +3552,10 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work); struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create); +noinline int can_nocow_extent(struct btrfs_trans_handle *trans, + struct inode *inode, u64 offset, u64 *len, + u64 *orig_start, u64 *orig_block_len, + u64 *ram_bytes); /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ #if defined(ClearPageFsMisc) && !defined(ClearPageChecked) -- cgit v1.2.3