From b5009945be18023942ce28327893c7bc1e58fe54 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 7 Jun 2011 15:07:51 -0400 Subject: Btrfs: do transaction space reservation before joining the transaction We have to do weird things when handling enospc in the transaction joining code. Because we've already joined the transaction we cannot commit the transaction within the reservation code since it will deadlock, so we have to return EAGAIN and then make sure we don't retry too many times. Instead of doing this, just do the reservation the normal way before we join the transaction, that way we can do whatever we want to try and reclaim space, and then if it fails we know for sure we are out of space and we can return ENOSPC. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 60e13ef23a5e..28e170bcdcb5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2223,9 +2223,6 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); void btrfs_clear_space_info_full(struct btrfs_fs_info *info); int btrfs_check_data_free_space(struct inode *inode, u64 bytes); void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); -int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - int num_items); void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, -- cgit v1.2.3 From fdb5effd5c2a7e01dc3a4217bb194e2d3a5b160f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 7 Jun 2011 16:07:44 -0400 Subject: Btrfs: serialize flushers in reserve_metadata_bytes We keep having problems with early enospc, and that's because our method of making space is inherently racy. The problem is we can have one guy trying to make space for himself, and in the meantime people come in and steal his reservation. In order to stop this we make a waitqueue and put anybody who comes into reserve_metadata_bytes on that waitqueue if somebody is trying to make more space. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 28e170bcdcb5..406c33876605 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -756,6 +756,8 @@ struct btrfs_space_info { chunks for this space */ unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ + unsigned int flush:1; /* set if we are trying to make space */ + unsigned int force_alloc; /* set if we need to force a chunk alloc for this space */ @@ -766,6 +768,7 @@ struct btrfs_space_info { spinlock_t lock; struct rw_semaphore groups_sem; atomic_t caching_threads; + wait_queue_head_t wait; }; struct btrfs_block_rsv { -- cgit v1.2.3 From bab39bf998133510f2dad08158006197ec0dabea Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 30 Jun 2011 14:42:28 -0400 Subject: Btrfs: use a worker thread to do caching A user reported a deadlock when copying a bunch of files. This is because they were low on memory and kthreadd got hung up trying to migrate pages for an allocation when starting the caching kthread. The page was locked by the person starting the caching kthread. To fix this we just need to use the async thread stuff so that the threads are already created and we don't have to worry about deadlocks. Thanks, Reported-by: Roman Mamedov Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 406c33876605..9f6f342900c9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -767,7 +767,6 @@ struct btrfs_space_info { struct list_head block_groups[BTRFS_NR_RAID_TYPES]; spinlock_t lock; struct rw_semaphore groups_sem; - atomic_t caching_threads; wait_queue_head_t wait; }; @@ -828,6 +827,7 @@ struct btrfs_caching_control { struct list_head list; struct mutex mutex; wait_queue_head_t wait; + struct btrfs_work work; struct btrfs_block_group_cache *block_group; u64 progress; atomic_t count; @@ -1036,6 +1036,8 @@ struct btrfs_fs_info { struct btrfs_workers endio_write_workers; struct btrfs_workers endio_freespace_worker; struct btrfs_workers submit_workers; + struct btrfs_workers caching_workers; + /* * fixup workers take dirty pages that didn't properly go through * the cow mechanism and make them safe to write. It happens -- cgit v1.2.3 From 9e0baf60dea69f31ac3b1adeb35b03b02a53e8e1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 15 Jul 2011 15:16:44 +0000 Subject: Btrfs: fix enospc problems with delalloc So I had this brilliant idea to use atomic counters for outstanding and reserved extents, but this turned out to be a bad idea. Consider this where we have 1 outstanding extent and 1 reserved extent Reserver Releaser atomic_dec(outstanding) now 0 atomic_read(outstanding)+1 get 1 atomic_read(reserved) get 1 don't actually reserve anything because they are the same atomic_cmpxchg(reserved, 1, 0) atomic_inc(outstanding) atomic_add(0, reserved) free reserved space for 1 extent Then the reserver now has no actual space reserved for it, and when it goes to finish the ordered IO it won't have enough space to do it's allocation and you get those lovely warnings. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9f6f342900c9..3063f21d3fc6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2134,7 +2134,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) /* extent-tree.c */ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, - int num_items) + unsigned num_items) { return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * 3 * num_items; -- cgit v1.2.3 From bd681513fa6f2ff29aa391f01e413a2d1c59fd77 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 16 Jul 2011 15:23:14 -0400 Subject: Btrfs: switch the btrfs tree locks to reader/writer The btrfs metadata btree is the source of significant lock contention, especially in the root node. This commit changes our locking to use a reader/writer lock. The lock is built on top of rw spinlocks, and it extends the lock tracking to remember if we have a read lock or a write lock when we go to blocking. Atomics count the number of blocking readers or writers at any given time. It removes all of the adaptive spinning from the old code and uses only the spinning/blocking hints inside of btrfs to decide when it should continue spinning. In read heavy workloads this is dramatically faster. In write heavy workloads we're still faster because of less contention on the root node lock. We suffer slightly in dbench because we schedule more often during write locks, but all other benchmarks so far are improved. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3063f21d3fc6..40235e10cfb9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2333,7 +2333,7 @@ struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); void btrfs_set_path_blocking(struct btrfs_path *p); void btrfs_clear_path_blocking(struct btrfs_path *p, - struct extent_buffer *held); + struct extent_buffer *held, int held_rw); void btrfs_unlock_up_safe(struct btrfs_path *p, int level); int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, -- cgit v1.2.3