From 61d92c328c16419fc96dc50dd16f8b8c695409ec Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 Oct 2009 19:11:56 -0400 Subject: Btrfs: fix deadlock on async thread startup The btrfs async worker threads are used for a wide variety of things, including processing bio end_io functions. This means that when the endio threads aren't running, the rest of the FS isn't able to do the final processing required to clear PageWriteback. The endio threads also try to exit as they become idle and start more as the work piles up. The problem is that starting more threads means kthreadd may need to allocate ram, and that allocation may wait until the global number of writeback pages on the system is below a certain limit. The result of that throttling is that end IO threads wait on kthreadd, who is waiting on IO to end, which will never happen. This commit fixes the deadlock by handing off thread startup to a dedicated thread. It also fixes a bug where the on-demand thread creation was creating far too many threads because it didn't take into account threads being started by other procs. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8184f2feb2f3..1b920ffc6a59 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -907,6 +907,7 @@ struct btrfs_fs_info { * A third pool does submit_bio to avoid deadlocking with the other * two */ + struct btrfs_workers generic_worker; struct btrfs_workers workers; struct btrfs_workers delalloc_workers; struct btrfs_workers endio_workers; -- cgit v1.2.3 From 32c00aff718bb54a214b39146bdd9ac01511cd25 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 8 Oct 2009 13:34:05 -0400 Subject: Btrfs: release delalloc reservations on extent item insertion This patch fixes an issue with the delalloc metadata space reservation code. The problem is we used to free the reservation as soon as we allocated the delalloc region. The problem with this is if we are not inserting an inline extent, we don't actually insert the extent item until after the ordered extent is written out. This patch does 3 things, 1) It moves the reservation clearing stuff into the ordered code, so when we remove the ordered extent we remove the reservation. 2) It adds a EXTENT_DO_ACCOUNTING flag that gets passed when we clear delalloc bits in the cases where we want to clear the metadata reservation when we clear the delalloc extent, in the case that we do an inline extent or we invalidate the page. 3) It adds another waitqueue to the space info so that when we start a fs wide delalloc flush, anybody else who also hits that area will simply wait for the flush to finish and then try to make their allocation. This has been tested thoroughly to make sure we did not regress on performance. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1b920ffc6a59..dbdada569507 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -699,6 +699,9 @@ struct btrfs_space_info { int allocating_chunk; wait_queue_head_t wait; + + int flushing; + wait_queue_head_t flush_wait; }; /* -- cgit v1.2.3 From e3ccfa989752c083ceb23c823a84f7ce3a081e61 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 7 Oct 2009 20:44:34 -0400 Subject: Btrfs: async delalloc flushing under space pressure This patch moves the delalloc flushing that occurs when we are under space pressure off to a async thread pool. This helps since we only free up metadata space when we actually insert the extent item, which means it takes quite a while for space to be free'ed up if we wait on all ordered extents. However, if space is freed up due to inline extents being inserted, we can wake people who are waiting up early, and they can finish their work. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index dbdada569507..a362dd617e97 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -691,17 +691,17 @@ struct btrfs_space_info { struct list_head list; + /* for controlling how we free up space for allocations */ + wait_queue_head_t allocate_wait; + wait_queue_head_t flush_wait; + int allocating_chunk; + int flushing; + /* for block groups in our same type */ struct list_head block_groups; spinlock_t lock; struct rw_semaphore groups_sem; atomic_t caching_threads; - - int allocating_chunk; - wait_queue_head_t wait; - - int flushing; - wait_queue_head_t flush_wait; }; /* @@ -918,6 +918,7 @@ struct btrfs_fs_info { struct btrfs_workers endio_meta_write_workers; struct btrfs_workers endio_write_workers; struct btrfs_workers submit_workers; + struct btrfs_workers enospc_workers; /* * fixup workers take dirty pages that didn't properly go through * the cow mechanism and make them safe to write. It happens -- cgit v1.2.3 From ff782e0a131c7f669445c07fe5c7ba91e043b7ed Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 8 Oct 2009 15:30:04 -0400 Subject: Btrfs: optimize fsync for the single writer case This patch optimizes the tree logging stuff so it doesn't always wait 1 jiffie for new people to join the logging transaction if there is only ever 1 writer. This helps a little bit with latency where we have something like RPM where it will fdatasync every file it writes, and so waiting the 1 jiffie for every fdatasync really starts to add up. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a362dd617e97..16ddb19dda86 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1010,6 +1010,8 @@ struct btrfs_root { atomic_t log_commit[2]; unsigned long log_transid; unsigned long log_batch; + pid_t log_start_pid; + bool log_multiple_pids; u64 objectid; u64 last_trans; -- cgit v1.2.3 From 82d339d9b3a6395f17d3253887653250b693b74b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 9 Oct 2009 09:54:36 -0400 Subject: Btrfs: constify dentry_operations Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 16ddb19dda86..36a19cd43e03 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2330,7 +2330,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); void btrfs_orphan_cleanup(struct btrfs_root *root); int btrfs_cont_expand(struct inode *inode, loff_t size); int btrfs_invalidate_inodes(struct btrfs_root *root); -extern struct dentry_operations btrfs_dentry_operations; +extern const struct dentry_operations btrfs_dentry_operations; /* ioctl.c */ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -- cgit v1.2.3