diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-18 09:42:05 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-18 09:42:05 -0800 |
commit | a22180d2666c018f4fef6818074d78bb76ff2bda (patch) | |
tree | a633aaf423ff39f94d00502d03dbbd99dab4b2ee /fs/btrfs/ordered-data.c | |
parent | 2d4dce0070448bcb5ccd04553a4be4635417f565 (diff) | |
parent | 213490b301773ea9c6fb89a86424a6901fcdd069 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs update from Chris Mason:
"A big set of fixes and features.
In terms of line count, most of the code comes from Stefan, who added
the ability to replace a single drive in place. This is different
from how btrfs normally replaces drives, and is much much much faster.
Josef is plowing through our synchronous write performance. This pull
request does not include the DIO_OWN_WAITING patch that was discussed
on the list, but it has a number of other improvements to cut down our
latencies and CPU time during fsync/O_DIRECT writes.
Miao Xie has a big series of fixes and is spreading out ordered
operations over more CPUs. This improves performance and reduces
contention.
I've put in fixes for error handling around hash collisions. These
are going back to individual stable kernels as I test against them.
Otherwise we have a lot of fixes and cleanups, thanks everyone!
raid5/6 is being rebased against the device replacement code. I'll
have it posted this Friday along with a nice series of benchmarks."
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (115 commits)
Btrfs: fix a bug of per-file nocow
Btrfs: fix hash overflow handling
Btrfs: don't take inode delalloc mutex if we're a free space inode
Btrfs: fix autodefrag and umount lockup
Btrfs: fix permissions of empty files not affected by umask
Btrfs: put raid properties into global table
Btrfs: fix BUG() in scrub when first superblock reading gives EIO
Btrfs: do not call file_update_time in aio_write
Btrfs: only unlock and relock if we have to
Btrfs: use tokens where we can in the tree log
Btrfs: optimize leaf_space_used
Btrfs: don't memset new tokens
Btrfs: only clear dirty on the buffer if it is marked as dirty
Btrfs: move checks in set_page_dirty under DEBUG
Btrfs: log changed inodes based on the extent map tree
Btrfs: add path->really_keep_locks
Btrfs: do not mark ems as prealloc if we are writing to them
Btrfs: keep track of the extents original block length
Btrfs: inline csums if we're fsyncing
Btrfs: don't bother copying if we're only logging the inode
...
Diffstat (limited to 'fs/btrfs/ordered-data.c')
-rw-r--r-- | fs/btrfs/ordered-data.c | 90 |
1 files changed, 63 insertions, 27 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 7772f02ba28e..f10731297040 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -211,6 +211,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, init_waitqueue_head(&entry->wait); INIT_LIST_HEAD(&entry->list); INIT_LIST_HEAD(&entry->root_extent_list); + INIT_LIST_HEAD(&entry->work_list); + init_completion(&entry->completion); trace_btrfs_ordered_extent_add(inode, entry); @@ -464,18 +466,28 @@ void btrfs_remove_ordered_extent(struct inode *inode, wake_up(&entry->wait); } +static void btrfs_run_ordered_extent_work(struct btrfs_work *work) +{ + struct btrfs_ordered_extent *ordered; + + ordered = container_of(work, struct btrfs_ordered_extent, flush_work); + btrfs_start_ordered_extent(ordered->inode, ordered, 1); + complete(&ordered->completion); +} + /* * wait for all the ordered extents in a root. This is done when balancing * space between drives. */ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) { - struct list_head splice; + struct list_head splice, works; struct list_head *cur; - struct btrfs_ordered_extent *ordered; + struct btrfs_ordered_extent *ordered, *next; struct inode *inode; INIT_LIST_HEAD(&splice); + INIT_LIST_HEAD(&works); spin_lock(&root->fs_info->ordered_extent_lock); list_splice_init(&root->fs_info->ordered_extents, &splice); @@ -494,19 +506,32 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) spin_unlock(&root->fs_info->ordered_extent_lock); if (inode) { - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - if (delay_iput) - btrfs_add_delayed_iput(inode); - else - iput(inode); + ordered->flush_work.func = btrfs_run_ordered_extent_work; + list_add_tail(&ordered->work_list, &works); + btrfs_queue_worker(&root->fs_info->flush_workers, + &ordered->flush_work); } else { btrfs_put_ordered_extent(ordered); } + cond_resched(); spin_lock(&root->fs_info->ordered_extent_lock); } spin_unlock(&root->fs_info->ordered_extent_lock); + + list_for_each_entry_safe(ordered, next, &works, work_list) { + list_del_init(&ordered->work_list); + wait_for_completion(&ordered->completion); + + inode = ordered->inode; + btrfs_put_ordered_extent(ordered); + if (delay_iput) + btrfs_add_delayed_iput(inode); + else + iput(inode); + + cond_resched(); + } } /* @@ -519,13 +544,17 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) * extra check to make sure the ordered operation list really is empty * before we return */ -void btrfs_run_ordered_operations(struct btrfs_root *root, int wait) +int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) { struct btrfs_inode *btrfs_inode; struct inode *inode; struct list_head splice; + struct list_head works; + struct btrfs_delalloc_work *work, *next; + int ret = 0; INIT_LIST_HEAD(&splice); + INIT_LIST_HEAD(&works); mutex_lock(&root->fs_info->ordered_operations_mutex); spin_lock(&root->fs_info->ordered_extent_lock); @@ -533,6 +562,7 @@ again: list_splice_init(&root->fs_info->ordered_operations, &splice); while (!list_empty(&splice)) { + btrfs_inode = list_entry(splice.next, struct btrfs_inode, ordered_operations); @@ -549,15 +579,26 @@ again: list_add_tail(&BTRFS_I(inode)->ordered_operations, &root->fs_info->ordered_operations); } + + if (!inode) + continue; spin_unlock(&root->fs_info->ordered_extent_lock); - if (inode) { - if (wait) - btrfs_wait_ordered_range(inode, 0, (u64)-1); - else - filemap_flush(inode->i_mapping); - btrfs_add_delayed_iput(inode); + work = btrfs_alloc_delalloc_work(inode, wait, 1); + if (!work) { + if (list_empty(&BTRFS_I(inode)->ordered_operations)) + list_add_tail(&btrfs_inode->ordered_operations, + &splice); + spin_lock(&root->fs_info->ordered_extent_lock); + list_splice_tail(&splice, + &root->fs_info->ordered_operations); + spin_unlock(&root->fs_info->ordered_extent_lock); + ret = -ENOMEM; + goto out; } + list_add_tail(&work->list, &works); + btrfs_queue_worker(&root->fs_info->flush_workers, + &work->work); cond_resched(); spin_lock(&root->fs_info->ordered_extent_lock); @@ -566,7 +607,13 @@ again: goto again; spin_unlock(&root->fs_info->ordered_extent_lock); +out: + list_for_each_entry_safe(work, next, &works, list) { + list_del_init(&work->list); + btrfs_wait_and_free_delalloc_work(work); + } mutex_unlock(&root->fs_info->ordered_operations_mutex); + return ret; } /* @@ -606,7 +653,6 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) u64 end; u64 orig_end; struct btrfs_ordered_extent *ordered; - int found; if (start + len < start) { orig_end = INT_LIMIT(loff_t); @@ -642,7 +688,6 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) filemap_fdatawait_range(inode->i_mapping, start, orig_end); end = orig_end; - found = 0; while (1) { ordered = btrfs_lookup_first_ordered_extent(inode, end); if (!ordered) @@ -655,7 +700,6 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) btrfs_put_ordered_extent(ordered); break; } - found++; btrfs_start_ordered_extent(inode, ordered, 1); end = ordered->file_offset; btrfs_put_ordered_extent(ordered); @@ -934,15 +978,6 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, if (last_mod < root->fs_info->last_trans_committed) return; - /* - * the transaction is already committing. Just start the IO and - * don't bother with all of this list nonsense - */ - if (trans && root->fs_info->running_transaction->blocked) { - btrfs_wait_ordered_range(inode, 0, (u64)-1); - return; - } - spin_lock(&root->fs_info->ordered_extent_lock); if (list_empty(&BTRFS_I(inode)->ordered_operations)) { list_add_tail(&BTRFS_I(inode)->ordered_operations, @@ -959,6 +994,7 @@ int __init ordered_data_init(void) NULL); if (!btrfs_ordered_extent_cache) return -ENOMEM; + return 0; } |