From dc7f370c05dd024697d4d6c68f91fd04fe8fad1e Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 14 May 2013 10:20:42 +0000 Subject: Btrfs: move the R/O check out of btrfs_clean_one_deleted_snapshot() If the fs is remounted to be R/O, it is unnecessary to call btrfs_clean_one_deleted_snapshot(), so move the R/O check out of this function. And besides that, it can make the check logic in the caller more clear. Cc: David Sterba Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0544587d74f4..f157752efc47 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1885,11 +1885,6 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) int ret; struct btrfs_fs_info *fs_info = root->fs_info; - if (fs_info->sb->s_flags & MS_RDONLY) { - pr_debug("btrfs: cleaner called for RO fs!\n"); - return 0; - } - spin_lock(&fs_info->trans_lock); if (list_empty(&fs_info->dead_roots)) { spin_unlock(&fs_info->trans_lock); -- cgit v1.2.3 From eb73c1b7cea7d533288ef5297a0ea0e159db85b0 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:22 +0000 Subject: Btrfs: introduce per-subvolume delalloc inode list When we create a snapshot, we need flush all delalloc inodes in the fs, just flushing the inodes in the source tree is OK. So we introduce per-subvolume delalloc inode list. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f157752efc47..4b6311181412 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1502,7 +1502,7 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, } if (flush_on_commit || snap_pending) { - ret = btrfs_start_delalloc_inodes(root, 1); + ret = btrfs_start_all_delalloc_inodes(root->fs_info, 1); if (ret) return ret; btrfs_wait_ordered_extents(root, 1); -- cgit v1.2.3 From 199c2a9c3d1389db7f7a211e64f6809d352ce5f6 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:23 +0000 Subject: Btrfs: introduce per-subvolume ordered extent list The reason we introduce per-subvolume ordered extent list is the same as the per-subvolume delalloc inode list. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4b6311181412..2b17213571a0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1505,7 +1505,7 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, ret = btrfs_start_all_delalloc_inodes(root->fs_info, 1); if (ret) return ret; - btrfs_wait_ordered_extents(root, 1); + btrfs_wait_all_ordered_extents(root->fs_info, 1); } ret = btrfs_run_delayed_items(trans, root); -- cgit v1.2.3 From 6a03843df4d29593912e558c72a2ce39274d2366 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:24 +0000 Subject: Btrfs: just flush the delalloc inodes in the source tree before snapshot creation Before applying this patch, we need flush all the delalloc inodes in the fs when we want to create a snapshot, it wastes time, and make the transaction commit be blocked for a long time. It means some other user operation would also be blocked for a long time. This patch improves this problem, we just flush the delalloc inodes that in the source trees before snapshot creation, so the transaction commit will complete quickly. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2b17213571a0..bc22be9b69b4 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1491,17 +1491,9 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); - int snap_pending = 0; int ret; - if (!flush_on_commit) { - spin_lock(&root->fs_info->trans_lock); - if (!list_empty(&trans->transaction->pending_snapshots)) - snap_pending = 1; - spin_unlock(&root->fs_info->trans_lock); - } - - if (flush_on_commit || snap_pending) { + if (flush_on_commit) { ret = btrfs_start_all_delalloc_inodes(root->fs_info, 1); if (ret) return ret; -- cgit v1.2.3 From 25d8c284c7d9b9d536a1334f6b670645da971a19 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:26 +0000 Subject: Btrfs: remove the code for the impossible case in cleanup_transaction() If the transaction is removed from the transaction list, it means the transaction has been committed successfully. So it is impossible to call cleanup_transaction(), otherwise there is something wrong with the code logic. Thus, we use BUG_ON() instead of the original handle. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bc22be9b69b4..cf8706ce85ac 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1450,11 +1450,12 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, spin_lock(&root->fs_info->trans_lock); - if (list_empty(&cur_trans->list)) { - spin_unlock(&root->fs_info->trans_lock); - btrfs_end_transaction(trans, root); - return; - } + /* + * If the transaction is removed from the list, it means this + * transaction has been committed successfully, so it is impossible + * to call the cleanup function. + */ + BUG_ON(list_empty(&cur_trans->list)); list_del_init(&cur_trans->list); if (cur_trans == root->fs_info->running_transaction) { -- cgit v1.2.3 From 0860adfdb21c87c73afab4d143e7195603b3e883 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:27 +0000 Subject: Btrfs: don't wait for all the writers circularly during the transaction commit btrfs_commit_transaction has the following loop before we commit the transaction. do { // attempt to do some useful stuff and/or sleep } while (atomic_read(&cur_trans->num_writers) > 1 || (should_grow && cur_trans->num_joined != joined)); This is used to prevent from the TRANS_START to get in the way of a committing transaction. But it does not prevent from TRANS_JOIN, that is we would do this loop for a long time if some writers JOIN the current transaction endlessly. Because we need join the current transaction to do some useful stuff, we can not block TRANS_JOIN here. So we introduce a external writer counter, which is used to count the TRANS_USERSPACE/TRANS_START writers. If the external writer counter is zero, we can break the above loop. In order to make the code more clear, we don't use enum variant to define the type of the transaction handle, use bitmask instead. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 55 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cf8706ce85ac..fd319b2ecd84 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -51,17 +51,41 @@ static noinline void switch_commit_root(struct btrfs_root *root) } static inline int can_join_transaction(struct btrfs_transaction *trans, - int type) + unsigned int type) { return !(trans->in_commit && - type != TRANS_JOIN && - type != TRANS_JOIN_NOLOCK); + (type & TRANS_EXTWRITERS)); +} + +static inline void extwriter_counter_inc(struct btrfs_transaction *trans, + unsigned int type) +{ + if (type & TRANS_EXTWRITERS) + atomic_inc(&trans->num_extwriters); +} + +static inline void extwriter_counter_dec(struct btrfs_transaction *trans, + unsigned int type) +{ + if (type & TRANS_EXTWRITERS) + atomic_dec(&trans->num_extwriters); +} + +static inline void extwriter_counter_init(struct btrfs_transaction *trans, + unsigned int type) +{ + atomic_set(&trans->num_extwriters, ((type & TRANS_EXTWRITERS) ? 1 : 0)); +} + +static inline int extwriter_counter_read(struct btrfs_transaction *trans) +{ + return atomic_read(&trans->num_extwriters); } /* * either allocate a new transaction or hop into the existing one */ -static noinline int join_transaction(struct btrfs_root *root, int type) +static noinline int join_transaction(struct btrfs_root *root, unsigned int type) { struct btrfs_transaction *cur_trans; struct btrfs_fs_info *fs_info = root->fs_info; @@ -99,6 +123,7 @@ loop: } atomic_inc(&cur_trans->use_count); atomic_inc(&cur_trans->num_writers); + extwriter_counter_inc(cur_trans, type); cur_trans->num_joined++; spin_unlock(&fs_info->trans_lock); return 0; @@ -131,6 +156,7 @@ loop: } atomic_set(&cur_trans->num_writers, 1); + extwriter_counter_init(cur_trans, type); cur_trans->num_joined = 0; init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); @@ -307,7 +333,7 @@ static int may_wait_transaction(struct btrfs_root *root, int type) } static struct btrfs_trans_handle * -start_transaction(struct btrfs_root *root, u64 num_items, int type, +start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, enum btrfs_reserve_flush_enum flush) { struct btrfs_trans_handle *h; @@ -320,7 +346,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type, return ERR_PTR(-EROFS); if (current->journal_info) { - WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK); + WARN_ON(type & TRANS_EXTWRITERS); h = current->journal_info; h->use_count++; WARN_ON(h->use_count > 2); @@ -366,7 +392,7 @@ again: * If we are ATTACH, it means we just want to catch the current * transaction and commit it, so we needn't do sb_start_intwrite(). */ - if (type < TRANS_JOIN_NOLOCK) + if (type & __TRANS_FREEZABLE) sb_start_intwrite(root->fs_info->sb); if (may_wait_transaction(root, type)) @@ -429,7 +455,7 @@ got_it: return h; join_fail: - if (type < TRANS_JOIN_NOLOCK) + if (type & __TRANS_FREEZABLE) sb_end_intwrite(root->fs_info->sb); kmem_cache_free(btrfs_trans_handle_cachep, h); alloc_fail: @@ -677,12 +703,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, } } - if (trans->type < TRANS_JOIN_NOLOCK) + if (trans->type & __TRANS_FREEZABLE) sb_end_intwrite(root->fs_info->sb); WARN_ON(cur_trans != info->running_transaction); WARN_ON(atomic_read(&cur_trans->num_writers) < 1); atomic_dec(&cur_trans->num_writers); + extwriter_counter_dec(cur_trans, trans->type); smp_mb(); if (waitqueue_active(&cur_trans->writer_wait)) @@ -1625,6 +1652,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, spin_unlock(&root->fs_info->trans_lock); } + extwriter_counter_dec(cur_trans, trans->type); + if (!btrfs_test_opt(root, SSD) && (now < cur_trans->start_time || now - cur_trans->start_time < 1)) should_grow = 1; @@ -1641,13 +1670,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, prepare_to_wait(&cur_trans->writer_wait, &wait, TASK_UNINTERRUPTIBLE); - if (atomic_read(&cur_trans->num_writers) > 1) - schedule_timeout(MAX_SCHEDULE_TIMEOUT); + if (extwriter_counter_read(cur_trans) > 0) + schedule(); else if (should_grow) schedule_timeout(1); finish_wait(&cur_trans->writer_wait, &wait); - } while (atomic_read(&cur_trans->num_writers) > 1 || + } while (extwriter_counter_read(cur_trans) > 0 || (should_grow && cur_trans->num_joined != joined)); ret = btrfs_flush_all_pending_stuffs(trans, root); @@ -1831,7 +1860,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, put_transaction(cur_trans); put_transaction(cur_trans); - if (trans->type < TRANS_JOIN_NOLOCK) + if (trans->type & __TRANS_FREEZABLE) sb_end_intwrite(root->fs_info->sb); trace_btrfs_transaction_commit(root); -- cgit v1.2.3 From 824366177aa108eb7b778dc67e4f38b9e01df93f Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:28 +0000 Subject: Btrfs: don't flush the delalloc inodes in the while loop if flushoncommit is set It is unnecessary to flush the delalloc inodes again and again because we don't care the dirty pages which are introduced after the flush, and they will be flush in the transaction commit. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index fd319b2ecd84..265db57b3341 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1518,16 +1518,8 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); int ret; - if (flush_on_commit) { - ret = btrfs_start_all_delalloc_inodes(root->fs_info, 1); - if (ret) - return ret; - btrfs_wait_all_ordered_extents(root->fs_info, 1); - } - ret = btrfs_run_delayed_items(trans, root); if (ret) return ret; @@ -1551,6 +1543,19 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, return ret; } +static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) +{ + if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) + return btrfs_start_all_delalloc_inodes(fs_info, 1); + return 0; +} + +static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) +{ + if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) + btrfs_wait_all_ordered_extents(fs_info, 1); +} + /* * btrfs_transaction state sequence: * in_commit = 0, blocked = 0 (initial) @@ -1654,6 +1659,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, extwriter_counter_dec(cur_trans, trans->type); + ret = btrfs_start_delalloc_flush(root->fs_info); + if (ret) + goto cleanup_transaction; + if (!btrfs_test_opt(root, SSD) && (now < cur_trans->start_time || now - cur_trans->start_time < 1)) should_grow = 1; @@ -1683,6 +1692,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (ret) goto cleanup_transaction; + btrfs_wait_delalloc_flush(root->fs_info); /* * Ok now we need to make sure to block out any other joins while we * commit the transaction. We could have started a join before setting -- cgit v1.2.3 From 3f1e3fa65c44b8ecdf2d6f14956c2cfe3a462a03 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:29 +0000 Subject: Btrfs: remove unnecessary varient ->num_joined in btrfs_transaction structure We used ->num_joined track if there were some writers which join the current transaction when the committer was sleeping. If some writers joined the current transaction, we has to continue the while loop to do some necessary stuff, such as flush the ordered operations. But it is unnecessary because we will do it after the while loop. Besides that, tracking ->num_joined would make the committer drop into the while loop when there are lots of internal writers(TRANS_JOIN). So we remove ->num_joined and don't track if there are some writers which join the current transaction when the committer is sleeping. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 265db57b3341..75e7b150eb54 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -124,7 +124,6 @@ loop: atomic_inc(&cur_trans->use_count); atomic_inc(&cur_trans->num_writers); extwriter_counter_inc(cur_trans, type); - cur_trans->num_joined++; spin_unlock(&fs_info->trans_lock); return 0; } @@ -157,7 +156,6 @@ loop: atomic_set(&cur_trans->num_writers, 1); extwriter_counter_init(cur_trans, type); - cur_trans->num_joined = 0; init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); cur_trans->in_commit = 0; @@ -1566,7 +1564,6 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - unsigned long joined = 0; struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_transaction *prev_trans = NULL; DEFINE_WAIT(wait); @@ -1668,8 +1665,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, should_grow = 1; do { - joined = cur_trans->num_joined; - WARN_ON(cur_trans != trans->transaction); ret = btrfs_flush_all_pending_stuffs(trans, root); @@ -1685,8 +1680,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, schedule_timeout(1); finish_wait(&cur_trans->writer_wait, &wait); - } while (extwriter_counter_read(cur_trans) > 0 || - (should_grow && cur_trans->num_joined != joined)); + } while (extwriter_counter_read(cur_trans) > 0); ret = btrfs_flush_all_pending_stuffs(trans, root); if (ret) -- cgit v1.2.3 From 581227d0d2b8735f899182f50b3a05089d02fa24 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:30 +0000 Subject: Btrfs: remove the time check in btrfs_commit_transaction() We checked the commit time to avoid committing the transaction frequently, but it is unnecessary because: - It made the transaction commit spend more time, and delayed the operation of the external writers(TRANS_START/TRANS_USERSPACE). - Except the space that we have to commit transaction, such as snapshot creation, btrfs doesn't commit the transaction on its own initiative. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 75e7b150eb54..5e75ff486daf 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1566,10 +1566,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, { struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_transaction *prev_trans = NULL; - DEFINE_WAIT(wait); int ret; - int should_grow = 0; - unsigned long now = get_seconds(); ret = btrfs_run_ordered_operations(trans, root, 0); if (ret) { @@ -1660,28 +1657,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (ret) goto cleanup_transaction; - if (!btrfs_test_opt(root, SSD) && - (now < cur_trans->start_time || now - cur_trans->start_time < 1)) - should_grow = 1; - - do { - WARN_ON(cur_trans != trans->transaction); - - ret = btrfs_flush_all_pending_stuffs(trans, root); - if (ret) - goto cleanup_transaction; - - prepare_to_wait(&cur_trans->writer_wait, &wait, - TASK_UNINTERRUPTIBLE); - - if (extwriter_counter_read(cur_trans) > 0) - schedule(); - else if (should_grow) - schedule_timeout(1); + ret = btrfs_flush_all_pending_stuffs(trans, root); + if (ret) + goto cleanup_transaction; - finish_wait(&cur_trans->writer_wait, &wait); - } while (extwriter_counter_read(cur_trans) > 0); + wait_event(cur_trans->writer_wait, + extwriter_counter_read(cur_trans) == 0); + /* some pending stuffs might be added after the previous flush. */ ret = btrfs_flush_all_pending_stuffs(trans, root); if (ret) goto cleanup_transaction; -- cgit v1.2.3 From 4a9d8bdee368de78ace8b36da4eb2186afea162d Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 17 May 2013 03:53:43 +0000 Subject: Btrfs: make the state of the transaction more readable We used 3 variants to track the state of the transaction, it was complex and wasted the memory space. Besides that, it was hard to understand that which types of the transaction handles should be blocked in each transaction state, so the developers often made mistakes. This patch improved the above problem. In this patch, we define 6 states for the transaction, enum btrfs_trans_state { TRANS_STATE_RUNNING = 0, TRANS_STATE_BLOCKED = 1, TRANS_STATE_COMMIT_START = 2, TRANS_STATE_COMMIT_DOING = 3, TRANS_STATE_UNBLOCKED = 4, TRANS_STATE_COMPLETED = 5, TRANS_STATE_MAX = 6, } and just use 1 variant to track those state. In order to make the blocked handle types for each state more clear, we introduce a array: unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { [TRANS_STATE_RUNNING] = 0U, [TRANS_STATE_BLOCKED] = (__TRANS_USERSPACE | __TRANS_START), [TRANS_STATE_COMMIT_START] = (__TRANS_USERSPACE | __TRANS_START | __TRANS_ATTACH), [TRANS_STATE_COMMIT_DOING] = (__TRANS_USERSPACE | __TRANS_START | __TRANS_ATTACH | __TRANS_JOIN), [TRANS_STATE_UNBLOCKED] = (__TRANS_USERSPACE | __TRANS_START | __TRANS_ATTACH | __TRANS_JOIN | __TRANS_JOIN_NOLOCK), [TRANS_STATE_COMPLETED] = (__TRANS_USERSPACE | __TRANS_START | __TRANS_ATTACH | __TRANS_JOIN | __TRANS_JOIN_NOLOCK), } it is very intuitionistic. Besides that, because we remove ->in_commit in transaction structure, so the lock ->commit_lock which was used to protect it is unnecessary, remove ->commit_lock. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 157 +++++++++++++++++++++++++++---------------------- 1 file changed, 86 insertions(+), 71 deletions(-) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5e75ff486daf..eec8686416ca 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -34,6 +34,29 @@ #define BTRFS_ROOT_TRANS_TAG 0 +static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { + [TRANS_STATE_RUNNING] = 0U, + [TRANS_STATE_BLOCKED] = (__TRANS_USERSPACE | + __TRANS_START), + [TRANS_STATE_COMMIT_START] = (__TRANS_USERSPACE | + __TRANS_START | + __TRANS_ATTACH), + [TRANS_STATE_COMMIT_DOING] = (__TRANS_USERSPACE | + __TRANS_START | + __TRANS_ATTACH | + __TRANS_JOIN), + [TRANS_STATE_UNBLOCKED] = (__TRANS_USERSPACE | + __TRANS_START | + __TRANS_ATTACH | + __TRANS_JOIN | + __TRANS_JOIN_NOLOCK), + [TRANS_STATE_COMPLETED] = (__TRANS_USERSPACE | + __TRANS_START | + __TRANS_ATTACH | + __TRANS_JOIN | + __TRANS_JOIN_NOLOCK), +}; + static void put_transaction(struct btrfs_transaction *transaction) { WARN_ON(atomic_read(&transaction->use_count) == 0); @@ -50,13 +73,6 @@ static noinline void switch_commit_root(struct btrfs_root *root) root->commit_root = btrfs_root_node(root); } -static inline int can_join_transaction(struct btrfs_transaction *trans, - unsigned int type) -{ - return !(trans->in_commit && - (type & TRANS_EXTWRITERS)); -} - static inline void extwriter_counter_inc(struct btrfs_transaction *trans, unsigned int type) { @@ -98,26 +114,13 @@ loop: return -EROFS; } - if (fs_info->trans_no_join) { - /* - * If we are JOIN_NOLOCK we're already committing a current - * transaction, we just need a handle to deal with something - * when committing the transaction, such as inode cache and - * space cache. It is a special case. - */ - if (type != TRANS_JOIN_NOLOCK) { - spin_unlock(&fs_info->trans_lock); - return -EBUSY; - } - } - cur_trans = fs_info->running_transaction; if (cur_trans) { if (cur_trans->aborted) { spin_unlock(&fs_info->trans_lock); return cur_trans->aborted; } - if (!can_join_transaction(cur_trans, type)) { + if (btrfs_blocked_trans_types[cur_trans->state] & type) { spin_unlock(&fs_info->trans_lock); return -EBUSY; } @@ -136,6 +139,12 @@ loop: if (type == TRANS_ATTACH) return -ENOENT; + /* + * JOIN_NOLOCK only happens during the transaction commit, so + * it is impossible that ->running_transaction is NULL + */ + BUG_ON(type == TRANS_JOIN_NOLOCK); + cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); if (!cur_trans) return -ENOMEM; @@ -144,7 +153,7 @@ loop: if (fs_info->running_transaction) { /* * someone started a transaction after we unlocked. Make sure - * to redo the trans_no_join checks above + * to redo the checks above */ kmem_cache_free(btrfs_transaction_cachep, cur_trans); goto loop; @@ -158,14 +167,12 @@ loop: extwriter_counter_init(cur_trans, type); init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); - cur_trans->in_commit = 0; - cur_trans->blocked = 0; + cur_trans->state = TRANS_STATE_RUNNING; /* * One for this trans handle, one so it will live on until we * commit the transaction. */ atomic_set(&cur_trans->use_count, 2); - cur_trans->commit_done = 0; cur_trans->start_time = get_seconds(); cur_trans->delayed_refs.root = RB_ROOT; @@ -188,7 +195,6 @@ loop: "creating a fresh transaction\n"); atomic64_set(&fs_info->tree_mod_seq, 0); - spin_lock_init(&cur_trans->commit_lock); spin_lock_init(&cur_trans->delayed_refs.lock); atomic_set(&cur_trans->delayed_refs.procs_running_refs, 0); atomic_set(&cur_trans->delayed_refs.ref_seq, 0); @@ -293,6 +299,12 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, return 0; } +static inline int is_transaction_blocked(struct btrfs_transaction *trans) +{ + return (trans->state >= TRANS_STATE_BLOCKED && + trans->state < TRANS_STATE_UNBLOCKED); +} + /* wait for commit against the current transaction to become unblocked * when this is done, it is safe to start a new transaction, but the current * transaction might not be fully on disk. @@ -303,12 +315,12 @@ static void wait_current_trans(struct btrfs_root *root) spin_lock(&root->fs_info->trans_lock); cur_trans = root->fs_info->running_transaction; - if (cur_trans && cur_trans->blocked) { + if (cur_trans && is_transaction_blocked(cur_trans)) { atomic_inc(&cur_trans->use_count); spin_unlock(&root->fs_info->trans_lock); wait_event(root->fs_info->transaction_wait, - !cur_trans->blocked); + cur_trans->state >= TRANS_STATE_UNBLOCKED); put_transaction(cur_trans); } else { spin_unlock(&root->fs_info->trans_lock); @@ -432,7 +444,8 @@ again: INIT_LIST_HEAD(&h->new_bgs); smp_mb(); - if (cur_trans->blocked && may_wait_transaction(root, type)) { + if (cur_trans->state >= TRANS_STATE_BLOCKED && + may_wait_transaction(root, type)) { btrfs_commit_transaction(h, root); goto again; } @@ -536,7 +549,7 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root) static noinline void wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { - wait_event(commit->commit_wait, commit->commit_done); + wait_event(commit->commit_wait, commit->state == TRANS_STATE_COMPLETED); } int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) @@ -572,8 +585,8 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) spin_lock(&root->fs_info->trans_lock); list_for_each_entry_reverse(t, &root->fs_info->trans_list, list) { - if (t->in_commit) { - if (t->commit_done) + if (t->state >= TRANS_STATE_COMMIT_START) { + if (t->state == TRANS_STATE_COMPLETED) break; cur_trans = t; atomic_inc(&cur_trans->use_count); @@ -614,7 +627,8 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, int err; smp_mb(); - if (cur_trans->blocked || cur_trans->delayed_refs.flushing) + if (cur_trans->state >= TRANS_STATE_BLOCKED || + cur_trans->delayed_refs.flushing) return 1; updates = trans->delayed_ref_updates; @@ -682,12 +696,15 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, btrfs_create_pending_block_groups(trans, root); if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && - should_end_transaction(trans, root)) { - trans->transaction->blocked = 1; - smp_wmb(); + should_end_transaction(trans, root) && + ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) { + spin_lock(&info->trans_lock); + if (cur_trans->state == TRANS_STATE_RUNNING) + cur_trans->state = TRANS_STATE_BLOCKED; + spin_unlock(&info->trans_lock); } - if (lock && cur_trans->blocked && !cur_trans->in_commit) { + if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { if (throttle) { /* * We may race with somebody else here so end up having @@ -1343,20 +1360,26 @@ static void update_super_roots(struct btrfs_root *root) int btrfs_transaction_in_commit(struct btrfs_fs_info *info) { + struct btrfs_transaction *trans; int ret = 0; + spin_lock(&info->trans_lock); - if (info->running_transaction) - ret = info->running_transaction->in_commit; + trans = info->running_transaction; + if (trans) + ret = (trans->state >= TRANS_STATE_COMMIT_START); spin_unlock(&info->trans_lock); return ret; } int btrfs_transaction_blocked(struct btrfs_fs_info *info) { + struct btrfs_transaction *trans; int ret = 0; + spin_lock(&info->trans_lock); - if (info->running_transaction) - ret = info->running_transaction->blocked; + trans = info->running_transaction; + if (trans) + ret = is_transaction_blocked(trans); spin_unlock(&info->trans_lock); return ret; } @@ -1368,7 +1391,8 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info) static void wait_current_trans_commit_start(struct btrfs_root *root, struct btrfs_transaction *trans) { - wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit); + wait_event(root->fs_info->transaction_blocked_wait, + trans->state >= TRANS_STATE_COMMIT_START); } /* @@ -1379,7 +1403,7 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, struct btrfs_transaction *trans) { wait_event(root->fs_info->transaction_wait, - trans->commit_done || (trans->in_commit && !trans->blocked)); + trans->state >= TRANS_STATE_UNBLOCKED); } /* @@ -1484,18 +1508,22 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, list_del_init(&cur_trans->list); if (cur_trans == root->fs_info->running_transaction) { - root->fs_info->trans_no_join = 1; + cur_trans->state = TRANS_STATE_COMMIT_DOING; spin_unlock(&root->fs_info->trans_lock); wait_event(cur_trans->writer_wait, atomic_read(&cur_trans->num_writers) == 1); spin_lock(&root->fs_info->trans_lock); - root->fs_info->running_transaction = NULL; } spin_unlock(&root->fs_info->trans_lock); btrfs_cleanup_one_transaction(trans->transaction, root); + spin_lock(&root->fs_info->trans_lock); + if (cur_trans == root->fs_info->running_transaction) + root->fs_info->running_transaction = NULL; + spin_unlock(&root->fs_info->trans_lock); + put_transaction(cur_trans); put_transaction(cur_trans); @@ -1507,10 +1535,6 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, current->journal_info = NULL; kmem_cache_free(btrfs_trans_handle_cachep, trans); - - spin_lock(&root->fs_info->trans_lock); - root->fs_info->trans_no_join = 0; - spin_unlock(&root->fs_info->trans_lock); } static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, @@ -1554,13 +1578,6 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) btrfs_wait_all_ordered_extents(fs_info, 1); } -/* - * btrfs_transaction state sequence: - * in_commit = 0, blocked = 0 (initial) - * in_commit = 1, blocked = 1 - * blocked = 0 - * commit_done = 1 - */ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -1615,9 +1632,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; } - spin_lock(&cur_trans->commit_lock); - if (cur_trans->in_commit) { - spin_unlock(&cur_trans->commit_lock); + spin_lock(&root->fs_info->trans_lock); + if (cur_trans->state >= TRANS_STATE_COMMIT_START) { + spin_unlock(&root->fs_info->trans_lock); atomic_inc(&cur_trans->use_count); ret = btrfs_end_transaction(trans, root); @@ -1628,16 +1645,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; } - trans->transaction->in_commit = 1; - trans->transaction->blocked = 1; - spin_unlock(&cur_trans->commit_lock); + cur_trans->state = TRANS_STATE_COMMIT_START; wake_up(&root->fs_info->transaction_blocked_wait); - spin_lock(&root->fs_info->trans_lock); if (cur_trans->list.prev != &root->fs_info->trans_list) { prev_trans = list_entry(cur_trans->list.prev, struct btrfs_transaction, list); - if (!prev_trans->commit_done) { + if (prev_trans->state != TRANS_STATE_COMPLETED) { atomic_inc(&prev_trans->use_count); spin_unlock(&root->fs_info->trans_lock); @@ -1673,10 +1687,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, /* * Ok now we need to make sure to block out any other joins while we * commit the transaction. We could have started a join before setting - * no_join so make sure to wait for num_writers to == 1 again. + * COMMIT_DOING so make sure to wait for num_writers to == 1 again. */ spin_lock(&root->fs_info->trans_lock); - root->fs_info->trans_no_join = 1; + cur_trans->state = TRANS_STATE_COMMIT_DOING; spin_unlock(&root->fs_info->trans_lock); wait_event(cur_trans->writer_wait, atomic_read(&cur_trans->num_writers) == 1); @@ -1803,10 +1817,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, sizeof(*root->fs_info->super_copy)); - trans->transaction->blocked = 0; spin_lock(&root->fs_info->trans_lock); + cur_trans->state = TRANS_STATE_UNBLOCKED; root->fs_info->running_transaction = NULL; - root->fs_info->trans_no_join = 0; spin_unlock(&root->fs_info->trans_lock); mutex_unlock(&root->fs_info->reloc_mutex); @@ -1834,10 +1847,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_finish_extent_commit(trans, root); - cur_trans->commit_done = 1; - root->fs_info->last_trans_committed = cur_trans->transid; - + /* + * We needn't acquire the lock here because there is no other task + * which can change it. + */ + cur_trans->state = TRANS_STATE_COMPLETED; wake_up(&cur_trans->commit_wait); spin_lock(&root->fs_info->trans_lock); -- cgit v1.2.3 From c6adc9cc082e3cffda153999c9b9f8a8baaaaf45 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 28 May 2013 10:05:39 +0000 Subject: Btrfs: merge pending IO for tree log write back Before applying this patch, we flushed the log tree of the fs/file tree firstly, and then flushed the log root tree. It is ineffective, especially on the hard disk. This patch improved this problem by wrapping the above two flushes by the same blk_plug. By test, the performance of the sync write went up ~60%(2.9MB/s -> 4.6MB/s) on my scsi disk whose disk buffer was enabled. Test step: # mkfs.btrfs -f -m single # mount # dd if=/dev/zero of=/file0 bs=32K count=1024 oflag=sync Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index eec8686416ca..8c8b80085e75 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -778,9 +778,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root, struct extent_state *cached_state = NULL; u64 start = 0; u64 end; - struct blk_plug plug; - blk_start_plug(&plug); while (!find_first_extent_bit(dirty_pages, start, &start, &end, mark, &cached_state)) { convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, @@ -794,7 +792,6 @@ int btrfs_write_marked_extents(struct btrfs_root *root, } if (err) werr = err; - blk_finish_plug(&plug); return werr; } @@ -839,8 +836,11 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, { int ret; int ret2; + struct blk_plug plug; + blk_start_plug(&plug); ret = btrfs_write_marked_extents(root, dirty_pages, mark); + blk_finish_plug(&plug); ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark); if (ret) -- cgit v1.2.3 From 501407aab8c947911b10cf5a0e0043019d5a4f17 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 10 Jun 2013 16:47:23 -0400 Subject: Btrfs: stop waiting on current trans if we aborted I hit a hang when run_delayed_refs returned an error in the beginning of btrfs_commit_transaction. If we decide we need to commit the transaction in btrfs_end_transaction we'll set BLOCKED and start to commit, but if we get an error this early on we'll just exit without committing. This is fine, except that anybody else who tried to start a transaction will sit in wait_current_trans() since we're set to BLOCKED and we never set it to something else and woke people up. To fix this we want to check for trans->aborted everywhere we wait for the transaction state to change, and make btrfs_abort_transaction() wake up any waiters there may be. All the callers will notice that the transaction has aborted and exit out properly. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8c8b80085e75..c11b7efcc561 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -302,7 +302,8 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, static inline int is_transaction_blocked(struct btrfs_transaction *trans) { return (trans->state >= TRANS_STATE_BLOCKED && - trans->state < TRANS_STATE_UNBLOCKED); + trans->state < TRANS_STATE_UNBLOCKED && + !trans->aborted); } /* wait for commit against the current transaction to become unblocked @@ -320,7 +321,8 @@ static void wait_current_trans(struct btrfs_root *root) spin_unlock(&root->fs_info->trans_lock); wait_event(root->fs_info->transaction_wait, - cur_trans->state >= TRANS_STATE_UNBLOCKED); + cur_trans->state >= TRANS_STATE_UNBLOCKED || + cur_trans->aborted); put_transaction(cur_trans); } else { spin_unlock(&root->fs_info->trans_lock); @@ -1392,7 +1394,8 @@ static void wait_current_trans_commit_start(struct btrfs_root *root, struct btrfs_transaction *trans) { wait_event(root->fs_info->transaction_blocked_wait, - trans->state >= TRANS_STATE_COMMIT_START); + trans->state >= TRANS_STATE_COMMIT_START || + trans->aborted); } /* @@ -1403,7 +1406,8 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, struct btrfs_transaction *trans) { wait_event(root->fs_info->transaction_wait, - trans->state >= TRANS_STATE_UNBLOCKED); + trans->state >= TRANS_STATE_UNBLOCKED || + trans->aborted); } /* -- cgit v1.2.3 From 1be41b78bc688fc634bf30965d2be692c99fd11d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 12 Jun 2013 13:56:06 -0400 Subject: Btrfs: fix transaction throttling for delayed refs Dave has this fs_mark script that can make btrfs abort with sufficient amount of ram. This is because with more ram we can keep more dirty metadata in cache which in a round about way makes for many more pending delayed refs. What happens is we end up not throttling the transaction enough so when we go to commit the transaction when we've completely filled the file system we'll abort() because we use all of the space in the global reserve and we still have delayed refs to run. To fix this we need to make the delayed ref flushing and the transaction throttling dependant upon the number of delayed refs that we have instead of how much reserved space is left in the global reserve. With this patch we not only stop aborting transactions but we also get a smoother run speed with fs_mark and it makes us about 10% faster. Thanks, Reported-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c11b7efcc561..c916ebdc689a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -615,10 +615,11 @@ void btrfs_throttle(struct btrfs_root *root) static int should_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - int ret; + if (root->fs_info->global_block_rsv.space_info->full && + btrfs_should_throttle_delayed_refs(trans, root)) + return 1; - ret = btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5); - return ret ? 1 : 0; + return !!btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5); } int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, @@ -649,7 +650,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, { struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_fs_info *info = root->fs_info; - int count = 0; + unsigned long cur = trans->delayed_ref_updates; int lock = (trans->type != TRANS_JOIN_NOLOCK); int err = 0; @@ -678,17 +679,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (!list_empty(&trans->new_bgs)) btrfs_create_pending_block_groups(trans, root); - while (count < 1) { - unsigned long cur = trans->delayed_ref_updates; + trans->delayed_ref_updates = 0; + if (btrfs_should_throttle_delayed_refs(trans, root)) { + cur = max_t(unsigned long, cur, 1); trans->delayed_ref_updates = 0; - if (cur && - trans->transaction->delayed_refs.num_heads_ready > 64) { - trans->delayed_ref_updates = 0; - btrfs_run_delayed_refs(trans, root, cur); - } else { - break; - } - count++; + btrfs_run_delayed_refs(trans, root, cur); } btrfs_trans_release_metadata(trans, root); @@ -1626,6 +1621,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, * start sending their work down. */ cur_trans->delayed_refs.flushing = 1; + smp_wmb(); if (!list_empty(&trans->new_bgs)) btrfs_create_pending_block_groups(trans, root); -- cgit v1.2.3 From 90b6d2830a72ff008c9bbc8dfbf7aaec90be458f Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Fri, 14 Jun 2013 16:21:24 +0800 Subject: Btrfs: fix the comment typo for btrfs_attach_transaction_barrier The comment is for btrfs_attach_transaction_barrier, not for btrfs_attach_transaction. Fix the typo. Signed-off-by: Wang Sheng-Hui Acked-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c916ebdc689a..bcfa32c91b5d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -529,7 +529,7 @@ struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root) } /* - * btrfs_attach_transaction() - catch the running transaction + * btrfs_attach_transaction_barrier() - catch the running transaction * * It is similar to the above function, the differentia is this one * will wait for all the inactive transactions until they fully -- cgit v1.2.3 From 6df9a95e63395f595d0d1eb5d561dd6c91c40270 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 27 Jun 2013 13:22:46 -0400 Subject: Btrfs: make the chunk allocator completely tree lockless When adjusting the enospc rules for relocation I ran into a deadlock because we were relocating the only system chunk and that forced us to try and allocate a new system chunk while holding locks in the chunk tree, which caused us to deadlock. To fix this I've moved all of the dev extent addition and chunk addition out to the delayed chunk completion stuff. We still keep the in-memory stuff which makes sure everything is consistent. One change I had to make was to search the commit root of the device tree to find a free dev extent, and hold onto any chunk em's that we allocated in that transaction so we do not allocate the same dev extent twice. This has the side effect of fixing a bug with balance that has been there ever since balance existed. Basically you can free a block group and it's dev extent and then immediately allocate that dev extent for a new block group and write stuff to that dev extent, all within the same transaction. So if you happen to crash during a balance you could come back to a completely broken file system. This patch should keep these sort of things from happening in the future since we won't be able to allocate free'd dev extents until after the transaction commits. This has passed all of the xfstests and my super annoying stress test followed by a balance. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bcfa32c91b5d..d58cce77fc6c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -63,6 +63,14 @@ static void put_transaction(struct btrfs_transaction *transaction) if (atomic_dec_and_test(&transaction->use_count)) { BUG_ON(!list_empty(&transaction->list)); WARN_ON(transaction->delayed_refs.root.rb_node); + while (!list_empty(&transaction->pending_chunks)) { + struct extent_map *em; + + em = list_first_entry(&transaction->pending_chunks, + struct extent_map, list); + list_del_init(&em->list); + free_extent_map(em); + } kmem_cache_free(btrfs_transaction_cachep, transaction); } } @@ -202,6 +210,7 @@ loop: INIT_LIST_HEAD(&cur_trans->pending_snapshots); INIT_LIST_HEAD(&cur_trans->ordered_operations); + INIT_LIST_HEAD(&cur_trans->pending_chunks); list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(&cur_trans->dirty_pages, fs_info->btree_inode->i_mapping); -- cgit v1.2.3 From cfad392b22163eba71d882950e17d2c4d43b2bad Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 25 Jul 2013 15:11:47 -0400 Subject: Btrfs: check to see if root_list is empty before adding it to dead roots A user reported a panic when running with autodefrag and deleting snapshots. This is because we could end up trying to add the root to the dead roots list twice. To fix this check to see if we are empty before adding ourselves to the dead roots list. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index d58cce77fc6c..af1931a5960d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -983,12 +983,12 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, * a dirty root struct and adds it into the list of dead roots that need to * be deleted */ -int btrfs_add_dead_root(struct btrfs_root *root) +void btrfs_add_dead_root(struct btrfs_root *root) { spin_lock(&root->fs_info->trans_lock); - list_add_tail(&root->root_list, &root->fs_info->dead_roots); + if (list_empty(&root->root_list)) + list_add_tail(&root->root_list, &root->fs_info->dead_roots); spin_unlock(&root->fs_info->trans_lock); - return 0; } /* @@ -1925,7 +1925,7 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) } root = list_first_entry(&fs_info->dead_roots, struct btrfs_root, root_list); - list_del(&root->root_list); + list_del_init(&root->root_list); spin_unlock(&fs_info->trans_lock); pr_debug("btrfs: cleaner removing %llu\n", -- cgit v1.2.3