From 581bb050941b4f220f84d3e5ed6dace3d42dd382 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 20 Apr 2011 10:06:11 +0800 Subject: Btrfs: Cache free inode numbers in memory Currently btrfs stores the highest objectid of the fs tree, and it always returns (highest+1) inode number when we create a file, so inode numbers won't be reclaimed when we delete files, so we'll run out of inode numbers as we keep create/delete files in 32bits machines. This fixes it, and it works similarly to how we cache free space in block cgroups. We start a kernel thread to read the file tree. By scanning inode items, we know which chunks of inode numbers are free, and we cache them in an rb-tree. Because we are searching the commit root, we have to carefully handle the cross-transaction case. The rb-tree is a hybrid extent+bitmap tree, so if we have too many small chunks of inode numbers, we'll use bitmaps. Initially we allow 16K ram of extents, and a bitmap will be used if we exceed this threshold. The extents threshold is adjusted in runtime. Signed-off-by: Li Zefan --- fs/btrfs/relocation.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 58250e09eb05..e6cb89357256 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -30,6 +30,7 @@ #include "btrfs_inode.h" #include "async-thread.h" #include "free-space-cache.h" +#include "inode-map.h" /* * backref_node, mapping_node and tree_block start with this @@ -3897,7 +3898,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, if (IS_ERR(trans)) return ERR_CAST(trans); - err = btrfs_find_free_objectid(trans, root, objectid, &objectid); + err = btrfs_find_free_objectid(root, &objectid); if (err) goto out; -- cgit v1.2.3 From 33345d01522f8152f99dc84a3e7a1a45707f387f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 20 Apr 2011 10:31:50 +0800 Subject: Btrfs: Always use 64bit inode number There's a potential problem in 32bit system when we exhaust 32bit inode numbers and start to allocate big inode numbers, because btrfs uses inode->i_ino in many places. So here we always use BTRFS_I(inode)->location.objectid, which is an u64 variable. There are 2 exceptions that BTRFS_I(inode)->location.objectid != inode->i_ino: the btree inode (0 vs 1) and empty subvol dirs (256 vs 2), and inode->i_ino will be used in those cases. Another reason to make this change is I'm going to use a special inode to save free ino cache, and the inode number must be > (u64)-256. Signed-off-by: Li Zefan --- fs/btrfs/relocation.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index e6cb89357256..7b75e0c8ef8d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1410,9 +1410,9 @@ again: prev = node; entry = rb_entry(node, struct btrfs_inode, rb_node); - if (objectid < entry->vfs_inode.i_ino) + if (objectid < btrfs_ino(&entry->vfs_inode)) node = node->rb_left; - else if (objectid > entry->vfs_inode.i_ino) + else if (objectid > btrfs_ino(&entry->vfs_inode)) node = node->rb_right; else break; @@ -1420,7 +1420,7 @@ again: if (!node) { while (prev) { entry = rb_entry(prev, struct btrfs_inode, rb_node); - if (objectid <= entry->vfs_inode.i_ino) { + if (objectid <= btrfs_ino(&entry->vfs_inode)) { node = prev; break; } @@ -1435,7 +1435,7 @@ again: return inode; } - objectid = entry->vfs_inode.i_ino + 1; + objectid = btrfs_ino(&entry->vfs_inode) + 1; if (cond_resched_lock(&root->inode_lock)) goto again; @@ -1471,7 +1471,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, return -ENOMEM; bytenr -= BTRFS_I(reloc_inode)->index_cnt; - ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, + ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode), bytenr, 0); if (ret < 0) goto out; @@ -1559,11 +1559,11 @@ int replace_file_extents(struct btrfs_trans_handle *trans, if (first) { inode = find_next_inode(root, key.objectid); first = 0; - } else if (inode && inode->i_ino < key.objectid) { + } else if (inode && btrfs_ino(inode) < key.objectid) { btrfs_add_delayed_iput(inode); inode = find_next_inode(root, key.objectid); } - if (inode && inode->i_ino == key.objectid) { + if (inode && btrfs_ino(inode) == key.objectid) { end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); WARN_ON(!IS_ALIGNED(key.offset, @@ -1894,6 +1894,7 @@ static int invalidate_extent_cache(struct btrfs_root *root, struct inode *inode = NULL; u64 objectid; u64 start, end; + u64 ino; objectid = min_key->objectid; while (1) { @@ -1906,17 +1907,18 @@ static int invalidate_extent_cache(struct btrfs_root *root, inode = find_next_inode(root, objectid); if (!inode) break; + ino = btrfs_ino(inode); - if (inode->i_ino > max_key->objectid) { + if (ino > max_key->objectid) { iput(inode); break; } - objectid = inode->i_ino + 1; + objectid = ino + 1; if (!S_ISREG(inode->i_mode)) continue; - if (unlikely(min_key->objectid == inode->i_ino)) { + if (unlikely(min_key->objectid == ino)) { if (min_key->type > BTRFS_EXTENT_DATA_KEY) continue; if (min_key->type < BTRFS_EXTENT_DATA_KEY) @@ -1929,7 +1931,7 @@ static int invalidate_extent_cache(struct btrfs_root *root, start = 0; } - if (unlikely(max_key->objectid == inode->i_ino)) { + if (unlikely(max_key->objectid == ino)) { if (max_key->type < BTRFS_EXTENT_DATA_KEY) continue; if (max_key->type > BTRFS_EXTENT_DATA_KEY) { -- cgit v1.2.3 From c704005d886cf0bc9bc3974eb009b22fe0da32c7 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 19 Apr 2011 18:00:01 +0200 Subject: btrfs: unify checking of IS_ERR and null use IS_ERR_OR_NULL when possible, done by this coccinelle script: @ match @ identifier id; @@ ( - BUG_ON(IS_ERR(id) || !id); + BUG_ON(IS_ERR_OR_NULL(id)); | - IS_ERR(id) || !id + IS_ERR_OR_NULL(id) | - !id || IS_ERR(id) + IS_ERR_OR_NULL(id) ) Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 199a80134312..fed0aaec0753 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3220,7 +3220,7 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info, key.offset = 0; inode = btrfs_iget(fs_info->sb, &key, root, NULL); - if (!inode || IS_ERR(inode) || is_bad_inode(inode)) { + if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) { if (inode && !IS_ERR(inode)) iput(inode); return -ENOENT; -- cgit v1.2.3 From f993c883ad8e111fb9e9ae603540acbe94f7246c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Apr 2011 23:35:57 +0200 Subject: btrfs: drop unused argument from extent_io_tree_init all callers pass GFP_NOFS, but the GFP mask argument is not used in the function; GFP_ATOMIC is passed to radix tree initialization and it's the only correct one, since we're using the preload/insert mechanism of radix tree. Let's drop the gfp mask from btrfs function, this will not change behaviour. Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index fed0aaec0753..f3edf45317bc 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3935,7 +3935,7 @@ static struct reloc_control *alloc_reloc_control(void) INIT_LIST_HEAD(&rc->reloc_roots); backref_cache_init(&rc->backref_cache); mapping_tree_init(&rc->reloc_root_tree); - extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); + extent_io_tree_init(&rc->processed_blocks, NULL); return rc; } -- cgit v1.2.3 From 172ddd60a662c4d8bf2809462866ddddd6431ea5 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 21 Apr 2011 00:48:27 +0200 Subject: btrfs: drop gfp parameter from alloc_extent_map pass GFP_NOFS directly to kmem_cache_alloc Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f3edf45317bc..2097a88f60aa 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2870,7 +2870,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end, struct extent_map *em; int ret = 0; - em = alloc_extent_map(GFP_NOFS); + em = alloc_extent_map(); if (!em) return -ENOMEM; -- cgit v1.2.3 From b3b4aa74b58bded927f579fff787fb6fa1c0393c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 21 Apr 2011 01:20:15 +0200 Subject: btrfs: drop unused parameter from btrfs_release_path parameter tree root it's not used since commit 5f39d397dfbe140a14edecd4e73c34ce23c4f9ee ("Btrfs: Create extent_buffer interface for large blocksizes") Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 2097a88f60aa..f7b799b151aa 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -961,7 +961,7 @@ again: lower = upper; upper = NULL; } - btrfs_release_path(root, path2); + btrfs_release_path(path2); next: if (ptr < end) { ptr += btrfs_extent_inline_ref_size(key.type); @@ -974,7 +974,7 @@ next: if (ptr >= end) path1->slots[0]++; } - btrfs_release_path(rc->extent_root, path1); + btrfs_release_path(path1); cur->checked = 1; WARN_ON(exist); @@ -1749,7 +1749,7 @@ again: btrfs_node_key_to_cpu(path->nodes[level], &key, path->slots[level]); - btrfs_release_path(src, path); + btrfs_release_path(path); path->lowest_level = level; ret = btrfs_search_slot(trans, src, &key, path, 0, 1); @@ -2496,7 +2496,7 @@ static int do_relocation(struct btrfs_trans_handle *trans, path->locks[upper->level] = 0; slot = path->slots[upper->level]; - btrfs_release_path(NULL, path); + btrfs_release_path(path); } else { ret = btrfs_bin_search(upper->eb, key, upper->level, &slot); @@ -2737,7 +2737,7 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, } else { path->lowest_level = node->level; ret = btrfs_search_slot(trans, root, key, path, 0, 1); - btrfs_release_path(root, path); + btrfs_release_path(path); if (ret > 0) ret = 0; } @@ -3119,7 +3119,7 @@ static int add_tree_block(struct reloc_control *rc, #endif } - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); BUG_ON(level == -1); @@ -3505,7 +3505,7 @@ int add_data_references(struct reloc_control *rc, } path->slots[0]++; } - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); if (err) free_block_list(blocks); return err; @@ -3568,7 +3568,7 @@ next: EXTENT_DIRTY); if (ret == 0 && start <= key.objectid) { - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); rc->search_start = end + 1; } else { rc->search_start = key.objectid + key.offset; @@ -3576,7 +3576,7 @@ next: return 0; } } - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); return ret; } @@ -3713,7 +3713,7 @@ restart: flags = BTRFS_EXTENT_FLAG_DATA; if (path_change) { - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); path->search_commit_root = 1; path->skip_locking = 1; @@ -3736,7 +3736,7 @@ restart: (flags & BTRFS_EXTENT_FLAG_DATA)) { ret = add_data_references(rc, &key, path, &blocks); } else { - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); ret = 0; } if (ret < 0) { @@ -3799,7 +3799,7 @@ restart: } } - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, GFP_NOFS); @@ -3867,7 +3867,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC); btrfs_mark_buffer_dirty(leaf); - btrfs_release_path(root, path); + btrfs_release_path(path); out: btrfs_free_path(path); return ret; @@ -4109,7 +4109,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) } leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - btrfs_release_path(root->fs_info->tree_root, path); + btrfs_release_path(path); if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || key.type != BTRFS_ROOT_ITEM_KEY) @@ -4141,7 +4141,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) key.offset--; } - btrfs_release_path(root->fs_info->tree_root, path); + btrfs_release_path(path); if (list_empty(&reloc_roots)) goto out; -- cgit v1.2.3 From f2a97a9dbd86eb1ef956bdf20e05c507b32beb96 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 5 May 2011 12:44:41 +0200 Subject: btrfs: remove all unused functions Remove static and global declarations and/or definitions. Reduces size of btrfs.ko by ~3.4kB. text data bss dec hex filename 402081 7464 200 409745 64091 btrfs.ko.base 398620 7144 200 405964 631cc btrfs.ko.remove-all Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f7b799b151aa..f726e72dd362 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -507,6 +507,7 @@ static int update_backref_cache(struct btrfs_trans_handle *trans, return 1; } + static int should_ignore_root(struct btrfs_root *root) { struct btrfs_root *reloc_root; @@ -529,7 +530,6 @@ static int should_ignore_root(struct btrfs_root *root) */ return 1; } - /* * find reloc tree by address of tree root */ -- cgit v1.2.3 From 70f23fd66bc821a0e99647f70a809e277cc93c4c Mon Sep 17 00:00:00 2001 From: "Justin P. Mattock" Date: Tue, 10 May 2011 10:16:21 +0200 Subject: treewide: fix a few typos in comments - kenrel -> kernel - whetehr -> whether - ttt -> tt - sss -> ss Signed-off-by: Justin P. Mattock Signed-off-by: Jiri Kosina --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 199a80134312..f340f7c99d09 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -709,7 +709,7 @@ again: WARN_ON(cur->checked); if (!list_empty(&cur->upper)) { /* - * the backref was added previously when processsing + * the backref was added previously when processing * backref of type BTRFS_TREE_BLOCK_REF_KEY */ BUG_ON(!list_is_singular(&cur->upper)); -- cgit v1.2.3 From a2de733c78fa7af51ba9670482fa7d392aa67c57 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Tue, 8 Mar 2011 14:14:00 +0100 Subject: btrfs: scrub This adds an initial implementation for scrub. It works quite straightforward. The usermode issues an ioctl for each device in the fs. For each device, it enumerates the allocated device chunks. For each chunk, the contained extents are enumerated and the data checksums fetched. The extents are read sequentially and the checksums verified. If an error occurs (checksum or EIO), a good copy is searched for. If one is found, the bad copy will be rewritten. All enumerations happen from the commit roots. During a transaction commit, the scrubs get paused and afterwards continue from the new roots. This commit is based on the series originally posted to linux-btrfs with some improvements that resulted from comments from David Sterba, Ilya Dryomov and Jan Schmidt. Signed-off-by: Arne Jansen --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 58250e09eb05..db1dffa9952b 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4242,7 +4242,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, - disk_bytenr + len - 1, &list); + disk_bytenr + len - 1, &list, 0); while (!list_empty(&list)) { sums = list_entry(list.next, struct btrfs_ordered_sum, list); -- cgit v1.2.3 From 7a7eaa40a39bde4eefc91aadeb1ce3dc4e6a1252 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 13 Apr 2011 12:54:33 -0400 Subject: Btrfs: take away the num_items argument from btrfs_join_transaction I keep forgetting that btrfs_join_transaction() just ignores the num_items argument, which leads me to sending pointless patches and looking stupid :). So just kill the num_items argument from btrfs_join_transaction and btrfs_start_ioctl_transaction, since neither of them use it. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 199a80134312..8bb256667f2d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2149,7 +2149,7 @@ again: err = ret; } - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) { if (!err) btrfs_block_rsv_release(rc->extent_root, @@ -3233,7 +3233,7 @@ truncate: goto out; } - trans = btrfs_join_transaction(root, 0); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { btrfs_free_path(path); ret = PTR_ERR(trans); @@ -3642,7 +3642,7 @@ int prepare_to_relocate(struct reloc_control *rc) rc->create_reloc_tree = 1; set_reloc_control(rc); - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); BUG_ON(IS_ERR(trans)); btrfs_commit_transaction(trans, rc->extent_root); return 0; @@ -3831,7 +3831,7 @@ restart: btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); /* get rid of pinned extents */ - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) err = PTR_ERR(trans); else @@ -4156,7 +4156,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) set_reloc_control(rc); - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) { unset_reloc_control(rc); err = PTR_ERR(trans); @@ -4190,7 +4190,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) unset_reloc_control(rc); - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) err = PTR_ERR(trans); else -- cgit v1.2.3 From a4abeea41adfa3c143c289045f4625dfaeba2212 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Apr 2011 17:25:13 -0400 Subject: Btrfs: kill trans_mutex We use trans_mutex for lots of things, here's a basic list 1) To serialize trans_handles joining the currently running transaction 2) To make sure that no new trans handles are started while we are committing 3) To protect the dead_roots list and the transaction lists Really the serializing trans_handles joining is not too hard, and can really get bogged down in acquiring a reference to the transaction. So replace the trans_mutex with a trans_lock spinlock and use it to do the following 1) Protect fs_info->running_transaction. All trans handles have to do is check this, and then take a reference of the transaction and keep on going. 2) Protect the fs_info->trans_list. This doesn't get used too much, basically it just holds the current transactions, which will usually just be the currently committing transaction and the currently running transaction at most. 3) Protect the dead roots list. This is only ever processed by splicing the list so this is relatively simple. 4) Protect the fs_info->reloc_ctl stuff. This is very lightweight and was using the trans_mutex before, so this is a pretty straightforward change. 5) Protect fs_info->no_trans_join. Because we don't hold the trans_lock over the entirety of the commit we need to have a way to block new people from creating a new transaction while we're doing our work. So we set no_trans_join and in join_transaction we test to see if that is set, and if it is we do a wait_on_commit. 6) Make the transaction use count atomic so we don't need to take locks to modify it when we're dropping references. 7) Add a commit_lock to the transaction to make sure multiple people trying to commit the same transaction don't race and commit at the same time. 8) Make open_ioctl_trans an atomic so we don't have to take any locks for ioctl trans. I have tested this with xfstests, but obviously it is a pretty hairy change so lots of testing is greatly appreciated. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 8bb256667f2d..09c30d37d43e 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2136,10 +2136,10 @@ int prepare_to_merge(struct reloc_control *rc, int err) u64 num_bytes = 0; int ret; - mutex_lock(&root->fs_info->trans_mutex); + spin_lock(&root->fs_info->trans_lock); rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; rc->merging_rsv_size += rc->nodes_relocated * 2; - mutex_unlock(&root->fs_info->trans_mutex); + spin_unlock(&root->fs_info->trans_lock); again: if (!err) { num_bytes = rc->merging_rsv_size; @@ -2208,9 +2208,9 @@ int merge_reloc_roots(struct reloc_control *rc) int ret; again: root = rc->extent_root; - mutex_lock(&root->fs_info->trans_mutex); + spin_lock(&root->fs_info->trans_lock); list_splice_init(&rc->reloc_roots, &reloc_roots); - mutex_unlock(&root->fs_info->trans_mutex); + spin_unlock(&root->fs_info->trans_lock); while (!list_empty(&reloc_roots)) { found = 1; @@ -3583,17 +3583,17 @@ next: static void set_reloc_control(struct reloc_control *rc) { struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - mutex_lock(&fs_info->trans_mutex); + spin_lock(&fs_info->trans_lock); fs_info->reloc_ctl = rc; - mutex_unlock(&fs_info->trans_mutex); + spin_unlock(&fs_info->trans_lock); } static void unset_reloc_control(struct reloc_control *rc) { struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - mutex_lock(&fs_info->trans_mutex); + spin_lock(&fs_info->trans_lock); fs_info->reloc_ctl = NULL; - mutex_unlock(&fs_info->trans_mutex); + spin_unlock(&fs_info->trans_lock); } static int check_extent_flags(u64 flags) -- cgit v1.2.3 From 026fd317828500524cdc7e5ff9e8e7923abb2868 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 May 2011 10:32:11 -0400 Subject: Btrfs: don't always do readahead Our readahead is sort of sloppy, and really isn't always needed. For example if ls is doing a stating ls (which is the default) it's going to stat in non-disk order, so if say you have a directory with a stupid amount of files, readahead is going to do nothing but waste time in the case of doing the stat. Taking the unconditional readahead out made my test go from 57 minutes to 36 minutes. This means that everywhere we do loop through the tree we want to make sure we do set path->reada properly, so I went through and found all of the places where we loop through the path and set reada to 1. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 09c30d37d43e..5872b41581f4 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -676,6 +676,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, err = -ENOMEM; goto out; } + path1->reada = 1; + path2->reada = 2; node = alloc_backref_node(cache); if (!node) { @@ -1996,6 +1998,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; reloc_root = root->reloc_root; root_item = &reloc_root->root_item; @@ -3297,6 +3300,7 @@ static int find_data_references(struct reloc_control *rc, path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; root = read_fs_root(rc->extent_root->fs_info, ref_root); if (IS_ERR(root)) { @@ -3665,6 +3669,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; ret = prepare_to_relocate(rc); if (ret) { @@ -4090,6 +4095,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = -1; key.objectid = BTRFS_TREE_RELOC_OBJECTID; key.type = BTRFS_ROOT_ITEM_KEY; -- cgit v1.2.3 From 7585717f304f5ed005cc4ad933a69aab3efbd136 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 13 Jun 2011 20:00:16 -0400 Subject: Btrfs: fix relocation races The recent commit to get rid of our trans_mutex introduced some races with block group relocation. The problem is that relocation needs to do some record keeping about each root, and it was relying on the transaction mutex to coordinate things in subtle ways. This fix adds a mutex just for the relocation code and makes sure it doesn't have a big impact on normal operations. The race is really fixed in btrfs_record_root_in_trans, which is where we step back and wait for the relocation code to finish accounting setup. Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f25b10a22a0a..086b1e6b8614 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1368,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, int ret; if (!root->reloc_root) - return 0; + goto out; reloc_root = root->reloc_root; root_item = &reloc_root->root_item; @@ -1390,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, ret = btrfs_update_root(trans, root->fs_info->tree_root, &reloc_root->root_key, root_item); BUG_ON(ret); + +out: return 0; } @@ -2142,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err) u64 num_bytes = 0; int ret; - spin_lock(&root->fs_info->trans_lock); + mutex_lock(&root->fs_info->reloc_mutex); rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; rc->merging_rsv_size += rc->nodes_relocated * 2; - spin_unlock(&root->fs_info->trans_lock); + mutex_unlock(&root->fs_info->reloc_mutex); + again: if (!err) { num_bytes = rc->merging_rsv_size; @@ -2214,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc) int ret; again: root = rc->extent_root; - spin_lock(&root->fs_info->trans_lock); + + /* + * this serializes us with btrfs_record_root_in_transaction, + * we have to make sure nobody is in the middle of + * adding their roots to the list while we are + * doing this splice + */ + mutex_lock(&root->fs_info->reloc_mutex); list_splice_init(&rc->reloc_roots, &reloc_roots); - spin_unlock(&root->fs_info->trans_lock); + mutex_unlock(&root->fs_info->reloc_mutex); while (!list_empty(&reloc_roots)) { found = 1; @@ -3590,17 +3600,19 @@ next: static void set_reloc_control(struct reloc_control *rc) { struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - spin_lock(&fs_info->trans_lock); + + mutex_lock(&fs_info->reloc_mutex); fs_info->reloc_ctl = rc; - spin_unlock(&fs_info->trans_lock); + mutex_unlock(&fs_info->reloc_mutex); } static void unset_reloc_control(struct reloc_control *rc) { struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - spin_lock(&fs_info->trans_lock); + + mutex_lock(&fs_info->reloc_mutex); fs_info->reloc_ctl = NULL; - spin_unlock(&fs_info->trans_lock); + mutex_unlock(&fs_info->reloc_mutex); } static int check_extent_flags(u64 flags) -- cgit v1.2.3