summaryrefslogtreecommitdiff
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-06-01 08:37:31 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-06-01 08:37:31 -0700
commit51eab603f5c86dd1eae4c525df3e7f7eeab401d6 (patch)
treee7a8c6214b072db126cca62d39008b3620134798 /fs/btrfs/extent_io.c
parent419f4319495043a9507ac3e616be9ca60af09744 (diff)
parent1e20932a23578bb1ec59107843574e259b96193f (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason: "This includes a fairly large change from Josef around data writeback completion. Before, the writeback wasn't completed until the metadata insertions for the extent were done, and this made for fairly large latency spikes on the last page of each ordered extent. We already had a separate mechanism for tracking pending metadata insertions, so Josef just needed to tweak things a little to end writeback earlier on the page. Overall it makes us much friendly to memory reclaim and lowers latencies quite a lot for synchronous IO. Jan Schmidt has finished some background work required to track btree blocks as they go through changes in ownership. It's the missing piece he needed for both btrfs send/receive and subvolume quotas. Neither of those are ready yet, but the new tracking code is included here. Most of the time, the new code is off. It is only used by scrub and other backref walkers. Stefan Behrens has added io failure tracking. This includes counters for which drives are causing the most trouble so the admin (or an automated tool) can choose to kick them out. We're tracking IO errors, crc errors, and generation checks we do on each metadata block. RAID5/6 did miss the cut this time because I'm having trouble with corruptions. I'll nail it down next week and post as a beta testing before 3.6" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (58 commits) Btrfs: fix tree mod log rewinded level and rewinding of moved keys Btrfs: fix tree mod log del_ptr Btrfs: add tree_mod_dont_log helper Btrfs: add missing spin_lock for insertion into tree mod log Btrfs: add inodes before dropping the extent lock in find_all_leafs Btrfs: use delayed ref sequence numbers for all fs-tree updates Btrfs: fix false positive in check-integrity on unmount Btrfs: fix runtime warning in check-integrity check data mode Btrfs: set ioprio of scrub readahead to idle Btrfs: fix return code in drop_objectid_items Btrfs: check to see if the inode is in the log before fsyncing Btrfs: return value of btrfs_read_buffer is checked correctly Btrfs: read device stats on mount, write modified ones during commit Btrfs: add ioctl to get and reset the device stats Btrfs: add device counters for detected IO and checksum errors btrfs: Drop unused function btrfs_abort_devices() Btrfs: fix the same inode id problem when doing auto defragment Btrfs: fall back to non-inline if we don't have enough space Btrfs: fix how we deal with the orphan block rsv Btrfs: convert the inode bit field to use the actual bit operations ...
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c168
1 files changed, 115 insertions, 53 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c9018a05036e..2c8f7b204617 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -186,7 +186,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
return parent;
}
- entry = rb_entry(node, struct tree_entry, rb_node);
rb_link_node(node, parent, p);
rb_insert_color(node, root);
return NULL;
@@ -413,7 +412,7 @@ static struct extent_state *next_state(struct extent_state *state)
/*
* utility function to clear some bits in an extent state struct.
- * it will optionally wake up any one waiting on this state (wake == 1)
+ * it will optionally wake up any one waiting on this state (wake == 1).
*
* If no bits are set on the state struct after clearing things, the
* struct is freed and removed from the tree
@@ -570,10 +569,8 @@ hit_next:
if (err)
goto out;
if (state->end <= end) {
- clear_state_bit(tree, state, &bits, wake);
- if (last_end == (u64)-1)
- goto out;
- start = last_end + 1;
+ state = clear_state_bit(tree, state, &bits, wake);
+ goto next;
}
goto search_again;
}
@@ -781,7 +778,6 @@ hit_next:
* Just lock what we found and keep going
*/
if (state->start == start && state->end <= end) {
- struct rb_node *next_node;
if (state->state & exclusive_bits) {
*failed_start = state->start;
err = -EEXIST;
@@ -789,20 +785,15 @@ hit_next:
}
set_state_bits(tree, state, &bits);
-
cache_state(state, cached_state);
merge_state(tree, state);
if (last_end == (u64)-1)
goto out;
-
start = last_end + 1;
- next_node = rb_next(&state->rb_node);
- if (next_node && start < end && prealloc && !need_resched()) {
- state = rb_entry(next_node, struct extent_state,
- rb_node);
- if (state->start == start)
- goto hit_next;
- }
+ state = next_state(state);
+ if (start < end && state && state->start == start &&
+ !need_resched())
+ goto hit_next;
goto search_again;
}
@@ -845,6 +836,10 @@ hit_next:
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
+ state = next_state(state);
+ if (start < end && state && state->start == start &&
+ !need_resched())
+ goto hit_next;
}
goto search_again;
}
@@ -994,21 +989,14 @@ hit_next:
* Just lock what we found and keep going
*/
if (state->start == start && state->end <= end) {
- struct rb_node *next_node;
-
set_state_bits(tree, state, &bits);
- clear_state_bit(tree, state, &clear_bits, 0);
+ state = clear_state_bit(tree, state, &clear_bits, 0);
if (last_end == (u64)-1)
goto out;
-
start = last_end + 1;
- next_node = rb_next(&state->rb_node);
- if (next_node && start < end && prealloc && !need_resched()) {
- state = rb_entry(next_node, struct extent_state,
- rb_node);
- if (state->start == start)
- goto hit_next;
- }
+ if (start < end && state && state->start == start &&
+ !need_resched())
+ goto hit_next;
goto search_again;
}
@@ -1042,10 +1030,13 @@ hit_next:
goto out;
if (state->end <= end) {
set_state_bits(tree, state, &bits);
- clear_state_bit(tree, state, &clear_bits, 0);
+ state = clear_state_bit(tree, state, &clear_bits, 0);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
+ if (start < end && state && state->start == start &&
+ !need_resched())
+ goto hit_next;
}
goto search_again;
}
@@ -1173,9 +1164,8 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
cached_state, mask);
}
-static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
- u64 end, struct extent_state **cached_state,
- gfp_t mask)
+int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
+ struct extent_state **cached_state, gfp_t mask)
{
return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
cached_state, mask);
@@ -1293,7 +1283,7 @@ out:
* returned if we find something, and *start_ret and *end_ret are
* set to reflect the state struct that was found.
*
- * If nothing was found, 1 is returned, < 0 on error
+ * If nothing was found, 1 is returned. If found something, return 0.
*/
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, int bits)
@@ -1923,6 +1913,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
/* try to remap that extent elsewhere? */
bio_put(bio);
+ btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
return -EIO;
}
@@ -2222,17 +2213,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
uptodate = 0;
}
- if (!uptodate && tree->ops &&
- tree->ops->writepage_io_failed_hook) {
- ret = tree->ops->writepage_io_failed_hook(NULL, page,
- start, end, NULL);
- /* Writeback already completed */
- if (ret == 0)
- return 1;
- }
-
if (!uptodate) {
- clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
ClearPageUptodate(page);
SetPageError(page);
}
@@ -2347,10 +2328,23 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end,
state, mirror);
- if (ret)
+ if (ret) {
+ /* no IO indicated but software detected errors
+ * in the block, either checksum errors or
+ * issues with the contents */
+ struct btrfs_root *root =
+ BTRFS_I(page->mapping->host)->root;
+ struct btrfs_device *device;
+
uptodate = 0;
- else
+ device = btrfs_find_device_for_logical(
+ root, start, mirror);
+ if (device)
+ btrfs_dev_stat_inc_and_print(device,
+ BTRFS_DEV_STAT_CORRUPTION_ERRS);
+ } else {
clean_io_failure(start, page);
+ }
}
if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
@@ -3164,7 +3158,7 @@ static int write_one_eb(struct extent_buffer *eb,
u64 offset = eb->start;
unsigned long i, num_pages;
int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
- int ret;
+ int ret = 0;
clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
num_pages = num_extent_pages(eb->start, eb->len);
@@ -3930,6 +3924,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
eb->start = start;
eb->len = len;
eb->tree = tree;
+ eb->bflags = 0;
rwlock_init(&eb->lock);
atomic_set(&eb->write_locks, 0);
atomic_set(&eb->read_locks, 0);
@@ -3967,6 +3962,60 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
return eb;
}
+struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
+{
+ unsigned long i;
+ struct page *p;
+ struct extent_buffer *new;
+ unsigned long num_pages = num_extent_pages(src->start, src->len);
+
+ new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC);
+ if (new == NULL)
+ return NULL;
+
+ for (i = 0; i < num_pages; i++) {
+ p = alloc_page(GFP_ATOMIC);
+ BUG_ON(!p);
+ attach_extent_buffer_page(new, p);
+ WARN_ON(PageDirty(p));
+ SetPageUptodate(p);
+ new->pages[i] = p;
+ }
+
+ copy_extent_buffer(new, src, 0, 0, src->len);
+ set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
+ set_bit(EXTENT_BUFFER_DUMMY, &new->bflags);
+
+ return new;
+}
+
+struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
+{
+ struct extent_buffer *eb;
+ unsigned long num_pages = num_extent_pages(0, len);
+ unsigned long i;
+
+ eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC);
+ if (!eb)
+ return NULL;
+
+ for (i = 0; i < num_pages; i++) {
+ eb->pages[i] = alloc_page(GFP_ATOMIC);
+ if (!eb->pages[i])
+ goto err;
+ }
+ set_extent_buffer_uptodate(eb);
+ btrfs_set_header_nritems(eb, 0);
+ set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
+
+ return eb;
+err:
+ for (i--; i > 0; i--)
+ __free_page(eb->pages[i]);
+ __free_extent_buffer(eb);
+ return NULL;
+}
+
static int extent_buffer_under_io(struct extent_buffer *eb)
{
return (atomic_read(&eb->io_pages) ||
@@ -3981,18 +4030,21 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
unsigned long start_idx)
{
unsigned long index;
+ unsigned long num_pages;
struct page *page;
+ int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
BUG_ON(extent_buffer_under_io(eb));
- index = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb->start, eb->len);
+ index = start_idx + num_pages;
if (start_idx >= index)
return;
do {
index--;
page = extent_buffer_page(eb, index);
- if (page) {
+ if (page && mapped) {
spin_lock(&page->mapping->private_lock);
/*
* We do this since we'll remove the pages after we've
@@ -4017,6 +4069,8 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
}
spin_unlock(&page->mapping->private_lock);
+ }
+ if (page) {
/* One for when we alloced the page */
page_cache_release(page);
}
@@ -4235,14 +4289,18 @@ static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
{
WARN_ON(atomic_read(&eb->refs) == 0);
if (atomic_dec_and_test(&eb->refs)) {
- struct extent_io_tree *tree = eb->tree;
+ if (test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) {
+ spin_unlock(&eb->refs_lock);
+ } else {
+ struct extent_io_tree *tree = eb->tree;
- spin_unlock(&eb->refs_lock);
+ spin_unlock(&eb->refs_lock);
- spin_lock(&tree->buffer_lock);
- radix_tree_delete(&tree->buffer,
- eb->start >> PAGE_CACHE_SHIFT);
- spin_unlock(&tree->buffer_lock);
+ spin_lock(&tree->buffer_lock);
+ radix_tree_delete(&tree->buffer,
+ eb->start >> PAGE_CACHE_SHIFT);
+ spin_unlock(&tree->buffer_lock);
+ }
/* Should be safe to release our pages at this point */
btrfs_release_extent_buffer_page(eb, 0);
@@ -4260,6 +4318,10 @@ void free_extent_buffer(struct extent_buffer *eb)
spin_lock(&eb->refs_lock);
if (atomic_read(&eb->refs) == 2 &&
+ test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))
+ atomic_dec(&eb->refs);
+
+ if (atomic_read(&eb->refs) == 2 &&
test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
!extent_buffer_under_io(eb) &&
test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))