diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-06-01 08:37:31 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-06-01 08:37:31 -0700 |
commit | 51eab603f5c86dd1eae4c525df3e7f7eeab401d6 (patch) | |
tree | e7a8c6214b072db126cca62d39008b3620134798 /fs/btrfs/extent_io.c | |
parent | 419f4319495043a9507ac3e616be9ca60af09744 (diff) | |
parent | 1e20932a23578bb1ec59107843574e259b96193f (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason:
"This includes a fairly large change from Josef around data writeback
completion. Before, the writeback wasn't completed until the metadata
insertions for the extent were done, and this made for fairly large
latency spikes on the last page of each ordered extent.
We already had a separate mechanism for tracking pending metadata
insertions, so Josef just needed to tweak things a little to end
writeback earlier on the page. Overall it makes us much friendly to
memory reclaim and lowers latencies quite a lot for synchronous IO.
Jan Schmidt has finished some background work required to track btree
blocks as they go through changes in ownership. It's the missing
piece he needed for both btrfs send/receive and subvolume quotas.
Neither of those are ready yet, but the new tracking code is included
here. Most of the time, the new code is off. It is only used by
scrub and other backref walkers.
Stefan Behrens has added io failure tracking. This includes counters
for which drives are causing the most trouble so the admin (or an
automated tool) can choose to kick them out. We're tracking IO
errors, crc errors, and generation checks we do on each metadata
block.
RAID5/6 did miss the cut this time because I'm having trouble with
corruptions. I'll nail it down next week and post as a beta testing
before 3.6"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (58 commits)
Btrfs: fix tree mod log rewinded level and rewinding of moved keys
Btrfs: fix tree mod log del_ptr
Btrfs: add tree_mod_dont_log helper
Btrfs: add missing spin_lock for insertion into tree mod log
Btrfs: add inodes before dropping the extent lock in find_all_leafs
Btrfs: use delayed ref sequence numbers for all fs-tree updates
Btrfs: fix false positive in check-integrity on unmount
Btrfs: fix runtime warning in check-integrity check data mode
Btrfs: set ioprio of scrub readahead to idle
Btrfs: fix return code in drop_objectid_items
Btrfs: check to see if the inode is in the log before fsyncing
Btrfs: return value of btrfs_read_buffer is checked correctly
Btrfs: read device stats on mount, write modified ones during commit
Btrfs: add ioctl to get and reset the device stats
Btrfs: add device counters for detected IO and checksum errors
btrfs: Drop unused function btrfs_abort_devices()
Btrfs: fix the same inode id problem when doing auto defragment
Btrfs: fall back to non-inline if we don't have enough space
Btrfs: fix how we deal with the orphan block rsv
Btrfs: convert the inode bit field to use the actual bit operations
...
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 168 |
1 files changed, 115 insertions, 53 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c9018a05036e..2c8f7b204617 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -186,7 +186,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, return parent; } - entry = rb_entry(node, struct tree_entry, rb_node); rb_link_node(node, parent, p); rb_insert_color(node, root); return NULL; @@ -413,7 +412,7 @@ static struct extent_state *next_state(struct extent_state *state) /* * utility function to clear some bits in an extent state struct. - * it will optionally wake up any one waiting on this state (wake == 1) + * it will optionally wake up any one waiting on this state (wake == 1). * * If no bits are set on the state struct after clearing things, the * struct is freed and removed from the tree @@ -570,10 +569,8 @@ hit_next: if (err) goto out; if (state->end <= end) { - clear_state_bit(tree, state, &bits, wake); - if (last_end == (u64)-1) - goto out; - start = last_end + 1; + state = clear_state_bit(tree, state, &bits, wake); + goto next; } goto search_again; } @@ -781,7 +778,6 @@ hit_next: * Just lock what we found and keep going */ if (state->start == start && state->end <= end) { - struct rb_node *next_node; if (state->state & exclusive_bits) { *failed_start = state->start; err = -EEXIST; @@ -789,20 +785,15 @@ hit_next: } set_state_bits(tree, state, &bits); - cache_state(state, cached_state); merge_state(tree, state); if (last_end == (u64)-1) goto out; - start = last_end + 1; - next_node = rb_next(&state->rb_node); - if (next_node && start < end && prealloc && !need_resched()) { - state = rb_entry(next_node, struct extent_state, - rb_node); - if (state->start == start) - goto hit_next; - } + state = next_state(state); + if (start < end && state && state->start == start && + !need_resched()) + goto hit_next; goto search_again; } @@ -845,6 +836,10 @@ hit_next: if (last_end == (u64)-1) goto out; start = last_end + 1; + state = next_state(state); + if (start < end && state && state->start == start && + !need_resched()) + goto hit_next; } goto search_again; } @@ -994,21 +989,14 @@ hit_next: * Just lock what we found and keep going */ if (state->start == start && state->end <= end) { - struct rb_node *next_node; - set_state_bits(tree, state, &bits); - clear_state_bit(tree, state, &clear_bits, 0); + state = clear_state_bit(tree, state, &clear_bits, 0); if (last_end == (u64)-1) goto out; - start = last_end + 1; - next_node = rb_next(&state->rb_node); - if (next_node && start < end && prealloc && !need_resched()) { - state = rb_entry(next_node, struct extent_state, - rb_node); - if (state->start == start) - goto hit_next; - } + if (start < end && state && state->start == start && + !need_resched()) + goto hit_next; goto search_again; } @@ -1042,10 +1030,13 @@ hit_next: goto out; if (state->end <= end) { set_state_bits(tree, state, &bits); - clear_state_bit(tree, state, &clear_bits, 0); + state = clear_state_bit(tree, state, &clear_bits, 0); if (last_end == (u64)-1) goto out; start = last_end + 1; + if (start < end && state && state->start == start && + !need_resched()) + goto hit_next; } goto search_again; } @@ -1173,9 +1164,8 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, cached_state, mask); } -static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, - u64 end, struct extent_state **cached_state, - gfp_t mask) +int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + struct extent_state **cached_state, gfp_t mask) { return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, cached_state, mask); @@ -1293,7 +1283,7 @@ out: * returned if we find something, and *start_ret and *end_ret are * set to reflect the state struct that was found. * - * If nothing was found, 1 is returned, < 0 on error + * If nothing was found, 1 is returned. If found something, return 0. */ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, u64 *start_ret, u64 *end_ret, int bits) @@ -1923,6 +1913,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { /* try to remap that extent elsewhere? */ bio_put(bio); + btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); return -EIO; } @@ -2222,17 +2213,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) uptodate = 0; } - if (!uptodate && tree->ops && - tree->ops->writepage_io_failed_hook) { - ret = tree->ops->writepage_io_failed_hook(NULL, page, - start, end, NULL); - /* Writeback already completed */ - if (ret == 0) - return 1; - } - if (!uptodate) { - clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); ClearPageUptodate(page); SetPageError(page); } @@ -2347,10 +2328,23 @@ static void end_bio_extent_readpage(struct bio *bio, int err) if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { ret = tree->ops->readpage_end_io_hook(page, start, end, state, mirror); - if (ret) + if (ret) { + /* no IO indicated but software detected errors + * in the block, either checksum errors or + * issues with the contents */ + struct btrfs_root *root = + BTRFS_I(page->mapping->host)->root; + struct btrfs_device *device; + uptodate = 0; - else + device = btrfs_find_device_for_logical( + root, start, mirror); + if (device) + btrfs_dev_stat_inc_and_print(device, + BTRFS_DEV_STAT_CORRUPTION_ERRS); + } else { clean_io_failure(start, page); + } } if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { @@ -3164,7 +3158,7 @@ static int write_one_eb(struct extent_buffer *eb, u64 offset = eb->start; unsigned long i, num_pages; int rw = (epd->sync_io ? WRITE_SYNC : WRITE); - int ret; + int ret = 0; clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); num_pages = num_extent_pages(eb->start, eb->len); @@ -3930,6 +3924,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, eb->start = start; eb->len = len; eb->tree = tree; + eb->bflags = 0; rwlock_init(&eb->lock); atomic_set(&eb->write_locks, 0); atomic_set(&eb->read_locks, 0); @@ -3967,6 +3962,60 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, return eb; } +struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) +{ + unsigned long i; + struct page *p; + struct extent_buffer *new; + unsigned long num_pages = num_extent_pages(src->start, src->len); + + new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC); + if (new == NULL) + return NULL; + + for (i = 0; i < num_pages; i++) { + p = alloc_page(GFP_ATOMIC); + BUG_ON(!p); + attach_extent_buffer_page(new, p); + WARN_ON(PageDirty(p)); + SetPageUptodate(p); + new->pages[i] = p; + } + + copy_extent_buffer(new, src, 0, 0, src->len); + set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags); + set_bit(EXTENT_BUFFER_DUMMY, &new->bflags); + + return new; +} + +struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) +{ + struct extent_buffer *eb; + unsigned long num_pages = num_extent_pages(0, len); + unsigned long i; + + eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC); + if (!eb) + return NULL; + + for (i = 0; i < num_pages; i++) { + eb->pages[i] = alloc_page(GFP_ATOMIC); + if (!eb->pages[i]) + goto err; + } + set_extent_buffer_uptodate(eb); + btrfs_set_header_nritems(eb, 0); + set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); + + return eb; +err: + for (i--; i > 0; i--) + __free_page(eb->pages[i]); + __free_extent_buffer(eb); + return NULL; +} + static int extent_buffer_under_io(struct extent_buffer *eb) { return (atomic_read(&eb->io_pages) || @@ -3981,18 +4030,21 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, unsigned long start_idx) { unsigned long index; + unsigned long num_pages; struct page *page; + int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); BUG_ON(extent_buffer_under_io(eb)); - index = num_extent_pages(eb->start, eb->len); + num_pages = num_extent_pages(eb->start, eb->len); + index = start_idx + num_pages; if (start_idx >= index) return; do { index--; page = extent_buffer_page(eb, index); - if (page) { + if (page && mapped) { spin_lock(&page->mapping->private_lock); /* * We do this since we'll remove the pages after we've @@ -4017,6 +4069,8 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, } spin_unlock(&page->mapping->private_lock); + } + if (page) { /* One for when we alloced the page */ page_cache_release(page); } @@ -4235,14 +4289,18 @@ static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask) { WARN_ON(atomic_read(&eb->refs) == 0); if (atomic_dec_and_test(&eb->refs)) { - struct extent_io_tree *tree = eb->tree; + if (test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) { + spin_unlock(&eb->refs_lock); + } else { + struct extent_io_tree *tree = eb->tree; - spin_unlock(&eb->refs_lock); + spin_unlock(&eb->refs_lock); - spin_lock(&tree->buffer_lock); - radix_tree_delete(&tree->buffer, - eb->start >> PAGE_CACHE_SHIFT); - spin_unlock(&tree->buffer_lock); + spin_lock(&tree->buffer_lock); + radix_tree_delete(&tree->buffer, + eb->start >> PAGE_CACHE_SHIFT); + spin_unlock(&tree->buffer_lock); + } /* Should be safe to release our pages at this point */ btrfs_release_extent_buffer_page(eb, 0); @@ -4260,6 +4318,10 @@ void free_extent_buffer(struct extent_buffer *eb) spin_lock(&eb->refs_lock); if (atomic_read(&eb->refs) == 2 && + test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) + atomic_dec(&eb->refs); + + if (atomic_read(&eb->refs) == 2 && test_bit(EXTENT_BUFFER_STALE, &eb->bflags) && !extent_buffer_under_io(eb) && test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) |