From 8bedd51b6121c4607784d75f852828d25d119c52 Mon Sep 17 00:00:00 2001 From: Mitch Harder Date: Thu, 26 Jan 2012 15:01:11 -0500 Subject: Btrfs: Check for NULL page in extent_range_uptodate A user has encountered a NULL pointer kernel oops in btrfs when encountering media errors. The problem has been identified as an unhandled NULL pointer returned from find_get_page(). This modification simply checks for a NULL page, and returns with an error if found (the extent_range_uptodate() function returns 1 on errors). After testing this patch, the user reported that the error with the NULL pointer oops was solved. However, there is still a remaining problem with a thread becoming stuck in wait_on_page_locked(page) in the read_extent_buffer_pages(...) function in extent_io.c for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); wait_on_page_locked(page); if (!PageUptodate(page)) ret = -EIO; } This patch leaves the issue with the locked page yet to be resolved. Signed-off-by: Mitch Harder Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9d09a4f81875..fcf77e1ded40 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3909,6 +3909,8 @@ int extent_range_uptodate(struct extent_io_tree *tree, while (start <= end) { index = start >> PAGE_CACHE_SHIFT; page = find_get_page(tree->mapping, index); + if (!page) + return 1; uptodate = PageUptodate(page); page_cache_release(page); if (!uptodate) { -- cgit v1.2.3 From 87826df0ec36fc28884b4ddbb3f3af41c4c2008f Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 15 Feb 2012 16:23:57 +0100 Subject: btrfs: delalloc for page dirtied out-of-band in fixup worker We encountered an issue that was easily observable on s/390 systems but could really happen anywhere. The timing just seemed to hit reliably on s/390 with limited memory. The gist is that when an unexpected set_page_dirty() happened, we'd run into the BUG() in btrfs_writepage_fixup_worker since it wasn't properly set up for delalloc. This patch does the following: - Performs the missing delalloc in the fixup worker - Allow the start hook to return -EBUSY which informs __extent_writepage that it should mark the page skipped and not to redirty it. This is required since the fixup worker can fail with -ENOSPC and the page will have already been redirtied. That causes an Oops in drop_outstanding_extents later. Retrying the fixup worker could lead to an infinite loop. Deferring the page redirty also saves us some cycles since the page would be stuck in a resubmit-redirty loop until the fixup worker completes. It's not harmful, just wasteful. - If the fixup worker fails, we mark the page and mapping as errored, and end the writeback, similar to what we would do had the page actually been submitted to writeback. Signed-off-by: Jeff Mahoney --- fs/btrfs/extent_io.c | 65 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 25 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fcf77e1ded40..89ba79fb945c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2161,6 +2161,38 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, /* lots and lots of room for performance fixes in the end_bio funcs */ +int end_extent_writepage(struct page *page, int err, u64 start, u64 end) +{ + int uptodate = (err == 0); + struct extent_io_tree *tree; + int ret; + + tree = &BTRFS_I(page->mapping->host)->io_tree; + + if (tree->ops && tree->ops->writepage_end_io_hook) { + ret = tree->ops->writepage_end_io_hook(page, start, + end, NULL, uptodate); + if (ret) + uptodate = 0; + } + + if (!uptodate && tree->ops && + tree->ops->writepage_io_failed_hook) { + ret = tree->ops->writepage_io_failed_hook(NULL, page, + start, end, NULL); + /* Writeback already completed */ + if (ret == 0) + return 1; + } + + if (!uptodate) { + clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); + ClearPageUptodate(page); + SetPageError(page); + } + return 0; +} + /* * after a writepage IO is done, we need to: * clear the uptodate bits on error @@ -2172,13 +2204,11 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, */ static void end_bio_extent_writepage(struct bio *bio, int err) { - int uptodate = err == 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct extent_io_tree *tree; u64 start; u64 end; int whole_page; - int ret; do { struct page *page = bvec->bv_page; @@ -2195,28 +2225,9 @@ static void end_bio_extent_writepage(struct bio *bio, int err) if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); - if (tree->ops && tree->ops->writepage_end_io_hook) { - ret = tree->ops->writepage_end_io_hook(page, start, - end, NULL, uptodate); - if (ret) - uptodate = 0; - } - - if (!uptodate && tree->ops && - tree->ops->writepage_io_failed_hook) { - ret = tree->ops->writepage_io_failed_hook(bio, page, - start, end, NULL); - if (ret == 0) { - uptodate = (err == 0); - continue; - } - } - if (!uptodate) { - clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); - ClearPageUptodate(page); - SetPageError(page); - } + if (end_extent_writepage(page, err, start, end)) + continue; if (whole_page) end_page_writeback(page); @@ -2818,8 +2829,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (tree->ops && tree->ops->writepage_start_hook) { ret = tree->ops->writepage_start_hook(page, start, page_end); - if (ret == -EAGAIN) { - redirty_page_for_writepage(wbc, page); + if (ret) { + /* Fixup worker will requeue */ + if (ret == -EBUSY) + wbc->pages_skipped++; + else + redirty_page_for_writepage(wbc, page); update_nr_written(page, wbc, nr_written); unlock_page(page); ret = 0; -- cgit v1.2.3 From 013bd4c336ad0d30e9e41f9cff0dbc1858934e75 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Thu, 16 Feb 2012 10:11:40 +0900 Subject: Btrfs: fix return value check of extent_io_ops This patch adds the check on the return value of extent_io_ops. Signed-off-by: Tsutomu Itoh --- fs/btrfs/extent_io.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 89ba79fb945c..b05d35a7c0f1 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2154,9 +2154,10 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode, failrec->this_mirror, num_copies, failrec->in_validation); - tree->ops->submit_bio_hook(inode, read_mode, bio, failrec->this_mirror, - failrec->bio_flags, 0); - return 0; + ret = tree->ops->submit_bio_hook(inode, read_mode, bio, + failrec->this_mirror, + failrec->bio_flags, 0); + return ret; } /* lots and lots of room for performance fixes in the end_bio funcs */ @@ -2790,9 +2791,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, delalloc_start = delalloc_end + 1; continue; } - tree->ops->fill_delalloc(inode, page, delalloc_start, - delalloc_end, &page_started, - &nr_written); + ret = tree->ops->fill_delalloc(inode, page, + delalloc_start, + delalloc_end, + &page_started, + &nr_written); + BUG_ON(ret); /* * delalloc_end is already one less than the total * length, so we don't subtract one from -- cgit v1.2.3 From 285190d99fef696ec8b0041787387f013cb71d67 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Thu, 16 Feb 2012 16:23:58 +0900 Subject: Btrfs: check return value of lookup_extent_mapping() correctly This patch corrects error checking of lookup_extent_mapping(). Signed-off-by: Tsutomu Itoh --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b05d35a7c0f1..8d6f55fbd28e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3308,7 +3308,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, len = end - start + 1; write_lock(&map->lock); em = lookup_extent_mapping(map, start, len); - if (IS_ERR_OR_NULL(em)) { + if (!em) { write_unlock(&map->lock); break; } -- cgit v1.2.3 From 0449314a9cc5a7fb0bd42e2175a3c46f68f3a2b0 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 16 Feb 2012 18:34:37 +0800 Subject: Btrfs: skip states when they does not contain bits to clear Clearing a range's bits is different with setting them, since we don't need to touch them when states do not contain bits we want. Signed-off-by: Liu Bo --- fs/btrfs/extent_io.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8d6f55fbd28e..fe14285b53f1 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -513,6 +513,15 @@ hit_next: WARN_ON(state->end < start); last_end = state->end; + if (state->end < end && !need_resched()) + next_node = rb_next(&state->rb_node); + else + next_node = NULL; + + /* the state doesn't have the wanted bits, go ahead */ + if (!(state->state & bits)) + goto next; + /* * | ---- desired range ---- | * | state | or @@ -565,12 +574,8 @@ hit_next: goto out; } - if (state->end < end && prealloc && !need_resched()) - next_node = rb_next(&state->rb_node); - else - next_node = NULL; - set |= clear_state_bit(tree, state, &bits, wake); +next: if (last_end == (u64)-1) goto out; start = last_end + 1; -- cgit v1.2.3 From 9d47c7671dc555e198c7347a173ed37316e0c4c1 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 16 Feb 2012 18:34:38 +0800 Subject: Btrfs: kick out redundant stuff in convert_extent_bit clear_state_bit will do merge_state for us, so kick out the redundant one. Signed-off-by: Liu Bo --- fs/btrfs/extent_io.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fe14285b53f1..37259ff5cd71 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -966,8 +966,6 @@ hit_next: set_state_bits(tree, state, &bits); clear_state_bit(tree, state, &clear_bits, 0); - - merge_state(tree, state); if (last_end == (u64)-1) goto out; @@ -1012,7 +1010,6 @@ hit_next: if (state->end <= end) { set_state_bits(tree, state, &bits); clear_state_bit(tree, state, &clear_bits, 0); - merge_state(tree, state); if (last_end == (u64)-1) goto out; start = last_end + 1; @@ -1073,8 +1070,6 @@ hit_next: set_state_bits(tree, prealloc, &bits); clear_state_bit(tree, prealloc, &clear_bits, 0); - - merge_state(tree, prealloc); prealloc = NULL; goto out; } -- cgit v1.2.3 From 692e5759a43b916f0b66bcb39b2957499992381e Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 16 Feb 2012 18:34:36 +0800 Subject: Btrfs: be less strict on finding next node in clear_extent_bit In clear_extent_bit, it is enough that next node is adjacent in tree level. Signed-off-by: Liu Bo --- fs/btrfs/extent_io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 37259ff5cd71..45ca8f9b0d61 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -582,8 +582,7 @@ next: if (start <= end && next_node) { state = rb_entry(next_node, struct extent_state, rb_node); - if (state->start == start) - goto hit_next; + goto hit_next; } goto search_again; -- cgit v1.2.3 From 506531905296d6aee84480c879b25ea98c3f9db6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 22 Feb 2012 12:36:24 -0500 Subject: Btrfs: clear the extent uptodate bits during parent transid failures If btrfs reads a block and finds a parent transid mismatch, it clears the uptodate flags on the extent buffer, and the pages inside it. But we only clear the uptodate bits in the state tree if the block straddles more than one page. This is from an old optimization from to reduce contention on the extent state tree. But it is buggy because the code that retries a read from a different copy of the block is going to find the uptodate state bits set and skip the IO. The end result of the bug is that we'll never actually read the good copy (if there is one). The fix here is to always clear the uptodate state bits, which is safe because this code is only called when the parent transid fails. Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 45ca8f9b0d61..a55fbe6252de 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3871,10 +3871,9 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); - if (eb_straddles_pages(eb)) { - clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, - cached_state, GFP_NOFS); - } + clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + cached_state, GFP_NOFS); + for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); if (page) -- cgit v1.2.3