summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/block-group.c18
-rw-r--r--fs/btrfs/discard.c34
-rw-r--r--fs/btrfs/disk-io.c40
-rw-r--r--fs/btrfs/extent-tree.c30
-rw-r--r--fs/btrfs/extent_io.c7
-rw-r--r--fs/btrfs/file.c9
-rw-r--r--fs/btrfs/inode.c15
-rw-r--r--fs/btrfs/relocation.c6
-rw-r--r--fs/btrfs/scrub.c38
-rw-r--r--fs/btrfs/send.c6
-rw-r--r--fs/btrfs/super.c3
-rw-r--r--fs/btrfs/tree-checker.c30
-rw-r--r--fs/btrfs/tree-checker.h1
-rw-r--r--fs/btrfs/zoned.c6
14 files changed, 169 insertions, 74 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 434cf3d5f4cf..226e6434a58a 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1885,6 +1885,17 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
up_write(&space_info->groups_sem);
goto next;
}
+
+ /*
+ * Cache the zone_unusable value before turning the block group
+ * to read only. As soon as the block group is read only it's
+ * zone_unusable value gets moved to the block group's read-only
+ * bytes and isn't available for calculations anymore. We also
+ * cache it before unlocking the block group, to prevent races
+ * (reports from KCSAN and such tools) with tasks updating it.
+ */
+ zone_unusable = bg->zone_unusable;
+
spin_unlock(&bg->lock);
/*
@@ -1900,13 +1911,6 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
goto next;
}
- /*
- * Cache the zone_unusable value before turning the block group
- * to read only. As soon as the blog group is read only it's
- * zone_unusable value gets moved to the block group's read-only
- * bytes and isn't available for calculations anymore.
- */
- zone_unusable = bg->zone_unusable;
ret = inc_block_group_ro(bg, 0);
up_write(&space_info->groups_sem);
if (ret < 0)
diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c
index 944a7340f6a4..3981c941f5b5 100644
--- a/fs/btrfs/discard.c
+++ b/fs/btrfs/discard.c
@@ -167,13 +167,7 @@ static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
block_group->discard_eligible_time = 0;
queued = !list_empty(&block_group->discard_list);
list_del_init(&block_group->discard_list);
- /*
- * If the block group is currently running in the discard workfn, we
- * don't want to deref it, since it's still being used by the workfn.
- * The workfn will notice this case and deref the block group when it is
- * finished.
- */
- if (queued && !running)
+ if (queued)
btrfs_put_block_group(block_group);
spin_unlock(&discard_ctl->lock);
@@ -260,9 +254,10 @@ again:
block_group->discard_cursor = block_group->start;
block_group->discard_state = BTRFS_DISCARD_EXTENTS;
}
- discard_ctl->block_group = block_group;
}
if (block_group) {
+ btrfs_get_block_group(block_group);
+ discard_ctl->block_group = block_group;
*discard_state = block_group->discard_state;
*discard_index = block_group->discard_index;
}
@@ -493,9 +488,20 @@ static void btrfs_discard_workfn(struct work_struct *work)
block_group = peek_discard_list(discard_ctl, &discard_state,
&discard_index, now);
- if (!block_group || !btrfs_run_discard_work(discard_ctl))
+ if (!block_group)
return;
+ if (!btrfs_run_discard_work(discard_ctl)) {
+ spin_lock(&discard_ctl->lock);
+ btrfs_put_block_group(block_group);
+ discard_ctl->block_group = NULL;
+ spin_unlock(&discard_ctl->lock);
+ return;
+ }
if (now < block_group->discard_eligible_time) {
+ spin_lock(&discard_ctl->lock);
+ btrfs_put_block_group(block_group);
+ discard_ctl->block_group = NULL;
+ spin_unlock(&discard_ctl->lock);
btrfs_discard_schedule_work(discard_ctl, false);
return;
}
@@ -547,15 +553,7 @@ static void btrfs_discard_workfn(struct work_struct *work)
spin_lock(&discard_ctl->lock);
discard_ctl->prev_discard = trimmed;
discard_ctl->prev_discard_time = now;
- /*
- * If the block group was removed from the discard list while it was
- * running in this workfn, then we didn't deref it, since this function
- * still owned that reference. But we set the discard_ctl->block_group
- * back to NULL, so we can use that condition to know that now we need
- * to deref the block_group.
- */
- if (discard_ctl->block_group == NULL)
- btrfs_put_block_group(block_group);
+ btrfs_put_block_group(block_group);
discard_ctl->block_group = NULL;
__btrfs_discard_schedule_work(discard_ctl, now, false);
spin_unlock(&discard_ctl->lock);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 967c6b5dd0a4..34a30d61b470 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4314,6 +4314,14 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_cleanup_defrag_inodes(fs_info);
/*
+ * Handle the error fs first, as it will flush and wait for all ordered
+ * extents. This will generate delayed iputs, thus we want to handle
+ * it first.
+ */
+ if (unlikely(BTRFS_FS_ERROR(fs_info)))
+ btrfs_error_commit_super(fs_info);
+
+ /*
* Wait for any fixup workers to complete.
* If we don't wait for them here and they are still running by the time
* we call kthread_stop() against the cleaner kthread further below, we
@@ -4334,6 +4342,31 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_flush_workqueue(fs_info->delalloc_workers);
/*
+ * We can have ordered extents getting their last reference dropped from
+ * the fs_info->workers queue because for async writes for data bios we
+ * queue a work for that queue, at btrfs_wq_submit_bio(), that runs
+ * run_one_async_done() which calls btrfs_bio_end_io() in case the bio
+ * has an error, and that later function can do the final
+ * btrfs_put_ordered_extent() on the ordered extent attached to the bio,
+ * which adds a delayed iput for the inode. So we must flush the queue
+ * so that we don't have delayed iputs after committing the current
+ * transaction below and stopping the cleaner and transaction kthreads.
+ */
+ btrfs_flush_workqueue(fs_info->workers);
+
+ /*
+ * When finishing a compressed write bio we schedule a work queue item
+ * to finish an ordered extent - btrfs_finish_compressed_write_work()
+ * calls btrfs_finish_ordered_extent() which in turns does a call to
+ * btrfs_queue_ordered_fn(), and that queues the ordered extent
+ * completion either in the endio_write_workers work queue or in the
+ * fs_info->endio_freespace_worker work queue. We flush those queues
+ * below, so before we flush them we must flush this queue for the
+ * workers of compressed writes.
+ */
+ flush_workqueue(fs_info->compressed_write_workers);
+
+ /*
* After we parked the cleaner kthread, ordered extents may have
* completed and created new delayed iputs. If one of the async reclaim
* tasks is running and in the RUN_DELAYED_IPUTS flush state, then we
@@ -4390,9 +4423,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_err(fs_info, "commit super ret %d", ret);
}
- if (BTRFS_FS_ERROR(fs_info))
- btrfs_error_commit_super(fs_info);
-
kthread_stop(fs_info->transaction_kthread);
kthread_stop(fs_info->cleaner_kthread);
@@ -4529,10 +4559,6 @@ static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
/* cleanup FS via transaction */
btrfs_cleanup_transaction(fs_info);
- mutex_lock(&fs_info->cleaner_mutex);
- btrfs_run_delayed_iputs(fs_info);
- mutex_unlock(&fs_info->cleaner_mutex);
-
down_write(&fs_info->cleanup_work_sem);
up_write(&fs_info->cleanup_work_sem);
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 021cf468274b..ef77d4208510 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -164,6 +164,14 @@ search_again:
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_item);
num_refs = btrfs_extent_refs(leaf, ei);
+ if (unlikely(num_refs == 0)) {
+ ret = -EUCLEAN;
+ btrfs_err(fs_info,
+ "unexpected zero reference count for extent item (%llu %u %llu)",
+ key.objectid, key.type, key.offset);
+ btrfs_abort_transaction(trans, ret);
+ goto out_free;
+ }
extent_flags = btrfs_extent_flags(leaf, ei);
} else {
ret = -EUCLEAN;
@@ -177,8 +185,6 @@ search_again:
goto out_free;
}
-
- BUG_ON(num_refs == 0);
} else {
num_refs = 0;
extent_flags = 0;
@@ -208,10 +214,19 @@ search_again:
goto search_again;
}
spin_lock(&head->lock);
- if (head->extent_op && head->extent_op->update_flags)
+ if (head->extent_op && head->extent_op->update_flags) {
extent_flags |= head->extent_op->flags_to_set;
- else
- BUG_ON(num_refs == 0);
+ } else if (unlikely(num_refs == 0)) {
+ spin_unlock(&head->lock);
+ mutex_unlock(&head->mutex);
+ spin_unlock(&delayed_refs->lock);
+ ret = -EUCLEAN;
+ btrfs_err(fs_info,
+ "unexpected zero reference count for extent %llu (%s)",
+ bytenr, metadata ? "metadata" : "data");
+ btrfs_abort_transaction(trans, ret);
+ goto out_free;
+ }
num_refs += head->ref_mod;
spin_unlock(&head->lock);
@@ -5540,7 +5555,10 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
ret = btrfs_dec_ref(trans, root, eb, 1);
else
ret = btrfs_dec_ref(trans, root, eb, 0);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
if (is_fstree(root->root_key.objectid)) {
ret = btrfs_qgroup_trace_leaf_items(trans, eb);
if (ret) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index b2ae50dcca0f..ed08d8e5639f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3565,10 +3565,10 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
return eb;
}
-#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start)
{
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct extent_buffer *eb, *exists = NULL;
int ret;
@@ -3604,8 +3604,11 @@ again:
free_eb:
btrfs_release_extent_buffer(eb);
return exists;
-}
+#else
+ /* Stub to avoid linker error when compiled with optimizations turned off. */
+ return NULL;
#endif
+}
static struct extent_buffer *grab_extent_buffer(
struct btrfs_fs_info *fs_info, struct page *page)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 68092b64e29e..e794606e7c78 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2225,15 +2225,20 @@ static void btrfs_punch_hole_lock_range(struct inode *inode,
* will always return true.
* So here we need to do extra page alignment for
* filemap_range_has_page().
+ *
+ * And do not decrease page_lockend right now, as it can be 0.
*/
const u64 page_lockstart = round_up(lockstart, PAGE_SIZE);
- const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE) - 1;
+ const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE);
while (1) {
truncate_pagecache_range(inode, lockstart, lockend);
lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
cached_state);
+ /* The same page or adjacent pages. */
+ if (page_lockend <= page_lockstart)
+ break;
/*
* We can't have ordered extents in the range, nor dirty/writeback
* pages, because we have locked the inode's VFS lock in exclusive
@@ -2245,7 +2250,7 @@ static void btrfs_punch_hole_lock_range(struct inode *inode,
* we do, unlock the range and retry.
*/
if (!filemap_range_has_page(inode->i_mapping, page_lockstart,
- page_lockend))
+ page_lockend - 1))
break;
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index cedffa567a75..48d257923672 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1546,6 +1546,7 @@ out_unlock:
locked_page,
clear_bits,
page_ops);
+ btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL);
start += cur_alloc_size;
}
@@ -1559,6 +1560,7 @@ out_unlock:
clear_bits |= EXTENT_CLEAR_DATA_RESV;
extent_clear_unlock_delalloc(inode, start, end, locked_page,
clear_bits, page_ops);
+ btrfs_qgroup_free_data(inode, NULL, start, end - start + 1, NULL);
}
return ret;
}
@@ -2056,12 +2058,13 @@ next_slot:
/*
* If the found extent starts after requested offset, then
- * adjust extent_end to be right before this extent begins
+ * adjust cur_offset to be right before this extent begins.
*/
if (found_key.offset > cur_offset) {
- extent_end = found_key.offset;
- extent_type = 0;
- goto must_cow;
+ if (cow_start == (u64)-1)
+ cow_start = cur_offset;
+ cur_offset = found_key.offset;
+ goto next_slot;
}
/*
@@ -2222,13 +2225,15 @@ error:
*/
if (cow_start != (u64)-1)
cur_offset = cow_start;
- if (cur_offset < end)
+ if (cur_offset < end) {
extent_clear_unlock_delalloc(inode, cur_offset, end,
locked_page, EXTENT_LOCKED |
EXTENT_DELALLOC | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
PAGE_START_WRITEBACK |
PAGE_END_WRITEBACK);
+ btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL);
+ }
btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 537e184b4b1d..474758c878fc 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2931,6 +2931,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
int ret;
ASSERT(page_index <= last_index);
+again:
page = find_lock_page(inode->i_mapping, page_index);
if (!page) {
page_cache_sync_readahead(inode->i_mapping, ra, NULL,
@@ -2952,6 +2953,11 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
ret = -EIO;
goto release_page;
}
+ if (page->mapping != inode->i_mapping) {
+ unlock_page(page);
+ put_page(page);
+ goto again;
+ }
}
/*
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6be092bb814f..7632d652a125 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -153,12 +153,14 @@ struct scrub_stripe {
unsigned int init_nr_io_errors;
unsigned int init_nr_csum_errors;
unsigned int init_nr_meta_errors;
+ unsigned int init_nr_meta_gen_errors;
/*
* The following error bitmaps are all for the current status.
* Every time we submit a new read, these bitmaps may be updated.
*
- * error_bitmap = io_error_bitmap | csum_error_bitmap | meta_error_bitmap;
+ * error_bitmap = io_error_bitmap | csum_error_bitmap |
+ * meta_error_bitmap | meta_generation_bitmap;
*
* IO and csum errors can happen for both metadata and data.
*/
@@ -166,6 +168,7 @@ struct scrub_stripe {
unsigned long io_error_bitmap;
unsigned long csum_error_bitmap;
unsigned long meta_error_bitmap;
+ unsigned long meta_gen_error_bitmap;
/* For writeback (repair or replace) error reporting. */
unsigned long write_error_bitmap;
@@ -617,7 +620,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
memcpy(on_disk_csum, header->csum, fs_info->csum_size);
if (logical != btrfs_stack_header_bytenr(header)) {
- bitmap_set(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree);
+ bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
"tree block %llu mirror %u has bad bytenr, has %llu want %llu",
@@ -673,7 +676,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
}
if (stripe->sectors[sector_nr].generation !=
btrfs_stack_header_generation(header)) {
- bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
+ bitmap_set(&stripe->meta_gen_error_bitmap, sector_nr, sectors_per_tree);
bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
"tree block %llu mirror %u has bad generation, has %llu want %llu",
@@ -685,6 +688,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
bitmap_clear(&stripe->error_bitmap, sector_nr, sectors_per_tree);
bitmap_clear(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree);
bitmap_clear(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
+ bitmap_clear(&stripe->meta_gen_error_bitmap, sector_nr, sectors_per_tree);
}
static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
@@ -973,8 +977,22 @@ skip:
if (__ratelimit(&rs) && dev)
scrub_print_common_warning("header error", dev, false,
stripe->logical, physical);
+ if (test_bit(sector_nr, &stripe->meta_gen_error_bitmap))
+ if (__ratelimit(&rs) && dev)
+ scrub_print_common_warning("generation error", dev, false,
+ stripe->logical, physical);
}
+ /* Update the device stats. */
+ for (int i = 0; i < stripe->init_nr_io_errors; i++)
+ btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_READ_ERRS);
+ for (int i = 0; i < stripe->init_nr_csum_errors; i++)
+ btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_CORRUPTION_ERRS);
+ /* Generation mismatch error is based on each metadata, not each block. */
+ for (int i = 0; i < stripe->init_nr_meta_gen_errors;
+ i += (fs_info->nodesize >> fs_info->sectorsize_bits))
+ btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_GENERATION_ERRS);
+
spin_lock(&sctx->stat_lock);
sctx->stat.data_extents_scrubbed += stripe->nr_data_extents;
sctx->stat.tree_extents_scrubbed += stripe->nr_meta_extents;
@@ -983,7 +1001,8 @@ skip:
sctx->stat.no_csum += nr_nodatacsum_sectors;
sctx->stat.read_errors += stripe->init_nr_io_errors;
sctx->stat.csum_errors += stripe->init_nr_csum_errors;
- sctx->stat.verify_errors += stripe->init_nr_meta_errors;
+ sctx->stat.verify_errors += stripe->init_nr_meta_errors +
+ stripe->init_nr_meta_gen_errors;
sctx->stat.uncorrectable_errors +=
bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors);
sctx->stat.corrected_errors += nr_repaired_sectors;
@@ -1029,6 +1048,8 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
stripe->nr_sectors);
stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap,
stripe->nr_sectors);
+ stripe->init_nr_meta_gen_errors = bitmap_weight(&stripe->meta_gen_error_bitmap,
+ stripe->nr_sectors);
if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors))
goto out;
@@ -1143,6 +1164,9 @@ static void scrub_write_endio(struct btrfs_bio *bbio)
bitmap_set(&stripe->write_error_bitmap, sector_nr,
bio_size >> fs_info->sectorsize_bits);
spin_unlock_irqrestore(&stripe->write_error_lock, flags);
+ for (int i = 0; i < (bio_size >> fs_info->sectorsize_bits); i++)
+ btrfs_dev_stat_inc_and_print(stripe->dev,
+ BTRFS_DEV_STAT_WRITE_ERRS);
}
bio_put(&bbio->bio);
@@ -1505,10 +1529,12 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe)
stripe->init_nr_io_errors = 0;
stripe->init_nr_csum_errors = 0;
stripe->init_nr_meta_errors = 0;
+ stripe->init_nr_meta_gen_errors = 0;
stripe->error_bitmap = 0;
stripe->io_error_bitmap = 0;
stripe->csum_error_bitmap = 0;
stripe->meta_error_bitmap = 0;
+ stripe->meta_gen_error_bitmap = 0;
}
/*
@@ -1538,8 +1564,8 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
u64 extent_gen;
int ret;
- if (unlikely(!extent_root)) {
- btrfs_err(fs_info, "no valid extent root for scrub");
+ if (unlikely(!extent_root || !csum_root)) {
+ btrfs_err(fs_info, "no valid extent or csum root for scrub");
return -EUCLEAN;
}
memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) *
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index aa1e6d88a72c..e2ead36e5be4 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -487,10 +487,8 @@ static int fs_path_ensure_buf(struct fs_path *p, int len)
if (p->buf_len >= len)
return 0;
- if (len > PATH_MAX) {
- WARN_ON(1);
- return -ENOMEM;
- }
+ if (WARN_ON(len > PATH_MAX))
+ return -ENAMETOOLONG;
path_len = p->end - p->start;
old_buf_len = p->buf_len;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index fb4992570c2b..2045ac3d94c4 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1336,8 +1336,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
subvol_name = btrfs_get_subvol_name_from_objectid(info,
BTRFS_I(d_inode(dentry))->root->root_key.objectid);
if (!IS_ERR(subvol_name)) {
- seq_puts(seq, ",subvol=");
- seq_escape(seq, subvol_name, " \t\n\\");
+ seq_show_option(seq, "subvol", subvol_name);
kfree(subvol_name);
}
return 0;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 53c74010140e..6d16506bbdc0 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1889,6 +1889,11 @@ enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf)
return BTRFS_TREE_BLOCK_INVALID_LEVEL;
}
+ if (unlikely(!btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN))) {
+ generic_err(leaf, 0, "invalid flag for leaf, WRITTEN not set");
+ return BTRFS_TREE_BLOCK_WRITTEN_NOT_SET;
+ }
+
/*
* Extent buffers from a relocation tree have a owner field that
* corresponds to the subvolume tree they are based on. So just from an
@@ -1950,6 +1955,7 @@ enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf)
for (slot = 0; slot < nritems; slot++) {
u32 item_end_expected;
u64 item_data_end;
+ enum btrfs_tree_block_status ret;
btrfs_item_key_to_cpu(leaf, &key, slot);
@@ -2005,21 +2011,10 @@ enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf)
return BTRFS_TREE_BLOCK_INVALID_OFFSETS;
}
- /*
- * We only want to do this if WRITTEN is set, otherwise the leaf
- * may be in some intermediate state and won't appear valid.
- */
- if (btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN)) {
- enum btrfs_tree_block_status ret;
-
- /*
- * Check if the item size and content meet other
- * criteria
- */
- ret = check_leaf_item(leaf, &key, slot, &prev_key);
- if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
- return ret;
- }
+ /* Check if the item size and content meet other criteria. */
+ ret = check_leaf_item(leaf, &key, slot, &prev_key);
+ if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
+ return ret;
prev_key.objectid = key.objectid;
prev_key.type = key.type;
@@ -2049,6 +2044,11 @@ enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node)
int level = btrfs_header_level(node);
u64 bytenr;
+ if (unlikely(!btrfs_header_flag(node, BTRFS_HEADER_FLAG_WRITTEN))) {
+ generic_err(node, 0, "invalid flag for node, WRITTEN not set");
+ return BTRFS_TREE_BLOCK_WRITTEN_NOT_SET;
+ }
+
if (unlikely(level <= 0 || level >= BTRFS_MAX_LEVEL)) {
generic_err(node, 0,
"invalid level for node, have %d expect [1, %d]",
diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h
index 3c2a02a72f64..43f2ceb78f34 100644
--- a/fs/btrfs/tree-checker.h
+++ b/fs/btrfs/tree-checker.h
@@ -51,6 +51,7 @@ enum btrfs_tree_block_status {
BTRFS_TREE_BLOCK_INVALID_BLOCKPTR,
BTRFS_TREE_BLOCK_INVALID_ITEM,
BTRFS_TREE_BLOCK_INVALID_OWNER,
+ BTRFS_TREE_BLOCK_WRITTEN_NOT_SET,
};
/*
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index c4463c3f2068..197dfafbf401 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -2006,6 +2006,9 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
physical = map->stripes[i].physical;
zinfo = device->zone_info;
+ if (!device->bdev)
+ continue;
+
if (zinfo->max_active_zones == 0)
continue;
@@ -2165,6 +2168,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
const u64 physical = map->stripes[i].physical;
struct btrfs_zoned_device_info *zinfo = device->zone_info;
+ if (!device->bdev)
+ continue;
+
if (zinfo->max_active_zones == 0)
continue;