diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 20:47:51 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 20:47:51 -0800 |
commit | 149c51f876322d9bfbd5e2d6ffae7aff3d794384 (patch) | |
tree | a61c7dd828356e307fca06fc66dbdbf9b109c18f /fs/btrfs/relocation.c | |
parent | 97971df811b8854882c0f6c6631e23ab8cdcc44f (diff) | |
parent | b7af0635c87ff78d6bd523298ab7471f9ffd3ce5 (diff) |
Merge tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"This round there are a lot of cleanups and moved code so the diffstat
looks huge, otherwise there are some nice performance improvements and
an update to raid56 reliability.
User visible features:
- raid56 reliability vs performance trade off:
- fix destructive RMW for raid5 data (raid6 still needs work): do
full checksum verification for all data during RMW cycle, this
should prevent rewriting potentially corrupted data without
notice
- stripes are cached in memory which should reduce the performance
impact but still can hurt some workloads
- checksums are verified after repair again
- this is the last option without introducing additional features
(write intent bitmap, journal, another tree), the extra checksum
read/verification was supposed to be avoided by the original
implementation exactly for performance reasons but that caused
all the reliability problems
- discard=async by default for devices that support it
- implement emergency flush reserve to avoid almost all unnecessary
transaction aborts due to ENOSPC in cases where there are too many
delayed refs or delayed allocation
- skip block group synchronization if there's no change in used
bytes, can reduce transaction commit count for some workloads
Performance improvements:
- fiemap and lseek:
- overall speedup due to skipping unnecessary or duplicate
searches (-40% run time)
- cache some data structures and sharedness of extents (-30% run
time)
- send:
- faster backref resolution when finding clones
- cached leaf to root mapping for faster backref walking
- improved clone/sharing detection
- overall run time improvements (-70%)
Core:
- module initialization converted to a table of function pointers run
in a sequence
- preparation for fscrypt, extend passing file names across calls,
dir item can store encryption status
- raid56 updates:
- more accurate error tracking of sectors within stripe
- simplify recovery path and remove dedicated endio worker kthread
- simplify scrub call paths
- refactoring to support the extra data checksum verification
during RMW cycle
- tree block parentness checks consolidated and done at metadata read
time
- improved error handling
- cleanups:
- move a lot of code for better synchronization between kernel and
user space sources, split big files
- enum cleanups
- GFP flag cleanups
- header file cleanups, prototypes, dependencies
- redundant parameter cleanups
- inline extent handling simplifications
- inode parameter conversion
- data structure cleanups, reductions, renames, merges"
* tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (249 commits)
btrfs: print transaction aborted messages with an error level
btrfs: sync some cleanups from progs into uapi/btrfs.h
btrfs: do not BUG_ON() on ENOMEM when dropping extent items for a range
btrfs: fix extent map use-after-free when handling missing device in read_one_chunk
btrfs: remove outdated logic from overwrite_item() and add assertion
btrfs: unify overwrite_item() and do_overwrite_item()
btrfs: replace strncpy() with strscpy()
btrfs: fix uninitialized variable in find_first_clear_extent_bit
btrfs: fix uninitialized parent in insert_state
btrfs: add might_sleep() annotations
btrfs: add stack helpers for a few btrfs items
btrfs: add nr_global_roots to the super block definition
btrfs: remove BTRFS_LEAF_DATA_OFFSET
btrfs: add helpers for manipulating leaf items and data
btrfs: add eb to btrfs_node_key_ptr_offset
btrfs: pass the extent buffer for the btrfs_item_nr helpers
btrfs: move the csum helpers into ctree.h
btrfs: move eb offset helpers into extent_io.h
btrfs: move file_extent_item helpers into file-item.h
btrfs: move leaf_data_end into ctree.c
...
Diffstat (limited to 'fs/btrfs/relocation.c')
-rw-r--r-- | fs/btrfs/relocation.c | 94 |
1 files changed, 61 insertions, 33 deletions
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 666a37a0ee89..31ec4a7658ce 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -27,6 +27,15 @@ #include "subpage.h" #include "zoned.h" #include "inode-item.h" +#include "space-info.h" +#include "fs.h" +#include "accessors.h" +#include "extent-tree.h" +#include "root-tree.h" +#include "file-item.h" +#include "relocation.h" +#include "super.h" +#include "tree-checker.h" /* * Relocation overview @@ -470,7 +479,7 @@ static noinline_for_stack struct btrfs_backref_node *build_backref_tree( int ret; int err = 0; - iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info, GFP_NOFS); + iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info); if (!iter) return ERR_PTR(-ENOMEM); path = btrfs_alloc_path(); @@ -1109,10 +1118,12 @@ int replace_file_extents(struct btrfs_trans_handle *trans, inode = find_next_inode(root, key.objectid); first = 0; } else if (inode && btrfs_ino(BTRFS_I(inode)) < key.objectid) { - btrfs_add_delayed_iput(inode); + btrfs_add_delayed_iput(BTRFS_I(inode)); inode = find_next_inode(root, key.objectid); } if (inode && btrfs_ino(BTRFS_I(inode)) == key.objectid) { + struct extent_state *cached_state = NULL; + end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); WARN_ON(!IS_ALIGNED(key.offset, @@ -1120,14 +1131,15 @@ int replace_file_extents(struct btrfs_trans_handle *trans, WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize)); end--; ret = try_lock_extent(&BTRFS_I(inode)->io_tree, - key.offset, end); + key.offset, end, + &cached_state); if (!ret) continue; btrfs_drop_extent_map_range(BTRFS_I(inode), key.offset, end, true); unlock_extent(&BTRFS_I(inode)->io_tree, - key.offset, end, NULL); + key.offset, end, &cached_state); } } @@ -1170,7 +1182,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans, if (dirty) btrfs_mark_buffer_dirty(leaf); if (inode) - btrfs_add_delayed_iput(inode); + btrfs_add_delayed_iput(BTRFS_I(inode)); return ret; } @@ -1516,6 +1528,8 @@ static int invalidate_extent_cache(struct btrfs_root *root, objectid = min_key->objectid; while (1) { + struct extent_state *cached_state = NULL; + cond_resched(); iput(inode); @@ -1566,9 +1580,9 @@ static int invalidate_extent_cache(struct btrfs_root *root, } /* the lock_extent waits for read_folio to complete */ - lock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL); + lock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state); btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, true); - unlock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL); + unlock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state); } return 0; } @@ -2597,10 +2611,14 @@ static int tree_block_processed(u64 bytenr, struct reloc_control *rc) static int get_tree_block_key(struct btrfs_fs_info *fs_info, struct tree_block *block) { + struct btrfs_tree_parent_check check = { + .level = block->level, + .owner_root = block->owner, + .transid = block->key.offset + }; struct extent_buffer *eb; - eb = read_tree_block(fs_info, block->bytenr, block->owner, - block->key.offset, block->level, NULL); + eb = read_tree_block(fs_info, block->bytenr, &check); if (IS_ERR(eb)) return PTR_ERR(eb); if (!extent_buffer_uptodate(eb)) { @@ -2861,25 +2879,27 @@ static noinline_for_stack int prealloc_file_extent_cluster( if (ret) return ret; - btrfs_inode_lock(&inode->vfs_inode, 0); + btrfs_inode_lock(inode, 0); for (nr = 0; nr < cluster->nr; nr++) { + struct extent_state *cached_state = NULL; + start = cluster->boundary[nr] - offset; if (nr + 1 < cluster->nr) end = cluster->boundary[nr + 1] - 1 - offset; else end = cluster->end - offset; - lock_extent(&inode->io_tree, start, end, NULL); + lock_extent(&inode->io_tree, start, end, &cached_state); num_bytes = end + 1 - start; ret = btrfs_prealloc_file_range(&inode->vfs_inode, 0, start, num_bytes, num_bytes, end + 1, &alloc_hint); cur_offset = end + 1; - unlock_extent(&inode->io_tree, start, end, NULL); + unlock_extent(&inode->io_tree, start, end, &cached_state); if (ret) break; } - btrfs_inode_unlock(&inode->vfs_inode, 0); + btrfs_inode_unlock(inode, 0); if (cur_offset < prealloc_end) btrfs_free_reserved_data_space_noquota(inode->root->fs_info, @@ -2891,6 +2911,7 @@ static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inod u64 start, u64 end, u64 block_start) { struct extent_map *em; + struct extent_state *cached_state = NULL; int ret = 0; em = alloc_extent_map(); @@ -2903,9 +2924,9 @@ static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inod em->block_start = block_start; set_bit(EXTENT_FLAG_PINNED, &em->flags); - lock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL); + lock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state); ret = btrfs_replace_extent_map_range(BTRFS_I(inode), em, false); - unlock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL); + unlock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state); free_extent_map(em); return ret; @@ -2983,6 +3004,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra, */ cur = max(page_start, cluster->boundary[*cluster_nr] - offset); while (cur <= page_end) { + struct extent_state *cached_state = NULL; u64 extent_start = cluster->boundary[*cluster_nr] - offset; u64 extent_end = get_cluster_boundary_end(cluster, *cluster_nr) - offset; @@ -2998,13 +3020,15 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra, goto release_page; /* Mark the range delalloc and dirty for later writeback */ - lock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end, NULL); + lock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end, + &cached_state); ret = btrfs_set_extent_delalloc(BTRFS_I(inode), clamped_start, - clamped_end, 0, NULL); + clamped_end, 0, &cached_state); if (ret) { - clear_extent_bits(&BTRFS_I(inode)->io_tree, - clamped_start, clamped_end, - EXTENT_LOCKED | EXTENT_BOUNDARY); + clear_extent_bit(&BTRFS_I(inode)->io_tree, + clamped_start, clamped_end, + EXTENT_LOCKED | EXTENT_BOUNDARY, + &cached_state); btrfs_delalloc_release_metadata(BTRFS_I(inode), clamped_len, true); btrfs_delalloc_release_extents(BTRFS_I(inode), @@ -3031,7 +3055,8 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra, boundary_start, boundary_end, EXTENT_BOUNDARY); } - unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end, NULL); + unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end, + &cached_state); btrfs_delalloc_release_extents(BTRFS_I(inode), clamped_len); cur += clamped_len; @@ -3388,24 +3413,28 @@ int add_data_references(struct reloc_control *rc, struct btrfs_path *path, struct rb_root *blocks) { - struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - struct ulist *leaves = NULL; + struct btrfs_backref_walk_ctx ctx = { 0 }; struct ulist_iterator leaf_uiter; struct ulist_node *ref_node = NULL; - const u32 blocksize = fs_info->nodesize; + const u32 blocksize = rc->extent_root->fs_info->nodesize; int ret = 0; btrfs_release_path(path); - ret = btrfs_find_all_leafs(NULL, fs_info, extent_key->objectid, - 0, &leaves, NULL, true); + + ctx.bytenr = extent_key->objectid; + ctx.ignore_extent_item_pos = true; + ctx.fs_info = rc->extent_root->fs_info; + + ret = btrfs_find_all_leafs(&ctx); if (ret < 0) return ret; ULIST_ITER_INIT(&leaf_uiter); - while ((ref_node = ulist_next(leaves, &leaf_uiter))) { + while ((ref_node = ulist_next(ctx.refs, &leaf_uiter))) { + struct btrfs_tree_parent_check check = { 0 }; struct extent_buffer *eb; - eb = read_tree_block(fs_info, ref_node->val, 0, 0, 0, NULL); + eb = read_tree_block(ctx.fs_info, ref_node->val, &check); if (IS_ERR(eb)) { ret = PTR_ERR(eb); break; @@ -3421,7 +3450,7 @@ int add_data_references(struct reloc_control *rc, } if (ret < 0) free_block_list(blocks); - ulist_free(leaves); + ulist_free(ctx.refs); return ret; } @@ -3905,8 +3934,7 @@ static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info) INIT_LIST_HEAD(&rc->dirty_subvol_roots); btrfs_backref_init_cache(fs_info, &rc->backref_cache, 1); mapping_tree_init(&rc->reloc_root_tree); - extent_io_tree_init(fs_info, &rc->processed_blocks, - IO_TREE_RELOC_BLOCKS, NULL); + extent_io_tree_init(fs_info, &rc->processed_blocks, IO_TREE_RELOC_BLOCKS); return rc; } @@ -4330,8 +4358,8 @@ int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len) disk_bytenr = file_pos + inode->index_cnt; csum_root = btrfs_csum_root(fs_info, disk_bytenr); - ret = btrfs_lookup_csums_range(csum_root, disk_bytenr, - disk_bytenr + len - 1, &list, 0, false); + ret = btrfs_lookup_csums_list(csum_root, disk_bytenr, + disk_bytenr + len - 1, &list, 0, false); if (ret) goto out; |