diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 20:47:51 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 20:47:51 -0800 |
commit | 149c51f876322d9bfbd5e2d6ffae7aff3d794384 (patch) | |
tree | a61c7dd828356e307fca06fc66dbdbf9b109c18f /fs/btrfs/tree-defrag.c | |
parent | 97971df811b8854882c0f6c6631e23ab8cdcc44f (diff) | |
parent | b7af0635c87ff78d6bd523298ab7471f9ffd3ce5 (diff) |
Merge tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"This round there are a lot of cleanups and moved code so the diffstat
looks huge, otherwise there are some nice performance improvements and
an update to raid56 reliability.
User visible features:
- raid56 reliability vs performance trade off:
- fix destructive RMW for raid5 data (raid6 still needs work): do
full checksum verification for all data during RMW cycle, this
should prevent rewriting potentially corrupted data without
notice
- stripes are cached in memory which should reduce the performance
impact but still can hurt some workloads
- checksums are verified after repair again
- this is the last option without introducing additional features
(write intent bitmap, journal, another tree), the extra checksum
read/verification was supposed to be avoided by the original
implementation exactly for performance reasons but that caused
all the reliability problems
- discard=async by default for devices that support it
- implement emergency flush reserve to avoid almost all unnecessary
transaction aborts due to ENOSPC in cases where there are too many
delayed refs or delayed allocation
- skip block group synchronization if there's no change in used
bytes, can reduce transaction commit count for some workloads
Performance improvements:
- fiemap and lseek:
- overall speedup due to skipping unnecessary or duplicate
searches (-40% run time)
- cache some data structures and sharedness of extents (-30% run
time)
- send:
- faster backref resolution when finding clones
- cached leaf to root mapping for faster backref walking
- improved clone/sharing detection
- overall run time improvements (-70%)
Core:
- module initialization converted to a table of function pointers run
in a sequence
- preparation for fscrypt, extend passing file names across calls,
dir item can store encryption status
- raid56 updates:
- more accurate error tracking of sectors within stripe
- simplify recovery path and remove dedicated endio worker kthread
- simplify scrub call paths
- refactoring to support the extra data checksum verification
during RMW cycle
- tree block parentness checks consolidated and done at metadata read
time
- improved error handling
- cleanups:
- move a lot of code for better synchronization between kernel and
user space sources, split big files
- enum cleanups
- GFP flag cleanups
- header file cleanups, prototypes, dependencies
- redundant parameter cleanups
- inline extent handling simplifications
- inode parameter conversion
- data structure cleanups, reductions, renames, merges"
* tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (249 commits)
btrfs: print transaction aborted messages with an error level
btrfs: sync some cleanups from progs into uapi/btrfs.h
btrfs: do not BUG_ON() on ENOMEM when dropping extent items for a range
btrfs: fix extent map use-after-free when handling missing device in read_one_chunk
btrfs: remove outdated logic from overwrite_item() and add assertion
btrfs: unify overwrite_item() and do_overwrite_item()
btrfs: replace strncpy() with strscpy()
btrfs: fix uninitialized variable in find_first_clear_extent_bit
btrfs: fix uninitialized parent in insert_state
btrfs: add might_sleep() annotations
btrfs: add stack helpers for a few btrfs items
btrfs: add nr_global_roots to the super block definition
btrfs: remove BTRFS_LEAF_DATA_OFFSET
btrfs: add helpers for manipulating leaf items and data
btrfs: add eb to btrfs_node_key_ptr_offset
btrfs: pass the extent buffer for the btrfs_item_nr helpers
btrfs: move the csum helpers into ctree.h
btrfs: move eb offset helpers into extent_io.h
btrfs: move file_extent_item helpers into file-item.h
btrfs: move leaf_data_end into ctree.c
...
Diffstat (limited to 'fs/btrfs/tree-defrag.c')
-rw-r--r-- | fs/btrfs/tree-defrag.c | 132 |
1 files changed, 0 insertions, 132 deletions
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c deleted file mode 100644 index b6cf39f4e7e4..000000000000 --- a/fs/btrfs/tree-defrag.c +++ /dev/null @@ -1,132 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2007 Oracle. All rights reserved. - */ - -#include <linux/sched.h> -#include "ctree.h" -#include "disk-io.h" -#include "print-tree.h" -#include "transaction.h" -#include "locking.h" - -/* - * Defrag all the leaves in a given btree. - * Read all the leaves and try to get key order to - * better reflect disk order - */ - -int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - struct btrfs_path *path = NULL; - struct btrfs_key key; - int ret = 0; - int wret; - int level; - int next_key_ret = 0; - u64 last_ret = 0; - - if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) - goto out; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - level = btrfs_header_level(root->node); - - if (level == 0) - goto out; - - if (root->defrag_progress.objectid == 0) { - struct extent_buffer *root_node; - u32 nritems; - - root_node = btrfs_lock_root_node(root); - nritems = btrfs_header_nritems(root_node); - root->defrag_max.objectid = 0; - /* from above we know this is not a leaf */ - btrfs_node_key_to_cpu(root_node, &root->defrag_max, - nritems - 1); - btrfs_tree_unlock(root_node); - free_extent_buffer(root_node); - memset(&key, 0, sizeof(key)); - } else { - memcpy(&key, &root->defrag_progress, sizeof(key)); - } - - path->keep_locks = 1; - - ret = btrfs_search_forward(root, &key, path, BTRFS_OLDEST_GENERATION); - if (ret < 0) - goto out; - if (ret > 0) { - ret = 0; - goto out; - } - btrfs_release_path(path); - /* - * We don't need a lock on a leaf. btrfs_realloc_node() will lock all - * leafs from path->nodes[1], so set lowest_level to 1 to avoid later - * a deadlock (attempting to write lock an already write locked leaf). - */ - path->lowest_level = 1; - wret = btrfs_search_slot(trans, root, &key, path, 0, 1); - - if (wret < 0) { - ret = wret; - goto out; - } - if (!path->nodes[1]) { - ret = 0; - goto out; - } - /* - * The node at level 1 must always be locked when our path has - * keep_locks set and lowest_level is 1, regardless of the value of - * path->slots[1]. - */ - BUG_ON(path->locks[1] == 0); - ret = btrfs_realloc_node(trans, root, - path->nodes[1], 0, - &last_ret, - &root->defrag_progress); - if (ret) { - WARN_ON(ret == -EAGAIN); - goto out; - } - /* - * Now that we reallocated the node we can find the next key. Note that - * btrfs_find_next_key() can release our path and do another search - * without COWing, this is because even with path->keep_locks = 1, - * btrfs_search_slot() / ctree.c:unlock_up() does not keeps a lock on a - * node when path->slots[node_level - 1] does not point to the last - * item or a slot beyond the last item (ctree.c:unlock_up()). Therefore - * we search for the next key after reallocating our node. - */ - path->slots[1] = btrfs_header_nritems(path->nodes[1]); - next_key_ret = btrfs_find_next_key(root, path, &key, 1, - BTRFS_OLDEST_GENERATION); - if (next_key_ret == 0) { - memcpy(&root->defrag_progress, &key, sizeof(key)); - ret = -EAGAIN; - } -out: - btrfs_free_path(path); - if (ret == -EAGAIN) { - if (root->defrag_max.objectid > root->defrag_progress.objectid) - goto done; - if (root->defrag_max.type > root->defrag_progress.type) - goto done; - if (root->defrag_max.offset > root->defrag_progress.offset) - goto done; - ret = 0; - } -done: - if (ret != -EAGAIN) - memset(&root->defrag_progress, 0, - sizeof(root->defrag_progress)); - - return ret; -} |