diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 20:47:51 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 20:47:51 -0800 |
commit | 149c51f876322d9bfbd5e2d6ffae7aff3d794384 (patch) | |
tree | a61c7dd828356e307fca06fc66dbdbf9b109c18f /fs/btrfs/volumes.h | |
parent | 97971df811b8854882c0f6c6631e23ab8cdcc44f (diff) | |
parent | b7af0635c87ff78d6bd523298ab7471f9ffd3ce5 (diff) |
Merge tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"This round there are a lot of cleanups and moved code so the diffstat
looks huge, otherwise there are some nice performance improvements and
an update to raid56 reliability.
User visible features:
- raid56 reliability vs performance trade off:
- fix destructive RMW for raid5 data (raid6 still needs work): do
full checksum verification for all data during RMW cycle, this
should prevent rewriting potentially corrupted data without
notice
- stripes are cached in memory which should reduce the performance
impact but still can hurt some workloads
- checksums are verified after repair again
- this is the last option without introducing additional features
(write intent bitmap, journal, another tree), the extra checksum
read/verification was supposed to be avoided by the original
implementation exactly for performance reasons but that caused
all the reliability problems
- discard=async by default for devices that support it
- implement emergency flush reserve to avoid almost all unnecessary
transaction aborts due to ENOSPC in cases where there are too many
delayed refs or delayed allocation
- skip block group synchronization if there's no change in used
bytes, can reduce transaction commit count for some workloads
Performance improvements:
- fiemap and lseek:
- overall speedup due to skipping unnecessary or duplicate
searches (-40% run time)
- cache some data structures and sharedness of extents (-30% run
time)
- send:
- faster backref resolution when finding clones
- cached leaf to root mapping for faster backref walking
- improved clone/sharing detection
- overall run time improvements (-70%)
Core:
- module initialization converted to a table of function pointers run
in a sequence
- preparation for fscrypt, extend passing file names across calls,
dir item can store encryption status
- raid56 updates:
- more accurate error tracking of sectors within stripe
- simplify recovery path and remove dedicated endio worker kthread
- simplify scrub call paths
- refactoring to support the extra data checksum verification
during RMW cycle
- tree block parentness checks consolidated and done at metadata read
time
- improved error handling
- cleanups:
- move a lot of code for better synchronization between kernel and
user space sources, split big files
- enum cleanups
- GFP flag cleanups
- header file cleanups, prototypes, dependencies
- redundant parameter cleanups
- inline extent handling simplifications
- inode parameter conversion
- data structure cleanups, reductions, renames, merges"
* tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (249 commits)
btrfs: print transaction aborted messages with an error level
btrfs: sync some cleanups from progs into uapi/btrfs.h
btrfs: do not BUG_ON() on ENOMEM when dropping extent items for a range
btrfs: fix extent map use-after-free when handling missing device in read_one_chunk
btrfs: remove outdated logic from overwrite_item() and add assertion
btrfs: unify overwrite_item() and do_overwrite_item()
btrfs: replace strncpy() with strscpy()
btrfs: fix uninitialized variable in find_first_clear_extent_bit
btrfs: fix uninitialized parent in insert_state
btrfs: add might_sleep() annotations
btrfs: add stack helpers for a few btrfs items
btrfs: add nr_global_roots to the super block definition
btrfs: remove BTRFS_LEAF_DATA_OFFSET
btrfs: add helpers for manipulating leaf items and data
btrfs: add eb to btrfs_node_key_ptr_offset
btrfs: pass the extent buffer for the btrfs_item_nr helpers
btrfs: move the csum helpers into ctree.h
btrfs: move eb offset helpers into extent_io.h
btrfs: move file_extent_item helpers into file-item.h
btrfs: move leaf_data_end into ctree.c
...
Diffstat (limited to 'fs/btrfs/volumes.h')
-rw-r--r-- | fs/btrfs/volumes.h | 116 |
1 files changed, 27 insertions, 89 deletions
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 099def5613b8..6b7a05f6cf82 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -6,10 +6,12 @@ #ifndef BTRFS_VOLUMES_H #define BTRFS_VOLUMES_H -#include <linux/bio.h> #include <linux/sort.h> #include <linux/btrfs.h> #include "async-thread.h" +#include "messages.h" +#include "tree-checker.h" +#include "rcu-string.h" #define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G) @@ -354,6 +356,8 @@ struct btrfs_fs_devices { * nonrot flag set */ bool rotating; + /* Devices support TRIM/discard commands */ + bool discardable; struct btrfs_fs_info *fs_info; /* sysfs kobjects */ @@ -368,8 +372,6 @@ struct btrfs_fs_devices { enum btrfs_read_policy read_policy; }; -#define BTRFS_BIO_INLINE_CSUM_SIZE 64 - #define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \ - sizeof(struct btrfs_chunk)) \ / sizeof(struct btrfs_stripe) + 1) @@ -379,89 +381,6 @@ struct btrfs_fs_devices { - 2 * sizeof(struct btrfs_chunk)) \ / sizeof(struct btrfs_stripe) + 1) -/* - * Maximum number of sectors for a single bio to limit the size of the - * checksum array. This matches the number of bio_vecs per bio and thus the - * I/O size for buffered I/O. - */ -#define BTRFS_MAX_BIO_SECTORS (256) - -typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio); - -/* - * Additional info to pass along bio. - * - * Mostly for btrfs specific features like csum and mirror_num. - */ -struct btrfs_bio { - unsigned int mirror_num; - struct bvec_iter iter; - - /* for direct I/O */ - u64 file_offset; - - /* @device is for stripe IO submission. */ - struct btrfs_device *device; - u8 *csum; - u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; - - /* End I/O information supplied to btrfs_bio_alloc */ - btrfs_bio_end_io_t end_io; - void *private; - - /* For read end I/O handling */ - struct work_struct end_io_work; - - /* - * This member must come last, bio_alloc_bioset will allocate enough - * bytes for entire btrfs_bio but relies on bio being last. - */ - struct bio bio; -}; - -static inline struct btrfs_bio *btrfs_bio(struct bio *bio) -{ - return container_of(bio, struct btrfs_bio, bio); -} - -int __init btrfs_bioset_init(void); -void __cold btrfs_bioset_exit(void); - -struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf, - btrfs_bio_end_io_t end_io, void *private); -struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size, - btrfs_bio_end_io_t end_io, void *private); - -static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status) -{ - bbio->bio.bi_status = status; - bbio->end_io(bbio); -} - -static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio) -{ - if (bbio->csum != bbio->csum_inline) { - kfree(bbio->csum); - bbio->csum = NULL; - } -} - -/* - * Iterate through a btrfs_bio (@bbio) on a per-sector basis. - * - * bvl - struct bio_vec - * bbio - struct btrfs_bio - * iters - struct bvec_iter - * bio_offset - unsigned int - */ -#define btrfs_bio_for_each_sector(fs_info, bvl, bbio, iter, bio_offset) \ - for ((iter) = (bbio)->iter, (bio_offset) = 0; \ - (iter).bi_size && \ - (((bvl) = bio_iter_iovec((&(bbio)->bio), (iter))), 1); \ - (bio_offset) += fs_info->sectorsize, \ - bio_advance_iter_single(&(bbio)->bio, &(iter), \ - (fs_info)->sectorsize)) - struct btrfs_io_stripe { struct btrfs_device *dev; union { @@ -603,6 +522,13 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio) } } +static inline unsigned long btrfs_chunk_item_size(int num_stripes) +{ + ASSERT(num_stripes); + return sizeof(struct btrfs_chunk) + + sizeof(struct btrfs_stripe) * (num_stripes - 1); +} + void btrfs_get_bioc(struct btrfs_io_context *bioc); void btrfs_put_bioc(struct btrfs_io_context *bioc); int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, @@ -611,6 +537,11 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, struct btrfs_io_context **bioc_ret); +int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, + u64 logical, u64 *length, + struct btrfs_io_context **bioc_ret, + struct btrfs_io_stripe *smap, int *mirror_num_ret, + int need_raid_map); struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info, u64 logical, u64 *length_ret, u32 *num_stripes); @@ -622,7 +553,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info); struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans, u64 type); void btrfs_mapping_tree_free(struct extent_map_tree *tree); -void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num); int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, fmode_t flags, void *holder); struct btrfs_device *btrfs_scan_one_device(const char *path, @@ -639,8 +569,8 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, struct btrfs_dev_lookup_args *args, const char *path); struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, - const u64 *devid, - const u8 *uuid); + const u64 *devid, const u8 *uuid, + const char *path); void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args); void btrfs_free_device(struct btrfs_device *device); int btrfs_rm_device(struct btrfs_fs_info *fs_info, @@ -741,6 +671,14 @@ static inline void btrfs_dev_stat_set(struct btrfs_device *dev, atomic_inc(&dev->dev_stats_ccnt); } +static inline const char *btrfs_dev_name(const struct btrfs_device *device) +{ + if (!device || test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) + return "<missing disk>"; + else + return rcu_str_deref(device->name); +} + void btrfs_commit_device_sizes(struct btrfs_transaction *trans); struct list_head * __attribute_const__ btrfs_get_fs_uuids(void); |