From 7dc66abb5a47778d7db327783a0ba172b8cff0b5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 21 Nov 2023 13:38:38 +0000 Subject: btrfs: use a dedicated data structure for chunk maps Currently we abuse the extent_map structure for two purposes: 1) To actually represent extents for inodes; 2) To represent chunk mappings. This is odd and has several disadvantages: 1) To create a chunk map, we need to do two memory allocations: one for an extent_map structure and another one for a map_lookup structure, so more potential for an allocation failure and more complicated code to manage and link two structures; 2) For a chunk map we actually only use 3 fields (24 bytes) of the respective extent map structure: the 'start' field to have the logical start address of the chunk, the 'len' field to have the chunk's size, and the 'orig_block_len' field to contain the chunk's stripe size. Besides wasting a memory, it's also odd and not intuitive at all to have the stripe size in a field named 'orig_block_len'. We are also using 'block_len' of the extent_map structure to contain the chunk size, so we have 2 fields for the same value, 'len' and 'block_len', which is pointless; 3) When an extent map is associated to a chunk mapping, we set the bit EXTENT_FLAG_FS_MAPPING on its flags and then make its member named 'map_lookup' point to the associated map_lookup structure. This means that for an extent map associated to an inode extent, we are not using this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform); 4) Extent maps associated to a chunk mapping are never merged or split so it's pointless to use the existing extent map infrastructure. So add a dedicated data structure named 'btrfs_chunk_map' to represent chunk mappings, this is basically the existing map_lookup structure with some extra fields: 1) 'start' to contain the chunk logical address; 2) 'chunk_len' to contain the chunk's length; 3) 'stripe_size' for the stripe size; 4) 'rb_node' for insertion into a rb tree; 5) 'refs' for reference counting. This way we do a single memory allocation for chunk mappings and we don't waste memory for them with unused/unnecessary fields from an extent_map. We also save 8 bytes from the extent_map structure by removing the 'map_lookup' pointer, so the size of struct extent_map is reduced from 144 bytes down to 136 bytes, and we can now have 30 extents map per 4K page instead of 28. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 279a7a0c90c0..4a95097ab590 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -21,7 +21,7 @@ struct btrfs_delayed_data_ref; struct btrfs_delayed_ref_head; struct btrfs_block_group; struct btrfs_free_cluster; -struct map_lookup; +struct btrfs_chunk_map; struct extent_buffer; struct btrfs_work; struct btrfs_workqueue; @@ -277,8 +277,7 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, { (1 << EXTENT_FLAG_COMPRESSED), "COMPRESSED" },\ { (1 << EXTENT_FLAG_PREALLOC), "PREALLOC" },\ { (1 << EXTENT_FLAG_LOGGING), "LOGGING" },\ - { (1 << EXTENT_FLAG_FILLING), "FILLING" },\ - { (1 << EXTENT_FLAG_FS_MAPPING), "FS_MAPPING" }) + { (1 << EXTENT_FLAG_FILLING), "FILLING" }) TRACE_EVENT_CONDITION(btrfs_get_extent, @@ -1061,7 +1060,7 @@ DEFINE_EVENT(btrfs_delayed_ref_head, run_delayed_ref_head, DECLARE_EVENT_CLASS(btrfs__chunk, TP_PROTO(const struct btrfs_fs_info *fs_info, - const struct map_lookup *map, u64 offset, u64 size), + const struct btrfs_chunk_map *map, u64 offset, u64 size), TP_ARGS(fs_info, map, offset, size), @@ -1095,7 +1094,7 @@ DECLARE_EVENT_CLASS(btrfs__chunk, DEFINE_EVENT(btrfs__chunk, btrfs_chunk_alloc, TP_PROTO(const struct btrfs_fs_info *fs_info, - const struct map_lookup *map, u64 offset, u64 size), + const struct btrfs_chunk_map *map, u64 offset, u64 size), TP_ARGS(fs_info, map, offset, size) ); @@ -1103,7 +1102,7 @@ DEFINE_EVENT(btrfs__chunk, btrfs_chunk_alloc, DEFINE_EVENT(btrfs__chunk, btrfs_chunk_free, TP_PROTO(const struct btrfs_fs_info *fs_info, - const struct map_lookup *map, u64 offset, u64 size), + const struct btrfs_chunk_map *map, u64 offset, u64 size), TP_ARGS(fs_info, map, offset, size) ); -- cgit v1.2.3 From 738290c056e28d83177ecbed3894e094e161939e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 21 Nov 2023 14:20:24 +0100 Subject: btrfs: always set extent_io_tree::inode and drop fs_info The extent_io_tree is embedded in several structures, notably in struct btrfs_inode. The fs_info is only used for reporting errors and for reference in trace points. We can get to the pointer through the inode, but not all io trees set it. However, we always know the owner and can recognize if inode is valid. For access helpers are provided, const variant for the trace points. This reduces size of extent_io_tree by 8 bytes and following structures in turn: - btrfs_inode 1104 -> 1088 - btrfs_device 520 -> 512 - btrfs_root 1360 -> 1344 - btrfs_transaction 456 -> 440 - btrfs_fs_info 3600 -> 3592 - reloc_control 1520 -> 1512 Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 47 +++++++++++++++----------------------------- 1 file changed, 16 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 4a95097ab590..856109048999 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2098,17 +2098,12 @@ TRACE_EVENT(btrfs_set_extent_bit, __field( unsigned, set_bits) ), - TP_fast_assign_btrfs(tree->fs_info, - __entry->owner = tree->owner; - if (tree->inode) { - const struct btrfs_inode *inode = tree->inode; - - __entry->ino = btrfs_ino(inode); - __entry->rootid = inode->root->root_key.objectid; - } else { - __entry->ino = 0; - __entry->rootid = 0; - } + TP_fast_assign_btrfs(extent_io_tree_to_fs_info(tree), + const struct btrfs_inode *inode = extent_io_tree_to_inode_const(tree); + + __entry->owner = tree->owner; + __entry->ino = inode ? btrfs_ino(inode) : 0; + __entry->rootid = inode ? inode->root->root_key.objectid : 0; __entry->start = start; __entry->len = len; __entry->set_bits = set_bits; @@ -2136,17 +2131,12 @@ TRACE_EVENT(btrfs_clear_extent_bit, __field( unsigned, clear_bits) ), - TP_fast_assign_btrfs(tree->fs_info, - __entry->owner = tree->owner; - if (tree->inode) { - const struct btrfs_inode *inode = tree->inode; + TP_fast_assign_btrfs(extent_io_tree_to_fs_info(tree), + const struct btrfs_inode *inode = extent_io_tree_to_inode_const(tree); - __entry->ino = btrfs_ino(inode); - __entry->rootid = inode->root->root_key.objectid; - } else { - __entry->ino = 0; - __entry->rootid = 0; - } + __entry->owner = tree->owner; + __entry->ino = inode ? btrfs_ino(inode) : 0; + __entry->rootid = inode ? inode->root->root_key.objectid : 0; __entry->start = start; __entry->len = len; __entry->clear_bits = clear_bits; @@ -2175,17 +2165,12 @@ TRACE_EVENT(btrfs_convert_extent_bit, __field( unsigned, clear_bits) ), - TP_fast_assign_btrfs(tree->fs_info, - __entry->owner = tree->owner; - if (tree->inode) { - const struct btrfs_inode *inode = tree->inode; + TP_fast_assign_btrfs(extent_io_tree_to_fs_info(tree), + const struct btrfs_inode *inode = extent_io_tree_to_inode_const(tree); - __entry->ino = btrfs_ino(inode); - __entry->rootid = inode->root->root_key.objectid; - } else { - __entry->ino = 0; - __entry->rootid = 0; - } + __entry->owner = tree->owner; + __entry->ino = inode ? btrfs_ino(inode) : 0; + __entry->rootid = inode ? inode->root->root_key.objectid : 0; __entry->start = start; __entry->len = len; __entry->set_bits = set_bits; -- cgit v1.2.3 From 3c0e918b8fb3a6a7da1558913302a3e89cf87343 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 23 Nov 2023 23:53:51 +0000 Subject: btrfs: remove no longer used EXTENT_MAP_DELALLOC block start value After commit ac3c0d36a2a2 ("btrfs: make fiemap more efficient and accurate reporting extent sharedness") we no longer need to create special extent maps during fiemap that have a block start with the EXTENT_MAP_DELALLOC value. So this block start value for extent maps is no longer used since then, therefore remove it. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 856109048999..31da1456f953 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -265,8 +265,7 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, __print_symbolic_u64(type, \ { EXTENT_MAP_LAST_BYTE, "LAST_BYTE" }, \ { EXTENT_MAP_HOLE, "HOLE" }, \ - { EXTENT_MAP_INLINE, "INLINE" }, \ - { EXTENT_MAP_DELALLOC, "DELALLOC" }) + { EXTENT_MAP_INLINE, "INLINE" }) #define show_map_type(type) \ type, (type >= EXTENT_MAP_LAST_BYTE) ? "-" : __show_map_type(type) -- cgit v1.2.3 From f86f7a75e2fb5fd7d31d00eab8a392f97ba42ce9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 4 Dec 2023 16:20:33 +0000 Subject: btrfs: use the flags of an extent map to identify the compression type Currently, in struct extent_map, we use an unsigned int (32 bits) to identify the compression type of an extent and an unsigned long (64 bits on a 64 bits platform, 32 bits otherwise) for flags. We are only using 6 different flags, so an unsigned long is excessive and we can use flags to identify the compression type instead of using a dedicated 32 bits field. We can easily have tens or hundreds of thousands (or more) of extent maps on busy and large filesystems, specially with compression enabled or many or large files with tons of small extents. So it's convenient to have the extent_map structure as small as possible in order to use less memory. So remove the compression type field from struct extent_map, use flags to identify the compression type and shorten the flags field from an unsigned long to a u32. This saves 8 bytes (on 64 bits platforms) and reduces the size of the structure from 136 bytes down to 128 bytes, using now only two cache lines, and increases the number of extent maps we can have per 4K page from 30 to 32. By using a u32 for the flags instead of an unsigned long, we no longer use test_bit(), set_bit() and clear_bit(), but that level of atomicity is not needed as most flags are never cleared once set (before adding an extent map to the tree), and the ones that can be cleared or set after an extent map is added to the tree, are always performed while holding the write lock on the extent map tree, while the reader holds a lock on the tree or tests for a flag that never changes once the extent map is in the tree (such as compression flags). Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 31da1456f953..90b0222390e5 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -272,11 +272,13 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, #define show_map_flags(flag) \ __print_flags(flag, "|", \ - { (1 << EXTENT_FLAG_PINNED), "PINNED" },\ - { (1 << EXTENT_FLAG_COMPRESSED), "COMPRESSED" },\ - { (1 << EXTENT_FLAG_PREALLOC), "PREALLOC" },\ - { (1 << EXTENT_FLAG_LOGGING), "LOGGING" },\ - { (1 << EXTENT_FLAG_FILLING), "FILLING" }) + { EXTENT_FLAG_PINNED, "PINNED" },\ + { EXTENT_FLAG_COMPRESS_ZLIB, "COMPRESS_ZLIB" },\ + { EXTENT_FLAG_COMPRESS_LZO, "COMPRESS_LZO" },\ + { EXTENT_FLAG_COMPRESS_ZSTD, "COMPRESS_ZSTD" },\ + { EXTENT_FLAG_PREALLOC, "PREALLOC" },\ + { EXTENT_FLAG_LOGGING, "LOGGING" },\ + { EXTENT_FLAG_FILLING, "FILLING" }) TRACE_EVENT_CONDITION(btrfs_get_extent, @@ -295,9 +297,8 @@ TRACE_EVENT_CONDITION(btrfs_get_extent, __field( u64, orig_start ) __field( u64, block_start ) __field( u64, block_len ) - __field( unsigned long, flags ) + __field( u32, flags ) __field( int, refs ) - __field( unsigned int, compress_type ) ), TP_fast_assign_btrfs(root->fs_info, @@ -310,13 +311,11 @@ TRACE_EVENT_CONDITION(btrfs_get_extent, __entry->block_len = map->block_len; __entry->flags = map->flags; __entry->refs = refcount_read(&map->refs); - __entry->compress_type = map->compress_type; ), TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu " "orig_start=%llu block_start=%llu(%s) " - "block_len=%llu flags=%s refs=%u " - "compress_type=%u", + "block_len=%llu flags=%s refs=%u", show_root_type(__entry->root_objectid), __entry->ino, __entry->start, @@ -325,7 +324,7 @@ TRACE_EVENT_CONDITION(btrfs_get_extent, show_map_type(__entry->block_start), __entry->block_len, show_map_flags(__entry->flags), - __entry->refs, __entry->compress_type) + __entry->refs) ); TRACE_EVENT(btrfs_handle_em_exist, -- cgit v1.2.3