summaryrefslogtreecommitdiff
path: root/fs/btrfs/accessors.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-12 20:47:51 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-12 20:47:51 -0800
commit149c51f876322d9bfbd5e2d6ffae7aff3d794384 (patch)
treea61c7dd828356e307fca06fc66dbdbf9b109c18f /fs/btrfs/accessors.c
parent97971df811b8854882c0f6c6631e23ab8cdcc44f (diff)
parentb7af0635c87ff78d6bd523298ab7471f9ffd3ce5 (diff)
Merge tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "This round there are a lot of cleanups and moved code so the diffstat looks huge, otherwise there are some nice performance improvements and an update to raid56 reliability. User visible features: - raid56 reliability vs performance trade off: - fix destructive RMW for raid5 data (raid6 still needs work): do full checksum verification for all data during RMW cycle, this should prevent rewriting potentially corrupted data without notice - stripes are cached in memory which should reduce the performance impact but still can hurt some workloads - checksums are verified after repair again - this is the last option without introducing additional features (write intent bitmap, journal, another tree), the extra checksum read/verification was supposed to be avoided by the original implementation exactly for performance reasons but that caused all the reliability problems - discard=async by default for devices that support it - implement emergency flush reserve to avoid almost all unnecessary transaction aborts due to ENOSPC in cases where there are too many delayed refs or delayed allocation - skip block group synchronization if there's no change in used bytes, can reduce transaction commit count for some workloads Performance improvements: - fiemap and lseek: - overall speedup due to skipping unnecessary or duplicate searches (-40% run time) - cache some data structures and sharedness of extents (-30% run time) - send: - faster backref resolution when finding clones - cached leaf to root mapping for faster backref walking - improved clone/sharing detection - overall run time improvements (-70%) Core: - module initialization converted to a table of function pointers run in a sequence - preparation for fscrypt, extend passing file names across calls, dir item can store encryption status - raid56 updates: - more accurate error tracking of sectors within stripe - simplify recovery path and remove dedicated endio worker kthread - simplify scrub call paths - refactoring to support the extra data checksum verification during RMW cycle - tree block parentness checks consolidated and done at metadata read time - improved error handling - cleanups: - move a lot of code for better synchronization between kernel and user space sources, split big files - enum cleanups - GFP flag cleanups - header file cleanups, prototypes, dependencies - redundant parameter cleanups - inline extent handling simplifications - inode parameter conversion - data structure cleanups, reductions, renames, merges" * tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (249 commits) btrfs: print transaction aborted messages with an error level btrfs: sync some cleanups from progs into uapi/btrfs.h btrfs: do not BUG_ON() on ENOMEM when dropping extent items for a range btrfs: fix extent map use-after-free when handling missing device in read_one_chunk btrfs: remove outdated logic from overwrite_item() and add assertion btrfs: unify overwrite_item() and do_overwrite_item() btrfs: replace strncpy() with strscpy() btrfs: fix uninitialized variable in find_first_clear_extent_bit btrfs: fix uninitialized parent in insert_state btrfs: add might_sleep() annotations btrfs: add stack helpers for a few btrfs items btrfs: add nr_global_roots to the super block definition btrfs: remove BTRFS_LEAF_DATA_OFFSET btrfs: add helpers for manipulating leaf items and data btrfs: add eb to btrfs_node_key_ptr_offset btrfs: pass the extent buffer for the btrfs_item_nr helpers btrfs: move the csum helpers into ctree.h btrfs: move eb offset helpers into extent_io.h btrfs: move file_extent_item helpers into file-item.h btrfs: move leaf_data_end into ctree.c ...
Diffstat (limited to 'fs/btrfs/accessors.c')
-rw-r--r--fs/btrfs/accessors.c174
1 files changed, 174 insertions, 0 deletions
diff --git a/fs/btrfs/accessors.c b/fs/btrfs/accessors.c
new file mode 100644
index 000000000000..206cf1612c1d
--- /dev/null
+++ b/fs/btrfs/accessors.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ */
+
+#include <asm/unaligned.h>
+#include "messages.h"
+#include "ctree.h"
+#include "accessors.h"
+
+static bool check_setget_bounds(const struct extent_buffer *eb,
+ const void *ptr, unsigned off, int size)
+{
+ const unsigned long member_offset = (unsigned long)ptr + off;
+
+ if (unlikely(member_offset + size > eb->len)) {
+ btrfs_warn(eb->fs_info,
+ "bad eb member %s: ptr 0x%lx start %llu member offset %lu size %d",
+ (member_offset > eb->len ? "start" : "end"),
+ (unsigned long)ptr, eb->start, member_offset, size);
+ return false;
+ }
+
+ return true;
+}
+
+void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *eb)
+{
+ token->eb = eb;
+ token->kaddr = page_address(eb->pages[0]);
+ token->offset = 0;
+}
+
+/*
+ * Macro templates that define helpers to read/write extent buffer data of a
+ * given size, that are also used via ctree.h for access to item members by
+ * specialized helpers.
+ *
+ * Generic helpers:
+ * - btrfs_set_8 (for 8/16/32/64)
+ * - btrfs_get_8 (for 8/16/32/64)
+ *
+ * Generic helpers with a token (cached address of the most recently accessed
+ * page):
+ * - btrfs_set_token_8 (for 8/16/32/64)
+ * - btrfs_get_token_8 (for 8/16/32/64)
+ *
+ * The set/get functions handle data spanning two pages transparently, in case
+ * metadata block size is larger than page. Every pointer to metadata items is
+ * an offset into the extent buffer page array, cast to a specific type. This
+ * gives us all the type checking.
+ *
+ * The extent buffer pages stored in the array pages do not form a contiguous
+ * phyusical range, but the API functions assume the linear offset to the range
+ * from 0 to metadata node size.
+ */
+
+#define DEFINE_BTRFS_SETGET_BITS(bits) \
+u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \
+ const void *ptr, unsigned long off) \
+{ \
+ const unsigned long member_offset = (unsigned long)ptr + off; \
+ const unsigned long idx = get_eb_page_index(member_offset); \
+ const unsigned long oip = get_eb_offset_in_page(token->eb, \
+ member_offset); \
+ const int size = sizeof(u##bits); \
+ u8 lebytes[sizeof(u##bits)]; \
+ const int part = PAGE_SIZE - oip; \
+ \
+ ASSERT(token); \
+ ASSERT(token->kaddr); \
+ ASSERT(check_setget_bounds(token->eb, ptr, off, size)); \
+ if (token->offset <= member_offset && \
+ member_offset + size <= token->offset + PAGE_SIZE) { \
+ return get_unaligned_le##bits(token->kaddr + oip); \
+ } \
+ token->kaddr = page_address(token->eb->pages[idx]); \
+ token->offset = idx << PAGE_SHIFT; \
+ if (INLINE_EXTENT_BUFFER_PAGES == 1 || oip + size <= PAGE_SIZE ) \
+ return get_unaligned_le##bits(token->kaddr + oip); \
+ \
+ memcpy(lebytes, token->kaddr + oip, part); \
+ token->kaddr = page_address(token->eb->pages[idx + 1]); \
+ token->offset = (idx + 1) << PAGE_SHIFT; \
+ memcpy(lebytes + part, token->kaddr, size - part); \
+ return get_unaligned_le##bits(lebytes); \
+} \
+u##bits btrfs_get_##bits(const struct extent_buffer *eb, \
+ const void *ptr, unsigned long off) \
+{ \
+ const unsigned long member_offset = (unsigned long)ptr + off; \
+ const unsigned long oip = get_eb_offset_in_page(eb, member_offset); \
+ const unsigned long idx = get_eb_page_index(member_offset); \
+ char *kaddr = page_address(eb->pages[idx]); \
+ const int size = sizeof(u##bits); \
+ const int part = PAGE_SIZE - oip; \
+ u8 lebytes[sizeof(u##bits)]; \
+ \
+ ASSERT(check_setget_bounds(eb, ptr, off, size)); \
+ if (INLINE_EXTENT_BUFFER_PAGES == 1 || oip + size <= PAGE_SIZE) \
+ return get_unaligned_le##bits(kaddr + oip); \
+ \
+ memcpy(lebytes, kaddr + oip, part); \
+ kaddr = page_address(eb->pages[idx + 1]); \
+ memcpy(lebytes + part, kaddr, size - part); \
+ return get_unaligned_le##bits(lebytes); \
+} \
+void btrfs_set_token_##bits(struct btrfs_map_token *token, \
+ const void *ptr, unsigned long off, \
+ u##bits val) \
+{ \
+ const unsigned long member_offset = (unsigned long)ptr + off; \
+ const unsigned long idx = get_eb_page_index(member_offset); \
+ const unsigned long oip = get_eb_offset_in_page(token->eb, \
+ member_offset); \
+ const int size = sizeof(u##bits); \
+ u8 lebytes[sizeof(u##bits)]; \
+ const int part = PAGE_SIZE - oip; \
+ \
+ ASSERT(token); \
+ ASSERT(token->kaddr); \
+ ASSERT(check_setget_bounds(token->eb, ptr, off, size)); \
+ if (token->offset <= member_offset && \
+ member_offset + size <= token->offset + PAGE_SIZE) { \
+ put_unaligned_le##bits(val, token->kaddr + oip); \
+ return; \
+ } \
+ token->kaddr = page_address(token->eb->pages[idx]); \
+ token->offset = idx << PAGE_SHIFT; \
+ if (INLINE_EXTENT_BUFFER_PAGES == 1 || oip + size <= PAGE_SIZE) { \
+ put_unaligned_le##bits(val, token->kaddr + oip); \
+ return; \
+ } \
+ put_unaligned_le##bits(val, lebytes); \
+ memcpy(token->kaddr + oip, lebytes, part); \
+ token->kaddr = page_address(token->eb->pages[idx + 1]); \
+ token->offset = (idx + 1) << PAGE_SHIFT; \
+ memcpy(token->kaddr, lebytes + part, size - part); \
+} \
+void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \
+ unsigned long off, u##bits val) \
+{ \
+ const unsigned long member_offset = (unsigned long)ptr + off; \
+ const unsigned long oip = get_eb_offset_in_page(eb, member_offset); \
+ const unsigned long idx = get_eb_page_index(member_offset); \
+ char *kaddr = page_address(eb->pages[idx]); \
+ const int size = sizeof(u##bits); \
+ const int part = PAGE_SIZE - oip; \
+ u8 lebytes[sizeof(u##bits)]; \
+ \
+ ASSERT(check_setget_bounds(eb, ptr, off, size)); \
+ if (INLINE_EXTENT_BUFFER_PAGES == 1 || oip + size <= PAGE_SIZE) { \
+ put_unaligned_le##bits(val, kaddr + oip); \
+ return; \
+ } \
+ \
+ put_unaligned_le##bits(val, lebytes); \
+ memcpy(kaddr + oip, lebytes, part); \
+ kaddr = page_address(eb->pages[idx + 1]); \
+ memcpy(kaddr, lebytes + part, size - part); \
+}
+
+DEFINE_BTRFS_SETGET_BITS(8)
+DEFINE_BTRFS_SETGET_BITS(16)
+DEFINE_BTRFS_SETGET_BITS(32)
+DEFINE_BTRFS_SETGET_BITS(64)
+
+void btrfs_node_key(const struct extent_buffer *eb,
+ struct btrfs_disk_key *disk_key, int nr)
+{
+ unsigned long ptr = btrfs_node_key_ptr_offset(eb, nr);
+ read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
+ struct btrfs_key_ptr, key, disk_key);
+}