From fe11ac191ce0ad910f6fda0c628bcff19fcff47d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 4 Dec 2025 23:04:54 -0800 Subject: btrfs: switch to library APIs for checksums Make btrfs use the library APIs instead of crypto_shash, for all checksum computations. This has many benefits: - Allows future checksum types, e.g. XXH3 or CRC64, to be more easily supported. Only a library API will be needed, not crypto_shash too. - Eliminates the overhead of the generic crypto layer, including an indirect call for every function call and other API overhead. A microbenchmark of btrfs_check_read_bio() with crc32c checksums shows a speedup from 658 cycles to 608 cycles per 4096-byte block. - Decreases the stack usage of btrfs by reducing the size of checksum contexts from 384 bytes to 240 bytes, and by eliminating the need for some functions to declare a checksum context at all. - Increases reliability. The library functions always succeed and return void. In contrast, crypto_shash can fail and return errors. Also, the library functions are guaranteed to be available when btrfs is loaded; there's no longer any need to use module softdeps to try to work around the crypto modules sometimes not being loaded. - Fixes a bug where blake2b checksums didn't work on kernels booted with fips=1. Since btrfs checksums are for integrity only, it's fine for them to use non-FIPS-approved algorithms. Note that with having to handle 4 algorithms instead of just 1-2, this commit does result in a slightly positive diffstat. That being said, this wouldn't have been the case if btrfs had actually checked for errors from crypto_shash, which technically it should have been doing. Reviewed-by: Ard Biesheuvel Reviewed-by: Neal Gompa Signed-off-by: Eric Biggers Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/Kconfig | 8 ++-- fs/btrfs/compression.c | 1 - fs/btrfs/disk-io.c | 68 +++++++++------------------------ fs/btrfs/file-item.c | 4 -- fs/btrfs/fs.c | 100 ++++++++++++++++++++++++++++++++++++++++++------- fs/btrfs/fs.h | 23 +++++++++--- fs/btrfs/inode.c | 10 ++--- fs/btrfs/scrub.c | 16 ++++---- fs/btrfs/super.c | 4 -- fs/btrfs/sysfs.c | 6 +-- 10 files changed, 137 insertions(+), 103 deletions(-) diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 6d6fc85835d4..d88eb836a193 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -4,11 +4,8 @@ config BTRFS_FS tristate "Btrfs filesystem support" select BLK_CGROUP_PUNT_BIO select CRC32 - select CRYPTO - select CRYPTO_CRC32C - select CRYPTO_XXHASH - select CRYPTO_SHA256 - select CRYPTO_BLAKE2B + select CRYPTO_LIB_BLAKE2B + select CRYPTO_LIB_SHA256 select ZLIB_INFLATE select ZLIB_DEFLATE select LZO_COMPRESS @@ -18,6 +15,7 @@ config BTRFS_FS select FS_IOMAP select RAID6_PQ select XOR_BLOCKS + select XXHASH depends on PAGE_SIZE_LESS_THAN_256KB help diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 6b3357287b42..4323d4172c7b 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -21,7 +21,6 @@ #include #include #include -#include #include "misc.h" #include "ctree.h" #include "fs.h" diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2833b44f4b4f..12d91407bb60 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -18,7 +18,6 @@ #include #include #include -#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -62,12 +61,6 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info); static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info); -static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info) -{ - if (fs_info->csum_shash) - crypto_free_shash(fs_info->csum_shash); -} - /* * Compute the csum of a btree block and store the result to provided buffer. */ @@ -76,12 +69,11 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result) struct btrfs_fs_info *fs_info = buf->fs_info; int num_pages; u32 first_page_part; - SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); + struct btrfs_csum_ctx csum; char *kaddr; int i; - shash->tfm = fs_info->csum_shash; - crypto_shash_init(shash); + btrfs_csum_init(&csum, fs_info->csum_type); if (buf->addr) { /* Pages are contiguous, handle them as a big one. */ @@ -94,21 +86,21 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result) num_pages = num_extent_pages(buf); } - crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE, - first_page_part - BTRFS_CSUM_SIZE); + btrfs_csum_update(&csum, kaddr + BTRFS_CSUM_SIZE, + first_page_part - BTRFS_CSUM_SIZE); /* * Multiple single-page folios case would reach here. * * nodesize <= PAGE_SIZE and large folio all handled by above - * crypto_shash_update() already. + * btrfs_csum_update() already. */ for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) { kaddr = folio_address(buf->folios[i]); - crypto_shash_update(shash, kaddr, PAGE_SIZE); + btrfs_csum_update(&csum, kaddr, PAGE_SIZE); } memset(result, 0, BTRFS_CSUM_SIZE); - crypto_shash_final(shash, result); + btrfs_csum_final(&csum, result); } /* @@ -160,18 +152,15 @@ static bool btrfs_supported_super_csum(u16 csum_type) int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, const struct btrfs_super_block *disk_sb) { - char result[BTRFS_CSUM_SIZE]; - SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); - - shash->tfm = fs_info->csum_shash; + u8 result[BTRFS_CSUM_SIZE]; /* * The super_block structure does not span the whole * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is * filled with zeros and is included in the checksum. */ - crypto_shash_digest(shash, (const u8 *)disk_sb + BTRFS_CSUM_SIZE, - BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result); + btrfs_csum(fs_info->csum_type, (const u8 *)disk_sb + BTRFS_CSUM_SIZE, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result); if (memcmp(disk_sb->csum, result, fs_info->csum_size)) return 1; @@ -1229,7 +1218,6 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) ASSERT(percpu_counter_sum_positive(em_counter) == 0); percpu_counter_destroy(em_counter); percpu_counter_destroy(&fs_info->dev_replace.bio_counter); - btrfs_free_csum_hash(fs_info); btrfs_free_stripe_hash_table(fs_info); btrfs_free_ref_cache(fs_info); kfree(fs_info->balance_ctl); @@ -1983,21 +1971,8 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) return 0; } -static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type) +static void btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type) { - struct crypto_shash *csum_shash; - const char *csum_driver = btrfs_super_csum_driver(csum_type); - - csum_shash = crypto_alloc_shash(csum_driver, 0, 0); - - if (IS_ERR(csum_shash)) { - btrfs_err(fs_info, "error allocating %s hash for checksum", - csum_driver); - return PTR_ERR(csum_shash); - } - - fs_info->csum_shash = csum_shash; - /* Check if the checksum implementation is a fast accelerated one. */ switch (csum_type) { case BTRFS_CSUM_TYPE_CRC32: @@ -2011,10 +1986,8 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type) break; } - btrfs_info(fs_info, "using %s (%s) checksum algorithm", - btrfs_super_csum_name(csum_type), - crypto_shash_driver_name(csum_shash)); - return 0; + btrfs_info(fs_info, "using %s checksum algorithm", + btrfs_super_csum_name(csum_type)); } static int btrfs_replay_log(struct btrfs_fs_info *fs_info, @@ -3302,12 +3275,9 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device } fs_info->csum_size = btrfs_super_csum_size(disk_super); + fs_info->csum_type = csum_type; - ret = btrfs_init_csum_hash(fs_info, csum_type); - if (ret) { - btrfs_release_disk_super(disk_super); - goto fail_alloc; - } + btrfs_init_csum_hash(fs_info, csum_type); /* * We want to check superblock checksum, the type is stored inside. @@ -3709,7 +3679,6 @@ static int write_dev_supers(struct btrfs_device *device, { struct btrfs_fs_info *fs_info = device->fs_info; struct address_space *mapping = device->bdev->bd_mapping; - SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); int i; int ret; u64 bytenr, bytenr_orig; @@ -3719,8 +3688,6 @@ static int write_dev_supers(struct btrfs_device *device, if (max_mirrors == 0) max_mirrors = BTRFS_SUPER_MIRROR_MAX; - shash->tfm = fs_info->csum_shash; - for (i = 0; i < max_mirrors; i++) { struct folio *folio; struct bio *bio; @@ -3744,9 +3711,8 @@ static int write_dev_supers(struct btrfs_device *device, btrfs_set_super_bytenr(sb, bytenr_orig); - crypto_shash_digest(shash, (const char *)sb + BTRFS_CSUM_SIZE, - BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, - sb->csum); + btrfs_csum(fs_info->csum_type, (const u8 *)sb + BTRFS_CSUM_SIZE, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, sb->csum); folio = __filemap_get_folio(mapping, bytenr >> PAGE_SHIFT, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 14e5257f0f04..568f0e0ebdf6 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -8,7 +8,6 @@ #include #include #include -#include #include "messages.h" #include "ctree.h" #include "disk-io.h" @@ -769,7 +768,6 @@ static void csum_one_bio(struct btrfs_bio *bbio, struct bvec_iter *src) { struct btrfs_inode *inode = bbio->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; - SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); struct bio *bio = &bbio->bio; struct btrfs_ordered_sum *sums = bbio->sums; struct bvec_iter iter = *src; @@ -781,8 +779,6 @@ static void csum_one_bio(struct btrfs_bio *bbio, struct bvec_iter *src) u32 offset = 0; int index = 0; - shash->tfm = fs_info->csum_shash; - btrfs_bio_for_each_block(paddr, bio, &iter, step) { paddrs[(offset / step) % nr_steps] = paddr; offset += step; diff --git a/fs/btrfs/fs.c b/fs/btrfs/fs.c index feb0a2faa837..14d83565cdee 100644 --- a/fs/btrfs/fs.c +++ b/fs/btrfs/fs.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include "messages.h" #include "fs.h" #include "accessors.h" @@ -8,13 +9,11 @@ static const struct btrfs_csums { u16 size; const char name[10]; - const char driver[12]; } btrfs_csums[] = { [BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" }, [BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" }, [BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" }, - [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b", - .driver = "blake2b-256" }, + [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b" }, }; /* This exists for btrfs-progs usages. */ @@ -37,21 +36,94 @@ const char *btrfs_super_csum_name(u16 csum_type) return btrfs_csums[csum_type].name; } -/* - * Return driver name if defined, otherwise the name that's also a valid driver - * name. - */ -const char *btrfs_super_csum_driver(u16 csum_type) +size_t __attribute_const__ btrfs_get_num_csums(void) { - /* csum type is validated at mount time */ - return btrfs_csums[csum_type].driver[0] ? - btrfs_csums[csum_type].driver : - btrfs_csums[csum_type].name; + return ARRAY_SIZE(btrfs_csums); } -size_t __attribute_const__ btrfs_get_num_csums(void) +void btrfs_csum(u16 csum_type, const u8 *data, size_t len, u8 *out) { - return ARRAY_SIZE(btrfs_csums); + switch (csum_type) { + case BTRFS_CSUM_TYPE_CRC32: + put_unaligned_le32(~crc32c(~0, data, len), out); + break; + case BTRFS_CSUM_TYPE_XXHASH: + put_unaligned_le64(xxh64(data, len, 0), out); + break; + case BTRFS_CSUM_TYPE_SHA256: + sha256(data, len, out); + break; + case BTRFS_CSUM_TYPE_BLAKE2: + blake2b(NULL, 0, data, len, out, 32); + break; + default: + /* Checksum type is validated at mount time. */ + BUG(); + } +} + +void btrfs_csum_init(struct btrfs_csum_ctx *ctx, u16 csum_type) +{ + ctx->csum_type = csum_type; + switch (ctx->csum_type) { + case BTRFS_CSUM_TYPE_CRC32: + ctx->crc32 = ~0; + break; + case BTRFS_CSUM_TYPE_XXHASH: + xxh64_reset(&ctx->xxh64, 0); + break; + case BTRFS_CSUM_TYPE_SHA256: + sha256_init(&ctx->sha256); + break; + case BTRFS_CSUM_TYPE_BLAKE2: + blake2b_init(&ctx->blake2b, 32); + break; + default: + /* Checksume type is validated at mount time. */ + BUG(); + } +} + +void btrfs_csum_update(struct btrfs_csum_ctx *ctx, const u8 *data, size_t len) +{ + switch (ctx->csum_type) { + case BTRFS_CSUM_TYPE_CRC32: + ctx->crc32 = crc32c(ctx->crc32, data, len); + break; + case BTRFS_CSUM_TYPE_XXHASH: + xxh64_update(&ctx->xxh64, data, len); + break; + case BTRFS_CSUM_TYPE_SHA256: + sha256_update(&ctx->sha256, data, len); + break; + case BTRFS_CSUM_TYPE_BLAKE2: + blake2b_update(&ctx->blake2b, data, len); + break; + default: + /* Checksum type is validated at mount time. */ + BUG(); + } +} + +void btrfs_csum_final(struct btrfs_csum_ctx *ctx, u8 *out) +{ + switch (ctx->csum_type) { + case BTRFS_CSUM_TYPE_CRC32: + put_unaligned_le32(~ctx->crc32, out); + break; + case BTRFS_CSUM_TYPE_XXHASH: + put_unaligned_le64(xxh64_digest(&ctx->xxh64), out); + break; + case BTRFS_CSUM_TYPE_SHA256: + sha256_final(&ctx->sha256, out); + break; + case BTRFS_CSUM_TYPE_BLAKE2: + blake2b_final(&ctx->blake2b, out); + break; + default: + /* Checksum type is validated at mount time. */ + BUG(); + } } /* diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index 8ffbc40ebe45..458a24206935 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -3,6 +3,8 @@ #ifndef BTRFS_FS_H #define BTRFS_FS_H +#include +#include #include #include #include @@ -24,6 +26,7 @@ #include #include #include +#include #include #include #include "extent-io-tree.h" @@ -35,7 +38,6 @@ struct inode; struct super_block; struct kobject; struct reloc_control; -struct crypto_shash; struct ulist; struct btrfs_device; struct btrfs_block_group; @@ -850,9 +852,10 @@ struct btrfs_fs_info { u32 sectorsize_bits; u32 block_min_order; u32 block_max_order; + u32 stripesize; u32 csum_size; u32 csums_per_leaf; - u32 stripesize; + u32 csum_type; /* * Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular @@ -864,8 +867,6 @@ struct btrfs_fs_info { spinlock_t swapfile_pins_lock; struct rb_root swapfile_pins; - struct crypto_shash *csum_shash; - /* Type of exclusive operation running, protected by super_lock */ enum btrfs_exclusive_operation exclusive_operation; @@ -1057,8 +1058,20 @@ int btrfs_check_ioctl_vol_args_path(const struct btrfs_ioctl_vol_args *vol_args) u16 btrfs_csum_type_size(u16 type); int btrfs_super_csum_size(const struct btrfs_super_block *s); const char *btrfs_super_csum_name(u16 csum_type); -const char *btrfs_super_csum_driver(u16 csum_type); size_t __attribute_const__ btrfs_get_num_csums(void); +struct btrfs_csum_ctx { + u16 csum_type; + union { + u32 crc32; + struct xxh64_state xxh64; + struct sha256_ctx sha256; + struct blake2b_ctx blake2b; + }; +}; +void btrfs_csum(u16 csum_type, const u8 *data, size_t len, u8 *out); +void btrfs_csum_init(struct btrfs_csum_ctx *ctx, u16 csum_type); +void btrfs_csum_update(struct btrfs_csum_ctx *ctx, const u8 *data, size_t len); +void btrfs_csum_final(struct btrfs_csum_ctx *ctx, u8 *out); static inline bool btrfs_is_empty_uuid(const u8 *uuid) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a2b5b440637e..5dceb03bee0a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3,7 +3,6 @@ * Copyright (C) 2007 Oracle. All rights reserved. */ -#include #include #include #include @@ -3417,20 +3416,19 @@ void btrfs_calculate_block_csum_pages(struct btrfs_fs_info *fs_info, const u32 blocksize = fs_info->sectorsize; const u32 step = min(blocksize, PAGE_SIZE); const u32 nr_steps = blocksize / step; - SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); + struct btrfs_csum_ctx csum; - shash->tfm = fs_info->csum_shash; - crypto_shash_init(shash); + btrfs_csum_init(&csum, fs_info->csum_type); for (int i = 0; i < nr_steps; i++) { const phys_addr_t paddr = paddrs[i]; void *kaddr; ASSERT(offset_in_page(paddr) + step <= PAGE_SIZE); kaddr = kmap_local_page(phys_to_page(paddr)) + offset_in_page(paddr); - crypto_shash_update(shash, kaddr, step); + btrfs_csum_update(&csum, kaddr, step); kunmap_local(kaddr); } - crypto_shash_final(shash, dest); + btrfs_csum_final(&csum, dest); } /* diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index a40ee41f42c6..1a60e631d801 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -6,7 +6,6 @@ #include #include #include -#include #include "ctree.h" #include "discard.h" #include "volumes.h" @@ -718,7 +717,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr const u64 logical = stripe->logical + (sector_nr << fs_info->sectorsize_bits); void *first_kaddr = scrub_stripe_get_kaddr(stripe, sector_nr); struct btrfs_header *header = first_kaddr; - SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); + struct btrfs_csum_ctx csum; u8 on_disk_csum[BTRFS_CSUM_SIZE]; u8 calculated_csum[BTRFS_CSUM_SIZE]; @@ -760,17 +759,16 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr } /* Now check tree block csum. */ - shash->tfm = fs_info->csum_shash; - crypto_shash_init(shash); - crypto_shash_update(shash, first_kaddr + BTRFS_CSUM_SIZE, - fs_info->sectorsize - BTRFS_CSUM_SIZE); + btrfs_csum_init(&csum, fs_info->csum_type); + btrfs_csum_update(&csum, first_kaddr + BTRFS_CSUM_SIZE, + fs_info->sectorsize - BTRFS_CSUM_SIZE); for (int i = sector_nr + 1; i < sector_nr + sectors_per_tree; i++) { - crypto_shash_update(shash, scrub_stripe_get_kaddr(stripe, i), - fs_info->sectorsize); + btrfs_csum_update(&csum, scrub_stripe_get_kaddr(stripe, i), + fs_info->sectorsize); } - crypto_shash_final(shash, calculated_csum); + btrfs_csum_final(&csum, calculated_csum); if (memcmp(calculated_csum, on_disk_csum, fs_info->csum_size) != 0) { scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index af56fdbba65d..0a931555e6dc 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2700,7 +2700,3 @@ module_exit(exit_btrfs_fs) MODULE_DESCRIPTION("B-Tree File System (BTRFS)"); MODULE_LICENSE("GPL"); -MODULE_SOFTDEP("pre: crc32c"); -MODULE_SOFTDEP("pre: xxhash64"); -MODULE_SOFTDEP("pre: sha256"); -MODULE_SOFTDEP("pre: blake2b-256"); diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 4b3c2acac51a..f0974f4c0ae4 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -11,7 +11,6 @@ #include #include #include -#include #include "messages.h" #include "ctree.h" #include "discard.h" @@ -1253,10 +1252,9 @@ static ssize_t btrfs_checksum_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = to_fs_info(kobj); u16 csum_type = btrfs_super_csum_type(fs_info->super_copy); + const char *csum_name = btrfs_super_csum_name(csum_type); - return sysfs_emit(buf, "%s (%s)\n", - btrfs_super_csum_name(csum_type), - crypto_shash_driver_name(fs_info->csum_shash)); + return sysfs_emit(buf, "%s (%s-lib)\n", csum_name, csum_name); } BTRFS_ATTR(, checksum, btrfs_checksum_show); -- cgit v1.2.3