diff options
| author | Nick Terrell <terrelln@meta.com> | 2025-03-08 12:09:33 -0800 |
|---|---|---|
| committer | Nick Terrell <terrelln@meta.com> | 2025-03-13 13:25:58 -0700 |
| commit | 65d1f5507ed2c78c64fce40e44e5574a9419eb09 (patch) | |
| tree | 4a1b819db2ea7f2a322e9bcc7582d946d0a4ea29 /lib/zstd/decompress/zstd_decompress_internal.h | |
| parent | 7eb172143d5508b4da468ed59ee857c6e5e01da6 (diff) | |
zstd: Import upstream v1.5.7
In addition to keeping the kernel's copy of zstd up to date, this update
was requested by Intel to expose upstream's APIs that allow QAT to accelerate
the LZ match finding stage of Zstd.
This patch is imported from the upstream tag v1.5.7-kernel [0], which is signed
with upstream's signing key EF8FE99528B52FFD [1]. It was imported from upstream
using this command:
export ZSTD=/path/to/repo/zstd/
export LINUX=/path/to/repo/linux/
cd "$ZSTD/contrib/linux-kernel"
git checkout v1.5.7-kernel
make import LINUX="$LINUX"
This patch has been tested on x86-64, and has been boot tested with
a zstd compressed kernel & initramfs on i386 and aarch64. I benchmarked
the patch on x86-64 with gcc-14.2.1 on an Intel i9-9900K by measruing the
performance of compressed filesystem reads and writes.
Component, Level, Size delta, C. time delta, D. time delta
Btrfs , 1, +0.00%, -6.1%, +1.4%
Btrfs , 3, +0.00%, -9.8%, +3.0%
Btrfs , 5, +0.00%, +1.7%, +1.4%
Btrfs , 7, +0.00%, -1.9%, +2.7%
Btrfs , 9, +0.00%, -3.4%, +3.7%
Btrfs , 15, +0.00%, -0.3%, +3.6%
SquashFS , 1, +0.00%, N/A, +1.9%
The major changes that impact the kernel use cases for each version are:
v1.5.7: https://github.com/facebook/zstd/releases/tag/v1.5.7
* Add zstd_compress_sequences_and_literals() for use by Intel's QAT driver
to implement Zstd compression acceleration in the kernel.
* Fix an underflow bug in 32-bit builds that can cause data corruption when
processing more than 4GB of data with a single `ZSTD_CCtx` object, when an
input crosses the 4GB boundry. I don't believe this impacts any current kernel
use cases, because the `ZSTD_CCtx` is typically reconstructed between
compressions.
* Levels 1-4 see 5-10% compression speed improvements for inputs smaller than
128KB.
v1.5.6: https://github.com/facebook/zstd/releases/tag/v1.5.6
* Improved compression ratio for the highest compression levels. I don't expect
these see much use however, due to their slow speeds.
v1.5.5: https://github.com/facebook/zstd/releases/tag/v1.5.5
* Fix a rare corruption bug that can trigger on levels 13 and above.
* Improve compression speed of levels 5-11 on incompressible data.
v1.5.4: https://github.com/facebook/zstd/releases/tag/v1.5.4
* Improve copmression speed of levels 5-11 on ARM.
* Improve dictionary compression speed.
Signed-off-by: Nick Terrell <terrelln@fb.com>
Diffstat (limited to 'lib/zstd/decompress/zstd_decompress_internal.h')
| -rw-r--r-- | lib/zstd/decompress/zstd_decompress_internal.h | 19 |
1 files changed, 12 insertions, 7 deletions
diff --git a/lib/zstd/decompress/zstd_decompress_internal.h b/lib/zstd/decompress/zstd_decompress_internal.h index 98102edb6a83..2a225d1811c4 100644 --- a/lib/zstd/decompress/zstd_decompress_internal.h +++ b/lib/zstd/decompress/zstd_decompress_internal.h @@ -1,5 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -75,12 +76,13 @@ static UNUSED_ATTR const U32 ML_base[MaxML+1] = { #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) +#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12 typedef struct { ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ - HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ + HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */ U32 rep[ZSTD_REP_NUM]; U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; } ZSTD_entropyDTables_t; @@ -135,7 +137,7 @@ struct ZSTD_DCtx_s const void* virtualStart; /* virtual start of previous segment if it was just before current one */ const void* dictEnd; /* end of previous segment */ size_t expected; - ZSTD_frameHeader fParams; + ZSTD_FrameHeader fParams; U64 processedCSize; U64 decodedSize; blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ @@ -152,7 +154,8 @@ struct ZSTD_DCtx_s size_t litSize; size_t rleSize; size_t staticSize; -#if DYNAMIC_BMI2 != 0 + int isFrameDecompression; +#if DYNAMIC_BMI2 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ #endif @@ -164,6 +167,8 @@ struct ZSTD_DCtx_s ZSTD_dictUses_e dictUses; ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ + int disableHufAsm; + int maxBlockSizeParam; /* streaming */ ZSTD_dStreamStage streamStage; @@ -199,11 +204,11 @@ struct ZSTD_DCtx_s }; /* typedef'd to ZSTD_DCtx within "zstd.h" */ MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) { -#if DYNAMIC_BMI2 != 0 - return dctx->bmi2; +#if DYNAMIC_BMI2 + return dctx->bmi2; #else (void)dctx; - return 0; + return 0; #endif } |
