summaryrefslogtreecommitdiff
path: root/lib/zstd
diff options
context:
space:
mode:
Diffstat (limited to 'lib/zstd')
-rw-r--r--lib/zstd/Makefile36
-rw-r--r--lib/zstd/common/bitstream.h446
-rw-r--r--lib/zstd/common/compiler.h184
-rw-r--r--lib/zstd/common/cpu.h194
-rw-r--r--lib/zstd/common/debug.c24
-rw-r--r--lib/zstd/common/debug.h92
-rw-r--r--lib/zstd/common/entropy_common.c357
-rw-r--r--lib/zstd/common/error_private.c56
-rw-r--r--lib/zstd/common/error_private.h145
-rw-r--r--lib/zstd/common/fse.h711
-rw-r--r--lib/zstd/common/fse_decompress.c390
-rw-r--r--lib/zstd/common/huf.h358
-rw-r--r--lib/zstd/common/mem.h262
-rw-r--r--lib/zstd/common/portability_macros.h93
-rw-r--r--lib/zstd/common/zstd_common.c83
-rw-r--r--lib/zstd/common/zstd_deps.h125
-rw-r--r--lib/zstd/common/zstd_internal.h443
-rw-r--r--lib/zstd/decompress/huf_decompress.c1740
-rw-r--r--lib/zstd/decompress/zstd_ddict.c241
-rw-r--r--lib/zstd/decompress/zstd_ddict.h44
-rw-r--r--lib/zstd/decompress/zstd_decompress.c2131
-rw-r--r--lib/zstd/decompress/zstd_decompress_block.c2072
-rw-r--r--lib/zstd/decompress/zstd_decompress_block.h68
-rw-r--r--lib/zstd/decompress/zstd_decompress_internal.h228
-rw-r--r--lib/zstd/decompress_sources.h34
-rw-r--r--lib/zstd/zstd.c61
-rw-r--r--lib/zstd/zstd_common_module.c32
-rw-r--r--lib/zstd/zstd_decompress_module.c105
28 files changed, 10755 insertions, 0 deletions
diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile
new file mode 100644
index 00000000000..2003e184258
--- /dev/null
+++ b/lib/zstd/Makefile
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+# ################################################################
+# Copyright (c) Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# You may select, at your option, one of the above-listed licenses.
+# ################################################################
+obj-y += zstd_decompress.o
+obj-y += zstd_common.o
+
+ifeq ($(CONFIG_ZSTD_LIB_MINIFY),y)
+ccflags-y += -DHUF_FORCE_DECOMPRESS_X1
+ccflags-y += -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+ccflags-y += -DZSTD_NO_INLINE
+ccflags-y += -DZSTD_STRIP_ERROR_STRINGS
+ccflags-y += -DDYNAMIC_BMI2=0
+endif
+
+zstd_decompress-y := \
+ zstd_decompress_module.o \
+ decompress/huf_decompress.o \
+ decompress/zstd_ddict.o \
+ decompress/zstd_decompress.o \
+ decompress/zstd_decompress_block.o \
+ zstd.o \
+
+zstd_common-y := \
+ zstd_common_module.o \
+ common/debug.o \
+ common/entropy_common.o \
+ common/error_private.o \
+ common/fse_decompress.o \
+ common/zstd_common.o \
diff --git a/lib/zstd/common/bitstream.h b/lib/zstd/common/bitstream.h
new file mode 100644
index 00000000000..feef3a1b1d6
--- /dev/null
+++ b/lib/zstd/common/bitstream.h
@@ -0,0 +1,446 @@
+/* ******************************************************************
+ * bitstream
+ * Part of FSE library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+/*
+* This API consists of small unitary functions, which must be inlined for best performance.
+* Since link-time-optimization is not available for all compilers,
+* these functions are defined into a .h to be included.
+*/
+
+/*-****************************************
+* Dependencies
+******************************************/
+#include "mem.h" /* unaligned access routines */
+#include "compiler.h" /* UNLIKELY() */
+#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
+#include "error_private.h" /* error codes and messages */
+
+
+/*=========================================
+* Target specific
+=========================================*/
+
+#define STREAM_ACCUMULATOR_MIN_32 25
+#define STREAM_ACCUMULATOR_MIN_64 57
+#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
+
+
+/*-******************************************
+* bitStream encoding API (write forward)
+********************************************/
+/* bitStream can mix input from multiple sources.
+ * A critical property of these streams is that they encode and decode in **reverse** direction.
+ * So the first bit sequence you add will be the last to be read, like a LIFO stack.
+ */
+typedef struct {
+ size_t bitContainer;
+ unsigned bitPos;
+ char* startPtr;
+ char* ptr;
+ char* endPtr;
+} BIT_CStream_t;
+
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
+MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
+
+/* Start with initCStream, providing the size of buffer to write into.
+* bitStream will never write outside of this buffer.
+* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
+*
+* bits are first added to a local register.
+* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
+* Writing data into memory is an explicit operation, performed by the flushBits function.
+* Hence keep track how many bits are potentially stored into local register to avoid register overflow.
+* After a flushBits, a maximum of 7 bits might still be stored into local register.
+*
+* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
+*
+* Last operation is to close the bitStream.
+* The function returns the final size of CStream in bytes.
+* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
+*/
+
+
+/*-********************************************
+* bitStream decoding API (read backward)
+**********************************************/
+typedef struct {
+ size_t bitContainer;
+ unsigned bitsConsumed;
+ const char* ptr;
+ const char* start;
+ const char* limitPtr;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+ BIT_DStream_endOfBuffer = 1,
+ BIT_DStream_completed = 2,
+ BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */
+ /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/* Start by invoking BIT_initDStream().
+* A chunk of the bitStream is then stored into a local register.
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* You can then retrieve bitFields stored into the local register, **in reverse order**.
+* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
+* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
+* Otherwise, it can be less than that, so proceed accordingly.
+* Checking if DStream has reached its end can be performed with BIT_endOfDStream().
+*/
+
+
+/*-****************************************
+* unsafe API
+******************************************/
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
+
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
+/* unsafe version; does not check buffer overflow */
+
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/*-**************************************************************
+* Internal functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (U32 val)
+{
+ assert(val != 0);
+ {
+# if (__GNUC__ >= 3) /* Use GCC Intrinsic */
+ return __builtin_clz (val) ^ 31;
+# else /* Software version */
+ static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
+ 11, 14, 16, 18, 22, 25, 3, 30,
+ 8, 12, 20, 28, 15, 17, 24, 7,
+ 19, 27, 23, 6, 26, 5, 4, 31 };
+ U32 v = val;
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+# endif
+ }
+}
+
+/*===== Local Constants =====*/
+static const unsigned BIT_mask[] = {
+ 0, 1, 3, 7, 0xF, 0x1F,
+ 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
+ 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF,
+ 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
+ 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
+ 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
+#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
+
+/*-**************************************************************
+* bitStream encoding
+****************************************************************/
+/*! BIT_initCStream() :
+ * `dstCapacity` must be > sizeof(size_t)
+ * @return : 0 if success,
+ * otherwise an error code (can be tested using ERR_isError()) */
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
+ void* startPtr, size_t dstCapacity)
+{
+ bitC->bitContainer = 0;
+ bitC->bitPos = 0;
+ bitC->startPtr = (char*)startPtr;
+ bitC->ptr = bitC->startPtr;
+ bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
+ if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
+ return 0;
+}
+
+/*! BIT_addBits() :
+ * can add up to 31 bits into `bitC`.
+ * Note : does not check for register overflow ! */
+MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
+ size_t value, unsigned nbBits)
+{
+ DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
+ assert(nbBits < BIT_MASK_SIZE);
+ assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+ bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
+ bitC->bitPos += nbBits;
+}
+
+/*! BIT_addBitsFast() :
+ * works only if `value` is _clean_,
+ * meaning all high bits above nbBits are 0 */
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
+ size_t value, unsigned nbBits)
+{
+ assert((value>>nbBits) == 0);
+ assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+ bitC->bitContainer |= value << bitC->bitPos;
+ bitC->bitPos += nbBits;
+}
+
+/*! BIT_flushBitsFast() :
+ * assumption : bitContainer has not overflowed
+ * unsafe version; does not check buffer overflow */
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
+{
+ size_t const nbBytes = bitC->bitPos >> 3;
+ assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+ assert(bitC->ptr <= bitC->endPtr);
+ MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+ bitC->ptr += nbBytes;
+ bitC->bitPos &= 7;
+ bitC->bitContainer >>= nbBytes*8;
+}
+
+/*! BIT_flushBits() :
+ * assumption : bitContainer has not overflowed
+ * safe version; check for buffer overflow, and prevents it.
+ * note : does not signal buffer overflow.
+ * overflow will be revealed later on using BIT_closeCStream() */
+MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
+{
+ size_t const nbBytes = bitC->bitPos >> 3;
+ assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+ assert(bitC->ptr <= bitC->endPtr);
+ MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+ bitC->ptr += nbBytes;
+ if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
+ bitC->bitPos &= 7;
+ bitC->bitContainer >>= nbBytes*8;
+}
+
+/*! BIT_closeCStream() :
+ * @return : size of CStream, in bytes,
+ * or 0 if it could not fit into dstBuffer */
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
+{
+ BIT_addBitsFast(bitC, 1, 1); /* endMark */
+ BIT_flushBits(bitC);
+ if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
+ return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
+}
+
+
+/*-********************************************************
+* bitStream decoding
+**********************************************************/
+/*! BIT_initDStream() :
+ * Initialize a BIT_DStream_t.
+ * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
+ * `srcSize` must be the *exact* size of the bitStream, in bytes.
+ * @return : size of stream (== srcSize), or an errorCode if a problem is detected
+ */
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+ if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+ bitD->start = (const char*)srcBuffer;
+ bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
+
+ if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */
+ bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
+ bitD->bitContainer = MEM_readLEST(bitD->ptr);
+ { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+ bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
+ if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
+ } else {
+ bitD->ptr = bitD->start;
+ bitD->bitContainer = *(const BYTE*)(bitD->start);
+ switch(srcSize)
+ {
+ case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
+ ZSTD_FALLTHROUGH;
+
+ case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
+ ZSTD_FALLTHROUGH;
+
+ case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
+ ZSTD_FALLTHROUGH;
+
+ case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
+ ZSTD_FALLTHROUGH;
+
+ case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
+ ZSTD_FALLTHROUGH;
+
+ case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
+ ZSTD_FALLTHROUGH;
+
+ default: break;
+ }
+ { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+ bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
+ if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
+ }
+ bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
+ }
+
+ return srcSize;
+}
+
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
+{
+ return bitContainer >> start;
+}
+
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
+{
+ U32 const regMask = sizeof(bitContainer)*8 - 1;
+ /* if start > regMask, bitstream is corrupted, and result is undefined */
+ assert(nbBits < BIT_MASK_SIZE);
+ /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
+ * than accessing memory. When bmi2 instruction is not present, we consider
+ * such cpus old (pre-Haswell, 2013) and their performance is not of that
+ * importance.
+ */
+#if defined(__x86_64__) || defined(_M_X86)
+ return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
+#else
+ return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
+#endif
+}
+
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
+{
+ assert(nbBits < BIT_MASK_SIZE);
+ return bitContainer & BIT_mask[nbBits];
+}
+
+/*! BIT_lookBits() :
+ * Provides next n bits from local register.
+ * local register is not modified.
+ * On 32-bits, maxNbBits==24.
+ * On 64-bits, maxNbBits==56.
+ * @return : value extracted */
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
+{
+ /* arbitrate between double-shift and shift+mask */
+#if 1
+ /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
+ * bitstream is likely corrupted, and result is undefined */
+ return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
+#else
+ /* this code path is slower on my os-x laptop */
+ U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+ return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
+#endif
+}
+
+/*! BIT_lookBitsFast() :
+ * unsafe version; only works if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
+{
+ U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+ assert(nbBits >= 1);
+ return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
+}
+
+MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+ bitD->bitsConsumed += nbBits;
+}
+
+/*! BIT_readBits() :
+ * Read (consume) next n bits from local register and update.
+ * Pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value. */
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
+{
+ size_t const value = BIT_lookBits(bitD, nbBits);
+ BIT_skipBits(bitD, nbBits);
+ return value;
+}
+
+/*! BIT_readBitsFast() :
+ * unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
+{
+ size_t const value = BIT_lookBitsFast(bitD, nbBits);
+ assert(nbBits >= 1);
+ BIT_skipBits(bitD, nbBits);
+ return value;
+}
+
+/*! BIT_reloadDStreamFast() :
+ * Similar to BIT_reloadDStream(), but with two differences:
+ * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
+ * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
+ * point you must use BIT_reloadDStream() to reload.
+ */
+MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
+{
+ if (UNLIKELY(bitD->ptr < bitD->limitPtr))
+ return BIT_DStream_overflow;
+ assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
+ bitD->ptr -= bitD->bitsConsumed >> 3;
+ bitD->bitsConsumed &= 7;
+ bitD->bitContainer = MEM_readLEST(bitD->ptr);
+ return BIT_DStream_unfinished;
+}
+
+/*! BIT_reloadDStream() :
+ * Refill `bitD` from buffer previously set in BIT_initDStream() .
+ * This function is safe, it guarantees it will not read beyond src buffer.
+ * @return : status of `BIT_DStream_t` internal register.
+ * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+ if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
+ return BIT_DStream_overflow;
+
+ if (bitD->ptr >= bitD->limitPtr) {
+ return BIT_reloadDStreamFast(bitD);
+ }
+ if (bitD->ptr == bitD->start) {
+ if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+ return BIT_DStream_completed;
+ }
+ /* start < ptr < limitPtr */
+ { U32 nbBytes = bitD->bitsConsumed >> 3;
+ BIT_DStream_status result = BIT_DStream_unfinished;
+ if (bitD->ptr - nbBytes < bitD->start) {
+ nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
+ result = BIT_DStream_endOfBuffer;
+ }
+ bitD->ptr -= nbBytes;
+ bitD->bitsConsumed -= nbBytes*8;
+ bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
+ return result;
+ }
+}
+
+/*! BIT_endOfDStream() :
+ * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
+ */
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+ return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+
+#endif /* BITSTREAM_H_MODULE */
diff --git a/lib/zstd/common/compiler.h b/lib/zstd/common/compiler.h
new file mode 100644
index 00000000000..c42d39faf9b
--- /dev/null
+++ b/lib/zstd/common/compiler.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPILER_H
+#define ZSTD_COMPILER_H
+
+#include "portability_macros.h"
+
+/*-*******************************************************
+* Compiler specifics
+*********************************************************/
+/* force inlining */
+
+#if !defined(ZSTD_NO_INLINE)
+#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+# define INLINE_KEYWORD inline
+#else
+# define INLINE_KEYWORD
+#endif
+
+#define FORCE_INLINE_ATTR __attribute__((always_inline))
+
+#else
+
+#define INLINE_KEYWORD
+#define FORCE_INLINE_ATTR
+
+#endif
+
+/*
+ On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
+ This explicitly marks such functions as __cdecl so that the code will still compile
+ if a CC other than __cdecl has been made the default.
+*/
+#define WIN_CDECL
+
+/*
+ * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
+ * parameters. They must be inlined for the compiler to eliminate the constant
+ * branches.
+ */
+#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
+/*
+ * HINT_INLINE is used to help the compiler generate better code. It is *not*
+ * used for "templates", so it can be tweaked based on the compilers
+ * performance.
+ *
+ * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
+ * always_inline attribute.
+ *
+ * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
+ * attribute.
+ */
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
+# define HINT_INLINE static INLINE_KEYWORD
+#else
+# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
+#endif
+
+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+#define UNUSED_ATTR __attribute__((unused))
+
+/* force no inlining */
+#define FORCE_NOINLINE static __attribute__((__noinline__))
+
+
+/* target attribute */
+#define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
+
+/* Target attribute for BMI2 dynamic dispatch.
+ * Enable lzcnt, bmi, and bmi2.
+ * We test for bmi1 & bmi2. lzcnt is included in bmi1.
+ */
+#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")
+
+/* prefetch
+ * can be disabled, by declaring NO_PREFETCH build macro */
+#if ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
+# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
+#elif defined(__aarch64__)
+# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
+# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
+#else
+# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
+# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
+#endif /* NO_PREFETCH */
+
+#define CACHELINE_SIZE 64
+
+#define PREFETCH_AREA(p, s) { \
+ const char* const _ptr = (const char*)(p); \
+ size_t const _size = (size_t)(s); \
+ size_t _pos; \
+ for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
+ PREFETCH_L2(_ptr + _pos); \
+ } \
+}
+
+/* vectorization
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
+ * and some compilers, like Intel ICC and MCST LCC, do not support it at all. */
+#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__)
+# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
+# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+# else
+# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
+# endif
+#else
+# define DONT_VECTORIZE
+#endif
+
+/* Tell the compiler that a branch is likely or unlikely.
+ * Only use these macros if it causes the compiler to generate better code.
+ * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
+ * and clang, please do.
+ */
+#define LIKELY(x) (__builtin_expect((x), 1))
+#define UNLIKELY(x) (__builtin_expect((x), 0))
+
+#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
+# define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); }
+#else
+# define ZSTD_UNREACHABLE { assert(0); }
+#endif
+
+/* disable warnings */
+
+/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
+
+
+/* compile time determination of SIMD support */
+
+/* C-language Attributes are added in C23. */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
+# define ZSTD_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
+#else
+# define ZSTD_HAS_C_ATTRIBUTE(x) 0
+#endif
+
+/* Only use C++ attributes in C++. Some compilers report support for C++
+ * attributes when compiling with C.
+ */
+#define ZSTD_HAS_CPP_ATTRIBUTE(x) 0
+
+/* Define ZSTD_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute.
+ * - C23: https://en.cppreference.com/w/c/language/attributes/fallthrough
+ * - CPP17: https://en.cppreference.com/w/cpp/language/attributes/fallthrough
+ * - Else: __attribute__((__fallthrough__))
+ */
+#define ZSTD_FALLTHROUGH fallthrough
+
+/*-**************************************************************
+* Alignment check
+*****************************************************************/
+
+/* this test was initially positioned in mem.h,
+ * but this file is removed (or replaced) for linux kernel
+ * so it's now hosted in compiler.h,
+ * which remains valid for both user & kernel spaces.
+ */
+
+#ifndef ZSTD_ALIGNOF
+/* covers gcc, clang & MSVC */
+/* note : this section must come first, before C11,
+ * due to a limitation in the kernel source generator */
+# define ZSTD_ALIGNOF(T) __alignof(T)
+
+#endif /* ZSTD_ALIGNOF */
+
+/*-**************************************************************
+* Sanitizer
+*****************************************************************/
+
+
+
+#endif /* ZSTD_COMPILER_H */
diff --git a/lib/zstd/common/cpu.h b/lib/zstd/common/cpu.h
new file mode 100644
index 00000000000..0db7b42407e
--- /dev/null
+++ b/lib/zstd/common/cpu.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMMON_CPU_H
+#define ZSTD_COMMON_CPU_H
+
+/*
+ * Implementation taken from folly/CpuId.h
+ * https://github.com/facebook/folly/blob/master/folly/CpuId.h
+ */
+
+#include "mem.h"
+
+
+typedef struct {
+ U32 f1c;
+ U32 f1d;
+ U32 f7b;
+ U32 f7c;
+} ZSTD_cpuid_t;
+
+MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
+ U32 f1c = 0;
+ U32 f1d = 0;
+ U32 f7b = 0;
+ U32 f7c = 0;
+#if defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
+ /* The following block like the normal cpuid branch below, but gcc
+ * reserves ebx for use of its pic register so we must specially
+ * handle the save and restore to avoid clobbering the register
+ */
+ U32 n;
+ __asm__(
+ "pushl %%ebx\n\t"
+ "cpuid\n\t"
+ "popl %%ebx\n\t"
+ : "=a"(n)
+ : "a"(0)
+ : "ecx", "edx");
+ if (n >= 1) {
+ U32 f1a;
+ __asm__(
+ "pushl %%ebx\n\t"
+ "cpuid\n\t"
+ "popl %%ebx\n\t"
+ : "=a"(f1a), "=c"(f1c), "=d"(f1d)
+ : "a"(1));
+ }
+ if (n >= 7) {
+ __asm__(
+ "pushl %%ebx\n\t"
+ "cpuid\n\t"
+ "movl %%ebx, %%eax\n\t"
+ "popl %%ebx"
+ : "=a"(f7b), "=c"(f7c)
+ : "a"(7), "c"(0)
+ : "edx");
+ }
+#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
+ U32 n;
+ __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
+ if (n >= 1) {
+ U32 f1a;
+ __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
+ }
+ if (n >= 7) {
+ U32 f7a;
+ __asm__("cpuid"
+ : "=a"(f7a), "=b"(f7b), "=c"(f7c)
+ : "a"(7), "c"(0)
+ : "edx");
+ }
+#endif
+ {
+ ZSTD_cpuid_t cpuid;
+ cpuid.f1c = f1c;
+ cpuid.f1d = f1d;
+ cpuid.f7b = f7b;
+ cpuid.f7c = f7c;
+ return cpuid;
+ }
+}
+
+#define X(name, r, bit) \
+ MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \
+ return ((cpuid.r) & (1U << bit)) != 0; \
+ }
+
+/* cpuid(1): Processor Info and Feature Bits. */
+#define C(name, bit) X(name, f1c, bit)
+ C(sse3, 0)
+ C(pclmuldq, 1)
+ C(dtes64, 2)
+ C(monitor, 3)
+ C(dscpl, 4)
+ C(vmx, 5)
+ C(smx, 6)
+ C(eist, 7)
+ C(tm2, 8)
+ C(ssse3, 9)
+ C(cnxtid, 10)
+ C(fma, 12)
+ C(cx16, 13)
+ C(xtpr, 14)
+ C(pdcm, 15)
+ C(pcid, 17)
+ C(dca, 18)
+ C(sse41, 19)
+ C(sse42, 20)
+ C(x2apic, 21)
+ C(movbe, 22)
+ C(popcnt, 23)
+ C(tscdeadline, 24)
+ C(aes, 25)
+ C(xsave, 26)
+ C(osxsave, 27)
+ C(avx, 28)
+ C(f16c, 29)
+ C(rdrand, 30)
+#undef C
+#define D(name, bit) X(name, f1d, bit)
+ D(fpu, 0)
+ D(vme, 1)
+ D(de, 2)
+ D(pse, 3)
+ D(tsc, 4)
+ D(msr, 5)
+ D(pae, 6)
+ D(mce, 7)
+ D(cx8, 8)
+ D(apic, 9)
+ D(sep, 11)
+ D(mtrr, 12)
+ D(pge, 13)
+ D(mca, 14)
+ D(cmov, 15)
+ D(pat, 16)
+ D(pse36, 17)
+ D(psn, 18)
+ D(clfsh, 19)
+ D(ds, 21)
+ D(acpi, 22)
+ D(mmx, 23)
+ D(fxsr, 24)
+ D(sse, 25)
+ D(sse2, 26)
+ D(ss, 27)
+ D(htt, 28)
+ D(tm, 29)
+ D(pbe, 31)
+#undef D
+
+/* cpuid(7): Extended Features. */
+#define B(name, bit) X(name, f7b, bit)
+ B(bmi1, 3)
+ B(hle, 4)
+ B(avx2, 5)
+ B(smep, 7)
+ B(bmi2, 8)
+ B(erms, 9)
+ B(invpcid, 10)
+ B(rtm, 11)
+ B(mpx, 14)
+ B(avx512f, 16)
+ B(avx512dq, 17)
+ B(rdseed, 18)
+ B(adx, 19)
+ B(smap, 20)
+ B(avx512ifma, 21)
+ B(pcommit, 22)
+ B(clflushopt, 23)
+ B(clwb, 24)
+ B(avx512pf, 26)
+ B(avx512er, 27)
+ B(avx512cd, 28)
+ B(sha, 29)
+ B(avx512bw, 30)
+ B(avx512vl, 31)
+#undef B
+#define C(name, bit) X(name, f7c, bit)
+ C(prefetchwt1, 0)
+ C(avx512vbmi, 1)
+#undef C
+
+#undef X
+
+#endif /* ZSTD_COMMON_CPU_H */
diff --git a/lib/zstd/common/debug.c b/lib/zstd/common/debug.c
new file mode 100644
index 00000000000..bb863c9ea61
--- /dev/null
+++ b/lib/zstd/common/debug.c
@@ -0,0 +1,24 @@
+/* ******************************************************************
+ * debug
+ * Part of FSE library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/*
+ * This module only hosts one global variable
+ * which can be used to dynamically influence the verbosity of traces,
+ * such as DEBUGLOG and RAWLOG
+ */
+
+#include "debug.h"
+
+int g_debuglevel = DEBUGLEVEL;
diff --git a/lib/zstd/common/debug.h b/lib/zstd/common/debug.h
new file mode 100644
index 00000000000..7f43dd3c063
--- /dev/null
+++ b/lib/zstd/common/debug.h
@@ -0,0 +1,92 @@
+/* ******************************************************************
+ * debug
+ * Part of FSE library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/*
+ * The purpose of this header is to enable debug functions.
+ * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time,
+ * and DEBUG_STATIC_ASSERT() for compile-time.
+ *
+ * By default, DEBUGLEVEL==0, which means run-time debug is disabled.
+ *
+ * Level 1 enables assert() only.
+ * Starting level 2, traces can be generated and pushed to stderr.
+ * The higher the level, the more verbose the traces.
+ *
+ * It's possible to dynamically adjust level using variable g_debug_level,
+ * which is only declared if DEBUGLEVEL>=2,
+ * and is a global variable, not multi-thread protected (use with care)
+ */
+
+#ifndef DEBUG_H_12987983217
+#define DEBUG_H_12987983217
+
+
+
+/* static assert is triggered at compile time, leaving no runtime artefact.
+ * static assert only works with compile-time constants.
+ * Also, this variant can only be used inside a function. */
+#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
+
+
+/* DEBUGLEVEL is expected to be defined externally,
+ * typically through compiler command line.
+ * Value must be a number. */
+#ifndef DEBUGLEVEL
+# define DEBUGLEVEL 0
+#endif
+
+
+/* recommended values for DEBUGLEVEL :
+ * 0 : release mode, no debug, all run-time checks disabled
+ * 1 : enables assert() only, no display
+ * 2 : reserved, for currently active debug path
+ * 3 : events once per object lifetime (CCtx, CDict, etc.)
+ * 4 : events once per frame
+ * 5 : events once per block
+ * 6 : events once per sequence (verbose)
+ * 7+: events at every position (*very* verbose)
+ *
+ * It's generally inconvenient to output traces > 5.
+ * In which case, it's possible to selectively trigger high verbosity levels
+ * by modifying g_debug_level.
+ */
+
+#if (DEBUGLEVEL>=2)
+# define ZSTD_DEPS_NEED_IO
+# include "zstd_deps.h"
+extern int g_debuglevel; /* the variable is only declared,
+ it actually lives in debug.c,
+ and is shared by the whole process.
+ It's not thread-safe.
+ It's useful when enabling very verbose levels
+ on selective conditions (such as position in src) */
+
+# define RAWLOG(l, ...) { \
+ if (l<=g_debuglevel) { \
+ ZSTD_DEBUG_PRINT(__VA_ARGS__); \
+ } }
+# define DEBUGLOG(l, ...) { \
+ if (l<=g_debuglevel) { \
+ ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
+ ZSTD_DEBUG_PRINT(" \n"); \
+ } }
+#else
+# define RAWLOG(l, ...) {} /* disabled */
+# define DEBUGLOG(l, ...) {} /* disabled */
+#endif
+
+
+
+#endif /* DEBUG_H_12987983217 */
diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c
new file mode 100644
index 00000000000..fef67056f05
--- /dev/null
+++ b/lib/zstd/common/entropy_common.c
@@ -0,0 +1,357 @@
+/* ******************************************************************
+ * Common functions of New Generation Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* *************************************
+* Dependencies
+***************************************/
+#include "mem.h"
+#include "error_private.h" /* ERR_*, ERROR */
+#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
+#include "fse.h"
+#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */
+#include "huf.h"
+
+
+/*=== Version ===*/
+unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
+
+
+/*=== Error Management ===*/
+unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/*-**************************************************************
+* FSE NCount encoding-decoding
+****************************************************************/
+static U32 FSE_ctz(U32 val)
+{
+ assert(val != 0);
+ {
+# if (__GNUC__ >= 3) /* GCC Intrinsic */
+ return __builtin_ctz(val);
+# else /* Software version */
+ U32 count = 0;
+ while ((val & 1) == 0) {
+ val >>= 1;
+ ++count;
+ }
+ return count;
+# endif
+ }
+}
+
+FORCE_INLINE_TEMPLATE
+size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+ const void* headerBuffer, size_t hbSize)
+{
+ const BYTE* const istart = (const BYTE*) headerBuffer;
+ const BYTE* const iend = istart + hbSize;
+ const BYTE* ip = istart;
+ int nbBits;
+ int remaining;
+ int threshold;
+ U32 bitStream;
+ int bitCount;
+ unsigned charnum = 0;
+ unsigned const maxSV1 = *maxSVPtr + 1;
+ int previous0 = 0;
+
+ if (hbSize < 8) {
+ /* This function only works when hbSize >= 8 */
+ char buffer[8] = {0};
+ ZSTD_memcpy(buffer, headerBuffer, hbSize);
+ { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
+ buffer, sizeof(buffer));
+ if (FSE_isError(countSize)) return countSize;
+ if (countSize > hbSize) return ERROR(corruption_detected);
+ return countSize;
+ } }
+ assert(hbSize >= 8);
+
+ /* init */
+ ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
+ bitStream = MEM_readLE32(ip);
+ nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
+ if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+ bitStream >>= 4;
+ bitCount = 4;
+ *tableLogPtr = nbBits;
+ remaining = (1<<nbBits)+1;
+ threshold = 1<<nbBits;
+ nbBits++;
+
+ for (;;) {
+ if (previous0) {
+ /* Count the number of repeats. Each time the
+ * 2-bit repeat code is 0b11 there is another
+ * repeat.
+ * Avoid UB by setting the high bit to 1.
+ */
+ int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
+ while (repeats >= 12) {
+ charnum += 3 * 12;
+ if (LIKELY(ip <= iend-7)) {
+ ip += 3;
+ } else {
+ bitCount -= (int)(8 * (iend - 7 - ip));
+ bitCount &= 31;
+ ip = iend - 4;
+ }
+ bitStream = MEM_readLE32(ip) >> bitCount;
+ repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
+ }
+ charnum += 3 * repeats;
+ bitStream >>= 2 * repeats;
+ bitCount += 2 * repeats;
+
+ /* Add the final repeat which isn't 0b11. */
+ assert((bitStream & 3) < 3);
+ charnum += bitStream & 3;
+ bitCount += 2;
+
+ /* This is an error, but break and return an error
+ * at the end, because returning out of a loop makes
+ * it harder for the compiler to optimize.
+ */
+ if (charnum >= maxSV1) break;
+
+ /* We don't need to set the normalized count to 0
+ * because we already memset the whole buffer to 0.
+ */
+
+ if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+ assert((bitCount >> 3) <= 3); /* For first condition to work */
+ ip += bitCount>>3;
+ bitCount &= 7;
+ } else {
+ bitCount -= (int)(8 * (iend - 4 - ip));
+ bitCount &= 31;
+ ip = iend - 4;
+ }
+ bitStream = MEM_readLE32(ip) >> bitCount;
+ }
+ {
+ int const max = (2*threshold-1) - remaining;
+ int count;
+
+ if ((bitStream & (threshold-1)) < (U32)max) {
+ count = bitStream & (threshold-1);
+ bitCount += nbBits-1;
+ } else {
+ count = bitStream & (2*threshold-1);
+ if (count >= threshold) count -= max;
+ bitCount += nbBits;
+ }
+
+ count--; /* extra accuracy */
+ /* When it matters (small blocks), this is a
+ * predictable branch, because we don't use -1.
+ */
+ if (count >= 0) {
+ remaining -= count;
+ } else {
+ assert(count == -1);
+ remaining += count;
+ }
+ normalizedCounter[charnum++] = (short)count;
+ previous0 = !count;
+
+ assert(threshold > 1);
+ if (remaining < threshold) {
+ /* This branch can be folded into the
+ * threshold update condition because we
+ * know that threshold > 1.
+ */
+ if (remaining <= 1) break;
+ nbBits = BIT_highbit32(remaining) + 1;
+ threshold = 1 << (nbBits - 1);
+ }
+ if (charnum >= maxSV1) break;
+
+ if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+ ip += bitCount>>3;
+ bitCount &= 7;
+ } else {
+ bitCount -= (int)(8 * (iend - 4 - ip));
+ bitCount &= 31;
+ ip = iend - 4;
+ }
+ bitStream = MEM_readLE32(ip) >> bitCount;
+ } }
+ if (remaining != 1) return ERROR(corruption_detected);
+ /* Only possible when there are too many zeros. */
+ if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall);
+ if (bitCount > 32) return ERROR(corruption_detected);
+ *maxSVPtr = charnum-1;
+
+ ip += (bitCount+7)>>3;
+ return ip-istart;
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t FSE_readNCount_body_default(
+ short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+ const void* headerBuffer, size_t hbSize)
+{
+ return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+
+#if DYNAMIC_BMI2
+BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2(
+ short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+ const void* headerBuffer, size_t hbSize)
+{
+ return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+#endif
+
+size_t FSE_readNCount_bmi2(
+ short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+ const void* headerBuffer, size_t hbSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+ if (bmi2) {
+ return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+ }
+#endif
+ (void)bmi2;
+ return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+
+size_t FSE_readNCount(
+ short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+ const void* headerBuffer, size_t hbSize)
+{
+ return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
+}
+
+
+/*! HUF_readStats() :
+ Read compact Huffman tree, saved by HUF_writeCTable().
+ `huffWeight` is destination buffer.
+ `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
+ @return : size read from `src` , or an error Code .
+ Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
+*/
+size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+ U32* nbSymbolsPtr, U32* tableLogPtr,
+ const void* src, size_t srcSize)
+{
+ U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+ return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+ U32* nbSymbolsPtr, U32* tableLogPtr,
+ const void* src, size_t srcSize,
+ void* workSpace, size_t wkspSize,
+ int bmi2)
+{
+ U32 weightTotal;
+ const BYTE* ip = (const BYTE*) src;
+ size_t iSize;
+ size_t oSize;
+
+ if (!srcSize) return ERROR(srcSize_wrong);
+ iSize = ip[0];
+ /* ZSTD_memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
+
+ if (iSize >= 128) { /* special header */
+ oSize = iSize - 127;
+ iSize = ((oSize+1)/2);
+ if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+ if (oSize >= hwSize) return ERROR(corruption_detected);
+ ip += 1;
+ { U32 n;
+ for (n=0; n<oSize; n+=2) {
+ huffWeight[n] = ip[n/2] >> 4;
+ huffWeight[n+1] = ip[n/2] & 15;
+ } } }
+ else { /* header compressed with FSE (normal case) */
+ if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+ /* max (hwSize-1) values decoded, as last one is implied */
+ oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2);
+ if (FSE_isError(oSize)) return oSize;
+ }
+
+ /* collect weight stats */
+ ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
+ weightTotal = 0;
+ { U32 n; for (n=0; n<oSize; n++) {
+ if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
+ rankStats[huffWeight[n]]++;
+ weightTotal += (1 << huffWeight[n]) >> 1;
+ } }
+ if (weightTotal == 0) return ERROR(corruption_detected);
+
+ /* get last non-null symbol weight (implied, total must be 2^n) */
+ { U32 const tableLog = BIT_highbit32(weightTotal) + 1;
+ if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
+ *tableLogPtr = tableLog;
+ /* determine last weight */
+ { U32 const total = 1 << tableLog;
+ U32 const rest = total - weightTotal;
+ U32 const verif = 1 << BIT_highbit32(rest);
+ U32 const lastWeight = BIT_highbit32(rest) + 1;
+ if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
+ huffWeight[oSize] = (BYTE)lastWeight;
+ rankStats[lastWeight]++;
+ } }
+
+ /* check tree construction validity */
+ if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
+
+ /* results */
+ *nbSymbolsPtr = (U32)(oSize+1);
+ return iSize+1;
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+ U32* nbSymbolsPtr, U32* tableLogPtr,
+ const void* src, size_t srcSize,
+ void* workSpace, size_t wkspSize)
+{
+ return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0);
+}
+
+#if DYNAMIC_BMI2
+static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+ U32* nbSymbolsPtr, U32* tableLogPtr,
+ const void* src, size_t srcSize,
+ void* workSpace, size_t wkspSize)
+{
+ return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1);
+}
+#endif
+
+size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+ U32* nbSymbolsPtr, U32* tableLogPtr,
+ const void* src, size_t srcSize,
+ void* workSpace, size_t wkspSize,
+ int bmi2)
+{
+#if DYNAMIC_BMI2
+ if (bmi2) {
+ return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
+ }
+#endif
+ (void)bmi2;
+ return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
+}
diff --git a/lib/zstd/common/error_private.c b/lib/zstd/common/error_private.c
new file mode 100644
index 00000000000..6d1135f8c37
--- /dev/null
+++ b/lib/zstd/common/error_private.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* The purpose of this file is to have a single list of error strings embedded in binary */
+
+#include "error_private.h"
+
+const char* ERR_getErrorString(ERR_enum code)
+{
+#ifdef ZSTD_STRIP_ERROR_STRINGS
+ (void)code;
+ return "Error strings stripped";
+#else
+ static const char* const notErrorCode = "Unspecified error code";
+ switch( code )
+ {
+ case PREFIX(no_error): return "No error detected";
+ case PREFIX(GENERIC): return "Error (generic)";
+ case PREFIX(prefix_unknown): return "Unknown frame descriptor";
+ case PREFIX(version_unsupported): return "Version not supported";
+ case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
+ case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
+ case PREFIX(corruption_detected): return "Corrupted block detected";
+ case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
+ case PREFIX(parameter_unsupported): return "Unsupported parameter";
+ case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
+ case PREFIX(init_missing): return "Context should be init first";
+ case PREFIX(memory_allocation): return "Allocation error : not enough memory";
+ case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough";
+ case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
+ case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
+ case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
+ case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
+ case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
+ case PREFIX(dictionary_wrong): return "Dictionary mismatch";
+ case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
+ case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
+ case PREFIX(srcSize_wrong): return "Src size is incorrect";
+ case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
+ /* following error codes are not stable and may be removed or changed in a future version */
+ case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
+ case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
+ case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
+ case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
+ case PREFIX(maxCode):
+ default: return notErrorCode;
+ }
+#endif
+}
diff --git a/lib/zstd/common/error_private.h b/lib/zstd/common/error_private.h
new file mode 100644
index 00000000000..ca5101e542f
--- /dev/null
+++ b/lib/zstd/common/error_private.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* Note : this module is expected to remain private, do not expose it */
+
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+
+
+/* ****************************************
+* Dependencies
+******************************************/
+#include <linux/zstd_errors.h> /* enum list */
+#include "compiler.h"
+#include "debug.h"
+#include "zstd_deps.h" /* size_t */
+
+
+/* ****************************************
+* Compiler-specific
+******************************************/
+#define ERR_STATIC static __attribute__((unused))
+
+
+/*-****************************************
+* Customization (error_public.h)
+******************************************/
+typedef ZSTD_ErrorCode ERR_enum;
+#define PREFIX(name) ZSTD_error_##name
+
+
+/*-****************************************
+* Error codes handling
+******************************************/
+#undef ERROR /* already defined on Visual Studio */
+#define ERROR(name) ZSTD_ERROR(name)
+#define ZSTD_ERROR(name) ((size_t)-PREFIX(name))
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
+
+/* check and forward error code */
+#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
+#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
+
+
+/*-****************************************
+* Error Strings
+******************************************/
+
+const char* ERR_getErrorString(ERR_enum code); /* error_private.c */
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+ return ERR_getErrorString(ERR_getErrorCode(code));
+}
+
+/*
+ * Ignore: this is an internal helper.
+ *
+ * This is a helper function to help force C99-correctness during compilation.
+ * Under strict compilation modes, variadic macro arguments can't be empty.
+ * However, variadic function arguments can be. Using a function therefore lets
+ * us statically check that at least one (string) argument was passed,
+ * independent of the compilation flags.
+ */
+static INLINE_KEYWORD UNUSED_ATTR
+void _force_has_format_string(const char *format, ...) {
+ (void)format;
+}
+
+/*
+ * Ignore: this is an internal helper.
+ *
+ * We want to force this function invocation to be syntactically correct, but
+ * we don't want to force runtime evaluation of its arguments.
+ */
+#define _FORCE_HAS_FORMAT_STRING(...) \
+ if (0) { \
+ _force_has_format_string(__VA_ARGS__); \
+ }
+
+#define ERR_QUOTE(str) #str
+
+/*
+ * Return the specified error if the condition evaluates to true.
+ *
+ * In debug modes, prints additional information.
+ * In order to do that (particularly, printing the conditional that failed),
+ * this can't just wrap RETURN_ERROR().
+ */
+#define RETURN_ERROR_IF(cond, err, ...) \
+ if (cond) { \
+ RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
+ __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+ RAWLOG(3, ": " __VA_ARGS__); \
+ RAWLOG(3, "\n"); \
+ return ERROR(err); \
+ }
+
+/*
+ * Unconditionally return the specified error.
+ *
+ * In debug modes, prints additional information.
+ */
+#define RETURN_ERROR(err, ...) \
+ do { \
+ RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
+ __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+ RAWLOG(3, ": " __VA_ARGS__); \
+ RAWLOG(3, "\n"); \
+ return ERROR(err); \
+ } while(0);
+
+/*
+ * If the provided expression evaluates to an error code, returns that error code.
+ *
+ * In debug modes, prints additional information.
+ */
+#define FORWARD_IF_ERROR(err, ...) \
+ do { \
+ size_t const err_code = (err); \
+ if (ERR_isError(err_code)) { \
+ RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
+ __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+ RAWLOG(3, ": " __VA_ARGS__); \
+ RAWLOG(3, "\n"); \
+ return err_code; \
+ } \
+ } while(0);
+
+
+#endif /* ERROR_H_MODULE */
diff --git a/lib/zstd/common/fse.h b/lib/zstd/common/fse.h
new file mode 100644
index 00000000000..4507043b228
--- /dev/null
+++ b/lib/zstd/common/fse.h
@@ -0,0 +1,711 @@
+/* ******************************************************************
+ * FSE : Finite State Entropy codec
+ * Public Prototypes declaration
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+#ifndef FSE_H
+#define FSE_H
+
+
+/*-*****************************************
+* Dependencies
+******************************************/
+#include "zstd_deps.h" /* size_t, ptrdiff_t */
+
+
+/*-*****************************************
+* FSE_PUBLIC_API : control library symbols visibility
+******************************************/
+#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+# define FSE_PUBLIC_API __attribute__ ((visibility ("default")))
+#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */
+# define FSE_PUBLIC_API __declspec(dllexport)
+#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+# define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+# define FSE_PUBLIC_API
+#endif
+
+/*------ Version ------*/
+#define FSE_VERSION_MAJOR 0
+#define FSE_VERSION_MINOR 9
+#define FSE_VERSION_RELEASE 0
+
+#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
+#define FSE_QUOTE(str) #str
+#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
+#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
+
+#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
+FSE_PUBLIC_API unsigned FSE_versionNumber(void); /*< library version number; to be used when checking dll version */
+
+
+/*-****************************************
+* FSE simple functions
+******************************************/
+/*! FSE_compress() :
+ Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
+ 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
+ @return : size of compressed data (<= dstCapacity).
+ Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+ if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
+ if FSE_isError(return), compression failed (more details using FSE_getErrorName())
+*/
+FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize);
+
+/*! FSE_decompress():
+ Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
+ into already allocated destination buffer 'dst', of size 'dstCapacity'.
+ @return : size of regenerated data (<= maxDstSize),
+ or an error code, which can be tested using FSE_isError() .
+
+ ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
+ Why ? : making this distinction requires a header.
+ Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity,
+ const void* cSrc, size_t cSrcSize);
+
+
+/*-*****************************************
+* Tool functions
+******************************************/
+FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */
+
+/* Error Management */
+FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */
+FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
+
+
+/*-*****************************************
+* FSE advanced functions
+******************************************/
+/*! FSE_compress2() :
+ Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
+ Both parameters can be defined as '0' to mean : use default value
+ @return : size of compressed data
+ Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
+ if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
+ if FSE_isError(return), it's an error code.
+*/
+FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+
+
+/*-*****************************************
+* FSE detailed API
+******************************************/
+/*!
+FSE_compress() does the following:
+1. count symbol occurrence from source[] into table count[] (see hist.h)
+2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
+3. save normalized counters to memory buffer using writeNCount()
+4. build encoding table 'CTable' from normalized counters
+5. encode the data stream using encoding table 'CTable'
+
+FSE_decompress() does the following:
+1. read normalized counters with readNCount()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table 'DTable'
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and provide normalized distribution using external method.
+*/
+
+/* *** COMPRESSION *** */
+
+/*! FSE_optimalTableLog():
+ dynamically downsize 'tableLog' when conditions are met.
+ It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
+ @return : recommended tableLog (necessarily <= 'maxTableLog') */
+FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+
+/*! FSE_normalizeCount():
+ normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
+ 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
+ useLowProbCount is a boolean parameter which trades off compressed size for
+ faster header decoding. When it is set to 1, the compressed data will be slightly
+ smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
+ faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
+ is a good default, since header deserialization makes a big speed difference.
+ Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
+ @return : tableLog,
+ or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
+ const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
+
+/*! FSE_NCountWriteBound():
+ Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
+ Typically useful for allocation purpose. */
+FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_writeNCount():
+ Compactly save 'normalizedCounter' into 'buffer'.
+ @return : size of the compressed table,
+ or an errorCode, which can be tested using FSE_isError(). */
+FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
+ const short* normalizedCounter,
+ unsigned maxSymbolValue, unsigned tableLog);
+
+/*! Constructor and Destructor of FSE_CTable.
+ Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
+FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
+FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct);
+
+/*! FSE_buildCTable():
+ Builds `ct`, which must be already allocated, using FSE_createCTable().
+ @return : 0, or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_compress_usingCTable():
+ Compress `src` using `ct` into `dst` which must be already allocated.
+ @return : size of compressed data (<= `dstCapacity`),
+ or 0 if compressed data could not fit into `dst`,
+ or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
+
+/*!
+Tutorial :
+----------
+The first step is to count all symbols. FSE_count() does this job very fast.
+Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
+'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
+maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
+FSE_count() will return the number of occurrence of the most frequent symbol.
+This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+The next step is to normalize the frequencies.
+FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
+It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
+You can use 'tableLog'==0 to mean "use default tableLog value".
+If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
+which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
+
+The result of FSE_normalizeCount() will be saved into a table,
+called 'normalizedCounter', which is a table of signed short.
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
+The return value is tableLog if everything proceeded as expected.
+It is 0 if there is a single symbol within distribution.
+If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
+'buffer' must be already allocated.
+For guaranteed success, buffer size must be at least FSE_headerBound().
+The result of the function is the number of bytes written into 'buffer'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
+
+'normalizedCounter' can then be used to create the compression table 'CTable'.
+The space required by 'CTable' must be already allocated, using FSE_createCTable().
+You can then use FSE_buildCTable() to fill 'CTable'.
+If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
+
+'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
+Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
+The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
+If it returns '0', compressed data could not fit into 'dst'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+*/
+
+
+/* *** DECOMPRESSION *** */
+
+/*! FSE_readNCount():
+ Read compactly saved 'normalizedCounter' from 'rBuffer'.
+ @return : size read from 'rBuffer',
+ or an errorCode, which can be tested using FSE_isError().
+ maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
+ unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
+ const void* rBuffer, size_t rBuffSize);
+
+/*! FSE_readNCount_bmi2():
+ * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
+ */
+FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
+ unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
+ const void* rBuffer, size_t rBuffSize, int bmi2);
+
+/*! Constructor and Destructor of FSE_DTable.
+ Note that its size depends on 'tableLog' */
+typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
+FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
+FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt);
+
+/*! FSE_buildDTable():
+ Builds 'dt', which must be already allocated, using FSE_createDTable().
+ return : 0, or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_decompress_usingDTable():
+ Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
+ into `dst` which must be already allocated.
+ @return : size of regenerated data (necessarily <= `dstCapacity`),
+ or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
+
+/*!
+Tutorial :
+----------
+(Note : these functions only decompress FSE-compressed blocks.
+ If block is uncompressed, use memcpy() instead
+ If block is a single repeated byte, use memset() instead )
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
+The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
+Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
+This is performed by the function FSE_buildDTable().
+The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
+`cSrcSize` must be strictly correct, otherwise decompression will fail.
+FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
+If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
+*/
+
+#endif /* FSE_H */
+
+#if !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
+#define FSE_H_FSE_STATIC_LINKING_ONLY
+
+/* *** Dependency *** */
+#include "bitstream.h"
+
+
+/* *****************************************
+* Static allocation
+*******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
+
+/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog)))
+
+/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
+#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
+#define FSE_DTABLE_SIZE(maxTableLog) (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable))
+
+
+/* *****************************************
+ * FSE advanced API
+ ***************************************** */
+
+unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
+/*< same as FSE_optimalTableLog(), which used `minus==2` */
+
+/* FSE_compress_wksp() :
+ * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
+ * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
+ */
+#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
+size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
+size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
+/*< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
+
+size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
+/*< build a fake FSE_CTable, designed to compress always the same symbolValue */
+
+/* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
+ * See FSE_buildCTable_wksp() for breakdown of workspace usage.
+ */
+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
+size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
+#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
+#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
+FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+/*< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/*< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
+
+size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/*< build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
+#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
+/*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
+
+size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
+/*< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
+
+typedef enum {
+ FSE_repeat_none, /*< Cannot use the previous table */
+ FSE_repeat_check, /*< Can use the previous table but it must be checked */
+ FSE_repeat_valid /*< Can use the previous table and it is assumed to be valid */
+ } FSE_repeat;
+
+/* *****************************************
+* FSE symbol compression API
+*******************************************/
+/*!
+ This API consists of small unitary functions, which highly benefit from being inlined.
+ Hence their body are included in next section.
+*/
+typedef struct {
+ ptrdiff_t value;
+ const void* stateTable;
+ const void* symbolTT;
+ unsigned stateLog;
+} FSE_CState_t;
+
+static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
+
+static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
+
+static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
+
+/*<
+These functions are inner components of FSE_compress_usingCTable().
+They allow the creation of custom streams, mixing multiple tables and bit sources.
+
+A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
+So the first symbol you will encode is the last you will decode, like a LIFO stack.
+
+You will need a few variables to track your CStream. They are :
+
+FSE_CTable ct; // Provided by FSE_buildCTable()
+BIT_CStream_t bitStream; // bitStream tracking structure
+FSE_CState_t state; // State tracking structure (can have several)
+
+
+The first thing to do is to init bitStream and state.
+ size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
+ FSE_initCState(&state, ct);
+
+Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
+You can then encode your input data, byte after byte.
+FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
+Remember decoding will be done in reverse direction.
+ FSE_encodeByte(&bitStream, &state, symbol);
+
+At any time, you can also add any bit sequence.
+Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
+ BIT_addBits(&bitStream, bitField, nbBits);
+
+The above methods don't commit data to memory, they just store it into local register, for speed.
+Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+Writing data to memory is a manual operation, performed by the flushBits function.
+ BIT_flushBits(&bitStream);
+
+Your last FSE encoding operation shall be to flush your last state value(s).
+ FSE_flushState(&bitStream, &state);
+
+Finally, you must close the bitStream.
+The function returns the size of CStream in bytes.
+If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
+If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
+ size_t size = BIT_closeCStream(&bitStream);
+*/
+
+
+/* *****************************************
+* FSE symbol decompression API
+*******************************************/
+typedef struct {
+ size_t state;
+ const void* table; /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*<
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream; // Stream context
+FSE_DState_t DState; // State context. Multiple ones are possible
+FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+ errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+ errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+ unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+ size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+ endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+ BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+ BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+ FSE_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+* FSE unsafe API
+*******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* *****************************************
+* Implementation of inlined functions
+*******************************************/
+typedef struct {
+ int deltaFindState;
+ U32 deltaNbBits;
+} FSE_symbolCompressionTransform; /* total 8 bytes */
+
+MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
+{
+ const void* ptr = ct;
+ const U16* u16ptr = (const U16*) ptr;
+ const U32 tableLog = MEM_read16(ptr);
+ statePtr->value = (ptrdiff_t)1<<tableLog;
+ statePtr->stateTable = u16ptr+2;
+ statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
+ statePtr->stateLog = tableLog;
+}
+
+
+/*! FSE_initCState2() :
+* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
+* uses the smallest state value possible, saving the cost of this symbol */
+MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol)
+{
+ FSE_initCState(statePtr, ct);
+ { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+ const U16* stateTable = (const U16*)(statePtr->stateTable);
+ U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16);
+ statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
+ statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+ }
+}
+
+MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol)
+{
+ FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+ const U16* const stateTable = (const U16*)(statePtr->stateTable);
+ U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
+ BIT_addBits(bitC, statePtr->value, nbBitsOut);
+ statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+}
+
+MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
+{
+ BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
+ BIT_flushBits(bitC);
+}
+
+
+/* FSE_getMaxNbBits() :
+ * Approximate maximum cost of a symbol, in bits.
+ * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
+ * note 1 : assume symbolValue is valid (<= maxSymbolValue)
+ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
+MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
+{
+ const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
+ return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16;
+}
+
+/* FSE_bitCost() :
+ * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
+ * note 1 : assume symbolValue is valid (<= maxSymbolValue)
+ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
+MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog)
+{
+ const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
+ U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
+ U32 const threshold = (minNbBits+1) << 16;
+ assert(tableLog < 16);
+ assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */
+ { U32 const tableSize = 1 << tableLog;
+ U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize);
+ U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */
+ U32 const bitMultiplier = 1 << accuracyLog;
+ assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
+ assert(normalizedDeltaFromThreshold <= bitMultiplier);
+ return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold;
+ }
+}
+
+
+/* ====== Decompression ====== */
+
+typedef struct {
+ U16 tableLog;
+ U16 fastMode;
+} FSE_DTableHeader; /* sizeof U32 */
+
+typedef struct
+{
+ unsigned short newState;
+ unsigned char symbol;
+ unsigned char nbBits;
+} FSE_decode_t; /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+ const void* ptr = dt;
+ const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
+ DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+ BIT_reloadDStream(bitD);
+ DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
+{
+ FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+ return DInfo.symbol;
+}
+
+MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+ FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+ U32 const nbBits = DInfo.nbBits;
+ size_t const lowBits = BIT_readBits(bitD, nbBits);
+ DStatePtr->state = DInfo.newState + lowBits;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+ FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+ U32 const nbBits = DInfo.nbBits;
+ BYTE const symbol = DInfo.symbol;
+ size_t const lowBits = BIT_readBits(bitD, nbBits);
+
+ DStatePtr->state = DInfo.newState + lowBits;
+ return symbol;
+}
+
+/*! FSE_decodeSymbolFast() :
+ unsafe, only works if no symbol has a probability > 50% */
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+ FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+ U32 const nbBits = DInfo.nbBits;
+ BYTE const symbol = DInfo.symbol;
+ size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
+
+ DStatePtr->state = DInfo.newState + lowBits;
+ return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+ return DStatePtr->state == 0;
+}
+
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/* **************************************************************
+* Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+* Increasing memory usage improves compression ratio
+* Reduced memory usage can improve speed, due to cache effect
+* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#ifndef FSE_MAX_MEMORY_USAGE
+# define FSE_MAX_MEMORY_USAGE 14
+#endif
+#ifndef FSE_DEFAULT_MEMORY_USAGE
+# define FSE_DEFAULT_MEMORY_USAGE 13
+#endif
+#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
+# error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
+#endif
+
+/*!FSE_MAX_SYMBOL_VALUE :
+* Maximum symbol value authorized.
+* Required for proper stack allocation */
+#ifndef FSE_MAX_SYMBOL_VALUE
+# define FSE_MAX_SYMBOL_VALUE 255
+#endif
+
+/* **************************************************************
+* template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+#define FSE_DECODE_TYPE FSE_decode_t
+
+
+#endif /* !FSE_COMMONDEFS_ONLY */
+
+
+/* ***************************************************************
+* Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
+
+
+#endif /* FSE_STATIC_LINKING_ONLY */
+
+
diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c
new file mode 100644
index 00000000000..a0d06095be8
--- /dev/null
+++ b/lib/zstd/common/fse_decompress.c
@@ -0,0 +1,390 @@
+/* ******************************************************************
+ * FSE : Finite State Entropy decoder
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/* **************************************************************
+* Includes
+****************************************************************/
+#include "debug.h" /* assert */
+#include "bitstream.h"
+#include "compiler.h"
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"
+#include "error_private.h"
+#define ZSTD_DEPS_NEED_MALLOC
+#include "zstd_deps.h"
+
+
+/* **************************************************************
+* Error Management
+****************************************************************/
+#define FSE_isError ERR_isError
+#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
+
+
+/* **************************************************************
+* Templates
+****************************************************************/
+/*
+ designed to be included
+ for type-specific functions (template emulation in C)
+ Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+# error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+# error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+FSE_DTable* FSE_createDTable (unsigned tableLog)
+{
+ if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+ return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+}
+
+void FSE_freeDTable (FSE_DTable* dt)
+{
+ ZSTD_free(dt);
+}
+
+static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
+{
+ void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
+ FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
+ U16* symbolNext = (U16*)workSpace;
+ BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
+
+ U32 const maxSV1 = maxSymbolValue + 1;
+ U32 const tableSize = 1 << tableLog;
+ U32 highThreshold = tableSize-1;
+
+ /* Sanity Checks */
+ if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
+ if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+ if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+ /* Init, lay down lowprob symbols */
+ { FSE_DTableHeader DTableH;
+ DTableH.tableLog = (U16)tableLog;
+ DTableH.fastMode = 1;
+ { S16 const largeLimit= (S16)(1 << (tableLog-1));
+ U32 s;
+ for (s=0; s<maxSV1; s++) {
+ if (normalizedCounter[s]==-1) {
+ tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+ symbolNext[s] = 1;
+ } else {
+ if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+ symbolNext[s] = normalizedCounter[s];
+ } } }
+ ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
+ }
+
+ /* Spread symbols */
+ if (highThreshold == tableSize - 1) {
+ size_t const tableMask = tableSize-1;
+ size_t const step = FSE_TABLESTEP(tableSize);
+ /* First lay down the symbols in order.
+ * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
+ * misses since small blocks generally have small table logs, so nearly
+ * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
+ * our buffer to handle the over-write.
+ */
+ {
+ U64 const add = 0x0101010101010101ull;
+ size_t pos = 0;
+ U64 sv = 0;
+ U32 s;
+ for (s=0; s<maxSV1; ++s, sv += add) {
+ int i;
+ int const n = normalizedCounter[s];
+ MEM_write64(spread + pos, sv);
+ for (i = 8; i < n; i += 8) {
+ MEM_write64(spread + pos + i, sv);
+ }
+ pos += n;
+ }
+ }
+ /* Now we spread those positions across the table.
+ * The benefit of doing it in two stages is that we avoid the the
+ * variable size inner loop, which caused lots of branch misses.
+ * Now we can run through all the positions without any branch misses.
+ * We unroll the loop twice, since that is what emperically worked best.
+ */
+ {
+ size_t position = 0;
+ size_t s;
+ size_t const unroll = 2;
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
+ size_t u;
+ for (u = 0; u < unroll; ++u) {
+ size_t const uPosition = (position + (u * step)) & tableMask;
+ tableDecode[uPosition].symbol = spread[s + u];
+ }
+ position = (position + (unroll * step)) & tableMask;
+ }
+ assert(position == 0);
+ }
+ } else {
+ U32 const tableMask = tableSize-1;
+ U32 const step = FSE_TABLESTEP(tableSize);
+ U32 s, position = 0;
+ for (s=0; s<maxSV1; s++) {
+ int i;
+ for (i=0; i<normalizedCounter[s]; i++) {
+ tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+ position = (position + step) & tableMask;
+ while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
+ } }
+ if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+ }
+
+ /* Build Decoding table */
+ { U32 u;
+ for (u=0; u<tableSize; u++) {
+ FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
+ U32 const nextState = symbolNext[symbol]++;
+ tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
+ tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+ } }
+
+ return 0;
+}
+
+size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
+{
+ return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/*-*******************************************************
+* Decompression (Byte symbols)
+*********************************************************/
+size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+ void* ptr = dt;
+ FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+ void* dPtr = dt + 1;
+ FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
+
+ DTableH->tableLog = 0;
+ DTableH->fastMode = 0;
+
+ cell->newState = 0;
+ cell->symbol = symbolValue;
+ cell->nbBits = 0;
+
+ return 0;
+}
+
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+ void* ptr = dt;
+ FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+ void* dPtr = dt + 1;
+ FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
+ const unsigned tableSize = 1 << nbBits;
+ const unsigned tableMask = tableSize - 1;
+ const unsigned maxSV1 = tableMask+1;
+ unsigned s;
+
+ /* Sanity checks */
+ if (nbBits < 1) return ERROR(GENERIC); /* min size */
+
+ /* Build Decoding Table */
+ DTableH->tableLog = (U16)nbBits;
+ DTableH->fastMode = 1;
+ for (s=0; s<maxSV1; s++) {
+ dinfo[s].newState = 0;
+ dinfo[s].symbol = (BYTE)s;
+ dinfo[s].nbBits = (BYTE)nbBits;
+ }
+
+ return 0;
+}
+
+FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
+ void* dst, size_t maxDstSize,
+ const void* cSrc, size_t cSrcSize,
+ const FSE_DTable* dt, const unsigned fast)
+{
+ BYTE* const ostart = (BYTE*) dst;
+ BYTE* op = ostart;
+ BYTE* const omax = op + maxDstSize;
+ BYTE* const olimit = omax-3;
+
+ BIT_DStream_t bitD;
+ FSE_DState_t state1;
+ FSE_DState_t state2;
+
+ /* Init */
+ CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
+
+ FSE_initDState(&state1, &bitD, dt);
+ FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+ /* 4 symbols per loop */
+ for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) {
+ op[0] = FSE_GETSYMBOL(&state1);
+
+ if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
+ BIT_reloadDStream(&bitD);
+
+ op[1] = FSE_GETSYMBOL(&state2);
+
+ if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
+ { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+ op[2] = FSE_GETSYMBOL(&state1);
+
+ if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
+ BIT_reloadDStream(&bitD);
+
+ op[3] = FSE_GETSYMBOL(&state2);
+ }
+
+ /* tail */
+ /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+ while (1) {
+ if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+ *op++ = FSE_GETSYMBOL(&state1);
+ if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+ *op++ = FSE_GETSYMBOL(&state2);
+ break;
+ }
+
+ if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+ *op++ = FSE_GETSYMBOL(&state2);
+ if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+ *op++ = FSE_GETSYMBOL(&state1);
+ break;
+ } }
+
+ return op-ostart;
+}
+
+
+size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+ const void* cSrc, size_t cSrcSize,
+ const FSE_DTable* dt)
+{
+ const void* ptr = dt;
+ const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
+ const U32 fastMode = DTableH->fastMode;
+
+ /* select fast mode (static) */
+ if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+ return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+ return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+typedef struct {
+ short ncount[FSE_MAX_SYMBOL_VALUE + 1];
+ FSE_DTable dtable[1]; /* Dynamically sized */
+} FSE_DecompressWksp;
+
+
+FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
+ void* dst, size_t dstCapacity,
+ const void* cSrc, size_t cSrcSize,
+ unsigned maxLog, void* workSpace, size_t wkspSize,
+ int bmi2)
+{
+ const BYTE* const istart = (const BYTE*)cSrc;
+ const BYTE* ip = istart;
+ unsigned tableLog;
+ unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+ FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
+
+ DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
+ if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
+
+ /* normal FSE decoding mode */
+ {
+ size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
+ if (FSE_isError(NCountLength)) return NCountLength;
+ if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
+ assert(NCountLength <= cSrcSize);
+ ip += NCountLength;
+ cSrcSize -= NCountLength;
+ }
+
+ if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
+ workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
+ wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
+
+ CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
+
+ {
+ const void* ptr = wksp->dtable;
+ const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
+ const U32 fastMode = DTableH->fastMode;
+
+ /* select fast mode (static) */
+ if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
+ return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
+ }
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+ return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
+}
+
+#if DYNAMIC_BMI2
+BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+ return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
+}
+#endif
+
+size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+ if (bmi2) {
+ return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
+ }
+#endif
+ (void)bmi2;
+ return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
+}
+
+
+typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+
+#endif /* FSE_COMMONDEFS_ONLY */
diff --git a/lib/zstd/common/huf.h b/lib/zstd/common/huf.h
new file mode 100644
index 00000000000..5042ff87030
--- /dev/null
+++ b/lib/zstd/common/huf.h
@@ -0,0 +1,358 @@
+/* ******************************************************************
+ * huff0 huffman codec,
+ * part of Finite State Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+#ifndef HUF_H_298734234
+#define HUF_H_298734234
+
+/* *** Dependencies *** */
+#include "zstd_deps.h" /* size_t */
+
+
+/* *** library symbols visibility *** */
+/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
+ * HUF symbols remain "private" (internal symbols for library only).
+ * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
+#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+# define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
+#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */
+# define HUF_PUBLIC_API __declspec(dllexport)
+#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
+#else
+# define HUF_PUBLIC_API
+#endif
+
+
+/* ========================== */
+/* *** simple functions *** */
+/* ========================== */
+
+/* HUF_compress() :
+ * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
+ * 'dst' buffer must be already allocated.
+ * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
+ * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
+ * @return : size of compressed data (<= `dstCapacity`).
+ * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+ * if HUF_isError(return), compression failed (more details using HUF_getErrorName())
+ */
+HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize);
+
+/* HUF_decompress() :
+ * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
+ * into already allocated buffer 'dst', of minimum size 'dstSize'.
+ * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
+ * Note : in contrast with FSE, HUF_decompress can regenerate
+ * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
+ * because it knows size to regenerate (originalSize).
+ * @return : size of regenerated data (== originalSize),
+ * or an error code, which can be tested using HUF_isError()
+ */
+HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize,
+ const void* cSrc, size_t cSrcSize);
+
+
+/* *** Tool functions *** */
+#define HUF_BLOCKSIZE_MAX (128 * 1024) /*< maximum input size for a single block compressed with HUF_compress */
+HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /*< maximum compressed size (worst case) */
+
+/* Error Management */
+HUF_PUBLIC_API unsigned HUF_isError(size_t code); /*< tells if a return value is an error code */
+HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /*< provides error code string (useful for debugging) */
+
+
+/* *** Advanced function *** */
+
+/* HUF_compress2() :
+ * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
+ * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
+ * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
+HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ unsigned maxSymbolValue, unsigned tableLog);
+
+/* HUF_compress4X_wksp() :
+ * Same as HUF_compress2(), but uses externally allocated `workSpace`.
+ * `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
+#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
+#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
+HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ unsigned maxSymbolValue, unsigned tableLog,
+ void* workSpace, size_t wkspSize);
+
+#endif /* HUF_H_298734234 */
+
+/* ******************************************************************
+ * WARNING !!
+ * The following section contains advanced and experimental definitions
+ * which shall never be used in the context of a dynamic library,
+ * because they are not guaranteed to remain stable in the future.
+ * Only consider them in association with static linking.
+ * *****************************************************************/
+#if !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
+#define HUF_H_HUF_STATIC_LINKING_ONLY
+
+/* *** Dependencies *** */
+#include "mem.h" /* U32 */
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"
+
+
+/* *** Constants *** */
+#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
+#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */
+#define HUF_SYMBOLVALUE_MAX 255
+
+#define HUF_TABLELOG_ABSOLUTEMAX 12 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
+# error "HUF_TABLELOG_MAX is too large !"
+#endif
+
+
+/* ****************************************
+* Static allocation
+******************************************/
+/* HUF buffer bounds */
+#define HUF_CTABLEBOUND 129
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */
+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
+
+/* static allocation of HUF's Compression Table */
+/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
+typedef size_t HUF_CElt; /* consider it an incomplete type */
+#define HUF_CTABLE_SIZE_ST(maxSymbolValue) ((maxSymbolValue)+2) /* Use tables of size_t, for proper alignment */
+#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t))
+#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
+ HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */
+
+/* static allocation of HUF's DTable */
+typedef U32 HUF_DTable;
+#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog)))
+#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \
+ HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+ HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
+
+
+/* ****************************************
+* Advanced decompression functions
+******************************************/
+size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */
+#endif
+
+size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< decodes RLE and uncompressed */
+size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< considers RLE and uncompressed as errors */
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< considers RLE and uncompressed as errors */
+size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */
+size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder */
+#endif
+
+
+/* ****************************************
+ * HUF detailed API
+ * ****************************************/
+
+/*! HUF_compress() does the following:
+ * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
+ * 2. (optional) refine tableLog using HUF_optimalTableLog()
+ * 3. build Huffman table from count using HUF_buildCTable()
+ * 4. save Huffman table to memory buffer using HUF_writeCTable()
+ * 5. encode the data stream using HUF_compress4X_usingCTable()
+ *
+ * The following API allows targeting specific sub-functions for advanced tasks.
+ * For example, it's possible to compress several blocks using the same 'CTable',
+ * or to save and regenerate 'CTable' using external methods.
+ */
+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
+size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
+size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+
+typedef enum {
+ HUF_repeat_none, /*< Cannot use the previous table */
+ HUF_repeat_check, /*< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
+ HUF_repeat_valid /*< Can use the previous table and it is assumed to be valid */
+ } HUF_repeat;
+/* HUF_compress4X_repeat() :
+ * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ * If it uses hufTable it does not modify hufTable or repeat.
+ * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ * If preferRepeat then the old table will always be used if valid.
+ * If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
+size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ unsigned maxSymbolValue, unsigned tableLog,
+ void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
+
+/* HUF_buildCTable_wksp() :
+ * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
+ */
+#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
+#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
+size_t HUF_buildCTable_wksp (HUF_CElt* tree,
+ const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
+ void* workSpace, size_t wkspSize);
+
+/*! HUF_readStats() :
+ * Read compact Huffman tree, saved by HUF_writeCTable().
+ * `huffWeight` is destination buffer.
+ * @return : size read from `src` , or an error Code .
+ * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
+size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
+ U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
+ const void* src, size_t srcSize);
+
+/*! HUF_readStats_wksp() :
+ * Same as HUF_readStats() but takes an external workspace which must be
+ * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+ */
+#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
+#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
+size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
+ U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
+ const void* src, size_t srcSize,
+ void* workspace, size_t wkspSize,
+ int bmi2);
+
+/* HUF_readCTable() :
+ * Loading a CTable saved with HUF_writeCTable() */
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
+
+/* HUF_getNbBitsFromCTable() :
+ * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
+ * Note 1 : is not inlined, as HUF_CElt definition is private */
+U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
+
+/*
+ * HUF_decompress() does the following:
+ * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
+ * 2. build Huffman table from save, using HUF_readDTableX?()
+ * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
+ */
+
+/* HUF_selectDecoder() :
+ * Tells which decoder is likely to decode faster,
+ * based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
+ * Assumption : 0 < dstSize <= 128 KB */
+U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
+
+/*
+ * The minimum workspace size for the `workSpace` used in
+ * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp().
+ *
+ * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
+ * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
+ * Buffer overflow errors may potentially occur if code modifications result in
+ * a required workspace size greater than that specified in the following
+ * macro.
+ */
+#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
+#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
+#endif
+
+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+
+
+/* ====================== */
+/* single stream variants */
+/* ====================== */
+
+size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */
+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
+/* HUF_compress1X_repeat() :
+ * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ * If it uses hufTable it does not modify hufTable or repeat.
+ * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ * If preferRepeat then the old table will always be used if valid.
+ * If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
+size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ unsigned maxSymbolValue, unsigned tableLog,
+ void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
+
+size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
+#endif
+
+size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */
+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder */
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder */
+#endif
+
+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /*< automatic selection of sing or double symbol decoder, based on DTable */
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+
+/* BMI2 variants.
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+ */
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+#endif
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
+#endif
+
+#endif /* HUF_STATIC_LINKING_ONLY */
+
diff --git a/lib/zstd/common/mem.h b/lib/zstd/common/mem.h
new file mode 100644
index 00000000000..9794296285b
--- /dev/null
+++ b/lib/zstd/common/mem.h
@@ -0,0 +1,262 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+/*-****************************************
+* Dependencies
+******************************************/
+#include <asm/unaligned.h> /* get_unaligned, put_unaligned* */
+#include <linux/compiler.h> /* inline */
+#include <asm/byteorder.h> /* swab32, swab64 */
+#include <linux/types.h> /* size_t, ptrdiff_t */
+#include "debug.h" /* DEBUG_STATIC_ASSERT */
+#include "compiler.h" /* INLINE_KEYWORD, UNUSED_ATTR */
+
+/*-****************************************
+* Compiler specifics
+******************************************/
+#define MEM_STATIC static INLINE_KEYWORD UNUSED_ATTR
+
+/*-**************************************************************
+* Basic Types
+*****************************************************************/
+typedef uint8_t BYTE;
+typedef uint8_t U8;
+typedef int8_t S8;
+typedef uint16_t U16;
+typedef int16_t S16;
+typedef uint32_t U32;
+typedef int32_t S32;
+typedef uint64_t U64;
+typedef int64_t S64;
+
+/*-**************************************************************
+* Memory I/O API
+*****************************************************************/
+/*=== Static platform detection ===*/
+MEM_STATIC unsigned MEM_32bits(void);
+MEM_STATIC unsigned MEM_64bits(void);
+MEM_STATIC unsigned MEM_isLittleEndian(void);
+
+/*=== Native unaligned read/write ===*/
+MEM_STATIC U16 MEM_read16(const void* memPtr);
+MEM_STATIC U32 MEM_read32(const void* memPtr);
+MEM_STATIC U64 MEM_read64(const void* memPtr);
+MEM_STATIC size_t MEM_readST(const void* memPtr);
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value);
+MEM_STATIC void MEM_write32(void* memPtr, U32 value);
+MEM_STATIC void MEM_write64(void* memPtr, U64 value);
+
+/*=== Little endian unaligned read/write ===*/
+MEM_STATIC U16 MEM_readLE16(const void* memPtr);
+MEM_STATIC U32 MEM_readLE24(const void* memPtr);
+MEM_STATIC U32 MEM_readLE32(const void* memPtr);
+MEM_STATIC U64 MEM_readLE64(const void* memPtr);
+MEM_STATIC size_t MEM_readLEST(const void* memPtr);
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
+MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
+
+/*=== Big endian unaligned read/write ===*/
+MEM_STATIC U32 MEM_readBE32(const void* memPtr);
+MEM_STATIC U64 MEM_readBE64(const void* memPtr);
+MEM_STATIC size_t MEM_readBEST(const void* memPtr);
+
+MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
+MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
+MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
+
+/*=== Byteswap ===*/
+MEM_STATIC U32 MEM_swap32(U32 in);
+MEM_STATIC U64 MEM_swap64(U64 in);
+MEM_STATIC size_t MEM_swapST(size_t in);
+
+/*-**************************************************************
+* Memory I/O Implementation
+*****************************************************************/
+MEM_STATIC unsigned MEM_32bits(void)
+{
+ return sizeof(size_t) == 4;
+}
+
+MEM_STATIC unsigned MEM_64bits(void)
+{
+ return sizeof(size_t) == 8;
+}
+
+#if defined(__LITTLE_ENDIAN)
+#define MEM_LITTLE_ENDIAN 1
+#else
+#define MEM_LITTLE_ENDIAN 0
+#endif
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+ return MEM_LITTLE_ENDIAN;
+}
+
+MEM_STATIC U16 MEM_read16(const void *memPtr)
+{
+ return get_unaligned((const U16 *)memPtr);
+}
+
+MEM_STATIC U32 MEM_read32(const void *memPtr)
+{
+ return get_unaligned((const U32 *)memPtr);
+}
+
+MEM_STATIC U64 MEM_read64(const void *memPtr)
+{
+ return get_unaligned((const U64 *)memPtr);
+}
+
+MEM_STATIC size_t MEM_readST(const void *memPtr)
+{
+ return get_unaligned((const size_t *)memPtr);
+}
+
+MEM_STATIC void MEM_write16(void *memPtr, U16 value)
+{
+ put_unaligned(value, (U16 *)memPtr);
+}
+
+MEM_STATIC void MEM_write32(void *memPtr, U32 value)
+{
+ put_unaligned(value, (U32 *)memPtr);
+}
+
+MEM_STATIC void MEM_write64(void *memPtr, U64 value)
+{
+ put_unaligned(value, (U64 *)memPtr);
+}
+
+/*=== Little endian r/w ===*/
+
+MEM_STATIC U16 MEM_readLE16(const void *memPtr)
+{
+ return get_unaligned_le16(memPtr);
+}
+
+MEM_STATIC void MEM_writeLE16(void *memPtr, U16 val)
+{
+ put_unaligned_le16(val, memPtr);
+}
+
+MEM_STATIC U32 MEM_readLE24(const void *memPtr)
+{
+ return MEM_readLE16(memPtr) + (((const BYTE *)memPtr)[2] << 16);
+}
+
+MEM_STATIC void MEM_writeLE24(void *memPtr, U32 val)
+{
+ MEM_writeLE16(memPtr, (U16)val);
+ ((BYTE *)memPtr)[2] = (BYTE)(val >> 16);
+}
+
+MEM_STATIC U32 MEM_readLE32(const void *memPtr)
+{
+ return get_unaligned_le32(memPtr);
+}
+
+MEM_STATIC void MEM_writeLE32(void *memPtr, U32 val32)
+{
+ put_unaligned_le32(val32, memPtr);
+}
+
+MEM_STATIC U64 MEM_readLE64(const void *memPtr)
+{
+ return get_unaligned_le64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLE64(void *memPtr, U64 val64)
+{
+ put_unaligned_le64(val64, memPtr);
+}
+
+MEM_STATIC size_t MEM_readLEST(const void *memPtr)
+{
+ if (MEM_32bits())
+ return (size_t)MEM_readLE32(memPtr);
+ else
+ return (size_t)MEM_readLE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLEST(void *memPtr, size_t val)
+{
+ if (MEM_32bits())
+ MEM_writeLE32(memPtr, (U32)val);
+ else
+ MEM_writeLE64(memPtr, (U64)val);
+}
+
+/*=== Big endian r/w ===*/
+
+MEM_STATIC U32 MEM_readBE32(const void *memPtr)
+{
+ return get_unaligned_be32(memPtr);
+}
+
+MEM_STATIC void MEM_writeBE32(void *memPtr, U32 val32)
+{
+ put_unaligned_be32(val32, memPtr);
+}
+
+MEM_STATIC U64 MEM_readBE64(const void *memPtr)
+{
+ return get_unaligned_be64(memPtr);
+}
+
+MEM_STATIC void MEM_writeBE64(void *memPtr, U64 val64)
+{
+ put_unaligned_be64(val64, memPtr);
+}
+
+MEM_STATIC size_t MEM_readBEST(const void *memPtr)
+{
+ if (MEM_32bits())
+ return (size_t)MEM_readBE32(memPtr);
+ else
+ return (size_t)MEM_readBE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeBEST(void *memPtr, size_t val)
+{
+ if (MEM_32bits())
+ MEM_writeBE32(memPtr, (U32)val);
+ else
+ MEM_writeBE64(memPtr, (U64)val);
+}
+
+MEM_STATIC U32 MEM_swap32(U32 in)
+{
+ return swab32(in);
+}
+
+MEM_STATIC U64 MEM_swap64(U64 in)
+{
+ return swab64(in);
+}
+
+MEM_STATIC size_t MEM_swapST(size_t in)
+{
+ if (MEM_32bits())
+ return (size_t)MEM_swap32((U32)in);
+ else
+ return (size_t)MEM_swap64((U64)in);
+}
+
+#endif /* MEM_H_MODULE */
diff --git a/lib/zstd/common/portability_macros.h b/lib/zstd/common/portability_macros.h
new file mode 100644
index 00000000000..0e3b2c0a527
--- /dev/null
+++ b/lib/zstd/common/portability_macros.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_PORTABILITY_MACROS_H
+#define ZSTD_PORTABILITY_MACROS_H
+
+/*
+ * This header file contains macro defintions to support portability.
+ * This header is shared between C and ASM code, so it MUST only
+ * contain macro definitions. It MUST not contain any C code.
+ *
+ * This header ONLY defines macros to detect platforms/feature support.
+ *
+ */
+
+
+/* compat. with non-clang compilers */
+#ifndef __has_attribute
+ #define __has_attribute(x) 0
+#endif
+
+/* compat. with non-clang compilers */
+#ifndef __has_builtin
+# define __has_builtin(x) 0
+#endif
+
+/* compat. with non-clang compilers */
+#ifndef __has_feature
+# define __has_feature(x) 0
+#endif
+
+/* detects whether we are being compiled under msan */
+
+/* detects whether we are being compiled under asan */
+
+/* detects whether we are being compiled under dfsan */
+
+/* Mark the internal assembly functions as hidden */
+#ifdef __ELF__
+# define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
+#else
+# define ZSTD_HIDE_ASM_FUNCTION(func)
+#endif
+
+/* Enable runtime BMI2 dispatch based on the CPU.
+ * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
+ */
+#ifndef DYNAMIC_BMI2
+ #if ((defined(__clang__) && __has_attribute(__target__)) \
+ || (defined(__GNUC__) \
+ && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
+ && (defined(__x86_64__) || defined(_M_X64)) \
+ && !defined(__BMI2__)
+ # define DYNAMIC_BMI2 1
+ #else
+ # define DYNAMIC_BMI2 0
+ #endif
+#endif
+
+/*
+ * Only enable assembly for GNUC comptabile compilers,
+ * because other platforms may not support GAS assembly syntax.
+ *
+ * Only enable assembly for Linux / MacOS, other platforms may
+ * work, but they haven't been tested. This could likely be
+ * extended to BSD systems.
+ *
+ * Disable assembly when MSAN is enabled, because MSAN requires
+ * 100% of code to be instrumented to work.
+ */
+#define ZSTD_ASM_SUPPORTED 1
+
+/*
+ * Determines whether we should enable assembly for x86-64
+ * with BMI2.
+ *
+ * Enable if all of the following conditions hold:
+ * - ASM hasn't been explicitly disabled by defining ZSTD_DISABLE_ASM
+ * - Assembly is supported
+ * - We are compiling for x86-64 and either:
+ * - DYNAMIC_BMI2 is enabled
+ * - BMI2 is supported at compile time
+ */
+#define ZSTD_ENABLE_ASM_X86_64_BMI2 0
+
+#endif /* ZSTD_PORTABILITY_MACROS_H */
diff --git a/lib/zstd/common/zstd_common.c b/lib/zstd/common/zstd_common.c
new file mode 100644
index 00000000000..3d7e35b309b
--- /dev/null
+++ b/lib/zstd/common/zstd_common.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+
+/*-*************************************
+* Dependencies
+***************************************/
+#define ZSTD_DEPS_NEED_MALLOC
+#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
+#include "error_private.h"
+#include "zstd_internal.h"
+
+
+/*-****************************************
+* Version
+******************************************/
+unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; }
+
+const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
+
+
+/*-****************************************
+* ZSTD Error Management
+******************************************/
+#undef ZSTD_isError /* defined within zstd_internal.h */
+/*! ZSTD_isError() :
+ * tells if a return value is an error code
+ * symbol is required for external callers */
+unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+/*! ZSTD_getErrorName() :
+ * provides error code string from function result (useful for debugging) */
+const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+/*! ZSTD_getError() :
+ * convert a `size_t` function result into a proper ZSTD_errorCode enum */
+ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
+
+/*! ZSTD_getErrorString() :
+ * provides error code string from enum */
+const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
+
+
+
+/*=**************************************************************
+* Custom allocator
+****************************************************************/
+void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
+{
+ if (customMem.customAlloc)
+ return customMem.customAlloc(customMem.opaque, size);
+ return ZSTD_malloc(size);
+}
+
+void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
+{
+ if (customMem.customAlloc) {
+ /* calloc implemented as malloc+memset;
+ * not as efficient as calloc, but next best guess for custom malloc */
+ void* const ptr = customMem.customAlloc(customMem.opaque, size);
+ ZSTD_memset(ptr, 0, size);
+ return ptr;
+ }
+ return ZSTD_calloc(1, size);
+}
+
+void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
+{
+ if (ptr!=NULL) {
+ if (customMem.customFree)
+ customMem.customFree(customMem.opaque, ptr);
+ else
+ ZSTD_free(ptr);
+ }
+}
diff --git a/lib/zstd/common/zstd_deps.h b/lib/zstd/common/zstd_deps.h
new file mode 100644
index 00000000000..e45ec566831
--- /dev/null
+++ b/lib/zstd/common/zstd_deps.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*
+ * This file provides common libc dependencies that zstd requires.
+ * The purpose is to allow replacing this file with a custom implementation
+ * to compile zstd without libc support.
+ */
+
+/* Need:
+ * NULL
+ * INT_MAX
+ * UINT_MAX
+ * ZSTD_memcpy()
+ * ZSTD_memset()
+ * ZSTD_memmove()
+ */
+#ifndef ZSTD_DEPS_COMMON
+#define ZSTD_DEPS_COMMON
+
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+
+#define ZSTD_memcpy(d,s,n) __builtin_memcpy((d),(s),(n))
+#define ZSTD_memmove(d,s,n) __builtin_memmove((d),(s),(n))
+#define ZSTD_memset(d,s,n) __builtin_memset((d),(s),(n))
+
+#endif /* ZSTD_DEPS_COMMON */
+
+/*
+ * Define malloc as always failing. That means the user must
+ * either use ZSTD_customMem or statically allocate memory.
+ * Need:
+ * ZSTD_malloc()
+ * ZSTD_free()
+ * ZSTD_calloc()
+ */
+#ifdef ZSTD_DEPS_NEED_MALLOC
+#ifndef ZSTD_DEPS_MALLOC
+#define ZSTD_DEPS_MALLOC
+
+#define ZSTD_malloc(s) ({ (void)(s); NULL; })
+#define ZSTD_free(p) ((void)(p))
+#define ZSTD_calloc(n,s) ({ (void)(n); (void)(s); NULL; })
+
+#endif /* ZSTD_DEPS_MALLOC */
+#endif /* ZSTD_DEPS_NEED_MALLOC */
+
+/*
+ * Provides 64-bit math support.
+ * Need:
+ * U64 ZSTD_div64(U64 dividend, U32 divisor)
+ */
+#ifdef ZSTD_DEPS_NEED_MATH64
+#ifndef ZSTD_DEPS_MATH64
+#define ZSTD_DEPS_MATH64
+
+#include <linux/math64.h>
+
+static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) {
+ return div_u64(dividend, divisor);
+}
+
+#endif /* ZSTD_DEPS_MATH64 */
+#endif /* ZSTD_DEPS_NEED_MATH64 */
+
+/*
+ * This is only requested when DEBUGLEVEL >= 1, meaning
+ * it is disabled in production.
+ * Need:
+ * assert()
+ */
+#ifdef ZSTD_DEPS_NEED_ASSERT
+#ifndef ZSTD_DEPS_ASSERT
+#define ZSTD_DEPS_ASSERT
+
+#include <linux/kernel.h>
+
+#define assert(x) WARN_ON((x))
+
+#endif /* ZSTD_DEPS_ASSERT */
+#endif /* ZSTD_DEPS_NEED_ASSERT */
+
+/*
+ * This is only requested when DEBUGLEVEL >= 2, meaning
+ * it is disabled in production.
+ * Need:
+ * ZSTD_DEBUG_PRINT()
+ */
+#ifdef ZSTD_DEPS_NEED_IO
+#ifndef ZSTD_DEPS_IO
+#define ZSTD_DEPS_IO
+
+#include <linux/printk.h>
+
+#define ZSTD_DEBUG_PRINT(...) pr_debug(__VA_ARGS__)
+
+#endif /* ZSTD_DEPS_IO */
+#endif /* ZSTD_DEPS_NEED_IO */
+
+/*
+ * Only requested when MSAN is enabled.
+ * Need:
+ * intptr_t
+ */
+#ifdef ZSTD_DEPS_NEED_STDINT
+#ifndef ZSTD_DEPS_STDINT
+#define ZSTD_DEPS_STDINT
+
+/*
+ * The Linux Kernel doesn't provide intptr_t, only uintptr_t, which
+ * is an unsigned long.
+ */
+typedef long intptr_t;
+
+#endif /* ZSTD_DEPS_STDINT */
+#endif /* ZSTD_DEPS_NEED_STDINT */
diff --git a/lib/zstd/common/zstd_internal.h b/lib/zstd/common/zstd_internal.h
new file mode 100644
index 00000000000..93305d9b41b
--- /dev/null
+++ b/lib/zstd/common/zstd_internal.h
@@ -0,0 +1,443 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
+
+/* this module contains definitions which must be identical
+ * across compression, decompression and dictBuilder.
+ * It also contains a few functions useful to at least 2 of them
+ * and which benefit from being inlined */
+
+/*-*************************************
+* Dependencies
+***************************************/
+#include "compiler.h"
+#include "cpu.h"
+#include "mem.h"
+#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
+#include "error_private.h"
+#define ZSTD_STATIC_LINKING_ONLY
+#include <linux/zstd.h>
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "huf.h"
+#include <linux/xxhash.h> /* XXH_reset, update, digest */
+#define ZSTD_TRACE 0
+
+
+/* ---- static assert (debug) --- */
+#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
+#define ZSTD_isError ERR_isError /* for inlining */
+#define FSE_isError ERR_isError
+#define HUF_isError ERR_isError
+
+
+/*-*************************************
+* shared macros
+***************************************/
+#undef MIN
+#undef MAX
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+#define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))
+
+
+/*-*************************************
+* Common constants
+***************************************/
+#define ZSTD_OPT_NUM (1<<12)
+
+#define ZSTD_REP_NUM 3 /* number of repcodes */
+static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BIT7 128
+#define BIT6 64
+#define BIT5 32
+#define BIT4 16
+#define BIT1 2
+#define BIT0 1
+
+#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
+static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
+static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
+
+#define ZSTD_FRAMEIDSIZE 4 /* magic number size */
+
+#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
+static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
+typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
+
+#define ZSTD_FRAMECHECKSUMSIZE 4
+
+#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
+
+#define HufLog 12
+typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
+
+#define LONGNBSEQ 0x7F00
+
+#define MINMATCH 3
+
+#define Litbits 8
+#define MaxLit ((1<<Litbits) - 1)
+#define MaxML 52
+#define MaxLL 35
+#define DefaultMaxOff 28
+#define MaxOff 31
+#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
+#define MLFSELog 9
+#define LLFSELog 9
+#define OffFSELog 8
+#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
+
+#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
+/* Each table cannot take more than #symbols * FSELog bits */
+#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
+
+static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 2, 2, 3, 3,
+ 4, 6, 7, 8, 9,10,11,12,
+ 13,14,15,16
+};
+static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
+ 4, 3, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 3, 2, 1, 1, 1, 1, 1,
+ -1,-1,-1,-1
+};
+#define LL_DEFAULTNORMLOG 6 /* for static allocation */
+static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
+
+static UNUSED_ATTR const U8 ML_bits[MaxML+1] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 2, 2, 3, 3,
+ 4, 4, 5, 7, 8, 9,10,11,
+ 12,13,14,15,16
+};
+static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {
+ 1, 4, 3, 2, 2, 2, 2, 2,
+ 2, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1,-1,-1,
+ -1,-1,-1,-1,-1
+};
+#define ML_DEFAULTNORMLOG 6 /* for static allocation */
+static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
+
+static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {
+ 1, 1, 1, 1, 1, 1, 2, 2,
+ 2, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ -1,-1,-1,-1,-1
+};
+#define OF_DEFAULTNORMLOG 5 /* for static allocation */
+static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
+
+
+/*-*******************************************
+* Shared functions to include for inlining
+*********************************************/
+static void ZSTD_copy8(void* dst, const void* src) {
+#if defined(ZSTD_ARCH_ARM_NEON)
+ vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
+#else
+ ZSTD_memcpy(dst, src, 8);
+#endif
+}
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+/* Need to use memmove here since the literal buffer can now be located within
+ the dst buffer. In circumstances where the op "catches up" to where the
+ literal buffer is, there can be partial overlaps in this call on the final
+ copy if the literal is being shifted by less than 16 bytes. */
+static void ZSTD_copy16(void* dst, const void* src) {
+#if defined(ZSTD_ARCH_ARM_NEON)
+ vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
+#elif defined(ZSTD_ARCH_X86_SSE2)
+ _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
+#elif defined(__clang__)
+ ZSTD_memmove(dst, src, 16);
+#else
+ /* ZSTD_memmove is not inlined properly by gcc */
+ BYTE copy16_buf[16];
+ ZSTD_memcpy(copy16_buf, src, 16);
+ ZSTD_memcpy(dst, copy16_buf, 16);
+#endif
+}
+#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
+
+#define WILDCOPY_OVERLENGTH 32
+#define WILDCOPY_VECLEN 16
+
+typedef enum {
+ ZSTD_no_overlap,
+ ZSTD_overlap_src_before_dst
+ /* ZSTD_overlap_dst_before_src, */
+} ZSTD_overlap_e;
+
+/*! ZSTD_wildcopy() :
+ * Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
+ * @param ovtype controls the overlap detection
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
+ * The src buffer must be before the dst buffer.
+ */
+MEM_STATIC FORCE_INLINE_ATTR
+void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
+{
+ ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
+ const BYTE* ip = (const BYTE*)src;
+ BYTE* op = (BYTE*)dst;
+ BYTE* const oend = op + length;
+
+ if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
+ /* Handle short offset copies. */
+ do {
+ COPY8(op, ip)
+ } while (op < oend);
+ } else {
+ assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
+ /* Separate out the first COPY16() call because the copy length is
+ * almost certain to be short, so the branches have different
+ * probabilities. Since it is almost certain to be short, only do
+ * one COPY16() in the first call. Then, do two calls per loop since
+ * at that point it is more likely to have a high trip count.
+ */
+#ifdef __aarch64__
+ do {
+ COPY16(op, ip);
+ }
+ while (op < oend);
+#else
+ ZSTD_copy16(op, ip);
+ if (16 >= length) return;
+ op += 16;
+ ip += 16;
+ do {
+ COPY16(op, ip);
+ COPY16(op, ip);
+ }
+ while (op < oend);
+#endif
+ }
+}
+
+MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+ size_t const length = MIN(dstCapacity, srcSize);
+ if (length > 0) {
+ ZSTD_memcpy(dst, src, length);
+ }
+ return length;
+}
+
+/* define "workspace is too large" as this number of times larger than needed */
+#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
+
+/* when workspace is continuously too large
+ * during at least this number of times,
+ * context's memory usage is considered wasteful,
+ * because it's sized to handle a worst case scenario which rarely happens.
+ * In which case, resize it down to free some memory */
+#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
+
+/* Controls whether the input/output buffer is buffered or stable. */
+typedef enum {
+ ZSTD_bm_buffered = 0, /* Buffer the input/output */
+ ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
+} ZSTD_bufferMode_e;
+
+
+/*-*******************************************
+* Private declarations
+*********************************************/
+typedef struct seqDef_s {
+ U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
+ U16 litLength;
+ U16 mlBase; /* mlBase == matchLength - MINMATCH */
+} seqDef;
+
+/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
+typedef enum {
+ ZSTD_llt_none = 0, /* no longLengthType */
+ ZSTD_llt_literalLength = 1, /* represents a long literal */
+ ZSTD_llt_matchLength = 2 /* represents a long match */
+} ZSTD_longLengthType_e;
+
+typedef struct {
+ seqDef* sequencesStart;
+ seqDef* sequences; /* ptr to end of sequences */
+ BYTE* litStart;
+ BYTE* lit; /* ptr to end of literals */
+ BYTE* llCode;
+ BYTE* mlCode;
+ BYTE* ofCode;
+ size_t maxNbSeq;
+ size_t maxNbLit;
+
+ /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
+ * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
+ * the existing value of the litLength or matchLength by 0x10000.
+ */
+ ZSTD_longLengthType_e longLengthType;
+ U32 longLengthPos; /* Index of the sequence to apply long length modification to */
+} seqStore_t;
+
+typedef struct {
+ U32 litLength;
+ U32 matchLength;
+} ZSTD_sequenceLength;
+
+/*
+ * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
+ * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
+ */
+MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
+{
+ ZSTD_sequenceLength seqLen;
+ seqLen.litLength = seq->litLength;
+ seqLen.matchLength = seq->mlBase + MINMATCH;
+ if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
+ if (seqStore->longLengthType == ZSTD_llt_literalLength) {
+ seqLen.litLength += 0xFFFF;
+ }
+ if (seqStore->longLengthType == ZSTD_llt_matchLength) {
+ seqLen.matchLength += 0xFFFF;
+ }
+ }
+ return seqLen;
+}
+
+/*
+ * Contains the compressed frame size and an upper-bound for the decompressed frame size.
+ * Note: before using `compressedSize`, check for errors using ZSTD_isError().
+ * similarly, before using `decompressedBound`, check for errors using:
+ * `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
+ */
+typedef struct {
+ size_t compressedSize;
+ unsigned long long decompressedBound;
+} ZSTD_frameSizeInfo; /* decompress & legacy */
+
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
+
+/* custom memory allocation functions */
+void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
+void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
+void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
+
+
+MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
+{
+ assert(val != 0);
+ {
+# if (__GNUC__ >= 3) /* GCC Intrinsic */
+ return __builtin_clz (val) ^ 31;
+# else /* Software version */
+ static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+ U32 v = val;
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
+# endif
+ }
+}
+
+/*
+ * Counts the number of trailing zeros of a `size_t`.
+ * Most compilers should support CTZ as a builtin. A backup
+ * implementation is provided if the builtin isn't supported, but
+ * it may not be terribly efficient.
+ */
+MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
+{
+ if (MEM_64bits()) {
+# if (__GNUC__ >= 4)
+ return __builtin_ctzll((U64)val);
+# else
+ static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19,
+ 4, 25, 14, 28, 9, 34, 20, 56,
+ 5, 17, 26, 54, 15, 41, 29, 43,
+ 10, 31, 38, 35, 21, 45, 49, 57,
+ 63, 6, 12, 18, 24, 27, 33, 55,
+ 16, 53, 40, 42, 30, 37, 44, 48,
+ 62, 11, 23, 32, 52, 39, 36, 47,
+ 61, 22, 51, 46, 60, 50, 59, 58 };
+ return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+# endif
+ } else { /* 32 bits */
+# if (__GNUC__ >= 3)
+ return __builtin_ctz((U32)val);
+# else
+ static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3,
+ 30, 22, 20, 15, 25, 17, 4, 8,
+ 31, 27, 13, 23, 21, 19, 16, 7,
+ 26, 12, 18, 6, 11, 5, 10, 9 };
+ return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+# endif
+ }
+}
+
+
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ * do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
+
+
+typedef struct {
+ blockType_e blockType;
+ U32 lastBlock;
+ U32 origSize;
+} blockProperties_t; /* declared here for decompress and fullbench */
+
+/*! ZSTD_getcBlockSize() :
+ * Provides the size of compressed block from block header `src` */
+/* Used by: decompress, fullbench (does not get its definition from here) */
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+ blockProperties_t* bpPtr);
+
+/*! ZSTD_decodeSeqHeaders() :
+ * decode sequence header from src */
+/* Used by: decompress, fullbench (does not get its definition from here) */
+size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
+ const void* src, size_t srcSize);
+
+/*
+ * @returns true iff the CPU supports dynamic BMI2 dispatch.
+ */
+MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
+{
+ ZSTD_cpuid_t cpuid = ZSTD_cpuid();
+ return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
+}
+
+
+#endif /* ZSTD_CCOMMON_H_MODULE */
diff --git a/lib/zstd/decompress/huf_decompress.c b/lib/zstd/decompress/huf_decompress.c
new file mode 100644
index 00000000000..89b269a641c
--- /dev/null
+++ b/lib/zstd/decompress/huf_decompress.c
@@ -0,0 +1,1740 @@
+/* ******************************************************************
+ * huff0 huffman decoder,
+ * part of Finite State Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* **************************************************************
+* Dependencies
+****************************************************************/
+#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
+#include "../common/compiler.h"
+#include "../common/bitstream.h" /* BIT_* */
+#include "../common/fse.h" /* to compress headers */
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "../common/error_private.h"
+#include "../common/zstd_internal.h"
+
+/* **************************************************************
+* Constants
+****************************************************************/
+
+#define HUF_DECODER_FAST_TABLELOG 11
+
+/* **************************************************************
+* Macros
+****************************************************************/
+
+/* These two optional macros force the use one way or another of the two
+ * Huffman decompression implementations. You can't force in both directions
+ * at the same time.
+ */
+#if defined(HUF_FORCE_DECOMPRESS_X1) && \
+ defined(HUF_FORCE_DECOMPRESS_X2)
+#error "Cannot force the use of the X1 and X2 decoders at the same time!"
+#endif
+
+#if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2
+# define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
+#else
+# define HUF_ASM_X86_64_BMI2_ATTRS
+#endif
+
+#define HUF_EXTERN_C
+#define HUF_ASM_DECL HUF_EXTERN_C
+
+#if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
+# define HUF_NEED_BMI2_FUNCTION 1
+#else
+# define HUF_NEED_BMI2_FUNCTION 0
+#endif
+
+#if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
+# define HUF_NEED_DEFAULT_FUNCTION 1
+#else
+# define HUF_NEED_DEFAULT_FUNCTION 0
+#endif
+
+/* **************************************************************
+* Error Management
+****************************************************************/
+#define HUF_isError ERR_isError
+
+
+/* **************************************************************
+* Byte alignment for workSpace management
+****************************************************************/
+#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
+#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
+
+
+/* **************************************************************
+* BMI2 Variant Wrappers
+****************************************************************/
+#if DYNAMIC_BMI2
+
+#define HUF_DGEN(fn) \
+ \
+ static size_t fn##_default( \
+ void* dst, size_t dstSize, \
+ const void* cSrc, size_t cSrcSize, \
+ const HUF_DTable* DTable) \
+ { \
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
+ } \
+ \
+ static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \
+ void* dst, size_t dstSize, \
+ const void* cSrc, size_t cSrcSize, \
+ const HUF_DTable* DTable) \
+ { \
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
+ } \
+ \
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
+ { \
+ if (bmi2) { \
+ return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
+ } \
+ return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
+ }
+
+#else
+
+#define HUF_DGEN(fn) \
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
+ { \
+ (void)bmi2; \
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
+ }
+
+#endif
+
+
+/*-***************************/
+/* generic DTableDesc */
+/*-***************************/
+typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
+
+static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
+{
+ DTableDesc dtd;
+ ZSTD_memcpy(&dtd, table, sizeof(dtd));
+ return dtd;
+}
+
+#if ZSTD_ENABLE_ASM_X86_64_BMI2
+
+static size_t HUF_initDStream(BYTE const* ip) {
+ BYTE const lastByte = ip[7];
+ size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
+ size_t const value = MEM_readLEST(ip) | 1;
+ assert(bitsConsumed <= 8);
+ return value << bitsConsumed;
+}
+typedef struct {
+ BYTE const* ip[4];
+ BYTE* op[4];
+ U64 bits[4];
+ void const* dt;
+ BYTE const* ilimit;
+ BYTE* oend;
+ BYTE const* iend[4];
+} HUF_DecompressAsmArgs;
+
+/*
+ * Initializes args for the asm decoding loop.
+ * @returns 0 on success
+ * 1 if the fallback implementation should be used.
+ * Or an error code on failure.
+ */
+static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
+{
+ void const* dt = DTable + 1;
+ U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
+
+ const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
+
+ BYTE* const oend = (BYTE*)dst + dstSize;
+
+ /* The following condition is false on x32 platform,
+ * but HUF_asm is not compatible with this ABI */
+ if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1;
+
+ /* strict minimum : jump table + 1 byte per stream */
+ if (srcSize < 10)
+ return ERROR(corruption_detected);
+
+ /* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers.
+ * If table log is not correct at this point, fallback to the old decoder.
+ * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
+ */
+ if (dtLog != HUF_DECODER_FAST_TABLELOG)
+ return 1;
+
+ /* Read the jump table. */
+ {
+ const BYTE* const istart = (const BYTE*)src;
+ size_t const length1 = MEM_readLE16(istart);
+ size_t const length2 = MEM_readLE16(istart+2);
+ size_t const length3 = MEM_readLE16(istart+4);
+ size_t const length4 = srcSize - (length1 + length2 + length3 + 6);
+ args->iend[0] = istart + 6; /* jumpTable */
+ args->iend[1] = args->iend[0] + length1;
+ args->iend[2] = args->iend[1] + length2;
+ args->iend[3] = args->iend[2] + length3;
+
+ /* HUF_initDStream() requires this, and this small of an input
+ * won't benefit from the ASM loop anyways.
+ * length1 must be >= 16 so that ip[0] >= ilimit before the loop
+ * starts.
+ */
+ if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
+ return 1;
+ if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */
+ }
+ /* ip[] contains the position that is currently loaded into bits[]. */
+ args->ip[0] = args->iend[1] - sizeof(U64);
+ args->ip[1] = args->iend[2] - sizeof(U64);
+ args->ip[2] = args->iend[3] - sizeof(U64);
+ args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64);
+
+ /* op[] contains the output pointers. */
+ args->op[0] = (BYTE*)dst;
+ args->op[1] = args->op[0] + (dstSize+3)/4;
+ args->op[2] = args->op[1] + (dstSize+3)/4;
+ args->op[3] = args->op[2] + (dstSize+3)/4;
+
+ /* No point to call the ASM loop for tiny outputs. */
+ if (args->op[3] >= oend)
+ return 1;
+
+ /* bits[] is the bit container.
+ * It is read from the MSB down to the LSB.
+ * It is shifted left as it is read, and zeros are
+ * shifted in. After the lowest valid bit a 1 is
+ * set, so that CountTrailingZeros(bits[]) can be used
+ * to count how many bits we've consumed.
+ */
+ args->bits[0] = HUF_initDStream(args->ip[0]);
+ args->bits[1] = HUF_initDStream(args->ip[1]);
+ args->bits[2] = HUF_initDStream(args->ip[2]);
+ args->bits[3] = HUF_initDStream(args->ip[3]);
+
+ /* If ip[] >= ilimit, it is guaranteed to be safe to
+ * reload bits[]. It may be beyond its section, but is
+ * guaranteed to be valid (>= istart).
+ */
+ args->ilimit = ilimit;
+
+ args->oend = oend;
+ args->dt = dt;
+
+ return 0;
+}
+
+static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd)
+{
+ /* Validate that we haven't overwritten. */
+ if (args->op[stream] > segmentEnd)
+ return ERROR(corruption_detected);
+ /* Validate that we haven't read beyond iend[].
+ * Note that ip[] may be < iend[] because the MSB is
+ * the next bit to read, and we may have consumed 100%
+ * of the stream, so down to iend[i] - 8 is valid.
+ */
+ if (args->ip[stream] < args->iend[stream] - 8)
+ return ERROR(corruption_detected);
+
+ /* Construct the BIT_DStream_t. */
+ bit->bitContainer = MEM_readLE64(args->ip[stream]);
+ bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]);
+ bit->start = (const char*)args->iend[0];
+ bit->limitPtr = bit->start + sizeof(size_t);
+ bit->ptr = (const char*)args->ip[stream];
+
+ return 0;
+}
+#endif
+
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+
+/*-***************************/
+/* single-symbol decoding */
+/*-***************************/
+typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decoding */
+
+/*
+ * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
+ * a time.
+ */
+static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
+ U64 D4;
+ if (MEM_isLittleEndian()) {
+ D4 = (symbol << 8) + nbBits;
+ } else {
+ D4 = symbol + (nbBits << 8);
+ }
+ D4 *= 0x0001000100010001ULL;
+ return D4;
+}
+
+/*
+ * Increase the tableLog to targetTableLog and rescales the stats.
+ * If tableLog > targetTableLog this is a no-op.
+ * @returns New tableLog
+ */
+static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog)
+{
+ if (tableLog > targetTableLog)
+ return tableLog;
+ if (tableLog < targetTableLog) {
+ U32 const scale = targetTableLog - tableLog;
+ U32 s;
+ /* Increase the weight for all non-zero probability symbols by scale. */
+ for (s = 0; s < nbSymbols; ++s) {
+ huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale);
+ }
+ /* Update rankVal to reflect the new weights.
+ * All weights except 0 get moved to weight + scale.
+ * Weights [1, scale] are empty.
+ */
+ for (s = targetTableLog; s > scale; --s) {
+ rankVal[s] = rankVal[s - scale];
+ }
+ for (s = scale; s > 0; --s) {
+ rankVal[s] = 0;
+ }
+ }
+ return targetTableLog;
+}
+
+typedef struct {
+ U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
+ U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
+ U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+ BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
+ BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
+} HUF_ReadDTableX1_Workspace;
+
+
+size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
+{
+ return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+ U32 tableLog = 0;
+ U32 nbSymbols = 0;
+ size_t iSize;
+ void* const dtPtr = DTable + 1;
+ HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
+ HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
+
+ DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
+ if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
+
+ DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
+ /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
+
+ iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
+ if (HUF_isError(iSize)) return iSize;
+
+
+ /* Table header */
+ { DTableDesc dtd = HUF_getDTableDesc(DTable);
+ U32 const maxTableLog = dtd.maxTableLog + 1;
+ U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG);
+ tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog);
+ if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
+ dtd.tableType = 0;
+ dtd.tableLog = (BYTE)tableLog;
+ ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
+ }
+
+ /* Compute symbols and rankStart given rankVal:
+ *
+ * rankVal already contains the number of values of each weight.
+ *
+ * symbols contains the symbols ordered by weight. First are the rankVal[0]
+ * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
+ * symbols[0] is filled (but unused) to avoid a branch.
+ *
+ * rankStart contains the offset where each rank belongs in the DTable.
+ * rankStart[0] is not filled because there are no entries in the table for
+ * weight 0.
+ */
+ {
+ int n;
+ int nextRankStart = 0;
+ int const unroll = 4;
+ int const nLimit = (int)nbSymbols - unroll + 1;
+ for (n=0; n<(int)tableLog+1; n++) {
+ U32 const curr = nextRankStart;
+ nextRankStart += wksp->rankVal[n];
+ wksp->rankStart[n] = curr;
+ }
+ for (n=0; n < nLimit; n += unroll) {
+ int u;
+ for (u=0; u < unroll; ++u) {
+ size_t const w = wksp->huffWeight[n+u];
+ wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
+ }
+ }
+ for (; n < (int)nbSymbols; ++n) {
+ size_t const w = wksp->huffWeight[n];
+ wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
+ }
+ }
+
+ /* fill DTable
+ * We fill all entries of each weight in order.
+ * That way length is a constant for each iteration of the outer loop.
+ * We can switch based on the length to a different inner loop which is
+ * optimized for that particular case.
+ */
+ {
+ U32 w;
+ int symbol=wksp->rankVal[0];
+ int rankStart=0;
+ for (w=1; w<tableLog+1; ++w) {
+ int const symbolCount = wksp->rankVal[w];
+ int const length = (1 << w) >> 1;
+ int uStart = rankStart;
+ BYTE const nbBits = (BYTE)(tableLog + 1 - w);
+ int s;
+ int u;
+ switch (length) {
+ case 1:
+ for (s=0; s<symbolCount; ++s) {
+ HUF_DEltX1 D;
+ D.byte = wksp->symbols[symbol + s];
+ D.nbBits = nbBits;
+ dt[uStart] = D;
+ uStart += 1;
+ }
+ break;
+ case 2:
+ for (s=0; s<symbolCount; ++s) {
+ HUF_DEltX1 D;
+ D.byte = wksp->symbols[symbol + s];
+ D.nbBits = nbBits;
+ dt[uStart+0] = D;
+ dt[uStart+1] = D;
+ uStart += 2;
+ }
+ break;
+ case 4:
+ for (s=0; s<symbolCount; ++s) {
+ U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+ MEM_write64(dt + uStart, D4);
+ uStart += 4;
+ }
+ break;
+ case 8:
+ for (s=0; s<symbolCount; ++s) {
+ U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+ MEM_write64(dt + uStart, D4);
+ MEM_write64(dt + uStart + 4, D4);
+ uStart += 8;
+ }
+ break;
+ default:
+ for (s=0; s<symbolCount; ++s) {
+ U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+ for (u=0; u < length; u += 16) {
+ MEM_write64(dt + uStart + u + 0, D4);
+ MEM_write64(dt + uStart + u + 4, D4);
+ MEM_write64(dt + uStart + u + 8, D4);
+ MEM_write64(dt + uStart + u + 12, D4);
+ }
+ assert(u == length);
+ uStart += length;
+ }
+ break;
+ }
+ symbol += symbolCount;
+ rankStart += symbolCount * length;
+ }
+ }
+ return iSize;
+}
+
+FORCE_INLINE_TEMPLATE BYTE
+HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
+{
+ size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+ BYTE const c = dt[val].byte;
+ BIT_skipBits(Dstream, dt[val].nbBits);
+ return c;
+}
+
+#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
+ *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
+ if (MEM_64bits()) \
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+
+HINT_INLINE size_t
+HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
+{
+ BYTE* const pStart = p;
+
+ /* up to 4 symbols at a time */
+ if ((pEnd - p) > 3) {
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+ HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+ }
+ } else {
+ BIT_reloadDStream(bitDPtr);
+ }
+
+ /* [0-3] symbols remaining */
+ if (MEM_32bits())
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+
+ /* no more data to retrieve from bitstream, no need to reload */
+ while (p < pEnd)
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+
+ return pEnd-pStart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X1_usingDTable_internal_body(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ BYTE* op = (BYTE*)dst;
+ BYTE* const oend = op + dstSize;
+ const void* dtPtr = DTable + 1;
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
+ BIT_DStream_t bitD;
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ U32 const dtLog = dtd.tableLog;
+
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
+
+ HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
+
+ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+ return dstSize;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X1_usingDTable_internal_body(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ /* Check */
+ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
+
+ { const BYTE* const istart = (const BYTE*) cSrc;
+ BYTE* const ostart = (BYTE*) dst;
+ BYTE* const oend = ostart + dstSize;
+ BYTE* const olimit = oend - 3;
+ const void* const dtPtr = DTable + 1;
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
+
+ /* Init */
+ BIT_DStream_t bitD1;
+ BIT_DStream_t bitD2;
+ BIT_DStream_t bitD3;
+ BIT_DStream_t bitD4;
+ size_t const length1 = MEM_readLE16(istart);
+ size_t const length2 = MEM_readLE16(istart+2);
+ size_t const length3 = MEM_readLE16(istart+4);
+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+ const BYTE* const istart1 = istart + 6; /* jumpTable */
+ const BYTE* const istart2 = istart1 + length1;
+ const BYTE* const istart3 = istart2 + length2;
+ const BYTE* const istart4 = istart3 + length3;
+ const size_t segmentSize = (dstSize+3) / 4;
+ BYTE* const opStart2 = ostart + segmentSize;
+ BYTE* const opStart3 = opStart2 + segmentSize;
+ BYTE* const opStart4 = opStart3 + segmentSize;
+ BYTE* op1 = ostart;
+ BYTE* op2 = opStart2;
+ BYTE* op3 = opStart3;
+ BYTE* op4 = opStart4;
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ U32 const dtLog = dtd.tableLog;
+ U32 endSignal = 1;
+
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
+ if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
+
+ /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
+ if ((size_t)(oend - op4) >= sizeof(size_t)) {
+ for ( ; (endSignal) & (op4 < olimit) ; ) {
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+ HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
+ HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
+ HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
+ HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+ HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
+ HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
+ HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
+ HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+ }
+ }
+
+ /* check corruption */
+ /* note : should not be necessary : op# advance in lock step, and we control op4.
+ * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
+ if (op1 > opStart2) return ERROR(corruption_detected);
+ if (op2 > opStart3) return ERROR(corruption_detected);
+ if (op3 > opStart4) return ERROR(corruption_detected);
+ /* note : op4 supposed already verified within main loop */
+
+ /* finish bitStreams one by one */
+ HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
+ HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
+ HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
+ HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
+
+ /* check */
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+ if (!endCheck) return ERROR(corruption_detected); }
+
+ /* decoded size */
+ return dstSize;
+ }
+}
+
+#if HUF_NEED_BMI2_FUNCTION
+static BMI2_TARGET_ATTRIBUTE
+size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
+ size_t cSrcSize, HUF_DTable const* DTable) {
+ return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+#endif
+
+#if HUF_NEED_DEFAULT_FUNCTION
+static
+size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
+ size_t cSrcSize, HUF_DTable const* DTable) {
+ return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+#endif
+
+#if ZSTD_ENABLE_ASM_X86_64_BMI2
+
+HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
+
+static HUF_ASM_X86_64_BMI2_ATTRS
+size_t
+HUF_decompress4X1_usingDTable_internal_bmi2_asm(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ void const* dt = DTable + 1;
+ const BYTE* const iend = (const BYTE*)cSrc + 6;
+ BYTE* const oend = (BYTE*)dst + dstSize;
+ HUF_DecompressAsmArgs args;
+ {
+ size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
+ FORWARD_IF_ERROR(ret, "Failed to init asm args");
+ if (ret != 0)
+ return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
+ }
+
+ assert(args.ip[0] >= args.ilimit);
+ HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args);
+
+ /* Our loop guarantees that ip[] >= ilimit and that we haven't
+ * overwritten any op[].
+ */
+ assert(args.ip[0] >= iend);
+ assert(args.ip[1] >= iend);
+ assert(args.ip[2] >= iend);
+ assert(args.ip[3] >= iend);
+ assert(args.op[3] <= oend);
+ (void)iend;
+
+ /* finish bit streams one by one. */
+ {
+ size_t const segmentSize = (dstSize+3) / 4;
+ BYTE* segmentEnd = (BYTE*)dst;
+ int i;
+ for (i = 0; i < 4; ++i) {
+ BIT_DStream_t bit;
+ if (segmentSize <= (size_t)(oend - segmentEnd))
+ segmentEnd += segmentSize;
+ else
+ segmentEnd = oend;
+ FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
+ /* Decompress and validate that we've produced exactly the expected length. */
+ args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG);
+ if (args.op[i] != segmentEnd) return ERROR(corruption_detected);
+ }
+ }
+
+ /* decoded size */
+ return dstSize;
+}
+#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
+
+typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
+ const void *cSrc,
+ size_t cSrcSize,
+ const HUF_DTable *DTable);
+
+HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
+
+static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
+{
+#if DYNAMIC_BMI2
+ if (bmi2) {
+# if ZSTD_ENABLE_ASM_X86_64_BMI2
+ return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
+# else
+ return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
+# endif
+ }
+#else
+ (void)bmi2;
+#endif
+
+#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
+ return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
+#else
+ return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
+#endif
+}
+
+
+size_t HUF_decompress1X1_usingDTable(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
+ if (dtd.tableType != 0) return ERROR(GENERIC);
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize)
+{
+ const BYTE* ip = (const BYTE*) cSrc;
+
+ size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
+ if (HUF_isError(hSize)) return hSize;
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+ ip += hSize; cSrcSize -= hSize;
+
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress4X1_usingDTable(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
+ if (dtd.tableType != 0) return ERROR(GENERIC);
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize, int bmi2)
+{
+ const BYTE* ip = (const BYTE*) cSrc;
+
+ size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+ if (HUF_isError(hSize)) return hSize;
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+ ip += hSize; cSrcSize -= hSize;
+
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize)
+{
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
+}
+
+
+#endif /* HUF_FORCE_DECOMPRESS_X2 */
+
+
+#ifndef HUF_FORCE_DECOMPRESS_X1
+
+/* *************************/
+/* double-symbols decoding */
+/* *************************/
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
+typedef struct { BYTE symbol; } sortedSymbol_t;
+typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
+typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
+
+/*
+ * Constructs a HUF_DEltX2 in a U32.
+ */
+static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level)
+{
+ U32 seq;
+ DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0);
+ DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2);
+ DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3);
+ DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32));
+ if (MEM_isLittleEndian()) {
+ seq = level == 1 ? symbol : (baseSeq + (symbol << 8));
+ return seq + (nbBits << 16) + ((U32)level << 24);
+ } else {
+ seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);
+ return (seq << 16) + (nbBits << 8) + (U32)level;
+ }
+}
+
+/*
+ * Constructs a HUF_DEltX2.
+ */
+static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level)
+{
+ HUF_DEltX2 DElt;
+ U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
+ DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val));
+ ZSTD_memcpy(&DElt, &val, sizeof(val));
+ return DElt;
+}
+
+/*
+ * Constructs 2 HUF_DEltX2s and packs them into a U64.
+ */
+static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level)
+{
+ U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
+ return (U64)DElt + ((U64)DElt << 32);
+}
+
+/*
+ * Fills the DTable rank with all the symbols from [begin, end) that are each
+ * nbBits long.
+ *
+ * @param DTableRank The start of the rank in the DTable.
+ * @param begin The first symbol to fill (inclusive).
+ * @param end The last symbol to fill (exclusive).
+ * @param nbBits Each symbol is nbBits long.
+ * @param tableLog The table log.
+ * @param baseSeq If level == 1 { 0 } else { the first level symbol }
+ * @param level The level in the table. Must be 1 or 2.
+ */
+static void HUF_fillDTableX2ForWeight(
+ HUF_DEltX2* DTableRank,
+ sortedSymbol_t const* begin, sortedSymbol_t const* end,
+ U32 nbBits, U32 tableLog,
+ U16 baseSeq, int const level)
+{
+ U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */);
+ const sortedSymbol_t* ptr;
+ assert(level >= 1 && level <= 2);
+ switch (length) {
+ case 1:
+ for (ptr = begin; ptr != end; ++ptr) {
+ HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
+ *DTableRank++ = DElt;
+ }
+ break;
+ case 2:
+ for (ptr = begin; ptr != end; ++ptr) {
+ HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
+ DTableRank[0] = DElt;
+ DTableRank[1] = DElt;
+ DTableRank += 2;
+ }
+ break;
+ case 4:
+ for (ptr = begin; ptr != end; ++ptr) {
+ U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
+ ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
+ ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
+ DTableRank += 4;
+ }
+ break;
+ case 8:
+ for (ptr = begin; ptr != end; ++ptr) {
+ U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
+ ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
+ ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
+ ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
+ ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
+ DTableRank += 8;
+ }
+ break;
+ default:
+ for (ptr = begin; ptr != end; ++ptr) {
+ U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
+ HUF_DEltX2* const DTableRankEnd = DTableRank + length;
+ for (; DTableRank != DTableRankEnd; DTableRank += 8) {
+ ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
+ ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
+ ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
+ ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
+ }
+ }
+ break;
+ }
+}
+
+/* HUF_fillDTableX2Level2() :
+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
+static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits,
+ const U32* rankVal, const int minWeight, const int maxWeight1,
+ const sortedSymbol_t* sortedSymbols, U32 const* rankStart,
+ U32 nbBitsBaseline, U16 baseSeq)
+{
+ /* Fill skipped values (all positions up to rankVal[minWeight]).
+ * These are positions only get a single symbol because the combined weight
+ * is too large.
+ */
+ if (minWeight>1) {
+ U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */);
+ U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1);
+ int const skipSize = rankVal[minWeight];
+ assert(length > 1);
+ assert((U32)skipSize < length);
+ switch (length) {
+ case 2:
+ assert(skipSize == 1);
+ ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2));
+ break;
+ case 4:
+ assert(skipSize <= 4);
+ ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2));
+ ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2));
+ break;
+ default:
+ {
+ int i;
+ for (i = 0; i < skipSize; i += 8) {
+ ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2));
+ ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2));
+ ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2));
+ ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2));
+ }
+ }
+ }
+ }
+
+ /* Fill each of the second level symbols by weight. */
+ {
+ int w;
+ for (w = minWeight; w < maxWeight1; ++w) {
+ int const begin = rankStart[w];
+ int const end = rankStart[w+1];
+ U32 const nbBits = nbBitsBaseline - w;
+ U32 const totalBits = nbBits + consumedBits;
+ HUF_fillDTableX2ForWeight(
+ DTable + rankVal[w],
+ sortedSymbols + begin, sortedSymbols + end,
+ totalBits, targetLog,
+ baseSeq, /* level */ 2);
+ }
+ }
+}
+
+static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
+ const sortedSymbol_t* sortedList,
+ const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+ const U32 nbBitsBaseline)
+{
+ U32* const rankVal = rankValOrigin[0];
+ const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+ const U32 minBits = nbBitsBaseline - maxWeight;
+ int w;
+ int const wEnd = (int)maxWeight + 1;
+
+ /* Fill DTable in order of weight. */
+ for (w = 1; w < wEnd; ++w) {
+ int const begin = (int)rankStart[w];
+ int const end = (int)rankStart[w+1];
+ U32 const nbBits = nbBitsBaseline - w;
+
+ if (targetLog-nbBits >= minBits) {
+ /* Enough room for a second symbol. */
+ int start = rankVal[w];
+ U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */);
+ int minWeight = nbBits + scaleLog;
+ int s;
+ if (minWeight < 1) minWeight = 1;
+ /* Fill the DTable for every symbol of weight w.
+ * These symbols get at least 1 second symbol.
+ */
+ for (s = begin; s != end; ++s) {
+ HUF_fillDTableX2Level2(
+ DTable + start, targetLog, nbBits,
+ rankValOrigin[nbBits], minWeight, wEnd,
+ sortedList, rankStart,
+ nbBitsBaseline, sortedList[s].symbol);
+ start += length;
+ }
+ } else {
+ /* Only a single symbol. */
+ HUF_fillDTableX2ForWeight(
+ DTable + rankVal[w],
+ sortedList + begin, sortedList + end,
+ nbBits, targetLog,
+ /* baseSeq */ 0, /* level */ 1);
+ }
+ }
+}
+
+typedef struct {
+ rankValCol_t rankVal[HUF_TABLELOG_MAX];
+ U32 rankStats[HUF_TABLELOG_MAX + 1];
+ U32 rankStart0[HUF_TABLELOG_MAX + 3];
+ sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
+ BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
+ U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+} HUF_ReadDTableX2_Workspace;
+
+size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
+ const void* src, size_t srcSize,
+ void* workSpace, size_t wkspSize)
+{
+ return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
+ const void* src, size_t srcSize,
+ void* workSpace, size_t wkspSize, int bmi2)
+{
+ U32 tableLog, maxW, nbSymbols;
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
+ U32 maxTableLog = dtd.maxTableLog;
+ size_t iSize;
+ void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
+ HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
+ U32 *rankStart;
+
+ HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
+
+ if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
+
+ rankStart = wksp->rankStart0 + 1;
+ ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
+ ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
+
+ DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
+ if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+ /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
+
+ iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2);
+ if (HUF_isError(iSize)) return iSize;
+
+ /* check result */
+ if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
+ if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG;
+
+ /* find maxWeight */
+ for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
+
+ /* Get start index of each weight */
+ { U32 w, nextRankStart = 0;
+ for (w=1; w<maxW+1; w++) {
+ U32 curr = nextRankStart;
+ nextRankStart += wksp->rankStats[w];
+ rankStart[w] = curr;
+ }
+ rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
+ rankStart[maxW+1] = nextRankStart;
+ }
+
+ /* sort symbols by weight */
+ { U32 s;
+ for (s=0; s<nbSymbols; s++) {
+ U32 const w = wksp->weightList[s];
+ U32 const r = rankStart[w]++;
+ wksp->sortedSymbol[r].symbol = (BYTE)s;
+ }
+ rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
+ }
+
+ /* Build rankVal */
+ { U32* const rankVal0 = wksp->rankVal[0];
+ { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
+ U32 nextRankVal = 0;
+ U32 w;
+ for (w=1; w<maxW+1; w++) {
+ U32 curr = nextRankVal;
+ nextRankVal += wksp->rankStats[w] << (w+rescale);
+ rankVal0[w] = curr;
+ } }
+ { U32 const minBits = tableLog+1 - maxW;
+ U32 consumed;
+ for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
+ U32* const rankValPtr = wksp->rankVal[consumed];
+ U32 w;
+ for (w = 1; w < maxW+1; w++) {
+ rankValPtr[w] = rankVal0[w] >> consumed;
+ } } } }
+
+ HUF_fillDTableX2(dt, maxTableLog,
+ wksp->sortedSymbol,
+ wksp->rankStart0, wksp->rankVal, maxW,
+ tableLog+1);
+
+ dtd.tableLog = (BYTE)maxTableLog;
+ dtd.tableType = 1;
+ ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
+ return iSize;
+}
+
+
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
+ ZSTD_memcpy(op, &dt[val].sequence, 2);
+ BIT_skipBits(DStream, dt[val].nbBits);
+ return dt[val].length;
+}
+
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
+ ZSTD_memcpy(op, &dt[val].sequence, 1);
+ if (dt[val].length==1) {
+ BIT_skipBits(DStream, dt[val].nbBits);
+ } else {
+ if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+ BIT_skipBits(DStream, dt[val].nbBits);
+ if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+ /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+ DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
+ }
+ }
+ return 1;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+ if (MEM_64bits()) \
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+HINT_INLINE size_t
+HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
+ const HUF_DEltX2* const dt, const U32 dtLog)
+{
+ BYTE* const pStart = p;
+
+ /* up to 8 symbols at a time */
+ if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) {
+ if (dtLog <= 11 && MEM_64bits()) {
+ /* up to 10 symbols at a time */
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) {
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+ }
+ } else {
+ /* up to 8 symbols at a time */
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+ HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+ }
+ }
+ } else {
+ BIT_reloadDStream(bitDPtr);
+ }
+
+ /* closer to end : up to 2 symbols at a time */
+ if ((size_t)(pEnd - p) >= 2) {
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+ while (p <= pEnd-2)
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
+ }
+
+ if (p < pEnd)
+ p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
+
+ return p-pStart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X2_usingDTable_internal_body(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ BIT_DStream_t bitD;
+
+ /* Init */
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
+
+ /* decode */
+ { BYTE* const ostart = (BYTE*) dst;
+ BYTE* const oend = ostart + dstSize;
+ const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
+ }
+
+ /* check */
+ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+ /* decoded size */
+ return dstSize;
+}
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X2_usingDTable_internal_body(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
+
+ { const BYTE* const istart = (const BYTE*) cSrc;
+ BYTE* const ostart = (BYTE*) dst;
+ BYTE* const oend = ostart + dstSize;
+ BYTE* const olimit = oend - (sizeof(size_t)-1);
+ const void* const dtPtr = DTable+1;
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
+
+ /* Init */
+ BIT_DStream_t bitD1;
+ BIT_DStream_t bitD2;
+ BIT_DStream_t bitD3;
+ BIT_DStream_t bitD4;
+ size_t const length1 = MEM_readLE16(istart);
+ size_t const length2 = MEM_readLE16(istart+2);
+ size_t const length3 = MEM_readLE16(istart+4);
+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+ const BYTE* const istart1 = istart + 6; /* jumpTable */
+ const BYTE* const istart2 = istart1 + length1;
+ const BYTE* const istart3 = istart2 + length2;
+ const BYTE* const istart4 = istart3 + length3;
+ size_t const segmentSize = (dstSize+3) / 4;
+ BYTE* const opStart2 = ostart + segmentSize;
+ BYTE* const opStart3 = opStart2 + segmentSize;
+ BYTE* const opStart4 = opStart3 + segmentSize;
+ BYTE* op1 = ostart;
+ BYTE* op2 = opStart2;
+ BYTE* op3 = opStart3;
+ BYTE* op4 = opStart4;
+ U32 endSignal = 1;
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ U32 const dtLog = dtd.tableLog;
+
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
+ if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
+
+ /* 16-32 symbols per loop (4-8 symbols per stream) */
+ if ((size_t)(oend - op4) >= sizeof(size_t)) {
+ for ( ; (endSignal) & (op4 < olimit); ) {
+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+#else
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+ endSignal = (U32)LIKELY((U32)
+ (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
+ & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
+ & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
+ & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
+#endif
+ }
+ }
+
+ /* check corruption */
+ if (op1 > opStart2) return ERROR(corruption_detected);
+ if (op2 > opStart3) return ERROR(corruption_detected);
+ if (op3 > opStart4) return ERROR(corruption_detected);
+ /* note : op4 already verified within main loop */
+
+ /* finish bitStreams one by one */
+ HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+ HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+ HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+ HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
+
+ /* check */
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+ if (!endCheck) return ERROR(corruption_detected); }
+
+ /* decoded size */
+ return dstSize;
+ }
+}
+
+#if HUF_NEED_BMI2_FUNCTION
+static BMI2_TARGET_ATTRIBUTE
+size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
+ size_t cSrcSize, HUF_DTable const* DTable) {
+ return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+#endif
+
+#if HUF_NEED_DEFAULT_FUNCTION
+static
+size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
+ size_t cSrcSize, HUF_DTable const* DTable) {
+ return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+#endif
+
+#if ZSTD_ENABLE_ASM_X86_64_BMI2
+
+HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
+
+static HUF_ASM_X86_64_BMI2_ATTRS size_t
+HUF_decompress4X2_usingDTable_internal_bmi2_asm(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable) {
+ void const* dt = DTable + 1;
+ const BYTE* const iend = (const BYTE*)cSrc + 6;
+ BYTE* const oend = (BYTE*)dst + dstSize;
+ HUF_DecompressAsmArgs args;
+ {
+ size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
+ FORWARD_IF_ERROR(ret, "Failed to init asm args");
+ if (ret != 0)
+ return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
+ }
+
+ assert(args.ip[0] >= args.ilimit);
+ HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args);
+
+ /* note : op4 already verified within main loop */
+ assert(args.ip[0] >= iend);
+ assert(args.ip[1] >= iend);
+ assert(args.ip[2] >= iend);
+ assert(args.ip[3] >= iend);
+ assert(args.op[3] <= oend);
+ (void)iend;
+
+ /* finish bitStreams one by one */
+ {
+ size_t const segmentSize = (dstSize+3) / 4;
+ BYTE* segmentEnd = (BYTE*)dst;
+ int i;
+ for (i = 0; i < 4; ++i) {
+ BIT_DStream_t bit;
+ if (segmentSize <= (size_t)(oend - segmentEnd))
+ segmentEnd += segmentSize;
+ else
+ segmentEnd = oend;
+ FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
+ args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG);
+ if (args.op[i] != segmentEnd)
+ return ERROR(corruption_detected);
+ }
+ }
+
+ /* decoded size */
+ return dstSize;
+}
+#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
+
+static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
+{
+#if DYNAMIC_BMI2
+ if (bmi2) {
+# if ZSTD_ENABLE_ASM_X86_64_BMI2
+ return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
+# else
+ return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
+# endif
+ }
+#else
+ (void)bmi2;
+#endif
+
+#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
+ return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
+#else
+ return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
+#endif
+}
+
+HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
+
+size_t HUF_decompress1X2_usingDTable(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
+ if (dtd.tableType != 1) return ERROR(GENERIC);
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize)
+{
+ const BYTE* ip = (const BYTE*) cSrc;
+
+ size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
+ workSpace, wkspSize);
+ if (HUF_isError(hSize)) return hSize;
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+ ip += hSize; cSrcSize -= hSize;
+
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress4X2_usingDTable(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
+ if (dtd.tableType != 1) return ERROR(GENERIC);
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize, int bmi2)
+{
+ const BYTE* ip = (const BYTE*) cSrc;
+
+ size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
+ workSpace, wkspSize);
+ if (HUF_isError(hSize)) return hSize;
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+ ip += hSize; cSrcSize -= hSize;
+
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize)
+{
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+
+#endif /* HUF_FORCE_DECOMPRESS_X1 */
+
+
+/* ***********************************/
+/* Universal decompression selectors */
+/* ***********************************/
+
+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+ (void)dtd;
+ assert(dtd.tableType == 0);
+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+ (void)dtd;
+ assert(dtd.tableType == 1);
+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#else
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#endif
+}
+
+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+ (void)dtd;
+ assert(dtd.tableType == 0);
+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+ (void)dtd;
+ assert(dtd.tableType == 1);
+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#else
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#endif
+}
+
+
+#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] =
+{
+ /* single, double, quad */
+ {{0,0}, {1,1}}, /* Q==0 : impossible */
+ {{0,0}, {1,1}}, /* Q==1 : impossible */
+ {{ 150,216}, { 381,119}}, /* Q == 2 : 12-18% */
+ {{ 170,205}, { 514,112}}, /* Q == 3 : 18-25% */
+ {{ 177,199}, { 539,110}}, /* Q == 4 : 25-32% */
+ {{ 197,194}, { 644,107}}, /* Q == 5 : 32-38% */
+ {{ 221,192}, { 735,107}}, /* Q == 6 : 38-44% */
+ {{ 256,189}, { 881,106}}, /* Q == 7 : 44-50% */
+ {{ 359,188}, {1167,109}}, /* Q == 8 : 50-56% */
+ {{ 582,187}, {1570,114}}, /* Q == 9 : 56-62% */
+ {{ 688,187}, {1712,122}}, /* Q ==10 : 62-69% */
+ {{ 825,186}, {1965,136}}, /* Q ==11 : 69-75% */
+ {{ 976,185}, {2131,150}}, /* Q ==12 : 75-81% */
+ {{1180,186}, {2070,175}}, /* Q ==13 : 81-87% */
+ {{1377,185}, {1731,202}}, /* Q ==14 : 87-93% */
+ {{1412,185}, {1695,202}}, /* Q ==15 : 93-99% */
+};
+#endif
+
+/* HUF_selectDecoder() :
+ * Tells which decoder is likely to decode faster,
+ * based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
+ * Assumption : 0 < dstSize <= 128 KB */
+U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
+{
+ assert(dstSize > 0);
+ assert(dstSize <= 128*1024);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+ (void)dstSize;
+ (void)cSrcSize;
+ return 0;
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+ (void)dstSize;
+ (void)cSrcSize;
+ return 1;
+#else
+ /* decoder timing evaluation */
+ { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
+ U32 const D256 = (U32)(dstSize >> 8);
+ U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
+ U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
+ DTime1 += DTime1 >> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */
+ return DTime1 < DTime0;
+ }
+#endif
+}
+
+
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
+ size_t dstSize, const void* cSrc,
+ size_t cSrcSize, void* workSpace,
+ size_t wkspSize)
+{
+ /* validation checks */
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
+ if (cSrcSize == 0) return ERROR(corruption_detected);
+
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+ (void)algoNb;
+ assert(algoNb == 0);
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+ (void)algoNb;
+ assert(algoNb == 1);
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#else
+ return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+ cSrcSize, workSpace, wkspSize):
+ HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#endif
+ }
+}
+
+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize)
+{
+ /* validation checks */
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
+ if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
+ if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
+
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+ (void)algoNb;
+ assert(algoNb == 0);
+ return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
+ cSrcSize, workSpace, wkspSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+ (void)algoNb;
+ assert(algoNb == 1);
+ return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+ cSrcSize, workSpace, wkspSize);
+#else
+ return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+ cSrcSize, workSpace, wkspSize):
+ HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
+ cSrcSize, workSpace, wkspSize);
+#endif
+ }
+}
+
+
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+ (void)dtd;
+ assert(dtd.tableType == 0);
+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+ (void)dtd;
+ assert(dtd.tableType == 1);
+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#else
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#endif
+}
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+ const BYTE* ip = (const BYTE*) cSrc;
+
+ size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+ if (HUF_isError(hSize)) return hSize;
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+ ip += hSize; cSrcSize -= hSize;
+
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+#endif
+
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+ (void)dtd;
+ assert(dtd.tableType == 0);
+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+ (void)dtd;
+ assert(dtd.tableType == 1);
+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#else
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#endif
+}
+
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+ /* validation checks */
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
+ if (cSrcSize == 0) return ERROR(corruption_detected);
+
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+ (void)algoNb;
+ assert(algoNb == 0);
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+ (void)algoNb;
+ assert(algoNb == 1);
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#else
+ return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
+ HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#endif
+ }
+}
+
diff --git a/lib/zstd/decompress/zstd_ddict.c b/lib/zstd/decompress/zstd_ddict.c
new file mode 100644
index 00000000000..dbbc7919de5
--- /dev/null
+++ b/lib/zstd/decompress/zstd_ddict.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* zstd_ddict.c :
+ * concentrates all logic that needs to know the internals of ZSTD_DDict object */
+
+/*-*******************************************************
+* Dependencies
+*********************************************************/
+#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/cpu.h" /* bmi2 */
+#include "../common/mem.h" /* low level memory routines */
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "zstd_decompress_internal.h"
+#include "zstd_ddict.h"
+
+
+
+
+/*-*******************************************************
+* Types
+*********************************************************/
+struct ZSTD_DDict_s {
+ void* dictBuffer;
+ const void* dictContent;
+ size_t dictSize;
+ ZSTD_entropyDTables_t entropy;
+ U32 dictID;
+ U32 entropyPresent;
+ ZSTD_customMem cMem;
+}; /* typedef'd to ZSTD_DDict within "zstd.h" */
+
+const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
+{
+ assert(ddict != NULL);
+ return ddict->dictContent;
+}
+
+size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
+{
+ assert(ddict != NULL);
+ return ddict->dictSize;
+}
+
+void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+ DEBUGLOG(4, "ZSTD_copyDDictParameters");
+ assert(dctx != NULL);
+ assert(ddict != NULL);
+ dctx->dictID = ddict->dictID;
+ dctx->prefixStart = ddict->dictContent;
+ dctx->virtualStart = ddict->dictContent;
+ dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
+ dctx->previousDstEnd = dctx->dictEnd;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+ dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
+#endif
+ if (ddict->entropyPresent) {
+ dctx->litEntropy = 1;
+ dctx->fseEntropy = 1;
+ dctx->LLTptr = ddict->entropy.LLTable;
+ dctx->MLTptr = ddict->entropy.MLTable;
+ dctx->OFTptr = ddict->entropy.OFTable;
+ dctx->HUFptr = ddict->entropy.hufTable;
+ dctx->entropy.rep[0] = ddict->entropy.rep[0];
+ dctx->entropy.rep[1] = ddict->entropy.rep[1];
+ dctx->entropy.rep[2] = ddict->entropy.rep[2];
+ } else {
+ dctx->litEntropy = 0;
+ dctx->fseEntropy = 0;
+ }
+}
+
+
+static size_t
+ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
+ ZSTD_dictContentType_e dictContentType)
+{
+ ddict->dictID = 0;
+ ddict->entropyPresent = 0;
+ if (dictContentType == ZSTD_dct_rawContent) return 0;
+
+ if (ddict->dictSize < 8) {
+ if (dictContentType == ZSTD_dct_fullDict)
+ return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
+ return 0; /* pure content mode */
+ }
+ { U32 const magic = MEM_readLE32(ddict->dictContent);
+ if (magic != ZSTD_MAGIC_DICTIONARY) {
+ if (dictContentType == ZSTD_dct_fullDict)
+ return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
+ return 0; /* pure content mode */
+ }
+ }
+ ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
+
+ /* load entropy tables */
+ RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
+ &ddict->entropy, ddict->dictContent, ddict->dictSize)),
+ dictionary_corrupted, "");
+ ddict->entropyPresent = 1;
+ return 0;
+}
+
+
+static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
+ const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType)
+{
+ if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
+ ddict->dictBuffer = NULL;
+ ddict->dictContent = dict;
+ if (!dict) dictSize = 0;
+ } else {
+ void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
+ ddict->dictBuffer = internalBuffer;
+ ddict->dictContent = internalBuffer;
+ if (!internalBuffer) return ERROR(memory_allocation);
+ ZSTD_memcpy(internalBuffer, dict, dictSize);
+ }
+ ddict->dictSize = dictSize;
+ ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
+
+ /* parse dictionary content */
+ FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
+
+ return 0;
+}
+
+ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType,
+ ZSTD_customMem customMem)
+{
+ if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+
+ { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
+ if (ddict == NULL) return NULL;
+ ddict->cMem = customMem;
+ { size_t const initResult = ZSTD_initDDict_internal(ddict,
+ dict, dictSize,
+ dictLoadMethod, dictContentType);
+ if (ZSTD_isError(initResult)) {
+ ZSTD_freeDDict(ddict);
+ return NULL;
+ } }
+ return ddict;
+ }
+}
+
+/*! ZSTD_createDDict() :
+* Create a digested dictionary, to start decompression without startup delay.
+* `dict` content is copied inside DDict.
+* Consequently, `dict` can be released after `ZSTD_DDict` creation */
+ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
+{
+ ZSTD_customMem const allocator = { NULL, NULL, NULL };
+ return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
+}
+
+/*! ZSTD_createDDict_byReference() :
+ * Create a digested dictionary, to start decompression without startup delay.
+ * Dictionary content is simply referenced, it will be accessed during decompression.
+ * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
+ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
+{
+ ZSTD_customMem const allocator = { NULL, NULL, NULL };
+ return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
+}
+
+
+const ZSTD_DDict* ZSTD_initStaticDDict(
+ void* sBuffer, size_t sBufferSize,
+ const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType)
+{
+ size_t const neededSpace = sizeof(ZSTD_DDict)
+ + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
+ ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
+ assert(sBuffer != NULL);
+ assert(dict != NULL);
+ if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
+ if (sBufferSize < neededSpace) return NULL;
+ if (dictLoadMethod == ZSTD_dlm_byCopy) {
+ ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */
+ dict = ddict+1;
+ }
+ if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
+ dict, dictSize,
+ ZSTD_dlm_byRef, dictContentType) ))
+ return NULL;
+ return ddict;
+}
+
+
+size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
+{
+ if (ddict==NULL) return 0; /* support free on NULL */
+ { ZSTD_customMem const cMem = ddict->cMem;
+ ZSTD_customFree(ddict->dictBuffer, cMem);
+ ZSTD_customFree(ddict, cMem);
+ return 0;
+ }
+}
+
+/*! ZSTD_estimateDDictSize() :
+ * Estimate amount of memory that will be needed to create a dictionary for decompression.
+ * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
+size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
+{
+ return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
+}
+
+size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
+{
+ if (ddict==NULL) return 0; /* support sizeof on NULL */
+ return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
+}
+
+/*! ZSTD_getDictID_fromDDict() :
+ * Provides the dictID of the dictionary loaded into `ddict`.
+ * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
+{
+ if (ddict==NULL) return 0;
+ return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
+}
diff --git a/lib/zstd/decompress/zstd_ddict.h b/lib/zstd/decompress/zstd_ddict.h
new file mode 100644
index 00000000000..8c1a79d666f
--- /dev/null
+++ b/lib/zstd/decompress/zstd_ddict.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+#ifndef ZSTD_DDICT_H
+#define ZSTD_DDICT_H
+
+/*-*******************************************************
+ * Dependencies
+ *********************************************************/
+#include "../common/zstd_deps.h" /* size_t */
+#include <linux/zstd.h> /* ZSTD_DDict, and several public functions */
+
+
+/*-*******************************************************
+ * Interface
+ *********************************************************/
+
+/* note: several prototypes are already published in `zstd.h` :
+ * ZSTD_createDDict()
+ * ZSTD_createDDict_byReference()
+ * ZSTD_createDDict_advanced()
+ * ZSTD_freeDDict()
+ * ZSTD_initStaticDDict()
+ * ZSTD_sizeof_DDict()
+ * ZSTD_estimateDDictSize()
+ * ZSTD_getDictID_fromDict()
+ */
+
+const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict);
+size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict);
+
+void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+
+
+#endif /* ZSTD_DDICT_H */
diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c
new file mode 100644
index 00000000000..b9b935a9f5c
--- /dev/null
+++ b/lib/zstd/decompress/zstd_decompress.c
@@ -0,0 +1,2131 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* ***************************************************************
+* Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default decompression function ZSTD_decompress() allocates its context,
+ * on stack (0), or into heap (1, default; requires malloc()).
+ * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
+ */
+#ifndef ZSTD_HEAPMODE
+# define ZSTD_HEAPMODE 1
+#endif
+
+/*!
+* LEGACY_SUPPORT :
+* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+)
+*/
+
+/*!
+ * MAXWINDOWSIZE_DEFAULT :
+ * maximum window size accepted by DStream __by default__.
+ * Frames requiring more memory will be rejected.
+ * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
+ */
+#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
+# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1)
+#endif
+
+/*!
+ * NO_FORWARD_PROGRESS_MAX :
+ * maximum allowed nb of calls to ZSTD_decompressStream()
+ * without any forward progress
+ * (defined as: no byte read from input, and no byte flushed to output)
+ * before triggering an error.
+ */
+#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX
+# define ZSTD_NO_FORWARD_PROGRESS_MAX 16
+#endif
+
+
+/*-*******************************************************
+* Dependencies
+*********************************************************/
+#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/mem.h" /* low level memory routines */
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include <linux/xxhash.h> /* xxh64_reset, xxh64_update, xxh64_digest, XXH64 */
+#include "../common/zstd_internal.h" /* blockProperties_t */
+#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
+#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
+#include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */
+
+
+
+
+/* ***********************************
+ * Multiple DDicts Hashset internals *
+ *************************************/
+
+#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
+#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
+ * Currently, that means a 0.75 load factor.
+ * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
+ * the load factor of the ddict hash set.
+ */
+
+#define DDICT_HASHSET_TABLE_BASE_SIZE 64
+#define DDICT_HASHSET_RESIZE_FACTOR 2
+
+/* Hash function to determine starting position of dict insertion within the table
+ * Returns an index between [0, hashSet->ddictPtrTableSize]
+ */
+static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) {
+ const U64 hash = xxh64(&dictID, sizeof(U32), 0);
+ /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */
+ return hash & (hashSet->ddictPtrTableSize - 1);
+}
+
+/* Adds DDict to a hashset without resizing it.
+ * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set.
+ * Returns 0 if successful, or a zstd error code if something went wrong.
+ */
+static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) {
+ const U32 dictID = ZSTD_getDictID_fromDDict(ddict);
+ size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
+ const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
+ RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!");
+ DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
+ while (hashSet->ddictPtrTable[idx] != NULL) {
+ /* Replace existing ddict if inserting ddict with same dictID */
+ if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) {
+ DEBUGLOG(4, "DictID already exists, replacing rather than adding");
+ hashSet->ddictPtrTable[idx] = ddict;
+ return 0;
+ }
+ idx &= idxRangeMask;
+ idx++;
+ }
+ DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
+ hashSet->ddictPtrTable[idx] = ddict;
+ hashSet->ddictPtrCount++;
+ return 0;
+}
+
+/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and
+ * rehashes all values, allocates new table, frees old table.
+ * Returns 0 on success, otherwise a zstd error code.
+ */
+static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
+ size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR;
+ const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem);
+ const ZSTD_DDict** oldTable = hashSet->ddictPtrTable;
+ size_t oldTableSize = hashSet->ddictPtrTableSize;
+ size_t i;
+
+ DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize);
+ RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!");
+ hashSet->ddictPtrTable = newTable;
+ hashSet->ddictPtrTableSize = newTableSize;
+ hashSet->ddictPtrCount = 0;
+ for (i = 0; i < oldTableSize; ++i) {
+ if (oldTable[i] != NULL) {
+ FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), "");
+ }
+ }
+ ZSTD_customFree((void*)oldTable, customMem);
+ DEBUGLOG(4, "Finished re-hash");
+ return 0;
+}
+
+/* Fetches a DDict with the given dictID
+ * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL.
+ */
+static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) {
+ size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
+ const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
+ DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
+ for (;;) {
+ size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]);
+ if (currDictID == dictID || currDictID == 0) {
+ /* currDictID == 0 implies a NULL ddict entry */
+ break;
+ } else {
+ idx &= idxRangeMask; /* Goes to start of table when we reach the end */
+ idx++;
+ }
+ }
+ DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
+ return hashSet->ddictPtrTable[idx];
+}
+
+/* Allocates space for and returns a ddict hash set
+ * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with.
+ * Returns NULL if allocation failed.
+ */
+static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) {
+ ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem);
+ DEBUGLOG(4, "Allocating new hash set");
+ if (!ret)
+ return NULL;
+ ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem);
+ if (!ret->ddictPtrTable) {
+ ZSTD_customFree(ret, customMem);
+ return NULL;
+ }
+ ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
+ ret->ddictPtrCount = 0;
+ return ret;
+}
+
+/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself.
+ * Note: The ZSTD_DDict* within the table are NOT freed.
+ */
+static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
+ DEBUGLOG(4, "Freeing ddict hash set");
+ if (hashSet && hashSet->ddictPtrTable) {
+ ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem);
+ }
+ if (hashSet) {
+ ZSTD_customFree(hashSet, customMem);
+ }
+}
+
+/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set.
+ * Returns 0 on success, or a ZSTD error.
+ */
+static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) {
+ DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize);
+ if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) {
+ FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), "");
+ }
+ FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), "");
+ return 0;
+}
+
+/*-*************************************************************
+* Context management
+***************************************************************/
+size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx)
+{
+ if (dctx==NULL) return 0; /* support sizeof NULL */
+ return sizeof(*dctx)
+ + ZSTD_sizeof_DDict(dctx->ddictLocal)
+ + dctx->inBuffSize + dctx->outBuffSize;
+}
+
+size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
+
+
+static size_t ZSTD_startingInputLength(ZSTD_format_e format)
+{
+ size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
+ /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
+ assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
+ return startingInputLength;
+}
+
+static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
+{
+ assert(dctx->streamStage == zdss_init);
+ dctx->format = ZSTD_f_zstd1;
+ dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
+ dctx->outBufferMode = ZSTD_bm_buffered;
+ dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
+ dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
+}
+
+static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
+{
+ dctx->staticSize = 0;
+ dctx->ddict = NULL;
+ dctx->ddictLocal = NULL;
+ dctx->dictEnd = NULL;
+ dctx->ddictIsCold = 0;
+ dctx->dictUses = ZSTD_dont_use;
+ dctx->inBuff = NULL;
+ dctx->inBuffSize = 0;
+ dctx->outBuffSize = 0;
+ dctx->streamStage = zdss_init;
+ dctx->noForwardProgress = 0;
+ dctx->oversizedDuration = 0;
+#if DYNAMIC_BMI2
+ dctx->bmi2 = ZSTD_cpuSupportsBmi2();
+#endif
+ dctx->ddictSet = NULL;
+ ZSTD_DCtx_resetParameters(dctx);
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ dctx->dictContentEndForFuzzing = NULL;
+#endif
+}
+
+ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
+{
+ ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace;
+
+ if ((size_t)workspace & 7) return NULL; /* 8-aligned */
+ if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */
+
+ ZSTD_initDCtx_internal(dctx);
+ dctx->staticSize = workspaceSize;
+ dctx->inBuff = (char*)(dctx+1);
+ return dctx;
+}
+
+static ZSTD_DCtx* ZSTD_createDCtx_internal(ZSTD_customMem customMem) {
+ if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+
+ { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem);
+ if (!dctx) return NULL;
+ dctx->customMem = customMem;
+ ZSTD_initDCtx_internal(dctx);
+ return dctx;
+ }
+}
+
+ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
+{
+ return ZSTD_createDCtx_internal(customMem);
+}
+
+ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+ DEBUGLOG(3, "ZSTD_createDCtx");
+ return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
+}
+
+static void ZSTD_clearDict(ZSTD_DCtx* dctx)
+{
+ ZSTD_freeDDict(dctx->ddictLocal);
+ dctx->ddictLocal = NULL;
+ dctx->ddict = NULL;
+ dctx->dictUses = ZSTD_dont_use;
+}
+
+size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+ if (dctx==NULL) return 0; /* support free on NULL */
+ RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx");
+ { ZSTD_customMem const cMem = dctx->customMem;
+ ZSTD_clearDict(dctx);
+ ZSTD_customFree(dctx->inBuff, cMem);
+ dctx->inBuff = NULL;
+ if (dctx->ddictSet) {
+ ZSTD_freeDDictHashSet(dctx->ddictSet, cMem);
+ dctx->ddictSet = NULL;
+ }
+ ZSTD_customFree(dctx, cMem);
+ return 0;
+ }
+}
+
+/* no longer useful */
+void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
+{
+ size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx);
+ ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */
+}
+
+/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on
+ * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then
+ * accordingly sets the ddict to be used to decompress the frame.
+ *
+ * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is.
+ *
+ * ZSTD_d_refMultipleDDicts must be enabled for this function to be called.
+ */
+static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) {
+ assert(dctx->refMultipleDDicts && dctx->ddictSet);
+ DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame");
+ if (dctx->ddict) {
+ const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID);
+ if (frameDDict) {
+ DEBUGLOG(4, "DDict found!");
+ ZSTD_clearDict(dctx);
+ dctx->dictID = dctx->fParams.dictID;
+ dctx->ddict = frameDDict;
+ dctx->dictUses = ZSTD_use_indefinitely;
+ }
+ }
+}
+
+
+/*-*************************************************************
+ * Frame header decoding
+ ***************************************************************/
+
+/*! ZSTD_isFrame() :
+ * Tells if the content of `buffer` starts with a valid Frame Identifier.
+ * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ * Note 3 : Skippable Frame Identifiers are considered valid. */
+unsigned ZSTD_isFrame(const void* buffer, size_t size)
+{
+ if (size < ZSTD_FRAMEIDSIZE) return 0;
+ { U32 const magic = MEM_readLE32(buffer);
+ if (magic == ZSTD_MAGICNUMBER) return 1;
+ if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
+ }
+ return 0;
+}
+
+/*! ZSTD_isSkippableFrame() :
+ * Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame.
+ * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ */
+unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size)
+{
+ if (size < ZSTD_FRAMEIDSIZE) return 0;
+ { U32 const magic = MEM_readLE32(buffer);
+ if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
+ }
+ return 0;
+}
+
+/* ZSTD_frameHeaderSize_internal() :
+ * srcSize must be large enough to reach header size fields.
+ * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
+ * @return : size of the Frame Header
+ * or an error code, which can be tested with ZSTD_isError() */
+static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
+{
+ size_t const minInputSize = ZSTD_startingInputLength(format);
+ RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, "");
+
+ { BYTE const fhd = ((const BYTE*)src)[minInputSize-1];
+ U32 const dictID= fhd & 3;
+ U32 const singleSegment = (fhd >> 5) & 1;
+ U32 const fcsId = fhd >> 6;
+ return minInputSize + !singleSegment
+ + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId]
+ + (singleSegment && !fcsId);
+ }
+}
+
+/* ZSTD_frameHeaderSize() :
+ * srcSize must be >= ZSTD_frameHeaderSize_prefix.
+ * @return : size of the Frame Header,
+ * or an error code (if srcSize is too small) */
+size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
+{
+ return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1);
+}
+
+
+/* ZSTD_getFrameHeader_advanced() :
+ * decode Frame Header, or require larger `srcSize`.
+ * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
+ * @return : 0, `zfhPtr` is correctly filled,
+ * >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ * or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
+{
+ const BYTE* ip = (const BYTE*)src;
+ size_t const minInputSize = ZSTD_startingInputLength(format);
+
+ ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
+ if (srcSize < minInputSize) return minInputSize;
+ RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
+
+ if ( (format != ZSTD_f_zstd1_magicless)
+ && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
+ if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+ /* skippable frame */
+ if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
+ return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */
+ ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));
+ zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
+ zfhPtr->frameType = ZSTD_skippableFrame;
+ return 0;
+ }
+ RETURN_ERROR(prefix_unknown, "");
+ }
+
+ /* ensure there is enough `srcSize` to fully read/decode frame header */
+ { size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format);
+ if (srcSize < fhsize) return fhsize;
+ zfhPtr->headerSize = (U32)fhsize;
+ }
+
+ { BYTE const fhdByte = ip[minInputSize-1];
+ size_t pos = minInputSize;
+ U32 const dictIDSizeCode = fhdByte&3;
+ U32 const checksumFlag = (fhdByte>>2)&1;
+ U32 const singleSegment = (fhdByte>>5)&1;
+ U32 const fcsID = fhdByte>>6;
+ U64 windowSize = 0;
+ U32 dictID = 0;
+ U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN;
+ RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported,
+ "reserved bits, must be zero");
+
+ if (!singleSegment) {
+ BYTE const wlByte = ip[pos++];
+ U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
+ RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, "");
+ windowSize = (1ULL << windowLog);
+ windowSize += (windowSize >> 3) * (wlByte&7);
+ }
+ switch(dictIDSizeCode)
+ {
+ default:
+ assert(0); /* impossible */
+ ZSTD_FALLTHROUGH;
+ case 0 : break;
+ case 1 : dictID = ip[pos]; pos++; break;
+ case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
+ case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break;
+ }
+ switch(fcsID)
+ {
+ default:
+ assert(0); /* impossible */
+ ZSTD_FALLTHROUGH;
+ case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
+ case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
+ case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
+ case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
+ }
+ if (singleSegment) windowSize = frameContentSize;
+
+ zfhPtr->frameType = ZSTD_frame;
+ zfhPtr->frameContentSize = frameContentSize;
+ zfhPtr->windowSize = windowSize;
+ zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+ zfhPtr->dictID = dictID;
+ zfhPtr->checksumFlag = checksumFlag;
+ }
+ return 0;
+}
+
+/* ZSTD_getFrameHeader() :
+ * decode Frame Header, or require larger `srcSize`.
+ * note : this function does not consume input, it only reads it.
+ * @return : 0, `zfhPtr` is correctly filled,
+ * >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ * or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
+{
+ return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
+}
+
+/* ZSTD_getFrameContentSize() :
+ * compatible with legacy mode
+ * @return : decompressed size of the single frame pointed to be `src` if known, otherwise
+ * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
+unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
+{
+ { ZSTD_frameHeader zfh;
+ if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0)
+ return ZSTD_CONTENTSIZE_ERROR;
+ if (zfh.frameType == ZSTD_skippableFrame) {
+ return 0;
+ } else {
+ return zfh.frameContentSize;
+ } }
+}
+
+static size_t readSkippableFrameSize(void const* src, size_t srcSize)
+{
+ size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
+ U32 sizeU32;
+
+ RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
+
+ sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
+ RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
+ frameParameter_unsupported, "");
+ {
+ size_t const skippableSize = skippableHeaderSize + sizeU32;
+ RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
+ return skippableSize;
+ }
+}
+
+/*! ZSTD_readSkippableFrame() :
+ * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
+ *
+ * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
+ * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested
+ * in the magicVariant.
+ *
+ * Returns an error if destination buffer is not large enough, or if the frame is not skippable.
+ *
+ * @return : number of bytes written or a ZSTD error.
+ */
+ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
+ const void* src, size_t srcSize)
+{
+ U32 const magicNumber = MEM_readLE32(src);
+ size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
+ size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
+
+ /* check input validity */
+ RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
+ RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
+ RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
+
+ /* deliver payload */
+ if (skippableContentSize > 0 && dst != NULL)
+ ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
+ if (magicVariant != NULL)
+ *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
+ return skippableContentSize;
+}
+
+/* ZSTD_findDecompressedSize() :
+ * compatible with legacy mode
+ * `srcSize` must be the exact length of some number of ZSTD compressed and/or
+ * skippable frames
+ * @return : decompressed size of the frames contained */
+unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
+{
+ unsigned long long totalDstSize = 0;
+
+ while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
+ U32 const magicNumber = MEM_readLE32(src);
+
+ if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+ size_t const skippableSize = readSkippableFrameSize(src, srcSize);
+ if (ZSTD_isError(skippableSize)) {
+ return ZSTD_CONTENTSIZE_ERROR;
+ }
+ assert(skippableSize <= srcSize);
+
+ src = (const BYTE *)src + skippableSize;
+ srcSize -= skippableSize;
+ continue;
+ }
+
+ { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+ if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
+
+ /* check for overflow */
+ if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
+ totalDstSize += ret;
+ }
+ { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
+ if (ZSTD_isError(frameSrcSize)) {
+ return ZSTD_CONTENTSIZE_ERROR;
+ }
+
+ src = (const BYTE *)src + frameSrcSize;
+ srcSize -= frameSrcSize;
+ }
+ } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
+
+ if (srcSize) return ZSTD_CONTENTSIZE_ERROR;
+
+ return totalDstSize;
+}
+
+/* ZSTD_getDecompressedSize() :
+ * compatible with legacy mode
+ * @return : decompressed size if known, 0 otherwise
+ note : 0 can mean any of the following :
+ - frame content is empty
+ - decompressed size field is not present in frame header
+ - frame header unknown / not supported
+ - frame header not complete (`srcSize` too small) */
+unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
+{
+ unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+ ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN);
+ return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret;
+}
+
+
+/* ZSTD_decodeFrameHeader() :
+ * `headerSize` must be the size provided by ZSTD_frameHeaderSize().
+ * If multiple DDict references are enabled, also will choose the correct DDict to use.
+ * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
+static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
+{
+ size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
+ if (ZSTD_isError(result)) return result; /* invalid header */
+ RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
+
+ /* Reference DDict requested by frame if dctx references multiple ddicts */
+ if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) {
+ ZSTD_DCtx_selectFrameDDict(dctx);
+ }
+
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ /* Skip the dictID check in fuzzing mode, because it makes the search
+ * harder.
+ */
+ RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
+ dictionary_wrong, "");
+#endif
+ dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0;
+ if (dctx->validateChecksum) xxh64_reset(&dctx->xxhState, 0);
+ dctx->processedCSize += headerSize;
+ return 0;
+}
+
+static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
+{
+ ZSTD_frameSizeInfo frameSizeInfo;
+ frameSizeInfo.compressedSize = ret;
+ frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+ return frameSizeInfo;
+}
+
+static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
+{
+ ZSTD_frameSizeInfo frameSizeInfo;
+ ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
+
+
+ if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
+ && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+ frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
+ assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
+ frameSizeInfo.compressedSize <= srcSize);
+ return frameSizeInfo;
+ } else {
+ const BYTE* ip = (const BYTE*)src;
+ const BYTE* const ipstart = ip;
+ size_t remainingSize = srcSize;
+ size_t nbBlocks = 0;
+ ZSTD_frameHeader zfh;
+
+ /* Extract Frame Header */
+ { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
+ if (ZSTD_isError(ret))
+ return ZSTD_errorFrameSizeInfo(ret);
+ if (ret > 0)
+ return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+ }
+
+ ip += zfh.headerSize;
+ remainingSize -= zfh.headerSize;
+
+ /* Iterate over each block */
+ while (1) {
+ blockProperties_t blockProperties;
+ size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+ if (ZSTD_isError(cBlockSize))
+ return ZSTD_errorFrameSizeInfo(cBlockSize);
+
+ if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
+ return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+
+ ip += ZSTD_blockHeaderSize + cBlockSize;
+ remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
+ nbBlocks++;
+
+ if (blockProperties.lastBlock) break;
+ }
+
+ /* Final frame content checksum */
+ if (zfh.checksumFlag) {
+ if (remainingSize < 4)
+ return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+ ip += 4;
+ }
+
+ frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
+ frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
+ ? zfh.frameContentSize
+ : nbBlocks * zfh.blockSizeMax;
+ return frameSizeInfo;
+ }
+}
+
+/* ZSTD_findFrameCompressedSize() :
+ * compatible with legacy mode
+ * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
+ * `srcSize` must be at least as large as the frame contained
+ * @return : the compressed size of the frame starting at `src` */
+size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
+{
+ ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+ return frameSizeInfo.compressedSize;
+}
+
+/* ZSTD_decompressBound() :
+ * compatible with legacy mode
+ * `src` must point to the start of a ZSTD frame or a skippeable frame
+ * `srcSize` must be at least as large as the frame contained
+ * @return : the maximum decompressed size of the compressed source
+ */
+unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
+{
+ unsigned long long bound = 0;
+ /* Iterate over each frame */
+ while (srcSize > 0) {
+ ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+ size_t const compressedSize = frameSizeInfo.compressedSize;
+ unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
+ if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
+ return ZSTD_CONTENTSIZE_ERROR;
+ assert(srcSize >= compressedSize);
+ src = (const BYTE*)src + compressedSize;
+ srcSize -= compressedSize;
+ bound += decompressedBound;
+ }
+ return bound;
+}
+
+
+/*-*************************************************************
+ * Frame decoding
+ ***************************************************************/
+
+/* ZSTD_insertBlock() :
+ * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
+size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
+{
+ DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
+ ZSTD_checkContinuity(dctx, blockStart, blockSize);
+ dctx->previousDstEnd = (const char*)blockStart + blockSize;
+ return blockSize;
+}
+
+
+static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize)
+{
+ DEBUGLOG(5, "ZSTD_copyRawBlock");
+ RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
+ if (dst == NULL) {
+ if (srcSize == 0) return 0;
+ RETURN_ERROR(dstBuffer_null, "");
+ }
+ ZSTD_memcpy(dst, src, srcSize);
+ return srcSize;
+}
+
+static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
+ BYTE b,
+ size_t regenSize)
+{
+ RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, "");
+ if (dst == NULL) {
+ if (regenSize == 0) return 0;
+ RETURN_ERROR(dstBuffer_null, "");
+ }
+ ZSTD_memset(dst, b, regenSize);
+ return regenSize;
+}
+
+static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming)
+{
+ (void)dctx;
+ (void)uncompressedSize;
+ (void)compressedSize;
+ (void)streaming;
+}
+
+
+/*! ZSTD_decompressFrame() :
+ * @dctx must be properly initialized
+ * will update *srcPtr and *srcSizePtr,
+ * to make *srcPtr progress by one frame. */
+static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
+ void* dst, size_t dstCapacity,
+ const void** srcPtr, size_t *srcSizePtr)
+{
+ const BYTE* const istart = (const BYTE*)(*srcPtr);
+ const BYTE* ip = istart;
+ BYTE* const ostart = (BYTE*)dst;
+ BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart;
+ BYTE* op = ostart;
+ size_t remainingSrcSize = *srcSizePtr;
+
+ DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr);
+
+ /* check */
+ RETURN_ERROR_IF(
+ remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
+ srcSize_wrong, "");
+
+ /* Frame Header */
+ { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
+ ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
+ if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
+ RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
+ srcSize_wrong, "");
+ FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , "");
+ ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
+ }
+
+ /* Loop on each block */
+ while (1) {
+ size_t decodedSize;
+ blockProperties_t blockProperties;
+ size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
+ if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+ ip += ZSTD_blockHeaderSize;
+ remainingSrcSize -= ZSTD_blockHeaderSize;
+ RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, "");
+
+ switch(blockProperties.blockType)
+ {
+ case bt_compressed:
+ decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1, not_streaming);
+ break;
+ case bt_raw :
+ decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
+ break;
+ case bt_rle :
+ decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize);
+ break;
+ case bt_reserved :
+ default:
+ RETURN_ERROR(corruption_detected, "invalid block type");
+ }
+
+ if (ZSTD_isError(decodedSize)) return decodedSize;
+ if (dctx->validateChecksum)
+ xxh64_update(&dctx->xxhState, op, decodedSize);
+ if (decodedSize != 0)
+ op += decodedSize;
+ assert(ip != NULL);
+ ip += cBlockSize;
+ remainingSrcSize -= cBlockSize;
+ if (blockProperties.lastBlock) break;
+ }
+
+ if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+ RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize,
+ corruption_detected, "");
+ }
+ if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
+ RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, "");
+ if (!dctx->forceIgnoreChecksum) {
+ U32 const checkCalc = (U32)xxh64_digest(&dctx->xxhState);
+ U32 checkRead;
+ checkRead = MEM_readLE32(ip);
+ RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, "");
+ }
+ ip += 4;
+ remainingSrcSize -= 4;
+ }
+ ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
+ /* Allow caller to get size read */
+ *srcPtr = ip;
+ *srcSizePtr = remainingSrcSize;
+ return (size_t)(op-ostart);
+}
+
+static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ const void* dict, size_t dictSize,
+ const ZSTD_DDict* ddict)
+{
+ void* const dststart = dst;
+ int moreThan1Frame = 0;
+
+ DEBUGLOG(5, "ZSTD_decompressMultiFrame");
+ assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */
+
+ if (ddict) {
+ dict = ZSTD_DDict_dictContent(ddict);
+ dictSize = ZSTD_DDict_dictSize(ddict);
+ }
+
+ while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
+
+
+ { U32 const magicNumber = MEM_readLE32(src);
+ DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
+ (unsigned)magicNumber, ZSTD_MAGICNUMBER);
+ if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+ size_t const skippableSize = readSkippableFrameSize(src, srcSize);
+ FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed");
+ assert(skippableSize <= srcSize);
+
+ src = (const BYTE *)src + skippableSize;
+ srcSize -= skippableSize;
+ continue;
+ } }
+
+ if (ddict) {
+ /* we were called from ZSTD_decompress_usingDDict */
+ FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), "");
+ } else {
+ /* this will initialize correctly with no dict if dict == NULL, so
+ * use this in all cases but ddict */
+ FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), "");
+ }
+ ZSTD_checkContinuity(dctx, dst, dstCapacity);
+
+ { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
+ &src, &srcSize);
+ RETURN_ERROR_IF(
+ (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
+ && (moreThan1Frame==1),
+ srcSize_wrong,
+ "At least one frame successfully completed, "
+ "but following bytes are garbage: "
+ "it's more likely to be a srcSize error, "
+ "specifying more input bytes than size of frame(s). "
+ "Note: one could be unlucky, it might be a corruption error instead, "
+ "happening right at the place where we expect zstd magic bytes. "
+ "But this is _much_ less likely than a srcSize field error.");
+ if (ZSTD_isError(res)) return res;
+ assert(res <= dstCapacity);
+ if (res != 0)
+ dst = (BYTE*)dst + res;
+ dstCapacity -= res;
+ }
+ moreThan1Frame = 1;
+ } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
+
+ RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");
+
+ return (size_t)((BYTE*)dst - (BYTE*)dststart);
+}
+
+size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ const void* dict, size_t dictSize)
+{
+ return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
+}
+
+
+static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx)
+{
+ switch (dctx->dictUses) {
+ default:
+ assert(0 /* Impossible */);
+ ZSTD_FALLTHROUGH;
+ case ZSTD_dont_use:
+ ZSTD_clearDict(dctx);
+ return NULL;
+ case ZSTD_use_indefinitely:
+ return dctx->ddict;
+ case ZSTD_use_once:
+ dctx->dictUses = ZSTD_dont_use;
+ return dctx->ddict;
+ }
+}
+
+size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+ return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx));
+}
+
+
+size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
+ size_t regenSize;
+ ZSTD_DCtx* const dctx = ZSTD_createDCtx_internal(ZSTD_defaultCMem);
+ RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!");
+ regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
+ ZSTD_freeDCtx(dctx);
+ return regenSize;
+#else /* stack mode */
+ ZSTD_DCtx dctx;
+ ZSTD_initDCtx_internal(&dctx);
+ return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
+#endif
+}
+
+
+/*-**************************************
+* Advanced Streaming Decompression API
+* Bufferless and synchronous
+****************************************/
+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
+
+/*
+ * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed,
+ * we allow taking a partial block as the input. Currently only raw uncompressed blocks can
+ * be streamed.
+ *
+ * For blocks that can be streamed, this allows us to reduce the latency until we produce
+ * output, and avoid copying the input.
+ *
+ * @param inputSize - The total amount of input that the caller currently has.
+ */
+static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) {
+ if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock))
+ return dctx->expected;
+ if (dctx->bType != bt_raw)
+ return dctx->expected;
+ return BOUNDED(1, inputSize, dctx->expected);
+}
+
+ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
+ switch(dctx->stage)
+ {
+ default: /* should not happen */
+ assert(0);
+ ZSTD_FALLTHROUGH;
+ case ZSTDds_getFrameHeaderSize:
+ ZSTD_FALLTHROUGH;
+ case ZSTDds_decodeFrameHeader:
+ return ZSTDnit_frameHeader;
+ case ZSTDds_decodeBlockHeader:
+ return ZSTDnit_blockHeader;
+ case ZSTDds_decompressBlock:
+ return ZSTDnit_block;
+ case ZSTDds_decompressLastBlock:
+ return ZSTDnit_lastBlock;
+ case ZSTDds_checkChecksum:
+ return ZSTDnit_checksum;
+ case ZSTDds_decodeSkippableHeader:
+ ZSTD_FALLTHROUGH;
+ case ZSTDds_skipFrame:
+ return ZSTDnit_skippableFrame;
+ }
+}
+
+static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; }
+
+/* ZSTD_decompressContinue() :
+ * srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress())
+ * @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
+ * or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+ DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
+ /* Sanity check */
+ RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed");
+ ZSTD_checkContinuity(dctx, dst, dstCapacity);
+
+ dctx->processedCSize += srcSize;
+
+ switch (dctx->stage)
+ {
+ case ZSTDds_getFrameHeaderSize :
+ assert(src != NULL);
+ if (dctx->format == ZSTD_f_zstd1) { /* allows header */
+ assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */
+ if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
+ ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
+ dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */
+ dctx->stage = ZSTDds_decodeSkippableHeader;
+ return 0;
+ } }
+ dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format);
+ if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize;
+ ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
+ dctx->expected = dctx->headerSize - srcSize;
+ dctx->stage = ZSTDds_decodeFrameHeader;
+ return 0;
+
+ case ZSTDds_decodeFrameHeader:
+ assert(src != NULL);
+ ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
+ FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), "");
+ dctx->expected = ZSTD_blockHeaderSize;
+ dctx->stage = ZSTDds_decodeBlockHeader;
+ return 0;
+
+ case ZSTDds_decodeBlockHeader:
+ { blockProperties_t bp;
+ size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+ if (ZSTD_isError(cBlockSize)) return cBlockSize;
+ RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum");
+ dctx->expected = cBlockSize;
+ dctx->bType = bp.blockType;
+ dctx->rleSize = bp.origSize;
+ if (cBlockSize) {
+ dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
+ return 0;
+ }
+ /* empty block */
+ if (bp.lastBlock) {
+ if (dctx->fParams.checksumFlag) {
+ dctx->expected = 4;
+ dctx->stage = ZSTDds_checkChecksum;
+ } else {
+ dctx->expected = 0; /* end of frame */
+ dctx->stage = ZSTDds_getFrameHeaderSize;
+ }
+ } else {
+ dctx->expected = ZSTD_blockHeaderSize; /* jump to next header */
+ dctx->stage = ZSTDds_decodeBlockHeader;
+ }
+ return 0;
+ }
+
+ case ZSTDds_decompressLastBlock:
+ case ZSTDds_decompressBlock:
+ DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock");
+ { size_t rSize;
+ switch(dctx->bType)
+ {
+ case bt_compressed:
+ DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
+ rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming);
+ dctx->expected = 0; /* Streaming not supported */
+ break;
+ case bt_raw :
+ assert(srcSize <= dctx->expected);
+ rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
+ FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed");
+ assert(rSize == srcSize);
+ dctx->expected -= rSize;
+ break;
+ case bt_rle :
+ rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize);
+ dctx->expected = 0; /* Streaming not supported */
+ break;
+ case bt_reserved : /* should never happen */
+ default:
+ RETURN_ERROR(corruption_detected, "invalid block type");
+ }
+ FORWARD_IF_ERROR(rSize, "");
+ RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
+ DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
+ dctx->decodedSize += rSize;
+ if (dctx->validateChecksum) xxh64_update(&dctx->xxhState, dst, rSize);
+ dctx->previousDstEnd = (char*)dst + rSize;
+
+ /* Stay on the same stage until we are finished streaming the block. */
+ if (dctx->expected > 0) {
+ return rSize;
+ }
+
+ if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
+ DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize);
+ RETURN_ERROR_IF(
+ dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+ && dctx->decodedSize != dctx->fParams.frameContentSize,
+ corruption_detected, "");
+ if (dctx->fParams.checksumFlag) { /* another round for frame checksum */
+ dctx->expected = 4;
+ dctx->stage = ZSTDds_checkChecksum;
+ } else {
+ ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
+ dctx->expected = 0; /* ends here */
+ dctx->stage = ZSTDds_getFrameHeaderSize;
+ }
+ } else {
+ dctx->stage = ZSTDds_decodeBlockHeader;
+ dctx->expected = ZSTD_blockHeaderSize;
+ }
+ return rSize;
+ }
+
+ case ZSTDds_checkChecksum:
+ assert(srcSize == 4); /* guaranteed by dctx->expected */
+ {
+ if (dctx->validateChecksum) {
+ U32 const h32 = (U32)xxh64_digest(&dctx->xxhState);
+ U32 const check32 = MEM_readLE32(src);
+ DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
+ RETURN_ERROR_IF(check32 != h32, checksum_wrong, "");
+ }
+ ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
+ dctx->expected = 0;
+ dctx->stage = ZSTDds_getFrameHeaderSize;
+ return 0;
+ }
+
+ case ZSTDds_decodeSkippableHeader:
+ assert(src != NULL);
+ assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
+ ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */
+ dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */
+ dctx->stage = ZSTDds_skipFrame;
+ return 0;
+
+ case ZSTDds_skipFrame:
+ dctx->expected = 0;
+ dctx->stage = ZSTDds_getFrameHeaderSize;
+ return 0;
+
+ default:
+ assert(0); /* impossible */
+ RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */
+ }
+}
+
+
+static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+ dctx->dictEnd = dctx->previousDstEnd;
+ dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
+ dctx->prefixStart = dict;
+ dctx->previousDstEnd = (const char*)dict + dictSize;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+ dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
+#endif
+ return 0;
+}
+
+/*! ZSTD_loadDEntropy() :
+ * dict : must point at beginning of a valid zstd dictionary.
+ * @return : size of entropy tables read */
+size_t
+ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
+ const void* const dict, size_t const dictSize)
+{
+ const BYTE* dictPtr = (const BYTE*)dict;
+ const BYTE* const dictEnd = dictPtr + dictSize;
+
+ RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small");
+ assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */
+ dictPtr += 8; /* skip header = magic + dictID */
+
+ ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable));
+ ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable));
+ ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE);
+ { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */
+ size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable);
+#ifdef HUF_FORCE_DECOMPRESS_X1
+ /* in minimal huffman, we always use X1 variants */
+ size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable,
+ dictPtr, dictEnd - dictPtr,
+ workspace, workspaceSize);
+#else
+ size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
+ dictPtr, (size_t)(dictEnd - dictPtr),
+ workspace, workspaceSize);
+#endif
+ RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
+ dictPtr += hSize;
+ }
+
+ { short offcodeNCount[MaxOff+1];
+ unsigned offcodeMaxValue = MaxOff, offcodeLog;
+ size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr));
+ RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
+ RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, "");
+ RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
+ ZSTD_buildFSETable( entropy->OFTable,
+ offcodeNCount, offcodeMaxValue,
+ OF_base, OF_bits,
+ offcodeLog,
+ entropy->workspace, sizeof(entropy->workspace),
+ /* bmi2 */0);
+ dictPtr += offcodeHeaderSize;
+ }
+
+ { short matchlengthNCount[MaxML+1];
+ unsigned matchlengthMaxValue = MaxML, matchlengthLog;
+ size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
+ RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
+ RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, "");
+ RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
+ ZSTD_buildFSETable( entropy->MLTable,
+ matchlengthNCount, matchlengthMaxValue,
+ ML_base, ML_bits,
+ matchlengthLog,
+ entropy->workspace, sizeof(entropy->workspace),
+ /* bmi2 */ 0);
+ dictPtr += matchlengthHeaderSize;
+ }
+
+ { short litlengthNCount[MaxLL+1];
+ unsigned litlengthMaxValue = MaxLL, litlengthLog;
+ size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
+ RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
+ RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, "");
+ RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
+ ZSTD_buildFSETable( entropy->LLTable,
+ litlengthNCount, litlengthMaxValue,
+ LL_base, LL_bits,
+ litlengthLog,
+ entropy->workspace, sizeof(entropy->workspace),
+ /* bmi2 */ 0);
+ dictPtr += litlengthHeaderSize;
+ }
+
+ RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
+ { int i;
+ size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
+ for (i=0; i<3; i++) {
+ U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
+ RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
+ dictionary_corrupted, "");
+ entropy->rep[i] = rep;
+ } }
+
+ return (size_t)(dictPtr - (const BYTE*)dict);
+}
+
+static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+ if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize);
+ { U32 const magic = MEM_readLE32(dict);
+ if (magic != ZSTD_MAGIC_DICTIONARY) {
+ return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */
+ } }
+ dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
+
+ /* load entropy tables */
+ { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize);
+ RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, "");
+ dict = (const char*)dict + eSize;
+ dictSize -= eSize;
+ }
+ dctx->litEntropy = dctx->fseEntropy = 1;
+
+ /* reference dictionary content */
+ return ZSTD_refDictContent(dctx, dict, dictSize);
+}
+
+size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
+{
+ assert(dctx != NULL);
+ dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */
+ dctx->stage = ZSTDds_getFrameHeaderSize;
+ dctx->processedCSize = 0;
+ dctx->decodedSize = 0;
+ dctx->previousDstEnd = NULL;
+ dctx->prefixStart = NULL;
+ dctx->virtualStart = NULL;
+ dctx->dictEnd = NULL;
+ dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
+ dctx->litEntropy = dctx->fseEntropy = 0;
+ dctx->dictID = 0;
+ dctx->bType = bt_reserved;
+ ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
+ ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */
+ dctx->LLTptr = dctx->entropy.LLTable;
+ dctx->MLTptr = dctx->entropy.MLTable;
+ dctx->OFTptr = dctx->entropy.OFTable;
+ dctx->HUFptr = dctx->entropy.hufTable;
+ return 0;
+}
+
+size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+ FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
+ if (dict && dictSize)
+ RETURN_ERROR_IF(
+ ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)),
+ dictionary_corrupted, "");
+ return 0;
+}
+
+
+/* ====== ZSTD_DDict ====== */
+
+size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+ DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict");
+ assert(dctx != NULL);
+ if (ddict) {
+ const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict);
+ size_t const dictSize = ZSTD_DDict_dictSize(ddict);
+ const void* const dictEnd = dictStart + dictSize;
+ dctx->ddictIsCold = (dctx->dictEnd != dictEnd);
+ DEBUGLOG(4, "DDict is %s",
+ dctx->ddictIsCold ? "~cold~" : "hot!");
+ }
+ FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
+ if (ddict) { /* NULL ddict is equivalent to no dictionary */
+ ZSTD_copyDDictParameters(dctx, ddict);
+ }
+ return 0;
+}
+
+/*! ZSTD_getDictID_fromDict() :
+ * Provides the dictID stored within dictionary.
+ * if @return == 0, the dictionary is not conformant with Zstandard specification.
+ * It can still be loaded, but as a content-only dictionary. */
+unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
+{
+ if (dictSize < 8) return 0;
+ if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0;
+ return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
+}
+
+/*! ZSTD_getDictID_fromFrame() :
+ * Provides the dictID required to decompress frame stored within `src`.
+ * If @return == 0, the dictID could not be decoded.
+ * This could for one of the following reasons :
+ * - The frame does not require a dictionary (most common case).
+ * - The frame was built with dictID intentionally removed.
+ * Needed dictionary is a hidden information.
+ * Note : this use case also happens when using a non-conformant dictionary.
+ * - `srcSize` is too small, and as a result, frame header could not be decoded.
+ * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
+ * - This is not a Zstandard frame.
+ * When identifying the exact failure cause, it's possible to use
+ * ZSTD_getFrameHeader(), which will provide a more precise error code. */
+unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
+{
+ ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 };
+ size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
+ if (ZSTD_isError(hError)) return 0;
+ return zfp.dictID;
+}
+
+
+/*! ZSTD_decompress_usingDDict() :
+* Decompression using a pre-digested Dictionary
+* Use dictionary without significant overhead. */
+size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ const ZSTD_DDict* ddict)
+{
+ /* pass content and size in case legacy frames are encountered */
+ return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize,
+ NULL, 0,
+ ddict);
+}
+
+
+/*=====================================
+* Streaming decompression
+*====================================*/
+
+ZSTD_DStream* ZSTD_createDStream(void)
+{
+ DEBUGLOG(3, "ZSTD_createDStream");
+ return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
+}
+
+ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
+{
+ return ZSTD_initStaticDCtx(workspace, workspaceSize);
+}
+
+ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
+{
+ return ZSTD_createDCtx_internal(customMem);
+}
+
+size_t ZSTD_freeDStream(ZSTD_DStream* zds)
+{
+ return ZSTD_freeDCtx(zds);
+}
+
+
+/* *** Initialization *** */
+
+size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
+size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
+
+size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
+ const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType)
+{
+ RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+ ZSTD_clearDict(dctx);
+ if (dict && dictSize != 0) {
+ dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
+ RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!");
+ dctx->ddict = dctx->ddictLocal;
+ dctx->dictUses = ZSTD_use_indefinitely;
+ }
+ return 0;
+}
+
+size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+ return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+ return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
+{
+ FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), "");
+ dctx->dictUses = ZSTD_use_once;
+ return 0;
+}
+
+size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
+{
+ return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent);
+}
+
+
+/* ZSTD_initDStream_usingDict() :
+ * return : expected size, aka ZSTD_startingInputLength().
+ * this function cannot fail */
+size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
+{
+ DEBUGLOG(4, "ZSTD_initDStream_usingDict");
+ FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , "");
+ FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , "");
+ return ZSTD_startingInputLength(zds->format);
+}
+
+/* note : this variant can't fail */
+size_t ZSTD_initDStream(ZSTD_DStream* zds)
+{
+ DEBUGLOG(4, "ZSTD_initDStream");
+ return ZSTD_initDStream_usingDDict(zds, NULL);
+}
+
+/* ZSTD_initDStream_usingDDict() :
+ * ddict will just be referenced, and must outlive decompression session
+ * this function cannot fail */
+size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
+{
+ FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , "");
+ FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , "");
+ return ZSTD_startingInputLength(dctx->format);
+}
+
+/* ZSTD_resetDStream() :
+ * return : expected size, aka ZSTD_startingInputLength().
+ * this function cannot fail */
+size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
+{
+ FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
+ return ZSTD_startingInputLength(dctx->format);
+}
+
+
+size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+ RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+ ZSTD_clearDict(dctx);
+ if (ddict) {
+ dctx->ddict = ddict;
+ dctx->dictUses = ZSTD_use_indefinitely;
+ if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) {
+ if (dctx->ddictSet == NULL) {
+ dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem);
+ if (!dctx->ddictSet) {
+ RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!");
+ }
+ }
+ assert(!dctx->staticSize); /* Impossible: ddictSet cannot have been allocated if static dctx */
+ FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), "");
+ }
+ }
+ return 0;
+}
+
+/* ZSTD_DCtx_setMaxWindowSize() :
+ * note : no direct equivalence in ZSTD_DCtx_setParameter,
+ * since this version sets windowSize, and the other sets windowLog */
+size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
+{
+ ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
+ size_t const min = (size_t)1 << bounds.lowerBound;
+ size_t const max = (size_t)1 << bounds.upperBound;
+ RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+ RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, "");
+ RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, "");
+ dctx->maxWindowSize = maxWindowSize;
+ return 0;
+}
+
+size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
+{
+ return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format);
+}
+
+ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
+{
+ ZSTD_bounds bounds = { 0, 0, 0 };
+ switch(dParam) {
+ case ZSTD_d_windowLogMax:
+ bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN;
+ bounds.upperBound = ZSTD_WINDOWLOG_MAX;
+ return bounds;
+ case ZSTD_d_format:
+ bounds.lowerBound = (int)ZSTD_f_zstd1;
+ bounds.upperBound = (int)ZSTD_f_zstd1_magicless;
+ ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
+ return bounds;
+ case ZSTD_d_stableOutBuffer:
+ bounds.lowerBound = (int)ZSTD_bm_buffered;
+ bounds.upperBound = (int)ZSTD_bm_stable;
+ return bounds;
+ case ZSTD_d_forceIgnoreChecksum:
+ bounds.lowerBound = (int)ZSTD_d_validateChecksum;
+ bounds.upperBound = (int)ZSTD_d_ignoreChecksum;
+ return bounds;
+ case ZSTD_d_refMultipleDDicts:
+ bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
+ bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
+ return bounds;
+ default:;
+ }
+ bounds.error = ERROR(parameter_unsupported);
+ return bounds;
+}
+
+/* ZSTD_dParam_withinBounds:
+ * @return 1 if value is within dParam bounds,
+ * 0 otherwise */
+static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
+{
+ ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam);
+ if (ZSTD_isError(bounds.error)) return 0;
+ if (value < bounds.lowerBound) return 0;
+ if (value > bounds.upperBound) return 0;
+ return 1;
+}
+
+#define CHECK_DBOUNDS(p,v) { \
+ RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \
+}
+
+size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value)
+{
+ switch (param) {
+ case ZSTD_d_windowLogMax:
+ *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize);
+ return 0;
+ case ZSTD_d_format:
+ *value = (int)dctx->format;
+ return 0;
+ case ZSTD_d_stableOutBuffer:
+ *value = (int)dctx->outBufferMode;
+ return 0;
+ case ZSTD_d_forceIgnoreChecksum:
+ *value = (int)dctx->forceIgnoreChecksum;
+ return 0;
+ case ZSTD_d_refMultipleDDicts:
+ *value = (int)dctx->refMultipleDDicts;
+ return 0;
+ default:;
+ }
+ RETURN_ERROR(parameter_unsupported, "");
+}
+
+size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
+{
+ RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+ switch(dParam) {
+ case ZSTD_d_windowLogMax:
+ if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
+ CHECK_DBOUNDS(ZSTD_d_windowLogMax, value);
+ dctx->maxWindowSize = ((size_t)1) << value;
+ return 0;
+ case ZSTD_d_format:
+ CHECK_DBOUNDS(ZSTD_d_format, value);
+ dctx->format = (ZSTD_format_e)value;
+ return 0;
+ case ZSTD_d_stableOutBuffer:
+ CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value);
+ dctx->outBufferMode = (ZSTD_bufferMode_e)value;
+ return 0;
+ case ZSTD_d_forceIgnoreChecksum:
+ CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value);
+ dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value;
+ return 0;
+ case ZSTD_d_refMultipleDDicts:
+ CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value);
+ if (dctx->staticSize != 0) {
+ RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!");
+ }
+ dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
+ return 0;
+ default:;
+ }
+ RETURN_ERROR(parameter_unsupported, "");
+}
+
+size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
+{
+ if ( (reset == ZSTD_reset_session_only)
+ || (reset == ZSTD_reset_session_and_parameters) ) {
+ dctx->streamStage = zdss_init;
+ dctx->noForwardProgress = 0;
+ }
+ if ( (reset == ZSTD_reset_parameters)
+ || (reset == ZSTD_reset_session_and_parameters) ) {
+ RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+ ZSTD_clearDict(dctx);
+ ZSTD_DCtx_resetParameters(dctx);
+ }
+ return 0;
+}
+
+
+size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
+{
+ return ZSTD_sizeof_DCtx(dctx);
+}
+
+size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
+{
+ size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+ /* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/
+ unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2);
+ unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
+ size_t const minRBSize = (size_t) neededSize;
+ RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
+ frameParameter_windowTooLarge, "");
+ return minRBSize;
+}
+
+size_t ZSTD_estimateDStreamSize(size_t windowSize)
+{
+ size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+ size_t const inBuffSize = blockSize; /* no block can be larger */
+ size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN);
+ return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize;
+}
+
+size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
+{
+ U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */
+ ZSTD_frameHeader zfh;
+ size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
+ if (ZSTD_isError(err)) return err;
+ RETURN_ERROR_IF(err>0, srcSize_wrong, "");
+ RETURN_ERROR_IF(zfh.windowSize > windowSizeMax,
+ frameParameter_windowTooLarge, "");
+ return ZSTD_estimateDStreamSize((size_t)zfh.windowSize);
+}
+
+
+/* ***** Decompression ***** */
+
+static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
+{
+ return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR;
+}
+
+static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
+{
+ if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize))
+ zds->oversizedDuration++;
+ else
+ zds->oversizedDuration = 0;
+}
+
+static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds)
+{
+ return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */
+static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output)
+{
+ ZSTD_outBuffer const expect = zds->expectedOutBuffer;
+ /* No requirement when ZSTD_obm_stable is not enabled. */
+ if (zds->outBufferMode != ZSTD_bm_stable)
+ return 0;
+ /* Any buffer is allowed in zdss_init, this must be the same for every other call until
+ * the context is reset.
+ */
+ if (zds->streamStage == zdss_init)
+ return 0;
+ /* The buffer must match our expectation exactly. */
+ if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size)
+ return 0;
+ RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!");
+}
+
+/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream()
+ * and updates the stage and the output buffer state. This call is extracted so it can be
+ * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode.
+ * NOTE: You must break after calling this function since the streamStage is modified.
+ */
+static size_t ZSTD_decompressContinueStream(
+ ZSTD_DStream* zds, char** op, char* oend,
+ void const* src, size_t srcSize) {
+ int const isSkipFrame = ZSTD_isSkipFrame(zds);
+ if (zds->outBufferMode == ZSTD_bm_buffered) {
+ size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart;
+ size_t const decodedSize = ZSTD_decompressContinue(zds,
+ zds->outBuff + zds->outStart, dstSize, src, srcSize);
+ FORWARD_IF_ERROR(decodedSize, "");
+ if (!decodedSize && !isSkipFrame) {
+ zds->streamStage = zdss_read;
+ } else {
+ zds->outEnd = zds->outStart + decodedSize;
+ zds->streamStage = zdss_flush;
+ }
+ } else {
+ /* Write directly into the output buffer */
+ size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op);
+ size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize);
+ FORWARD_IF_ERROR(decodedSize, "");
+ *op += decodedSize;
+ /* Flushing is not needed. */
+ zds->streamStage = zdss_read;
+ assert(*op <= oend);
+ assert(zds->outBufferMode == ZSTD_bm_stable);
+ }
+ return 0;
+}
+
+size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+{
+ const char* const src = (const char*)input->src;
+ const char* const istart = input->pos != 0 ? src + input->pos : src;
+ const char* const iend = input->size != 0 ? src + input->size : src;
+ const char* ip = istart;
+ char* const dst = (char*)output->dst;
+ char* const ostart = output->pos != 0 ? dst + output->pos : dst;
+ char* const oend = output->size != 0 ? dst + output->size : dst;
+ char* op = ostart;
+ U32 someMoreWork = 1;
+
+ DEBUGLOG(5, "ZSTD_decompressStream");
+ RETURN_ERROR_IF(
+ input->pos > input->size,
+ srcSize_wrong,
+ "forbidden. in: pos: %u vs size: %u",
+ (U32)input->pos, (U32)input->size);
+ RETURN_ERROR_IF(
+ output->pos > output->size,
+ dstSize_tooSmall,
+ "forbidden. out: pos: %u vs size: %u",
+ (U32)output->pos, (U32)output->size);
+ DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
+ FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), "");
+
+ while (someMoreWork) {
+ switch(zds->streamStage)
+ {
+ case zdss_init :
+ DEBUGLOG(5, "stage zdss_init => transparent reset ");
+ zds->streamStage = zdss_loadHeader;
+ zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
+ zds->hostageByte = 0;
+ zds->expectedOutBuffer = *output;
+ ZSTD_FALLTHROUGH;
+
+ case zdss_loadHeader :
+ DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
+ { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
+ if (zds->refMultipleDDicts && zds->ddictSet) {
+ ZSTD_DCtx_selectFrameDDict(zds);
+ }
+ DEBUGLOG(5, "header size : %u", (U32)hSize);
+ if (ZSTD_isError(hSize)) {
+ return hSize; /* error */
+ }
+ if (hSize != 0) { /* need more input */
+ size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */
+ size_t const remainingInput = (size_t)(iend-ip);
+ assert(iend >= ip);
+ if (toLoad > remainingInput) { /* not enough input to load full header */
+ if (remainingInput > 0) {
+ ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
+ zds->lhSize += remainingInput;
+ }
+ input->pos = input->size;
+ return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
+ }
+ assert(ip != NULL);
+ ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
+ break;
+ } }
+
+ /* check for single-pass mode opportunity */
+ if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+ && zds->fParams.frameType != ZSTD_skippableFrame
+ && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
+ size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart));
+ if (cSize <= (size_t)(iend-istart)) {
+ /* shortcut : using single-pass mode */
+ size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
+ if (ZSTD_isError(decompressedSize)) return decompressedSize;
+ DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
+ ip = istart + cSize;
+ op += decompressedSize;
+ zds->expected = 0;
+ zds->streamStage = zdss_init;
+ someMoreWork = 0;
+ break;
+ } }
+
+ /* Check output buffer is large enough for ZSTD_odm_stable. */
+ if (zds->outBufferMode == ZSTD_bm_stable
+ && zds->fParams.frameType != ZSTD_skippableFrame
+ && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+ && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) {
+ RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small");
+ }
+
+ /* Consume header (see ZSTDds_decodeFrameHeader) */
+ DEBUGLOG(4, "Consume header");
+ FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
+
+ if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
+ zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
+ zds->stage = ZSTDds_skipFrame;
+ } else {
+ FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), "");
+ zds->expected = ZSTD_blockHeaderSize;
+ zds->stage = ZSTDds_decodeBlockHeader;
+ }
+
+ /* control buffer memory usage */
+ DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)",
+ (U32)(zds->fParams.windowSize >>10),
+ (U32)(zds->maxWindowSize >> 10) );
+ zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
+ RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
+ frameParameter_windowTooLarge, "");
+
+ /* Adapt buffer sizes to frame header instructions */
+ { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
+ size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
+ ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
+ : 0;
+
+ ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
+
+ { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize);
+ int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds);
+
+ if (tooSmall || tooLarge) {
+ size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
+ DEBUGLOG(4, "inBuff : from %u to %u",
+ (U32)zds->inBuffSize, (U32)neededInBuffSize);
+ DEBUGLOG(4, "outBuff : from %u to %u",
+ (U32)zds->outBuffSize, (U32)neededOutBuffSize);
+ if (zds->staticSize) { /* static DCtx */
+ DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
+ assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */
+ RETURN_ERROR_IF(
+ bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
+ memory_allocation, "");
+ } else {
+ ZSTD_customFree(zds->inBuff, zds->customMem);
+ zds->inBuffSize = 0;
+ zds->outBuffSize = 0;
+ zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem);
+ RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, "");
+ }
+ zds->inBuffSize = neededInBuffSize;
+ zds->outBuff = zds->inBuff + zds->inBuffSize;
+ zds->outBuffSize = neededOutBuffSize;
+ } } }
+ zds->streamStage = zdss_read;
+ ZSTD_FALLTHROUGH;
+
+ case zdss_read:
+ DEBUGLOG(5, "stage zdss_read");
+ { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip));
+ DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
+ if (neededInSize==0) { /* end of frame */
+ zds->streamStage = zdss_init;
+ someMoreWork = 0;
+ break;
+ }
+ if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */
+ FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
+ ip += neededInSize;
+ /* Function modifies the stage so we must break */
+ break;
+ } }
+ if (ip==iend) { someMoreWork = 0; break; } /* no more input */
+ zds->streamStage = zdss_load;
+ ZSTD_FALLTHROUGH;
+
+ case zdss_load:
+ { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
+ size_t const toLoad = neededInSize - zds->inPos;
+ int const isSkipFrame = ZSTD_isSkipFrame(zds);
+ size_t loadedSize;
+ /* At this point we shouldn't be decompressing a block that we can stream. */
+ assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip));
+ if (isSkipFrame) {
+ loadedSize = MIN(toLoad, (size_t)(iend-ip));
+ } else {
+ RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos,
+ corruption_detected,
+ "should never happen");
+ loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
+ }
+ ip += loadedSize;
+ zds->inPos += loadedSize;
+ if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */
+
+ /* decode loaded input */
+ zds->inPos = 0; /* input is consumed */
+ FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), "");
+ /* Function modifies the stage so we must break */
+ break;
+ }
+ case zdss_flush:
+ { size_t const toFlushSize = zds->outEnd - zds->outStart;
+ size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize);
+ op += flushedSize;
+ zds->outStart += flushedSize;
+ if (flushedSize == toFlushSize) { /* flush completed */
+ zds->streamStage = zdss_read;
+ if ( (zds->outBuffSize < zds->fParams.frameContentSize)
+ && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
+ DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
+ (int)(zds->outBuffSize - zds->outStart),
+ (U32)zds->fParams.blockSizeMax);
+ zds->outStart = zds->outEnd = 0;
+ }
+ break;
+ } }
+ /* cannot complete flush */
+ someMoreWork = 0;
+ break;
+
+ default:
+ assert(0); /* impossible */
+ RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */
+ } }
+
+ /* result */
+ input->pos = (size_t)(ip - (const char*)(input->src));
+ output->pos = (size_t)(op - (char*)(output->dst));
+
+ /* Update the expected output buffer for ZSTD_obm_stable. */
+ zds->expectedOutBuffer = *output;
+
+ if ((ip==istart) && (op==ostart)) { /* no forward progress */
+ zds->noForwardProgress ++;
+ if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
+ RETURN_ERROR_IF(op==oend, dstSize_tooSmall, "");
+ RETURN_ERROR_IF(ip==iend, srcSize_wrong, "");
+ assert(0);
+ }
+ } else {
+ zds->noForwardProgress = 0;
+ }
+ { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds);
+ if (!nextSrcSizeHint) { /* frame fully decoded */
+ if (zds->outEnd == zds->outStart) { /* output fully flushed */
+ if (zds->hostageByte) {
+ if (input->pos >= input->size) {
+ /* can't release hostage (not present) */
+ zds->streamStage = zdss_read;
+ return 1;
+ }
+ input->pos++; /* release hostage */
+ } /* zds->hostageByte */
+ return 0;
+ } /* zds->outEnd == zds->outStart */
+ if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
+ input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */
+ zds->hostageByte=1;
+ }
+ return 1;
+ } /* nextSrcSizeHint==0 */
+ nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */
+ assert(zds->inPos <= nextSrcSizeHint);
+ nextSrcSizeHint -= zds->inPos; /* part already loaded*/
+ return nextSrcSizeHint;
+ }
+}
+
+size_t ZSTD_decompressStream_simpleArgs (
+ ZSTD_DCtx* dctx,
+ void* dst, size_t dstCapacity, size_t* dstPos,
+ const void* src, size_t srcSize, size_t* srcPos)
+{
+ ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
+ ZSTD_inBuffer input = { src, srcSize, *srcPos };
+ /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
+ size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
+ *dstPos = output.pos;
+ *srcPos = input.pos;
+ return cErr;
+}
diff --git a/lib/zstd/decompress/zstd_decompress_block.c b/lib/zstd/decompress/zstd_decompress_block.c
new file mode 100644
index 00000000000..c1913b8e7c8
--- /dev/null
+++ b/lib/zstd/decompress/zstd_decompress_block.c
@@ -0,0 +1,2072 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* zstd_decompress_block :
+ * this module takes care of decompressing _compressed_ block */
+
+/*-*******************************************************
+* Dependencies
+*********************************************************/
+#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/compiler.h" /* prefetch */
+#include "../common/cpu.h" /* bmi2 */
+#include "../common/mem.h" /* low level memory routines */
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "../common/zstd_internal.h"
+#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
+#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
+#include "zstd_decompress_block.h"
+
+/*_*******************************************************
+* Macros
+**********************************************************/
+
+/* These two optional macros force the use one way or another of the two
+ * ZSTD_decompressSequences implementations. You can't force in both directions
+ * at the same time.
+ */
+#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+ defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
+#endif
+
+
+/*_*******************************************************
+* Memory operations
+**********************************************************/
+static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
+
+
+/*-*************************************************************
+ * Block decoding
+ ***************************************************************/
+
+/*! ZSTD_getcBlockSize() :
+ * Provides the size of compressed block from block header `src` */
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+ blockProperties_t* bpPtr)
+{
+ RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
+
+ { U32 const cBlockHeader = MEM_readLE24(src);
+ U32 const cSize = cBlockHeader >> 3;
+ bpPtr->lastBlock = cBlockHeader & 1;
+ bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
+ bpPtr->origSize = cSize; /* only useful for RLE */
+ if (bpPtr->blockType == bt_rle) return 1;
+ RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
+ return cSize;
+ }
+}
+
+/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
+static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
+ const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
+{
+ if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
+ {
+ /* room for litbuffer to fit without read faulting */
+ dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
+ dctx->litBufferEnd = dctx->litBuffer + litSize;
+ dctx->litBufferLocation = ZSTD_in_dst;
+ }
+ else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
+ {
+ /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
+ if (splitImmediately) {
+ /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
+ dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
+ dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
+ }
+ else {
+ /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
+ dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
+ dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
+ }
+ dctx->litBufferLocation = ZSTD_split;
+ }
+ else
+ {
+ /* fits entirely within litExtraBuffer, so no split is necessary */
+ dctx->litBuffer = dctx->litExtraBuffer;
+ dctx->litBufferEnd = dctx->litBuffer + litSize;
+ dctx->litBufferLocation = ZSTD_not_in_dst;
+ }
+}
+
+/* Hidden declaration for fullbench */
+size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+ const void* src, size_t srcSize,
+ void* dst, size_t dstCapacity, const streaming_operation streaming);
+/*! ZSTD_decodeLiteralsBlock() :
+ * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
+ * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
+ * block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
+ * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
+ *
+ * @return : nb of bytes read from src (< srcSize )
+ * note : symbol not declared but exposed for fullbench */
+size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+ const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
+ void* dst, size_t dstCapacity, const streaming_operation streaming)
+{
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
+ RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
+
+ { const BYTE* const istart = (const BYTE*) src;
+ symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
+
+ switch(litEncType)
+ {
+ case set_repeat:
+ DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
+ RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
+ ZSTD_FALLTHROUGH;
+
+ case set_compressed:
+ RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
+ { size_t lhSize, litSize, litCSize;
+ U32 singleStream=0;
+ U32 const lhlCode = (istart[0] >> 2) & 3;
+ U32 const lhc = MEM_readLE32(istart);
+ size_t hufSuccess;
+ size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
+ switch(lhlCode)
+ {
+ case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
+ /* 2 - 2 - 10 - 10 */
+ singleStream = !lhlCode;
+ lhSize = 3;
+ litSize = (lhc >> 4) & 0x3FF;
+ litCSize = (lhc >> 14) & 0x3FF;
+ break;
+ case 2:
+ /* 2 - 2 - 14 - 14 */
+ lhSize = 4;
+ litSize = (lhc >> 4) & 0x3FFF;
+ litCSize = lhc >> 18;
+ break;
+ case 3:
+ /* 2 - 2 - 18 - 18 */
+ lhSize = 5;
+ litSize = (lhc >> 4) & 0x3FFFF;
+ litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
+ break;
+ }
+ RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+ RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
+ RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
+ ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
+
+ /* prefetch huffman table if cold */
+ if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
+ PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
+ }
+
+ if (litEncType==set_repeat) {
+ if (singleStream) {
+ hufSuccess = HUF_decompress1X_usingDTable_bmi2(
+ dctx->litBuffer, litSize, istart+lhSize, litCSize,
+ dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
+ } else {
+ hufSuccess = HUF_decompress4X_usingDTable_bmi2(
+ dctx->litBuffer, litSize, istart+lhSize, litCSize,
+ dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
+ }
+ } else {
+ if (singleStream) {
+#if defined(HUF_FORCE_DECOMPRESS_X2)
+ hufSuccess = HUF_decompress1X_DCtx_wksp(
+ dctx->entropy.hufTable, dctx->litBuffer, litSize,
+ istart+lhSize, litCSize, dctx->workspace,
+ sizeof(dctx->workspace));
+#else
+ hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
+ dctx->entropy.hufTable, dctx->litBuffer, litSize,
+ istart+lhSize, litCSize, dctx->workspace,
+ sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
+#endif
+ } else {
+ hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
+ dctx->entropy.hufTable, dctx->litBuffer, litSize,
+ istart+lhSize, litCSize, dctx->workspace,
+ sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
+ }
+ }
+ if (dctx->litBufferLocation == ZSTD_split)
+ {
+ ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
+ ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
+ dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
+ dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
+ }
+
+ RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
+
+ dctx->litPtr = dctx->litBuffer;
+ dctx->litSize = litSize;
+ dctx->litEntropy = 1;
+ if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
+ return litCSize + lhSize;
+ }
+
+ case set_basic:
+ { size_t litSize, lhSize;
+ U32 const lhlCode = ((istart[0]) >> 2) & 3;
+ size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
+ switch(lhlCode)
+ {
+ case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
+ lhSize = 1;
+ litSize = istart[0] >> 3;
+ break;
+ case 1:
+ lhSize = 2;
+ litSize = MEM_readLE16(istart) >> 4;
+ break;
+ case 3:
+ lhSize = 3;
+ litSize = MEM_readLE24(istart) >> 4;
+ break;
+ }
+
+ RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
+ RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
+ ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
+ if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
+ RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
+ if (dctx->litBufferLocation == ZSTD_split)
+ {
+ ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
+ ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
+ }
+ else
+ {
+ ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
+ }
+ dctx->litPtr = dctx->litBuffer;
+ dctx->litSize = litSize;
+ return lhSize+litSize;
+ }
+ /* direct reference into compressed stream */
+ dctx->litPtr = istart+lhSize;
+ dctx->litSize = litSize;
+ dctx->litBufferEnd = dctx->litPtr + litSize;
+ dctx->litBufferLocation = ZSTD_not_in_dst;
+ return lhSize+litSize;
+ }
+
+ case set_rle:
+ { U32 const lhlCode = ((istart[0]) >> 2) & 3;
+ size_t litSize, lhSize;
+ size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
+ switch(lhlCode)
+ {
+ case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
+ lhSize = 1;
+ litSize = istart[0] >> 3;
+ break;
+ case 1:
+ lhSize = 2;
+ litSize = MEM_readLE16(istart) >> 4;
+ break;
+ case 3:
+ lhSize = 3;
+ litSize = MEM_readLE24(istart) >> 4;
+ RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
+ break;
+ }
+ RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+ RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
+ ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
+ if (dctx->litBufferLocation == ZSTD_split)
+ {
+ ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
+ ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
+ }
+ else
+ {
+ ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
+ }
+ dctx->litPtr = dctx->litBuffer;
+ dctx->litSize = litSize;
+ return lhSize+1;
+ }
+ default:
+ RETURN_ERROR(corruption_detected, "impossible");
+ }
+ }
+}
+
+/* Default FSE distribution tables.
+ * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
+ * They were generated programmatically with following method :
+ * - start from default distributions, present in /lib/common/zstd_internal.h
+ * - generate tables normally, using ZSTD_buildFSETable()
+ * - printout the content of tables
+ * - pretify output, report below, test with fuzzer to ensure it's correct */
+
+/* Default FSE distribution table for Literal Lengths */
+static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
+ { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
+ /* nextState, nbAddBits, nbBits, baseVal */
+ { 0, 0, 4, 0}, { 16, 0, 4, 0},
+ { 32, 0, 5, 1}, { 0, 0, 5, 3},
+ { 0, 0, 5, 4}, { 0, 0, 5, 6},
+ { 0, 0, 5, 7}, { 0, 0, 5, 9},
+ { 0, 0, 5, 10}, { 0, 0, 5, 12},
+ { 0, 0, 6, 14}, { 0, 1, 5, 16},
+ { 0, 1, 5, 20}, { 0, 1, 5, 22},
+ { 0, 2, 5, 28}, { 0, 3, 5, 32},
+ { 0, 4, 5, 48}, { 32, 6, 5, 64},
+ { 0, 7, 5, 128}, { 0, 8, 6, 256},
+ { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
+ { 32, 0, 4, 0}, { 0, 0, 4, 1},
+ { 0, 0, 5, 2}, { 32, 0, 5, 4},
+ { 0, 0, 5, 5}, { 32, 0, 5, 7},
+ { 0, 0, 5, 8}, { 32, 0, 5, 10},
+ { 0, 0, 5, 11}, { 0, 0, 6, 13},
+ { 32, 1, 5, 16}, { 0, 1, 5, 18},
+ { 32, 1, 5, 22}, { 0, 2, 5, 24},
+ { 32, 3, 5, 32}, { 0, 3, 5, 40},
+ { 0, 6, 4, 64}, { 16, 6, 4, 64},
+ { 32, 7, 5, 128}, { 0, 9, 6, 512},
+ { 0, 11, 6, 2048}, { 48, 0, 4, 0},
+ { 16, 0, 4, 1}, { 32, 0, 5, 2},
+ { 32, 0, 5, 3}, { 32, 0, 5, 5},
+ { 32, 0, 5, 6}, { 32, 0, 5, 8},
+ { 32, 0, 5, 9}, { 32, 0, 5, 11},
+ { 32, 0, 5, 12}, { 0, 0, 6, 15},
+ { 32, 1, 5, 18}, { 32, 1, 5, 20},
+ { 32, 2, 5, 24}, { 32, 2, 5, 28},
+ { 32, 3, 5, 40}, { 32, 4, 5, 48},
+ { 0, 16, 6,65536}, { 0, 15, 6,32768},
+ { 0, 14, 6,16384}, { 0, 13, 6, 8192},
+}; /* LL_defaultDTable */
+
+/* Default FSE distribution table for Offset Codes */
+static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
+ { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
+ /* nextState, nbAddBits, nbBits, baseVal */
+ { 0, 0, 5, 0}, { 0, 6, 4, 61},
+ { 0, 9, 5, 509}, { 0, 15, 5,32765},
+ { 0, 21, 5,2097149}, { 0, 3, 5, 5},
+ { 0, 7, 4, 125}, { 0, 12, 5, 4093},
+ { 0, 18, 5,262141}, { 0, 23, 5,8388605},
+ { 0, 5, 5, 29}, { 0, 8, 4, 253},
+ { 0, 14, 5,16381}, { 0, 20, 5,1048573},
+ { 0, 2, 5, 1}, { 16, 7, 4, 125},
+ { 0, 11, 5, 2045}, { 0, 17, 5,131069},
+ { 0, 22, 5,4194301}, { 0, 4, 5, 13},
+ { 16, 8, 4, 253}, { 0, 13, 5, 8189},
+ { 0, 19, 5,524285}, { 0, 1, 5, 1},
+ { 16, 6, 4, 61}, { 0, 10, 5, 1021},
+ { 0, 16, 5,65533}, { 0, 28, 5,268435453},
+ { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
+ { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
+}; /* OF_defaultDTable */
+
+
+/* Default FSE distribution table for Match Lengths */
+static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
+ { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
+ /* nextState, nbAddBits, nbBits, baseVal */
+ { 0, 0, 6, 3}, { 0, 0, 4, 4},
+ { 32, 0, 5, 5}, { 0, 0, 5, 6},
+ { 0, 0, 5, 8}, { 0, 0, 5, 9},
+ { 0, 0, 5, 11}, { 0, 0, 6, 13},
+ { 0, 0, 6, 16}, { 0, 0, 6, 19},
+ { 0, 0, 6, 22}, { 0, 0, 6, 25},
+ { 0, 0, 6, 28}, { 0, 0, 6, 31},
+ { 0, 0, 6, 34}, { 0, 1, 6, 37},
+ { 0, 1, 6, 41}, { 0, 2, 6, 47},
+ { 0, 3, 6, 59}, { 0, 4, 6, 83},
+ { 0, 7, 6, 131}, { 0, 9, 6, 515},
+ { 16, 0, 4, 4}, { 0, 0, 4, 5},
+ { 32, 0, 5, 6}, { 0, 0, 5, 7},
+ { 32, 0, 5, 9}, { 0, 0, 5, 10},
+ { 0, 0, 6, 12}, { 0, 0, 6, 15},
+ { 0, 0, 6, 18}, { 0, 0, 6, 21},
+ { 0, 0, 6, 24}, { 0, 0, 6, 27},
+ { 0, 0, 6, 30}, { 0, 0, 6, 33},
+ { 0, 1, 6, 35}, { 0, 1, 6, 39},
+ { 0, 2, 6, 43}, { 0, 3, 6, 51},
+ { 0, 4, 6, 67}, { 0, 5, 6, 99},
+ { 0, 8, 6, 259}, { 32, 0, 4, 4},
+ { 48, 0, 4, 4}, { 16, 0, 4, 5},
+ { 32, 0, 5, 7}, { 32, 0, 5, 8},
+ { 32, 0, 5, 10}, { 32, 0, 5, 11},
+ { 0, 0, 6, 14}, { 0, 0, 6, 17},
+ { 0, 0, 6, 20}, { 0, 0, 6, 23},
+ { 0, 0, 6, 26}, { 0, 0, 6, 29},
+ { 0, 0, 6, 32}, { 0, 16, 6,65539},
+ { 0, 15, 6,32771}, { 0, 14, 6,16387},
+ { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
+ { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
+}; /* ML_defaultDTable */
+
+
+static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
+{
+ void* ptr = dt;
+ ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
+ ZSTD_seqSymbol* const cell = dt + 1;
+
+ DTableH->tableLog = 0;
+ DTableH->fastMode = 0;
+
+ cell->nbBits = 0;
+ cell->nextState = 0;
+ assert(nbAddBits < 255);
+ cell->nbAdditionalBits = nbAddBits;
+ cell->baseValue = baseValue;
+}
+
+
+/* ZSTD_buildFSETable() :
+ * generate FSE decoding table for one symbol (ll, ml or off)
+ * cannot fail if input is valid =>
+ * all inputs are presumed validated at this stage */
+FORCE_INLINE_TEMPLATE
+void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
+ const short* normalizedCounter, unsigned maxSymbolValue,
+ const U32* baseValue, const U8* nbAdditionalBits,
+ unsigned tableLog, void* wksp, size_t wkspSize)
+{
+ ZSTD_seqSymbol* const tableDecode = dt+1;
+ U32 const maxSV1 = maxSymbolValue + 1;
+ U32 const tableSize = 1 << tableLog;
+
+ U16* symbolNext = (U16*)wksp;
+ BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
+ U32 highThreshold = tableSize - 1;
+
+
+ /* Sanity Checks */
+ assert(maxSymbolValue <= MaxSeq);
+ assert(tableLog <= MaxFSELog);
+ assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
+ (void)wkspSize;
+ /* Init, lay down lowprob symbols */
+ { ZSTD_seqSymbol_header DTableH;
+ DTableH.tableLog = tableLog;
+ DTableH.fastMode = 1;
+ { S16 const largeLimit= (S16)(1 << (tableLog-1));
+ U32 s;
+ for (s=0; s<maxSV1; s++) {
+ if (normalizedCounter[s]==-1) {
+ tableDecode[highThreshold--].baseValue = s;
+ symbolNext[s] = 1;
+ } else {
+ if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+ assert(normalizedCounter[s]>=0);
+ symbolNext[s] = (U16)normalizedCounter[s];
+ } } }
+ ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
+ }
+
+ /* Spread symbols */
+ assert(tableSize <= 512);
+ /* Specialized symbol spreading for the case when there are
+ * no low probability (-1 count) symbols. When compressing
+ * small blocks we avoid low probability symbols to hit this
+ * case, since header decoding speed matters more.
+ */
+ if (highThreshold == tableSize - 1) {
+ size_t const tableMask = tableSize-1;
+ size_t const step = FSE_TABLESTEP(tableSize);
+ /* First lay down the symbols in order.
+ * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
+ * misses since small blocks generally have small table logs, so nearly
+ * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
+ * our buffer to handle the over-write.
+ */
+ {
+ U64 const add = 0x0101010101010101ull;
+ size_t pos = 0;
+ U64 sv = 0;
+ U32 s;
+ for (s=0; s<maxSV1; ++s, sv += add) {
+ int i;
+ int const n = normalizedCounter[s];
+ MEM_write64(spread + pos, sv);
+ for (i = 8; i < n; i += 8) {
+ MEM_write64(spread + pos + i, sv);
+ }
+ pos += n;
+ }
+ }
+ /* Now we spread those positions across the table.
+ * The benefit of doing it in two stages is that we avoid the the
+ * variable size inner loop, which caused lots of branch misses.
+ * Now we can run through all the positions without any branch misses.
+ * We unroll the loop twice, since that is what emperically worked best.
+ */
+ {
+ size_t position = 0;
+ size_t s;
+ size_t const unroll = 2;
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
+ size_t u;
+ for (u = 0; u < unroll; ++u) {
+ size_t const uPosition = (position + (u * step)) & tableMask;
+ tableDecode[uPosition].baseValue = spread[s + u];
+ }
+ position = (position + (unroll * step)) & tableMask;
+ }
+ assert(position == 0);
+ }
+ } else {
+ U32 const tableMask = tableSize-1;
+ U32 const step = FSE_TABLESTEP(tableSize);
+ U32 s, position = 0;
+ for (s=0; s<maxSV1; s++) {
+ int i;
+ int const n = normalizedCounter[s];
+ for (i=0; i<n; i++) {
+ tableDecode[position].baseValue = s;
+ position = (position + step) & tableMask;
+ while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
+ } }
+ assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+ }
+
+ /* Build Decoding table */
+ {
+ U32 u;
+ for (u=0; u<tableSize; u++) {
+ U32 const symbol = tableDecode[u].baseValue;
+ U32 const nextState = symbolNext[symbol]++;
+ tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
+ tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+ assert(nbAdditionalBits[symbol] < 255);
+ tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
+ tableDecode[u].baseValue = baseValue[symbol];
+ }
+ }
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
+ const short* normalizedCounter, unsigned maxSymbolValue,
+ const U32* baseValue, const U8* nbAdditionalBits,
+ unsigned tableLog, void* wksp, size_t wkspSize)
+{
+ ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+
+#if DYNAMIC_BMI2
+BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
+ const short* normalizedCounter, unsigned maxSymbolValue,
+ const U32* baseValue, const U8* nbAdditionalBits,
+ unsigned tableLog, void* wksp, size_t wkspSize)
+{
+ ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+#endif
+
+void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+ const short* normalizedCounter, unsigned maxSymbolValue,
+ const U32* baseValue, const U8* nbAdditionalBits,
+ unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+ if (bmi2) {
+ ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+ return;
+ }
+#endif
+ (void)bmi2;
+ ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+
+
+/*! ZSTD_buildSeqTable() :
+ * @return : nb bytes read from src,
+ * or an error code if it fails */
+static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
+ symbolEncodingType_e type, unsigned max, U32 maxLog,
+ const void* src, size_t srcSize,
+ const U32* baseValue, const U8* nbAdditionalBits,
+ const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
+ int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
+ int bmi2)
+{
+ switch(type)
+ {
+ case set_rle :
+ RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
+ RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
+ { U32 const symbol = *(const BYTE*)src;
+ U32 const baseline = baseValue[symbol];
+ U8 const nbBits = nbAdditionalBits[symbol];
+ ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
+ }
+ *DTablePtr = DTableSpace;
+ return 1;
+ case set_basic :
+ *DTablePtr = defaultTable;
+ return 0;
+ case set_repeat:
+ RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
+ /* prefetch FSE table if used */
+ if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
+ const void* const pStart = *DTablePtr;
+ size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
+ PREFETCH_AREA(pStart, pSize);
+ }
+ return 0;
+ case set_compressed :
+ { unsigned tableLog;
+ S16 norm[MaxSeq+1];
+ size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
+ RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
+ RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
+ ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
+ *DTablePtr = DTableSpace;
+ return headerSize;
+ }
+ default :
+ assert(0);
+ RETURN_ERROR(GENERIC, "impossible");
+ }
+}
+
+size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
+ const void* src, size_t srcSize)
+{
+ const BYTE* const istart = (const BYTE*)src;
+ const BYTE* const iend = istart + srcSize;
+ const BYTE* ip = istart;
+ int nbSeq;
+ DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
+
+ /* check */
+ RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
+
+ /* SeqHead */
+ nbSeq = *ip++;
+ if (!nbSeq) {
+ *nbSeqPtr=0;
+ RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
+ return 1;
+ }
+ if (nbSeq > 0x7F) {
+ if (nbSeq == 0xFF) {
+ RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
+ nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
+ ip+=2;
+ } else {
+ RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
+ nbSeq = ((nbSeq-0x80)<<8) + *ip++;
+ }
+ }
+ *nbSeqPtr = nbSeq;
+
+ /* FSE table descriptors */
+ RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
+ { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
+ symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
+ symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
+ ip++;
+
+ /* Build DTables */
+ { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
+ LLtype, MaxLL, LLFSELog,
+ ip, iend-ip,
+ LL_base, LL_bits,
+ LL_defaultDTable, dctx->fseEntropy,
+ dctx->ddictIsCold, nbSeq,
+ dctx->workspace, sizeof(dctx->workspace),
+ ZSTD_DCtx_get_bmi2(dctx));
+ RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+ ip += llhSize;
+ }
+
+ { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
+ OFtype, MaxOff, OffFSELog,
+ ip, iend-ip,
+ OF_base, OF_bits,
+ OF_defaultDTable, dctx->fseEntropy,
+ dctx->ddictIsCold, nbSeq,
+ dctx->workspace, sizeof(dctx->workspace),
+ ZSTD_DCtx_get_bmi2(dctx));
+ RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+ ip += ofhSize;
+ }
+
+ { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
+ MLtype, MaxML, MLFSELog,
+ ip, iend-ip,
+ ML_base, ML_bits,
+ ML_defaultDTable, dctx->fseEntropy,
+ dctx->ddictIsCold, nbSeq,
+ dctx->workspace, sizeof(dctx->workspace),
+ ZSTD_DCtx_get_bmi2(dctx));
+ RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+ ip += mlhSize;
+ }
+ }
+
+ return ip-istart;
+}
+
+
+typedef struct {
+ size_t litLength;
+ size_t matchLength;
+ size_t offset;
+} seq_t;
+
+typedef struct {
+ size_t state;
+ const ZSTD_seqSymbol* table;
+} ZSTD_fseState;
+
+typedef struct {
+ BIT_DStream_t DStream;
+ ZSTD_fseState stateLL;
+ ZSTD_fseState stateOffb;
+ ZSTD_fseState stateML;
+ size_t prevOffset[ZSTD_REP_NUM];
+} seqState_t;
+
+/*! ZSTD_overlapCopy8() :
+ * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
+ * If the offset is < 8 then the offset is spread to at least 8 bytes.
+ *
+ * Precondition: *ip <= *op
+ * Postcondition: *op - *op >= 8
+ */
+HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
+ assert(*ip <= *op);
+ if (offset < 8) {
+ /* close range match, overlap */
+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
+ int const sub2 = dec64table[offset];
+ (*op)[0] = (*ip)[0];
+ (*op)[1] = (*ip)[1];
+ (*op)[2] = (*ip)[2];
+ (*op)[3] = (*ip)[3];
+ *ip += dec32table[offset];
+ ZSTD_copy4(*op+4, *ip);
+ *ip -= sub2;
+ } else {
+ ZSTD_copy8(*op, *ip);
+ }
+ *ip += 8;
+ *op += 8;
+ assert(*op - *ip >= 8);
+}
+
+/*! ZSTD_safecopy() :
+ * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
+ * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
+ * This function is only called in the uncommon case where the sequence is near the end of the block. It
+ * should be fast for a single long sequence, but can be slow for several short sequences.
+ *
+ * @param ovtype controls the overlap detection
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
+ * The src buffer must be before the dst buffer.
+ */
+static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
+ ptrdiff_t const diff = op - ip;
+ BYTE* const oend = op + length;
+
+ assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
+ (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
+
+ if (length < 8) {
+ /* Handle short lengths. */
+ while (op < oend) *op++ = *ip++;
+ return;
+ }
+ if (ovtype == ZSTD_overlap_src_before_dst) {
+ /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
+ assert(length >= 8);
+ ZSTD_overlapCopy8(&op, &ip, diff);
+ length -= 8;
+ assert(op - ip >= 8);
+ assert(op <= oend);
+ }
+
+ if (oend <= oend_w) {
+ /* No risk of overwrite. */
+ ZSTD_wildcopy(op, ip, length, ovtype);
+ return;
+ }
+ if (op <= oend_w) {
+ /* Wildcopy until we get close to the end. */
+ assert(oend > oend_w);
+ ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
+ ip += oend_w - op;
+ op += oend_w - op;
+ }
+ /* Handle the leftovers. */
+ while (op < oend) *op++ = *ip++;
+}
+
+/* ZSTD_safecopyDstBeforeSrc():
+ * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
+ * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
+static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
+ ptrdiff_t const diff = op - ip;
+ BYTE* const oend = op + length;
+
+ if (length < 8 || diff > -8) {
+ /* Handle short lengths, close overlaps, and dst not before src. */
+ while (op < oend) *op++ = *ip++;
+ return;
+ }
+
+ if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
+ ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
+ ip += oend - WILDCOPY_OVERLENGTH - op;
+ op += oend - WILDCOPY_OVERLENGTH - op;
+ }
+
+ /* Handle the leftovers. */
+ while (op < oend) *op++ = *ip++;
+}
+
+/* ZSTD_execSequenceEnd():
+ * This version handles cases that are near the end of the output buffer. It requires
+ * more careful checks to make sure there is no overflow. By separating out these hard
+ * and unlikely cases, we can speed up the common cases.
+ *
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
+ */
+FORCE_NOINLINE
+size_t ZSTD_execSequenceEnd(BYTE* op,
+ BYTE* const oend, seq_t sequence,
+ const BYTE** litPtr, const BYTE* const litLimit,
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+{
+ BYTE* const oLitEnd = op + sequence.litLength;
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+ const BYTE* match = oLitEnd - sequence.offset;
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
+
+ /* bounds checks : careful of address space overflow in 32-bit mode */
+ RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
+ RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
+ assert(op < op + sequenceLength);
+ assert(oLitEnd < op + sequenceLength);
+
+ /* copy literals */
+ ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
+ op = oLitEnd;
+ *litPtr = iLitEnd;
+
+ /* copy Match */
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+ /* offset beyond prefix */
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
+ match = dictEnd - (prefixStart - match);
+ if (match + sequence.matchLength <= dictEnd) {
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+ return sequenceLength;
+ }
+ /* span extDict & currentPrefixSegment */
+ { size_t const length1 = dictEnd - match;
+ ZSTD_memmove(oLitEnd, match, length1);
+ op = oLitEnd + length1;
+ sequence.matchLength -= length1;
+ match = prefixStart;
+ }
+ }
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
+ return sequenceLength;
+}
+
+/* ZSTD_execSequenceEndSplitLitBuffer():
+ * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
+ */
+FORCE_NOINLINE
+size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
+ BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
+ const BYTE** litPtr, const BYTE* const litLimit,
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+{
+ BYTE* const oLitEnd = op + sequence.litLength;
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+ const BYTE* match = oLitEnd - sequence.offset;
+
+
+ /* bounds checks : careful of address space overflow in 32-bit mode */
+ RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
+ RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
+ assert(op < op + sequenceLength);
+ assert(oLitEnd < op + sequenceLength);
+
+ /* copy literals */
+ RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
+ ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
+ op = oLitEnd;
+ *litPtr = iLitEnd;
+
+ /* copy Match */
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+ /* offset beyond prefix */
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
+ match = dictEnd - (prefixStart - match);
+ if (match + sequence.matchLength <= dictEnd) {
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+ return sequenceLength;
+ }
+ /* span extDict & currentPrefixSegment */
+ { size_t const length1 = dictEnd - match;
+ ZSTD_memmove(oLitEnd, match, length1);
+ op = oLitEnd + length1;
+ sequence.matchLength -= length1;
+ match = prefixStart;
+ }
+ }
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
+ return sequenceLength;
+}
+
+HINT_INLINE
+size_t ZSTD_execSequence(BYTE* op,
+ BYTE* const oend, seq_t sequence,
+ const BYTE** litPtr, const BYTE* const litLimit,
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+{
+ BYTE* const oLitEnd = op + sequence.litLength;
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+ BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+ const BYTE* match = oLitEnd - sequence.offset;
+
+ assert(op != NULL /* Precondition */);
+ assert(oend_w < oend /* No underflow */);
+ /* Handle edge cases in a slow path:
+ * - Read beyond end of literals
+ * - Match end is within WILDCOPY_OVERLIMIT of oend
+ * - 32-bit mode and the match length overflows
+ */
+ if (UNLIKELY(
+ iLitEnd > litLimit ||
+ oMatchEnd > oend_w ||
+ (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
+ return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
+
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
+ assert(op <= oLitEnd /* No overflow */);
+ assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
+ assert(oMatchEnd <= oend /* No underflow */);
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
+
+ /* Copy Literals:
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
+ * We likely don't need the full 32-byte wildcopy.
+ */
+ assert(WILDCOPY_OVERLENGTH >= 16);
+ ZSTD_copy16(op, (*litPtr));
+ if (UNLIKELY(sequence.litLength > 16)) {
+ ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
+ }
+ op = oLitEnd;
+ *litPtr = iLitEnd; /* update for next sequence */
+
+ /* Copy Match */
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+ /* offset beyond prefix -> go into extDict */
+ RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
+ match = dictEnd + (match - prefixStart);
+ if (match + sequence.matchLength <= dictEnd) {
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+ return sequenceLength;
+ }
+ /* span extDict & currentPrefixSegment */
+ { size_t const length1 = dictEnd - match;
+ ZSTD_memmove(oLitEnd, match, length1);
+ op = oLitEnd + length1;
+ sequence.matchLength -= length1;
+ match = prefixStart;
+ }
+ }
+ /* Match within prefix of 1 or more bytes */
+ assert(op <= oMatchEnd);
+ assert(oMatchEnd <= oend_w);
+ assert(match >= prefixStart);
+ assert(sequence.matchLength >= 1);
+
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
+ * without overlap checking.
+ */
+ if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
+ /* We bet on a full wildcopy for matches, since we expect matches to be
+ * longer than literals (in general). In silesia, ~10% of matches are longer
+ * than 16 bytes.
+ */
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
+ return sequenceLength;
+ }
+ assert(sequence.offset < WILDCOPY_VECLEN);
+
+ /* Copy 8 bytes and spread the offset to be >= 8. */
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
+
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
+ if (sequence.matchLength > 8) {
+ assert(op < oMatchEnd);
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
+ }
+ return sequenceLength;
+}
+
+HINT_INLINE
+size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
+ BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
+ const BYTE** litPtr, const BYTE* const litLimit,
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+{
+ BYTE* const oLitEnd = op + sequence.litLength;
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+ BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+ const BYTE* match = oLitEnd - sequence.offset;
+
+ assert(op != NULL /* Precondition */);
+ assert(oend_w < oend /* No underflow */);
+ /* Handle edge cases in a slow path:
+ * - Read beyond end of literals
+ * - Match end is within WILDCOPY_OVERLIMIT of oend
+ * - 32-bit mode and the match length overflows
+ */
+ if (UNLIKELY(
+ iLitEnd > litLimit ||
+ oMatchEnd > oend_w ||
+ (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
+ return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
+
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
+ assert(op <= oLitEnd /* No overflow */);
+ assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
+ assert(oMatchEnd <= oend /* No underflow */);
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
+
+ /* Copy Literals:
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
+ * We likely don't need the full 32-byte wildcopy.
+ */
+ assert(WILDCOPY_OVERLENGTH >= 16);
+ ZSTD_copy16(op, (*litPtr));
+ if (UNLIKELY(sequence.litLength > 16)) {
+ ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
+ }
+ op = oLitEnd;
+ *litPtr = iLitEnd; /* update for next sequence */
+
+ /* Copy Match */
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+ /* offset beyond prefix -> go into extDict */
+ RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
+ match = dictEnd + (match - prefixStart);
+ if (match + sequence.matchLength <= dictEnd) {
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+ return sequenceLength;
+ }
+ /* span extDict & currentPrefixSegment */
+ { size_t const length1 = dictEnd - match;
+ ZSTD_memmove(oLitEnd, match, length1);
+ op = oLitEnd + length1;
+ sequence.matchLength -= length1;
+ match = prefixStart;
+ } }
+ /* Match within prefix of 1 or more bytes */
+ assert(op <= oMatchEnd);
+ assert(oMatchEnd <= oend_w);
+ assert(match >= prefixStart);
+ assert(sequence.matchLength >= 1);
+
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
+ * without overlap checking.
+ */
+ if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
+ /* We bet on a full wildcopy for matches, since we expect matches to be
+ * longer than literals (in general). In silesia, ~10% of matches are longer
+ * than 16 bytes.
+ */
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
+ return sequenceLength;
+ }
+ assert(sequence.offset < WILDCOPY_VECLEN);
+
+ /* Copy 8 bytes and spread the offset to be >= 8. */
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
+
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
+ if (sequence.matchLength > 8) {
+ assert(op < oMatchEnd);
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
+ }
+ return sequenceLength;
+}
+
+
+static void
+ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
+{
+ const void* ptr = dt;
+ const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
+ DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+ DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
+ (U32)DStatePtr->state, DTableH->tableLog);
+ BIT_reloadDStream(bitD);
+ DStatePtr->table = dt + 1;
+}
+
+FORCE_INLINE_TEMPLATE void
+ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
+{
+ size_t const lowBits = BIT_readBits(bitD, nbBits);
+ DStatePtr->state = nextState + lowBits;
+}
+
+/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
+ * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
+ * bits before reloading. This value is the maximum number of bytes we read
+ * after reloading when we are decoding long offsets.
+ */
+#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
+ (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
+ ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
+ : 0)
+
+typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
+
+FORCE_INLINE_TEMPLATE seq_t
+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
+{
+ seq_t seq;
+ const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
+ const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
+ const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
+ seq.matchLength = mlDInfo->baseValue;
+ seq.litLength = llDInfo->baseValue;
+ { U32 const ofBase = ofDInfo->baseValue;
+ BYTE const llBits = llDInfo->nbAdditionalBits;
+ BYTE const mlBits = mlDInfo->nbAdditionalBits;
+ BYTE const ofBits = ofDInfo->nbAdditionalBits;
+ BYTE const totalBits = llBits+mlBits+ofBits;
+
+ U16 const llNext = llDInfo->nextState;
+ U16 const mlNext = mlDInfo->nextState;
+ U16 const ofNext = ofDInfo->nextState;
+ U32 const llnbBits = llDInfo->nbBits;
+ U32 const mlnbBits = mlDInfo->nbBits;
+ U32 const ofnbBits = ofDInfo->nbBits;
+ /*
+ * As gcc has better branch and block analyzers, sometimes it is only
+ * valuable to mark likelyness for clang, it gives around 3-4% of
+ * performance.
+ */
+
+ /* sequence */
+ { size_t offset;
+ #if defined(__clang__)
+ if (LIKELY(ofBits > 1)) {
+ #else
+ if (ofBits > 1) {
+ #endif
+ ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
+ ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
+ assert(ofBits <= MaxOff);
+ if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
+ U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
+ offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
+ BIT_reloadDStream(&seqState->DStream);
+ if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
+ assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
+ } else {
+ offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
+ }
+ seqState->prevOffset[2] = seqState->prevOffset[1];
+ seqState->prevOffset[1] = seqState->prevOffset[0];
+ seqState->prevOffset[0] = offset;
+ } else {
+ U32 const ll0 = (llDInfo->baseValue == 0);
+ if (LIKELY((ofBits == 0))) {
+ offset = seqState->prevOffset[ll0];
+ seqState->prevOffset[1] = seqState->prevOffset[!ll0];
+ seqState->prevOffset[0] = offset;
+ } else {
+ offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
+ { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+ temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
+ if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
+ seqState->prevOffset[1] = seqState->prevOffset[0];
+ seqState->prevOffset[0] = offset = temp;
+ } } }
+ seq.offset = offset;
+ }
+
+ #if defined(__clang__)
+ if (UNLIKELY(mlBits > 0))
+ #else
+ if (mlBits > 0)
+ #endif
+ seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
+
+ if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
+ BIT_reloadDStream(&seqState->DStream);
+ if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
+ BIT_reloadDStream(&seqState->DStream);
+ /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
+ ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
+
+ #if defined(__clang__)
+ if (UNLIKELY(llBits > 0))
+ #else
+ if (llBits > 0)
+ #endif
+ seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
+
+ if (MEM_32bits())
+ BIT_reloadDStream(&seqState->DStream);
+
+ DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+
+ ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
+ ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
+ ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
+ }
+
+ return seq;
+}
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
+{
+ size_t const windowSize = dctx->fParams.windowSize;
+ /* No dictionary used. */
+ if (dctx->dictContentEndForFuzzing == NULL) return 0;
+ /* Dictionary is our prefix. */
+ if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
+ /* Dictionary is not our ext-dict. */
+ if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
+ /* Dictionary is not within our window size. */
+ if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
+ /* Dictionary is active. */
+ return 1;
+}
+
+MEM_STATIC void ZSTD_assertValidSequence(
+ ZSTD_DCtx const* dctx,
+ BYTE const* op, BYTE const* oend,
+ seq_t const seq,
+ BYTE const* prefixStart, BYTE const* virtualStart)
+{
+#if DEBUGLEVEL >= 1
+ size_t const windowSize = dctx->fParams.windowSize;
+ size_t const sequenceSize = seq.litLength + seq.matchLength;
+ BYTE const* const oLitEnd = op + seq.litLength;
+ DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+ assert(op <= oend);
+ assert((size_t)(oend - op) >= sequenceSize);
+ assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
+ if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
+ size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
+ /* Offset must be within the dictionary. */
+ assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
+ assert(seq.offset <= windowSize + dictSize);
+ } else {
+ /* Offset must be within our window. */
+ assert(seq.offset <= windowSize);
+ }
+#else
+ (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
+#endif
+}
+#endif
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+
+
+FORCE_INLINE_TEMPLATE size_t
+DONT_VECTORIZE
+ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
+{
+ const BYTE* ip = (const BYTE*)seqStart;
+ const BYTE* const iend = ip + seqSize;
+ BYTE* const ostart = (BYTE*)dst;
+ BYTE* const oend = ostart + maxDstSize;
+ BYTE* op = ostart;
+ const BYTE* litPtr = dctx->litPtr;
+ const BYTE* litBufferEnd = dctx->litBufferEnd;
+ const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
+ const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
+ const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+ DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
+ (void)frame;
+
+ /* Regen sequences */
+ if (nbSeq) {
+ seqState_t seqState;
+ dctx->fseEntropy = 1;
+ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
+ RETURN_ERROR_IF(
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
+ corruption_detected, "");
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+ assert(dst != NULL);
+
+ ZSTD_STATIC_ASSERT(
+ BIT_DStream_unfinished < BIT_DStream_completed &&
+ BIT_DStream_endOfBuffer < BIT_DStream_completed &&
+ BIT_DStream_completed < BIT_DStream_overflow);
+
+ /* decompress without overrunning litPtr begins */
+ {
+ seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+ /* Align the decompression loop to 32 + 16 bytes.
+ *
+ * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
+ * speed swings based on the alignment of the decompression loop. This
+ * performance swing is caused by parts of the decompression loop falling
+ * out of the DSB. The entire decompression loop should fit in the DSB,
+ * when it can't we get much worse performance. You can measure if you've
+ * hit the good case or the bad case with this perf command for some
+ * compressed file test.zst:
+ *
+ * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
+ * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
+ *
+ * If you see most cycles served out of the MITE you've hit the bad case.
+ * If you see most cycles served out of the DSB you've hit the good case.
+ * If it is pretty even then you may be in an okay case.
+ *
+ * This issue has been reproduced on the following CPUs:
+ * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
+ * Use Instruments->Counters to get DSB/MITE cycles.
+ * I never got performance swings, but I was able to
+ * go from the good case of mostly DSB to half of the
+ * cycles served from MITE.
+ * - Coffeelake: Intel i9-9900k
+ * - Coffeelake: Intel i7-9700k
+ *
+ * I haven't been able to reproduce the instability or DSB misses on any
+ * of the following CPUS:
+ * - Haswell
+ * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
+ * - Skylake
+ *
+ * Alignment is done for each of the three major decompression loops:
+ * - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
+ * - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
+ * - ZSTD_decompressSequences_body
+ * Alignment choices are made to minimize large swings on bad cases and influence on performance
+ * from changes external to this code, rather than to overoptimize on the current commit.
+ *
+ * If you are seeing performance stability this script can help test.
+ * It tests on 4 commits in zstd where I saw performance change.
+ *
+ * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
+ */
+#if defined(__x86_64__)
+ __asm__(".p2align 6");
+# if __GNUC__ >= 7
+ /* good for gcc-7, gcc-9, and gcc-11 */
+ __asm__("nop");
+ __asm__(".p2align 5");
+ __asm__("nop");
+ __asm__(".p2align 4");
+# if __GNUC__ == 8 || __GNUC__ == 10
+ /* good for gcc-8 and gcc-10 */
+ __asm__("nop");
+ __asm__(".p2align 3");
+# endif
+# endif
+#endif
+
+ /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
+ for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
+ size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+ assert(!ZSTD_isError(oneSeqSize));
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+#endif
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+ return oneSeqSize;
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+ op += oneSeqSize;
+ if (UNLIKELY(!--nbSeq))
+ break;
+ BIT_reloadDStream(&(seqState.DStream));
+ sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+ }
+
+ /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
+ if (nbSeq > 0) {
+ const size_t leftoverLit = dctx->litBufferEnd - litPtr;
+ if (leftoverLit)
+ {
+ RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
+ ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
+ sequence.litLength -= leftoverLit;
+ op += leftoverLit;
+ }
+ litPtr = dctx->litExtraBuffer;
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+ dctx->litBufferLocation = ZSTD_not_in_dst;
+ {
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+ assert(!ZSTD_isError(oneSeqSize));
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+#endif
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+ return oneSeqSize;
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+ op += oneSeqSize;
+ if (--nbSeq)
+ BIT_reloadDStream(&(seqState.DStream));
+ }
+ }
+ }
+
+ if (nbSeq > 0) /* there is remaining lit from extra buffer */
+ {
+
+#if defined(__x86_64__)
+ __asm__(".p2align 6");
+ __asm__("nop");
+# if __GNUC__ != 7
+ /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
+ __asm__(".p2align 4");
+ __asm__("nop");
+ __asm__(".p2align 3");
+# elif __GNUC__ >= 11
+ __asm__(".p2align 3");
+# else
+ __asm__(".p2align 5");
+ __asm__("nop");
+ __asm__(".p2align 3");
+# endif
+#endif
+
+ for (; ; ) {
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+ assert(!ZSTD_isError(oneSeqSize));
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+#endif
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+ return oneSeqSize;
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+ op += oneSeqSize;
+ if (UNLIKELY(!--nbSeq))
+ break;
+ BIT_reloadDStream(&(seqState.DStream));
+ }
+ }
+
+ /* check if reached exact end */
+ DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
+ RETURN_ERROR_IF(nbSeq, corruption_detected, "");
+ RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
+ /* save reps for next block */
+ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+ }
+
+ /* last literal segment */
+ if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
+ {
+ size_t const lastLLSize = litBufferEnd - litPtr;
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
+ if (op != NULL) {
+ ZSTD_memmove(op, litPtr, lastLLSize);
+ op += lastLLSize;
+ }
+ litPtr = dctx->litExtraBuffer;
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+ dctx->litBufferLocation = ZSTD_not_in_dst;
+ }
+ { size_t const lastLLSize = litBufferEnd - litPtr;
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+ if (op != NULL) {
+ ZSTD_memcpy(op, litPtr, lastLLSize);
+ op += lastLLSize;
+ }
+ }
+
+ return op-ostart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+DONT_VECTORIZE
+ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
+{
+ const BYTE* ip = (const BYTE*)seqStart;
+ const BYTE* const iend = ip + seqSize;
+ BYTE* const ostart = (BYTE*)dst;
+ BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
+ BYTE* op = ostart;
+ const BYTE* litPtr = dctx->litPtr;
+ const BYTE* const litEnd = litPtr + dctx->litSize;
+ const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
+ const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
+ const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
+ DEBUGLOG(5, "ZSTD_decompressSequences_body");
+ (void)frame;
+
+ /* Regen sequences */
+ if (nbSeq) {
+ seqState_t seqState;
+ dctx->fseEntropy = 1;
+ { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
+ RETURN_ERROR_IF(
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
+ corruption_detected, "");
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+ assert(dst != NULL);
+
+ ZSTD_STATIC_ASSERT(
+ BIT_DStream_unfinished < BIT_DStream_completed &&
+ BIT_DStream_endOfBuffer < BIT_DStream_completed &&
+ BIT_DStream_completed < BIT_DStream_overflow);
+
+#if defined(__x86_64__)
+ __asm__(".p2align 6");
+ __asm__("nop");
+# if __GNUC__ >= 7
+ __asm__(".p2align 5");
+ __asm__("nop");
+ __asm__(".p2align 3");
+# else
+ __asm__(".p2align 4");
+ __asm__("nop");
+ __asm__(".p2align 3");
+# endif
+#endif
+
+ for ( ; ; ) {
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+ assert(!ZSTD_isError(oneSeqSize));
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+#endif
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+ return oneSeqSize;
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+ op += oneSeqSize;
+ if (UNLIKELY(!--nbSeq))
+ break;
+ BIT_reloadDStream(&(seqState.DStream));
+ }
+
+ /* check if reached exact end */
+ DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
+ RETURN_ERROR_IF(nbSeq, corruption_detected, "");
+ RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
+ /* save reps for next block */
+ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+ }
+
+ /* last literal segment */
+ { size_t const lastLLSize = litEnd - litPtr;
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+ if (op != NULL) {
+ ZSTD_memcpy(op, litPtr, lastLLSize);
+ op += lastLLSize;
+ }
+ }
+
+ return op-ostart;
+}
+
+static size_t
+ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
+{
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+
+static size_t
+ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
+{
+ return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
+ const BYTE* const prefixStart, const BYTE* const dictEnd)
+{
+ prefetchPos += sequence.litLength;
+ { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
+ const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
+ * No consequence though : memory address is only used for prefetching, not for dereferencing */
+ PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+ }
+ return prefetchPos + sequence.matchLength;
+}
+
+/* This decoding function employs prefetching
+ * to reduce latency impact of cache misses.
+ * It's generally employed when block contains a significant portion of long-distance matches
+ * or when coupled with a "cold" dictionary */
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_decompressSequencesLong_body(
+ ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
+{
+ const BYTE* ip = (const BYTE*)seqStart;
+ const BYTE* const iend = ip + seqSize;
+ BYTE* const ostart = (BYTE*)dst;
+ BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
+ BYTE* op = ostart;
+ const BYTE* litPtr = dctx->litPtr;
+ const BYTE* litBufferEnd = dctx->litBufferEnd;
+ const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
+ const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
+ const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+ (void)frame;
+
+ /* Regen sequences */
+ if (nbSeq) {
+#define STORED_SEQS 8
+#define STORED_SEQS_MASK (STORED_SEQS-1)
+#define ADVANCED_SEQS STORED_SEQS
+ seq_t sequences[STORED_SEQS];
+ int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
+ seqState_t seqState;
+ int seqNb;
+ size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
+
+ dctx->fseEntropy = 1;
+ { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
+ assert(dst != NULL);
+ assert(iend >= ip);
+ RETURN_ERROR_IF(
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
+ corruption_detected, "");
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+
+ /* prepare in advance */
+ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+ sequences[seqNb] = sequence;
+ }
+ RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
+
+ /* decompress without stomping litBuffer */
+ for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
+ seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+ size_t oneSeqSize;
+
+ if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
+ {
+ /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
+ const size_t leftoverLit = dctx->litBufferEnd - litPtr;
+ if (leftoverLit)
+ {
+ RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
+ ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
+ sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
+ op += leftoverLit;
+ }
+ litPtr = dctx->litExtraBuffer;
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+ dctx->litBufferLocation = ZSTD_not_in_dst;
+ oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+ assert(!ZSTD_isError(oneSeqSize));
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+ sequences[seqNb & STORED_SEQS_MASK] = sequence;
+ op += oneSeqSize;
+ }
+ else
+ {
+ /* lit buffer is either wholly contained in first or second split, or not split at all*/
+ oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
+ ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
+ ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+ assert(!ZSTD_isError(oneSeqSize));
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+ sequences[seqNb & STORED_SEQS_MASK] = sequence;
+ op += oneSeqSize;
+ }
+ }
+ RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
+
+ /* finish queue */
+ seqNb -= seqAdvance;
+ for ( ; seqNb<nbSeq ; seqNb++) {
+ seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
+ if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
+ {
+ const size_t leftoverLit = dctx->litBufferEnd - litPtr;
+ if (leftoverLit)
+ {
+ RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
+ ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
+ sequence->litLength -= leftoverLit;
+ op += leftoverLit;
+ }
+ litPtr = dctx->litExtraBuffer;
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+ dctx->litBufferLocation = ZSTD_not_in_dst;
+ {
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+ assert(!ZSTD_isError(oneSeqSize));
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+ op += oneSeqSize;
+ }
+ }
+ else
+ {
+ size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
+ ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
+ ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+ assert(!ZSTD_isError(oneSeqSize));
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+ op += oneSeqSize;
+ }
+ }
+
+ /* save reps for next block */
+ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+ }
+
+ /* last literal segment */
+ if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */
+ {
+ size_t const lastLLSize = litBufferEnd - litPtr;
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
+ if (op != NULL) {
+ ZSTD_memmove(op, litPtr, lastLLSize);
+ op += lastLLSize;
+ }
+ litPtr = dctx->litExtraBuffer;
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+ }
+ { size_t const lastLLSize = litBufferEnd - litPtr;
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+ if (op != NULL) {
+ ZSTD_memmove(op, litPtr, lastLLSize);
+ op += lastLLSize;
+ }
+ }
+
+ return op-ostart;
+}
+
+static size_t
+ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
+{
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+
+
+#if DYNAMIC_BMI2
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+static BMI2_TARGET_ATTRIBUTE size_t
+DONT_VECTORIZE
+ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
+{
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+static BMI2_TARGET_ATTRIBUTE size_t
+DONT_VECTORIZE
+ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
+{
+ return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+static BMI2_TARGET_ATTRIBUTE size_t
+ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
+{
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+#endif /* DYNAMIC_BMI2 */
+
+typedef size_t (*ZSTD_decompressSequences_t)(
+ ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame);
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+static size_t
+ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
+{
+ DEBUGLOG(5, "ZSTD_decompressSequences");
+#if DYNAMIC_BMI2
+ if (ZSTD_DCtx_get_bmi2(dctx)) {
+ return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+ }
+#endif
+ return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+static size_t
+ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
+{
+ DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
+#if DYNAMIC_BMI2
+ if (ZSTD_DCtx_get_bmi2(dctx)) {
+ return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+ }
+#endif
+ return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+/* ZSTD_decompressSequencesLong() :
+ * decompression function triggered when a minimum share of offsets is considered "long",
+ * aka out of cache.
+ * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
+ * This function will try to mitigate main memory latency through the use of prefetching */
+static size_t
+ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
+{
+ DEBUGLOG(5, "ZSTD_decompressSequencesLong");
+#if DYNAMIC_BMI2
+ if (ZSTD_DCtx_get_bmi2(dctx)) {
+ return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+ }
+#endif
+ return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+ !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+/* ZSTD_getLongOffsetsShare() :
+ * condition : offTable must be valid
+ * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
+ * compared to maximum possible of (1<<OffFSELog) */
+static unsigned
+ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
+{
+ const void* ptr = offTable;
+ U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
+ const ZSTD_seqSymbol* table = offTable + 1;
+ U32 const max = 1 << tableLog;
+ U32 u, total = 0;
+ DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
+
+ assert(max <= (1 << OffFSELog)); /* max not too large */
+ for (u=0; u<max; u++) {
+ if (table[u].nbAdditionalBits > 22) total += 1;
+ }
+
+ assert(tableLog <= OffFSELog);
+ total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
+
+ return total;
+}
+#endif
+
+size_t
+ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
+{ /* blockType == blockCompressed */
+ const BYTE* ip = (const BYTE*)src;
+ /* isLongOffset must be true if there are long offsets.
+ * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
+ * We don't expect that to be the case in 64-bit mode.
+ * In block mode, window size is not known, so we have to be conservative.
+ * (note: but it could be evaluated from current-lowLimit)
+ */
+ ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
+ DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
+
+ RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
+
+ /* Decode literals section */
+ { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
+ if (ZSTD_isError(litCSize)) return litCSize;
+ ip += litCSize;
+ srcSize -= litCSize;
+ }
+
+ /* Build Decoding Tables */
+ {
+ /* These macros control at build-time which decompressor implementation
+ * we use. If neither is defined, we do some inspection and dispatch at
+ * runtime.
+ */
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+ !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+ int usePrefetchDecoder = dctx->ddictIsCold;
+#endif
+ int nbSeq;
+ size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
+ if (ZSTD_isError(seqHSize)) return seqHSize;
+ ip += seqHSize;
+ srcSize -= seqHSize;
+
+ RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+ !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+ if ( !usePrefetchDecoder
+ && (!frame || (dctx->fParams.windowSize > (1<<24)))
+ && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */
+ U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
+ U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
+ usePrefetchDecoder = (shareLongOffsets >= minShare);
+ }
+#endif
+
+ dctx->ddictIsCold = 0;
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+ !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+ if (usePrefetchDecoder)
+#endif
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+#endif
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+ /* else */
+ if (dctx->litBufferLocation == ZSTD_split)
+ return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+ else
+ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+#endif
+ }
+}
+
+
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
+{
+ if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
+ dctx->dictEnd = dctx->previousDstEnd;
+ dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
+ dctx->prefixStart = dst;
+ dctx->previousDstEnd = dst;
+ }
+}
+
+
+size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize)
+{
+ size_t dSize;
+ ZSTD_checkContinuity(dctx, dst, dstCapacity);
+ dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
+ dctx->previousDstEnd = (char*)dst + dSize;
+ return dSize;
+}
diff --git a/lib/zstd/decompress/zstd_decompress_block.h b/lib/zstd/decompress/zstd_decompress_block.h
new file mode 100644
index 00000000000..3d2d57a5d25
--- /dev/null
+++ b/lib/zstd/decompress/zstd_decompress_block.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+#ifndef ZSTD_DEC_BLOCK_H
+#define ZSTD_DEC_BLOCK_H
+
+/*-*******************************************************
+ * Dependencies
+ *********************************************************/
+#include "../common/zstd_deps.h" /* size_t */
+#include <linux/zstd.h> /* DCtx, and some public functions */
+#include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */
+#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
+
+
+/* === Prototypes === */
+
+/* note: prototypes already published within `zstd.h` :
+ * ZSTD_decompressBlock()
+ */
+
+/* note: prototypes already published within `zstd_internal.h` :
+ * ZSTD_getcBlockSize()
+ * ZSTD_decodeSeqHeaders()
+ */
+
+
+ /* Streaming state is used to inform allocation of the literal buffer */
+typedef enum {
+ not_streaming = 0,
+ is_streaming = 1
+} streaming_operation;
+
+/* ZSTD_decompressBlock_internal() :
+ * decompress block, starting at `src`,
+ * into destination buffer `dst`.
+ * @return : decompressed block size,
+ * or an error code (which can be tested using ZSTD_isError())
+ */
+size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
+
+/* ZSTD_buildFSETable() :
+ * generate FSE decoding table for one symbol (ll, ml or off)
+ * this function must be called with valid parameters only
+ * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
+ * in which case it cannot fail.
+ * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
+ * defined in zstd_decompress_internal.h.
+ * Internal use only.
+ */
+void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+ const short* normalizedCounter, unsigned maxSymbolValue,
+ const U32* baseValue, const U8* nbAdditionalBits,
+ unsigned tableLog, void* wksp, size_t wkspSize,
+ int bmi2);
+
+
+#endif /* ZSTD_DEC_BLOCK_H */
diff --git a/lib/zstd/decompress/zstd_decompress_internal.h b/lib/zstd/decompress/zstd_decompress_internal.h
new file mode 100644
index 00000000000..98102edb6a8
--- /dev/null
+++ b/lib/zstd/decompress/zstd_decompress_internal.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* zstd_decompress_internal:
+ * objects and definitions shared within lib/decompress modules */
+
+ #ifndef ZSTD_DECOMPRESS_INTERNAL_H
+ #define ZSTD_DECOMPRESS_INTERNAL_H
+
+
+/*-*******************************************************
+ * Dependencies
+ *********************************************************/
+#include "../common/mem.h" /* BYTE, U16, U32 */
+#include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
+
+
+
+/*-*******************************************************
+ * Constants
+ *********************************************************/
+static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 18, 20, 22, 24, 28, 32, 40,
+ 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+ 0x2000, 0x4000, 0x8000, 0x10000 };
+
+static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
+ 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
+ 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
+ 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
+ 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
+
+static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31 };
+
+static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
+ 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26,
+ 27, 28, 29, 30, 31, 32, 33, 34,
+ 35, 37, 39, 41, 43, 47, 51, 59,
+ 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
+ 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
+
+
+/*-*******************************************************
+ * Decompression types
+ *********************************************************/
+ typedef struct {
+ U32 fastMode;
+ U32 tableLog;
+ } ZSTD_seqSymbol_header;
+
+ typedef struct {
+ U16 nextState;
+ BYTE nbAdditionalBits;
+ BYTE nbBits;
+ U32 baseValue;
+ } ZSTD_seqSymbol;
+
+ #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
+
+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
+
+typedef struct {
+ ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
+ ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
+ ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
+ HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
+ U32 rep[ZSTD_REP_NUM];
+ U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
+} ZSTD_entropyDTables_t;
+
+typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
+ ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
+ ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
+ ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
+
+typedef enum { zdss_init=0, zdss_loadHeader,
+ zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
+
+typedef enum {
+ ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */
+ ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */
+ ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
+} ZSTD_dictUses_e;
+
+/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
+typedef struct {
+ const ZSTD_DDict** ddictPtrTable;
+ size_t ddictPtrTableSize;
+ size_t ddictPtrCount;
+} ZSTD_DDictHashSet;
+
+#ifndef ZSTD_DECODER_INTERNAL_BUFFER
+# define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
+#endif
+
+#define ZSTD_LBMIN 64
+#define ZSTD_LBMAX (128 << 10)
+
+/* extra buffer, compensates when dst is not large enough to store litBuffer */
+#define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
+
+typedef enum {
+ ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
+ ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
+ ZSTD_split = 2 /* Split between litExtraBuffer and dst */
+} ZSTD_litLocation_e;
+
+struct ZSTD_DCtx_s
+{
+ const ZSTD_seqSymbol* LLTptr;
+ const ZSTD_seqSymbol* MLTptr;
+ const ZSTD_seqSymbol* OFTptr;
+ const HUF_DTable* HUFptr;
+ ZSTD_entropyDTables_t entropy;
+ U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */
+ const void* previousDstEnd; /* detect continuity */
+ const void* prefixStart; /* start of current segment */
+ const void* virtualStart; /* virtual start of previous segment if it was just before current one */
+ const void* dictEnd; /* end of previous segment */
+ size_t expected;
+ ZSTD_frameHeader fParams;
+ U64 processedCSize;
+ U64 decodedSize;
+ blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
+ ZSTD_dStage stage;
+ U32 litEntropy;
+ U32 fseEntropy;
+ struct xxh64_state xxhState;
+ size_t headerSize;
+ ZSTD_format_e format;
+ ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
+ U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
+ const BYTE* litPtr;
+ ZSTD_customMem customMem;
+ size_t litSize;
+ size_t rleSize;
+ size_t staticSize;
+#if DYNAMIC_BMI2 != 0
+ int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
+#endif
+
+ /* dictionary */
+ ZSTD_DDict* ddictLocal;
+ const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
+ U32 dictID;
+ int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
+ ZSTD_dictUses_e dictUses;
+ ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
+ ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
+
+ /* streaming */
+ ZSTD_dStreamStage streamStage;
+ char* inBuff;
+ size_t inBuffSize;
+ size_t inPos;
+ size_t maxWindowSize;
+ char* outBuff;
+ size_t outBuffSize;
+ size_t outStart;
+ size_t outEnd;
+ size_t lhSize;
+ U32 hostageByte;
+ int noForwardProgress;
+ ZSTD_bufferMode_e outBufferMode;
+ ZSTD_outBuffer expectedOutBuffer;
+
+ /* workspace */
+ BYTE* litBuffer;
+ const BYTE* litBufferEnd;
+ ZSTD_litLocation_e litBufferLocation;
+ BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
+ BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
+
+ size_t oversizedDuration;
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ void const* dictContentBeginForFuzzing;
+ void const* dictContentEndForFuzzing;
+#endif
+
+ /* Tracing */
+}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
+
+MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
+#if DYNAMIC_BMI2 != 0
+ return dctx->bmi2;
+#else
+ (void)dctx;
+ return 0;
+#endif
+}
+
+/*-*******************************************************
+ * Shared internal functions
+ *********************************************************/
+
+/*! ZSTD_loadDEntropy() :
+ * dict : must point at beginning of a valid zstd dictionary.
+ * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
+size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
+ const void* const dict, size_t const dictSize);
+
+/*! ZSTD_checkContinuity() :
+ * check if next `dst` follows previous position, where decompression ended.
+ * If yes, do nothing (continue on current segment).
+ * If not, classify previous segment as "external dictionary", and start a new segment.
+ * This function cannot fail. */
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
+
+
+#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
diff --git a/lib/zstd/decompress_sources.h b/lib/zstd/decompress_sources.h
new file mode 100644
index 00000000000..a06ca187aab
--- /dev/null
+++ b/lib/zstd/decompress_sources.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*
+ * This file includes every .c file needed for decompression.
+ * It is used by lib/decompress_unzstd.c to include the decompression
+ * source into the translation-unit, so it can be used for kernel
+ * decompression.
+ */
+
+/*
+ * Disable the ASM Huffman implementation because we need to
+ * include all the sources.
+ */
+#define ZSTD_DISABLE_ASM 1
+
+#include "common/debug.c"
+#include "common/entropy_common.c"
+#include "common/error_private.c"
+#include "common/fse_decompress.c"
+#include "common/zstd_common.c"
+#include "decompress/huf_decompress.c"
+#include "decompress/zstd_ddict.c"
+#include "decompress/zstd_decompress.c"
+#include "decompress/zstd_decompress_block.c"
+#include "zstd_decompress_module.c"
diff --git a/lib/zstd/zstd.c b/lib/zstd/zstd.c
new file mode 100644
index 00000000000..14bde369068
--- /dev/null
+++ b/lib/zstd/zstd.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2021 Google LLC
+ */
+
+#define LOG_CATEGORY LOGC_BOOT
+
+#include <abuf.h>
+#include <log.h>
+#include <malloc.h>
+#include <linux/errno.h>
+#include <linux/zstd.h>
+
+int zstd_decompress(struct abuf *in, struct abuf *out)
+{
+ zstd_dctx *ctx;
+ size_t wsize, len;
+ void *workspace;
+ int ret;
+
+ wsize = zstd_dctx_workspace_bound();
+ workspace = malloc(wsize);
+ if (!workspace) {
+ debug("%s: cannot allocate workspace of size %zu\n", __func__,
+ wsize);
+ return -ENOMEM;
+ }
+
+ ctx = zstd_init_dctx(workspace, wsize);
+ if (!ctx) {
+ log_err("%s: zstd_init_dctx() failed\n", __func__);
+ ret = -EPERM;
+ goto do_free;
+ }
+
+ /*
+ * Find out how large the frame actually is, there may be junk at
+ * the end of the frame that zstd_decompress_dctx() can't handle.
+ */
+ len = zstd_find_frame_compressed_size(abuf_data(in), abuf_size(in));
+ if (zstd_is_error(len)) {
+ log_err("%s: failed to detect compressed size: %d\n", __func__,
+ zstd_get_error_code(len));
+ ret = -EINVAL;
+ goto do_free;
+ }
+
+ len = zstd_decompress_dctx(ctx, abuf_data(out), abuf_size(out),
+ abuf_data(in), len);
+ if (zstd_is_error(len)) {
+ log_err("%s: failed to decompress: %d\n", __func__,
+ zstd_get_error_code(len));
+ ret = -EINVAL;
+ goto do_free;
+ }
+
+ ret = len;
+do_free:
+ free(workspace);
+ return ret;
+}
diff --git a/lib/zstd/zstd_common_module.c b/lib/zstd/zstd_common_module.c
new file mode 100644
index 00000000000..73b97c2778c
--- /dev/null
+++ b/lib/zstd/zstd_common_module.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <linux/compat.h>
+
+#include "common/huf.h"
+#include "common/fse.h"
+#include "common/zstd_internal.h"
+
+// Export symbols shared by compress and decompress into a common module
+
+#undef ZSTD_isError /* defined within zstd_internal.h */
+EXPORT_SYMBOL_GPL(FSE_readNCount);
+EXPORT_SYMBOL_GPL(HUF_readStats);
+EXPORT_SYMBOL_GPL(HUF_readStats_wksp);
+EXPORT_SYMBOL_GPL(ZSTD_isError);
+EXPORT_SYMBOL_GPL(ZSTD_getErrorName);
+EXPORT_SYMBOL_GPL(ZSTD_getErrorCode);
+EXPORT_SYMBOL_GPL(ZSTD_customMalloc);
+EXPORT_SYMBOL_GPL(ZSTD_customCalloc);
+EXPORT_SYMBOL_GPL(ZSTD_customFree);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Common");
diff --git a/lib/zstd/zstd_decompress_module.c b/lib/zstd/zstd_decompress_module.c
new file mode 100644
index 00000000000..06cf3e43bca
--- /dev/null
+++ b/lib/zstd/zstd_decompress_module.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <linux/kernel.h>
+#include <linux/compat.h>
+#include <linux/string.h>
+#include <linux/zstd.h>
+
+#include "common/zstd_deps.h"
+
+/* Common symbols. zstd_compress must depend on zstd_decompress. */
+
+unsigned int zstd_is_error(size_t code)
+{
+ return ZSTD_isError(code);
+}
+EXPORT_SYMBOL(zstd_is_error);
+
+zstd_error_code zstd_get_error_code(size_t code)
+{
+ return ZSTD_getErrorCode(code);
+}
+EXPORT_SYMBOL(zstd_get_error_code);
+
+const char *zstd_get_error_name(size_t code)
+{
+ return ZSTD_getErrorName(code);
+}
+EXPORT_SYMBOL(zstd_get_error_name);
+
+/* Decompression symbols. */
+
+size_t zstd_dctx_workspace_bound(void)
+{
+ return ZSTD_estimateDCtxSize();
+}
+EXPORT_SYMBOL(zstd_dctx_workspace_bound);
+
+zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size)
+{
+ if (workspace == NULL)
+ return NULL;
+ return ZSTD_initStaticDCtx(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dctx);
+
+size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
+ const void *src, size_t src_size)
+{
+ return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size);
+}
+EXPORT_SYMBOL(zstd_decompress_dctx);
+
+size_t zstd_dstream_workspace_bound(size_t max_window_size)
+{
+ return ZSTD_estimateDStreamSize(max_window_size);
+}
+EXPORT_SYMBOL(zstd_dstream_workspace_bound);
+
+zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
+ size_t workspace_size)
+{
+ if (workspace == NULL)
+ return NULL;
+ (void)max_window_size;
+ return ZSTD_initStaticDStream(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dstream);
+
+size_t zstd_reset_dstream(zstd_dstream *dstream)
+{
+ return ZSTD_resetDStream(dstream);
+}
+EXPORT_SYMBOL(zstd_reset_dstream);
+
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+ zstd_in_buffer *input)
+{
+ return ZSTD_decompressStream(dstream, output, input);
+}
+EXPORT_SYMBOL(zstd_decompress_stream);
+
+size_t zstd_find_frame_compressed_size(const void *src, size_t src_size)
+{
+ return ZSTD_findFrameCompressedSize(src, src_size);
+}
+EXPORT_SYMBOL(zstd_find_frame_compressed_size);
+
+size_t zstd_get_frame_header(zstd_frame_header *header, const void *src,
+ size_t src_size)
+{
+ return ZSTD_getFrameHeader(header, src, src_size);
+}
+EXPORT_SYMBOL(zstd_get_frame_header);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Decompressor");