--- a/contrib/python-zstandard/zstd/decompress/huf_decompress.c Sun Apr 08 01:08:43 2018 +0200
+++ b/contrib/python-zstandard/zstd/decompress/huf_decompress.c Mon Apr 09 10:13:29 2018 -0700
@@ -33,41 +33,35 @@
****************************************************************** */
/* **************************************************************
-* Compiler specifics
-****************************************************************/
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-/* inline is defined */
-#elif defined(_MSC_VER) || defined(__GNUC__)
-# define inline __inline
-#else
-# define inline /* disable inline */
-#endif
-
-#ifdef _MSC_VER /* Visual Studio */
-# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
-#endif
-
-
-/* **************************************************************
* Dependencies
****************************************************************/
#include <string.h> /* memcpy, memset */
#include "bitstream.h" /* BIT_* */
+#include "compiler.h"
#include "fse.h" /* header compression */
#define HUF_STATIC_LINKING_ONLY
#include "huf.h"
+#include "error_private.h"
/* **************************************************************
* Error Management
****************************************************************/
+#define HUF_isError ERR_isError
#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
+#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
+
+
+/* **************************************************************
+* Byte alignment for workSpace management
+****************************************************************/
+#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
+#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
/*-***************************/
/* generic DTableDesc */
/*-***************************/
-
typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
@@ -81,19 +75,27 @@
/*-***************************/
/* single-symbol decoding */
/*-***************************/
-
typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */
-size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize)
+size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
{
- BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
- U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
U32 tableLog = 0;
U32 nbSymbols = 0;
size_t iSize;
void* const dtPtr = DTable + 1;
HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
+ U32* rankVal;
+ BYTE* huffWeight;
+ size_t spaceUsed32 = 0;
+
+ rankVal = (U32 *)workSpace + spaceUsed32;
+ spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
+ huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
+ spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
+
+ if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
+
HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
@@ -102,16 +104,16 @@
/* Table header */
{ DTableDesc dtd = HUF_getDTableDesc(DTable);
- if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, huffman tree cannot fit in */
+ if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
dtd.tableType = 0;
dtd.tableLog = (BYTE)tableLog;
memcpy(DTable, &dtd, sizeof(dtd));
}
- /* Prepare ranks */
+ /* Calculate starting value for each rank */
{ U32 n, nextRankStart = 0;
for (n=1; n<tableLog+1; n++) {
- U32 current = nextRankStart;
+ U32 const current = nextRankStart;
nextRankStart += (rankVal[n] << (n-1));
rankVal[n] = current;
} }
@@ -121,19 +123,28 @@
for (n=0; n<nbSymbols; n++) {
U32 const w = huffWeight[n];
U32 const length = (1 << w) >> 1;
- U32 i;
+ U32 u;
HUF_DEltX2 D;
D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
- for (i = rankVal[w]; i < rankVal[w] + length; i++)
- dt[i] = D;
+ for (u = rankVal[w]; u < rankVal[w] + length; u++)
+ dt[u] = D;
rankVal[w] += length;
} }
return iSize;
}
+size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
+{
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+ return HUF_readDTableX2_wksp(DTable, src, srcSize,
+ workSpace, sizeof(workSpace));
+}
-static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
+
+FORCE_INLINE_TEMPLATE BYTE
+HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
{
size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
BYTE const c = dt[val].byte;
@@ -144,7 +155,7 @@
#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
*ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
-#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
@@ -152,30 +163,33 @@
if (MEM_64bits()) \
HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
-static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+HINT_INLINE size_t
+HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
{
BYTE* const pStart = p;
/* up to 4 symbols at a time */
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) {
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
}
- /* closer to the end */
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
- HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+ /* [0-3] symbols remaining */
+ if (MEM_32bits())
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
- /* no more data to retrieve from bitstream, hence no need to reload */
+ /* no more data to retrieve from bitstream, no need to reload */
while (p < pEnd)
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
return pEnd-pStart;
}
-static size_t HUF_decompress1X2_usingDTable_internal(
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X2_usingDTable_internal_body(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const HUF_DTable* DTable)
@@ -188,47 +202,17 @@
DTableDesc const dtd = HUF_getDTableDesc(DTable);
U32 const dtLog = dtd.tableLog;
- { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
- if (HUF_isError(errorCode)) return errorCode; }
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
- /* check */
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
return dstSize;
}
-size_t HUF_decompress1X2_usingDTable(
- void* dst, size_t dstSize,
- const void* cSrc, size_t cSrcSize,
- const HUF_DTable* DTable)
-{
- DTableDesc dtd = HUF_getDTableDesc(DTable);
- if (dtd.tableType != 0) return ERROR(GENERIC);
- return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress1X2_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
- const BYTE* ip = (const BYTE*) cSrc;
-
- size_t const hSize = HUF_readDTableX2 (DCtx, cSrc, cSrcSize);
- if (HUF_isError(hSize)) return hSize;
- if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
- ip += hSize; cSrcSize -= hSize;
-
- return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
-}
-
-size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
- HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
- return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
-}
-
-
-static size_t HUF_decompress4X2_usingDTable_internal(
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X2_usingDTable_internal_body(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const HUF_DTable* DTable)
@@ -263,23 +247,19 @@
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
- U32 endSignal;
+ U32 endSignal = BIT_DStream_unfinished;
DTableDesc const dtd = HUF_getDTableDesc(DTable);
U32 const dtLog = dtd.tableLog;
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
- { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
- if (HUF_isError(errorCode)) return errorCode; }
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
- /* 16-32 symbols per loop (4-8 symbols per stream) */
+ /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
- for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) {
+ while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
@@ -296,10 +276,15 @@
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+ BIT_reloadDStream(&bitD1);
+ BIT_reloadDStream(&bitD2);
+ BIT_reloadDStream(&bitD3);
+ BIT_reloadDStream(&bitD4);
}
/* check corruption */
+ /* note : should not be necessary : op# advance in lock step, and we control op4.
+ * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
@@ -312,8 +297,8 @@
HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
/* check */
- endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
- if (!endSignal) return ERROR(corruption_detected);
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+ if (!endCheck) return ERROR(corruption_detected); }
/* decoded size */
return dstSize;
@@ -321,6 +306,279 @@
}
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
+ memcpy(op, dt+val, 2);
+ BIT_skipBits(DStream, dt[val].nbBits);
+ return dt[val].length;
+}
+
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
+ memcpy(op, dt+val, 1);
+ if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+ else {
+ if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+ BIT_skipBits(DStream, dt[val].nbBits);
+ if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+ /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+ DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
+ } }
+ return 1;
+}
+
+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+ ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+ ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+ if (MEM_64bits()) \
+ ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+HINT_INLINE size_t
+HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
+ const HUF_DEltX4* const dt, const U32 dtLog)
+{
+ BYTE* const pStart = p;
+
+ /* up to 8 symbols at a time */
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
+ HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+ HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
+ HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+ HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+ }
+
+ /* closer to end : up to 2 symbols at a time */
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
+ HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+ while (p <= pEnd-2)
+ HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
+
+ if (p < pEnd)
+ p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+ return p-pStart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X4_usingDTable_internal_body(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ BIT_DStream_t bitD;
+
+ /* Init */
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
+
+ /* decode */
+ { BYTE* const ostart = (BYTE*) dst;
+ BYTE* const oend = ostart + dstSize;
+ const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
+ const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
+ }
+
+ /* check */
+ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+ /* decoded size */
+ return dstSize;
+}
+
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X4_usingDTable_internal_body(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
+
+ { const BYTE* const istart = (const BYTE*) cSrc;
+ BYTE* const ostart = (BYTE*) dst;
+ BYTE* const oend = ostart + dstSize;
+ const void* const dtPtr = DTable+1;
+ const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
+
+ /* Init */
+ BIT_DStream_t bitD1;
+ BIT_DStream_t bitD2;
+ BIT_DStream_t bitD3;
+ BIT_DStream_t bitD4;
+ size_t const length1 = MEM_readLE16(istart);
+ size_t const length2 = MEM_readLE16(istart+2);
+ size_t const length3 = MEM_readLE16(istart+4);
+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+ const BYTE* const istart1 = istart + 6; /* jumpTable */
+ const BYTE* const istart2 = istart1 + length1;
+ const BYTE* const istart3 = istart2 + length2;
+ const BYTE* const istart4 = istart3 + length3;
+ size_t const segmentSize = (dstSize+3) / 4;
+ BYTE* const opStart2 = ostart + segmentSize;
+ BYTE* const opStart3 = opStart2 + segmentSize;
+ BYTE* const opStart4 = opStart3 + segmentSize;
+ BYTE* op1 = ostart;
+ BYTE* op2 = opStart2;
+ BYTE* op3 = opStart3;
+ BYTE* op4 = opStart4;
+ U32 endSignal;
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ U32 const dtLog = dtd.tableLog;
+
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
+
+ /* 16-32 symbols per loop (4-8 symbols per stream) */
+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+ for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
+ HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+ HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+ HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+ HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+ HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
+ HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
+ HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
+ HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
+ HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+ HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+ HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+ HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+ HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
+ HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
+ HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
+ HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+ }
+
+ /* check corruption */
+ if (op1 > opStart2) return ERROR(corruption_detected);
+ if (op2 > opStart3) return ERROR(corruption_detected);
+ if (op3 > opStart4) return ERROR(corruption_detected);
+ /* note : op4 already verified within main loop */
+
+ /* finish bitStreams one by one */
+ HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+ HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+ HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+ HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
+
+ /* check */
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+ if (!endCheck) return ERROR(corruption_detected); }
+
+ /* decoded size */
+ return dstSize;
+ }
+}
+
+
+typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
+ const void *cSrc,
+ size_t cSrcSize,
+ const HUF_DTable *DTable);
+#if DYNAMIC_BMI2
+
+#define X(fn) \
+ \
+ static size_t fn##_default( \
+ void* dst, size_t dstSize, \
+ const void* cSrc, size_t cSrcSize, \
+ const HUF_DTable* DTable) \
+ { \
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
+ } \
+ \
+ static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
+ void* dst, size_t dstSize, \
+ const void* cSrc, size_t cSrcSize, \
+ const HUF_DTable* DTable) \
+ { \
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
+ } \
+ \
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
+ { \
+ if (bmi2) { \
+ return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
+ } \
+ return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
+ }
+
+#else
+
+#define X(fn) \
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
+ { \
+ (void)bmi2; \
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
+ }
+
+#endif
+
+X(HUF_decompress1X2_usingDTable_internal)
+X(HUF_decompress4X2_usingDTable_internal)
+X(HUF_decompress1X4_usingDTable_internal)
+X(HUF_decompress4X4_usingDTable_internal)
+
+#undef X
+
+
+size_t HUF_decompress1X2_usingDTable(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
+ if (dtd.tableType != 0) return ERROR(GENERIC);
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize)
+{
+ const BYTE* ip = (const BYTE*) cSrc;
+
+ size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
+ if (HUF_isError(hSize)) return hSize;
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+ ip += hSize; cSrcSize -= hSize;
+
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize)
+{
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+ return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
+ workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
+ return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
size_t HUF_decompress4X2_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
@@ -328,22 +586,38 @@
{
DTableDesc dtd = HUF_getDTableDesc(DTable);
if (dtd.tableType != 0) return ERROR(GENERIC);
- return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize, int bmi2)
+{
+ const BYTE* ip = (const BYTE*) cSrc;
+
+ size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize,
+ workSpace, wkspSize);
+ if (HUF_isError(hSize)) return hSize;
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+ ip += hSize; cSrcSize -= hSize;
+
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize)
+{
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
}
size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
{
- const BYTE* ip = (const BYTE*) cSrc;
-
- size_t const hSize = HUF_readDTableX2 (dctx, cSrc, cSrcSize);
- if (HUF_isError(hSize)) return hSize;
- if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
- ip += hSize; cSrcSize -= hSize;
-
- return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx);
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+ workSpace, sizeof(workSpace));
}
-
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
{
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
@@ -354,8 +628,6 @@
/* *************************/
/* double-symbols decoding */
/* *************************/
-typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
-
typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
/* HUF_fillDTableX4Level2() :
@@ -400,7 +672,8 @@
} }
}
-typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1];
+typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
+typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
const sortedSymbol_t* sortedList, const U32 sortedListSize,
@@ -444,22 +717,42 @@
}
}
-size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize)
+size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
+ size_t srcSize, void* workSpace,
+ size_t wkspSize)
{
- BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
- sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
- U32 rankStats[HUF_TABLELOG_MAX + 1] = { 0 };
- U32 rankStart0[HUF_TABLELOG_MAX + 2] = { 0 };
- U32* const rankStart = rankStart0+1;
- rankVal_t rankVal;
U32 tableLog, maxW, sizeOfSort, nbSymbols;
DTableDesc dtd = HUF_getDTableDesc(DTable);
U32 const maxTableLog = dtd.maxTableLog;
size_t iSize;
void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr;
+ U32 *rankStart;
- HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compilation fails here, assertion is false */
+ rankValCol_t* rankVal;
+ U32* rankStats;
+ U32* rankStart0;
+ sortedSymbol_t* sortedSymbol;
+ BYTE* weightList;
+ size_t spaceUsed32 = 0;
+
+ rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
+ spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
+ rankStats = (U32 *)workSpace + spaceUsed32;
+ spaceUsed32 += HUF_TABLELOG_MAX + 1;
+ rankStart0 = (U32 *)workSpace + spaceUsed32;
+ spaceUsed32 += HUF_TABLELOG_MAX + 2;
+ sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
+ spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
+ weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
+ spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
+
+ if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
+
+ rankStart = rankStart0 + 1;
+ memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
+
+ HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
/* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
@@ -524,93 +817,11 @@
return iSize;
}
-
-static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
-{
- size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
- memcpy(op, dt+val, 2);
- BIT_skipBits(DStream, dt[val].nbBits);
- return dt[val].length;
-}
-
-static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
-{
- size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
- memcpy(op, dt+val, 1);
- if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
- else {
- if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
- BIT_skipBits(DStream, dt[val].nbBits);
- if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
- DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
- } }
- return 1;
-}
-
-
-#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
- if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
- if (MEM_64bits()) \
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
+size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize)
{
- BYTE* const pStart = p;
-
- /* up to 8 symbols at a time */
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
- HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
- HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
- HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
- }
-
- /* closer to end : up to 2 symbols at a time */
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
-
- while (p <= pEnd-2)
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
-
- if (p < pEnd)
- p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
-
- return p-pStart;
-}
-
-
-static size_t HUF_decompress1X4_usingDTable_internal(
- void* dst, size_t dstSize,
- const void* cSrc, size_t cSrcSize,
- const HUF_DTable* DTable)
-{
- BIT_DStream_t bitD;
-
- /* Init */
- { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
- if (HUF_isError(errorCode)) return errorCode;
- }
-
- /* decode */
- { BYTE* const ostart = (BYTE*) dst;
- BYTE* const oend = ostart + dstSize;
- const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
- const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
- DTableDesc const dtd = HUF_getDTableDesc(DTable);
- HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
- }
-
- /* check */
- if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
-
- /* decoded size */
- return dstSize;
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+ return HUF_readDTableX4_wksp(DTable, src, srcSize,
+ workSpace, sizeof(workSpace));
}
size_t HUF_decompress1X4_usingDTable(
@@ -620,19 +831,31 @@
{
DTableDesc dtd = HUF_getDTableDesc(DTable);
if (dtd.tableType != 1) return ERROR(GENERIC);
- return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+ return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
-size_t HUF_decompress1X4_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize)
{
const BYTE* ip = (const BYTE*) cSrc;
- size_t const hSize = HUF_readDTableX4 (DCtx, cSrc, cSrcSize);
+ size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize,
+ workSpace, wkspSize);
if (HUF_isError(hSize)) return hSize;
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize;
- return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
+ return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress1X4_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize)
+{
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+ return HUF_decompress1X4_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
+ workSpace, sizeof(workSpace));
}
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
@@ -641,99 +864,6 @@
return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
}
-static size_t HUF_decompress4X4_usingDTable_internal(
- void* dst, size_t dstSize,
- const void* cSrc, size_t cSrcSize,
- const HUF_DTable* DTable)
-{
- if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
-
- { const BYTE* const istart = (const BYTE*) cSrc;
- BYTE* const ostart = (BYTE*) dst;
- BYTE* const oend = ostart + dstSize;
- const void* const dtPtr = DTable+1;
- const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
-
- /* Init */
- BIT_DStream_t bitD1;
- BIT_DStream_t bitD2;
- BIT_DStream_t bitD3;
- BIT_DStream_t bitD4;
- size_t const length1 = MEM_readLE16(istart);
- size_t const length2 = MEM_readLE16(istart+2);
- size_t const length3 = MEM_readLE16(istart+4);
- size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
- const BYTE* const istart1 = istart + 6; /* jumpTable */
- const BYTE* const istart2 = istart1 + length1;
- const BYTE* const istart3 = istart2 + length2;
- const BYTE* const istart4 = istart3 + length3;
- size_t const segmentSize = (dstSize+3) / 4;
- BYTE* const opStart2 = ostart + segmentSize;
- BYTE* const opStart3 = opStart2 + segmentSize;
- BYTE* const opStart4 = opStart3 + segmentSize;
- BYTE* op1 = ostart;
- BYTE* op2 = opStart2;
- BYTE* op3 = opStart3;
- BYTE* op4 = opStart4;
- U32 endSignal;
- DTableDesc const dtd = HUF_getDTableDesc(DTable);
- U32 const dtLog = dtd.tableLog;
-
- if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
- { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
- if (HUF_isError(errorCode)) return errorCode; }
-
- /* 16-32 symbols per loop (4-8 symbols per stream) */
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
- for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
- HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
- HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
- HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
- HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
- HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
- HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
- HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
- HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
- HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
- HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
- HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
- HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
- HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
- HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
- HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
- HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
-
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
- }
-
- /* check corruption */
- if (op1 > opStart2) return ERROR(corruption_detected);
- if (op2 > opStart3) return ERROR(corruption_detected);
- if (op3 > opStart4) return ERROR(corruption_detected);
- /* note : op4 already verified within main loop */
-
- /* finish bitStreams one by one */
- HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
- HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
- HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
- HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
-
- /* check */
- { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
- if (!endCheck) return ERROR(corruption_detected); }
-
- /* decoded size */
- return dstSize;
- }
-}
-
-
size_t HUF_decompress4X4_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
@@ -741,20 +871,38 @@
{
DTableDesc dtd = HUF_getDTableDesc(DTable);
if (dtd.tableType != 1) return ERROR(GENERIC);
- return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+ return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
-
-size_t HUF_decompress4X4_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+static size_t HUF_decompress4X4_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize, int bmi2)
{
const BYTE* ip = (const BYTE*) cSrc;
- size_t hSize = HUF_readDTableX4 (dctx, cSrc, cSrcSize);
+ size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize,
+ workSpace, wkspSize);
if (HUF_isError(hSize)) return hSize;
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize;
- return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
+ return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize)
+{
+ return HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize)
+{
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+ return HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+ workSpace, sizeof(workSpace));
}
size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
@@ -773,8 +921,8 @@
const HUF_DTable* DTable)
{
DTableDesc const dtd = HUF_getDTableDesc(DTable);
- return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
- HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
+ return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+ HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
@@ -782,8 +930,8 @@
const HUF_DTable* DTable)
{
DTableDesc const dtd = HUF_getDTableDesc(DTable);
- return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
- HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
+ return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+ HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
@@ -810,21 +958,22 @@
};
/** HUF_selectDecoder() :
-* Tells which decoder is likely to decode faster,
-* based on a set of pre-determined metrics.
-* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
-* Assumption : 0 < cSrcSize < dstSize <= 128 KB */
+ * Tells which decoder is likely to decode faster,
+ * based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
+ * Assumption : 0 < dstSize <= 128 KB */
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
{
+ assert(dstSize > 0);
+ assert(dstSize <= 128 KB);
/* decoder timing evaluation */
- U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
- U32 const D256 = (U32)(dstSize >> 8);
- U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
- U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
- DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */
-
- return DTime1 < DTime0;
-}
+ { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
+ U32 const D256 = (U32)(dstSize >> 8);
+ U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
+ U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
+ DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
+ return DTime1 < DTime0;
+} }
typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
@@ -858,19 +1007,32 @@
}
}
-size_t HUF_decompress4X_hufOnly (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+ return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+ workSpace, sizeof(workSpace));
+}
+
+
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
+ size_t dstSize, const void* cSrc,
+ size_t cSrcSize, void* workSpace,
+ size_t wkspSize)
{
/* validation checks */
if (dstSize == 0) return ERROR(dstSize_tooSmall);
- if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) return ERROR(corruption_detected); /* invalid */
+ if (cSrcSize == 0) return ERROR(corruption_detected);
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
- return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
- HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+ return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
+ HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
}
}
-size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize)
{
/* validation checks */
if (dstSize == 0) return ERROR(dstSize_tooSmall);
@@ -879,7 +1041,56 @@
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
- return algoNb ? HUF_decompress1X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
- HUF_decompress1X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+ return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc,
+ cSrcSize, workSpace, wkspSize):
+ HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+ cSrcSize, workSpace, wkspSize);
}
}
+
+size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize)
+{
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+ return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+ workSpace, sizeof(workSpace));
+}
+
+
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+ HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+}
+
+size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+ const BYTE* ip = (const BYTE*) cSrc;
+
+ size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
+ if (HUF_isError(hSize)) return hSize;
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+ ip += hSize; cSrcSize -= hSize;
+
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+ HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+}
+
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+ /* validation checks */
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
+ if (cSrcSize == 0) return ERROR(corruption_detected);
+
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+ return algoNb ? HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
+ HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+ }
+}