comparison contrib/python-zstandard/zstd/decompress/huf_decompress.c @ 40121:73fef626dae3

zstandard: vendor python-zstandard 0.10.1 This was just released. The upstream source distribution from PyPI was extracted. Unwanted files were removed. The clang-format ignore list was updated to reflect the new source of files. setup.py was updated to pass a new argument to python-zstandard's function for returning an Extension instance. Upstream had to change to use relative paths because Python 3.7's packaging doesn't seem to like absolute paths when defining sources, includes, etc. The default relative path calculation is relative to setup_zstd.py which is different from the directory of Mercurial's setup.py. The project contains a vendored copy of zstandard 1.3.6. The old version was 1.3.4. The API should be backwards compatible and nothing in core should need adjusted. However, there is a new "chunker" API that we may find useful in places where we want to emit compressed chunks of a fixed size. There are a pair of bug fixes in 0.10.0 with regards to compressobj() and decompressobj() when block flushing is used. I actually found these bugs when introducing these APIs in Mercurial! But existing Mercurial code is not affected because we don't perform block flushing. # no-check-commit because 3rd party code has different style guidelines Differential Revision: https://phab.mercurial-scm.org/D4911
author Gregory Szorc <gregory.szorc@gmail.com>
date Mon, 08 Oct 2018 16:27:40 -0700
parents b1fb341d8a61
children 675775c33ab6
comparison
equal deleted inserted replaced
40120:89742f1fa6cb 40121:73fef626dae3
1 /* ****************************************************************** 1 /* ******************************************************************
2 Huffman decoder, part of New Generation Entropy library 2 huff0 huffman decoder,
3 Copyright (C) 2013-2016, Yann Collet. 3 part of Finite State Entropy library
4 Copyright (C) 2013-present, Yann Collet.
4 5
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 7
7 Redistribution and use in source and binary forms, with or without 8 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are 9 modification, are permitted provided that the following conditions are
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 30
30 You can contact the author at : 31 You can contact the author at :
31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 32 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 ****************************************************************** */ 33 ****************************************************************** */
34 34
35 /* ************************************************************** 35 /* **************************************************************
36 * Dependencies 36 * Dependencies
37 ****************************************************************/ 37 ****************************************************************/
38 #include <string.h> /* memcpy, memset */ 38 #include <string.h> /* memcpy, memset */
39 #include "compiler.h"
39 #include "bitstream.h" /* BIT_* */ 40 #include "bitstream.h" /* BIT_* */
40 #include "compiler.h" 41 #include "fse.h" /* to compress headers */
41 #include "fse.h" /* header compression */
42 #define HUF_STATIC_LINKING_ONLY 42 #define HUF_STATIC_LINKING_ONLY
43 #include "huf.h" 43 #include "huf.h"
44 #include "error_private.h" 44 #include "error_private.h"
45 45
46 46
47 /* ************************************************************** 47 /* **************************************************************
48 * Error Management 48 * Error Management
49 ****************************************************************/ 49 ****************************************************************/
50 #define HUF_isError ERR_isError 50 #define HUF_isError ERR_isError
51 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
52 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; } 51 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
53 52
54 53
55 /* ************************************************************** 54 /* **************************************************************
56 * Byte alignment for workSpace management 55 * Byte alignment for workSpace management
73 72
74 73
75 /*-***************************/ 74 /*-***************************/
76 /* single-symbol decoding */ 75 /* single-symbol decoding */
77 /*-***************************/ 76 /*-***************************/
78 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ 77 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
79 78
80 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) 79 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
81 { 80 {
82 U32 tableLog = 0; 81 U32 tableLog = 0;
83 U32 nbSymbols = 0; 82 U32 nbSymbols = 0;
84 size_t iSize; 83 size_t iSize;
85 void* const dtPtr = DTable + 1; 84 void* const dtPtr = DTable + 1;
86 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; 85 HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
87 86
88 U32* rankVal; 87 U32* rankVal;
89 BYTE* huffWeight; 88 BYTE* huffWeight;
90 size_t spaceUsed32 = 0; 89 size_t spaceUsed32 = 0;
91 90
94 huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); 93 huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
95 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; 94 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
96 95
97 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); 96 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
98 97
99 HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); 98 DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
100 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ 99 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
101 100
102 iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); 101 iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
103 if (HUF_isError(iSize)) return iSize; 102 if (HUF_isError(iSize)) return iSize;
104 103
122 { U32 n; 121 { U32 n;
123 for (n=0; n<nbSymbols; n++) { 122 for (n=0; n<nbSymbols; n++) {
124 U32 const w = huffWeight[n]; 123 U32 const w = huffWeight[n];
125 U32 const length = (1 << w) >> 1; 124 U32 const length = (1 << w) >> 1;
126 U32 u; 125 U32 u;
127 HUF_DEltX2 D; 126 HUF_DEltX1 D;
128 D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); 127 D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
129 for (u = rankVal[w]; u < rankVal[w] + length; u++) 128 for (u = rankVal[w]; u < rankVal[w] + length; u++)
130 dt[u] = D; 129 dt[u] = D;
131 rankVal[w] += length; 130 rankVal[w] += length;
132 } } 131 } }
133 132
134 return iSize; 133 return iSize;
135 } 134 }
136 135
137 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) 136 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
138 { 137 {
139 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 138 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
140 return HUF_readDTableX2_wksp(DTable, src, srcSize, 139 return HUF_readDTableX1_wksp(DTable, src, srcSize,
141 workSpace, sizeof(workSpace)); 140 workSpace, sizeof(workSpace));
142 } 141 }
143 142
144 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
145
146 FORCE_INLINE_TEMPLATE BYTE 143 FORCE_INLINE_TEMPLATE BYTE
147 HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) 144 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
148 { 145 {
149 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ 146 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
150 BYTE const c = dt[val].byte; 147 BYTE const c = dt[val].byte;
151 BIT_skipBits(Dstream, dt[val].nbBits); 148 BIT_skipBits(Dstream, dt[val].nbBits);
152 return c; 149 return c;
153 } 150 }
154 151
155 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ 152 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
156 *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) 153 *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
157 154
158 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ 155 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
159 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 156 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
160 HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) 157 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
161 158
162 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ 159 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
163 if (MEM_64bits()) \ 160 if (MEM_64bits()) \
164 HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) 161 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
165 162
166 HINT_INLINE size_t 163 HINT_INLINE size_t
167 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) 164 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
168 { 165 {
169 BYTE* const pStart = p; 166 BYTE* const pStart = p;
170 167
171 /* up to 4 symbols at a time */ 168 /* up to 4 symbols at a time */
172 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { 169 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
173 HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 170 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
174 HUF_DECODE_SYMBOLX2_1(p, bitDPtr); 171 HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
175 HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 172 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
176 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 173 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
177 } 174 }
178 175
179 /* [0-3] symbols remaining */ 176 /* [0-3] symbols remaining */
180 if (MEM_32bits()) 177 if (MEM_32bits())
181 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) 178 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
182 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 179 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
183 180
184 /* no more data to retrieve from bitstream, no need to reload */ 181 /* no more data to retrieve from bitstream, no need to reload */
185 while (p < pEnd) 182 while (p < pEnd)
186 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 183 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
187 184
188 return pEnd-pStart; 185 return pEnd-pStart;
189 } 186 }
190 187
191 FORCE_INLINE_TEMPLATE size_t 188 FORCE_INLINE_TEMPLATE size_t
192 HUF_decompress1X2_usingDTable_internal_body( 189 HUF_decompress1X1_usingDTable_internal_body(
193 void* dst, size_t dstSize, 190 void* dst, size_t dstSize,
194 const void* cSrc, size_t cSrcSize, 191 const void* cSrc, size_t cSrcSize,
195 const HUF_DTable* DTable) 192 const HUF_DTable* DTable)
196 { 193 {
197 BYTE* op = (BYTE*)dst; 194 BYTE* op = (BYTE*)dst;
198 BYTE* const oend = op + dstSize; 195 BYTE* const oend = op + dstSize;
199 const void* dtPtr = DTable + 1; 196 const void* dtPtr = DTable + 1;
200 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; 197 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
201 BIT_DStream_t bitD; 198 BIT_DStream_t bitD;
202 DTableDesc const dtd = HUF_getDTableDesc(DTable); 199 DTableDesc const dtd = HUF_getDTableDesc(DTable);
203 U32 const dtLog = dtd.tableLog; 200 U32 const dtLog = dtd.tableLog;
204 201
205 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); 202 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
206 203
207 HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog); 204 HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
208 205
209 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); 206 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
210 207
211 return dstSize; 208 return dstSize;
212 } 209 }
213 210
214 FORCE_INLINE_TEMPLATE size_t 211 FORCE_INLINE_TEMPLATE size_t
215 HUF_decompress4X2_usingDTable_internal_body( 212 HUF_decompress4X1_usingDTable_internal_body(
216 void* dst, size_t dstSize, 213 void* dst, size_t dstSize,
217 const void* cSrc, size_t cSrcSize, 214 const void* cSrc, size_t cSrcSize,
218 const HUF_DTable* DTable) 215 const HUF_DTable* DTable)
219 { 216 {
220 /* Check */ 217 /* Check */
222 219
223 { const BYTE* const istart = (const BYTE*) cSrc; 220 { const BYTE* const istart = (const BYTE*) cSrc;
224 BYTE* const ostart = (BYTE*) dst; 221 BYTE* const ostart = (BYTE*) dst;
225 BYTE* const oend = ostart + dstSize; 222 BYTE* const oend = ostart + dstSize;
226 const void* const dtPtr = DTable + 1; 223 const void* const dtPtr = DTable + 1;
227 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; 224 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
228 225
229 /* Init */ 226 /* Init */
230 BIT_DStream_t bitD1; 227 BIT_DStream_t bitD1;
231 BIT_DStream_t bitD2; 228 BIT_DStream_t bitD2;
232 BIT_DStream_t bitD3; 229 BIT_DStream_t bitD3;
258 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); 255 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
259 256
260 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ 257 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
261 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 258 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
262 while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) { 259 while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
263 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 260 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
264 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 261 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
265 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 262 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
266 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 263 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
267 HUF_DECODE_SYMBOLX2_1(op1, &bitD1); 264 HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
268 HUF_DECODE_SYMBOLX2_1(op2, &bitD2); 265 HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
269 HUF_DECODE_SYMBOLX2_1(op3, &bitD3); 266 HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
270 HUF_DECODE_SYMBOLX2_1(op4, &bitD4); 267 HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
271 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 268 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
272 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 269 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
273 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 270 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
274 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 271 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
275 HUF_DECODE_SYMBOLX2_0(op1, &bitD1); 272 HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
276 HUF_DECODE_SYMBOLX2_0(op2, &bitD2); 273 HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
277 HUF_DECODE_SYMBOLX2_0(op3, &bitD3); 274 HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
278 HUF_DECODE_SYMBOLX2_0(op4, &bitD4); 275 HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
279 BIT_reloadDStream(&bitD1); 276 BIT_reloadDStream(&bitD1);
280 BIT_reloadDStream(&bitD2); 277 BIT_reloadDStream(&bitD2);
281 BIT_reloadDStream(&bitD3); 278 BIT_reloadDStream(&bitD3);
282 BIT_reloadDStream(&bitD4); 279 BIT_reloadDStream(&bitD4);
283 } 280 }
289 if (op2 > opStart3) return ERROR(corruption_detected); 286 if (op2 > opStart3) return ERROR(corruption_detected);
290 if (op3 > opStart4) return ERROR(corruption_detected); 287 if (op3 > opStart4) return ERROR(corruption_detected);
291 /* note : op4 supposed already verified within main loop */ 288 /* note : op4 supposed already verified within main loop */
292 289
293 /* finish bitStreams one by one */ 290 /* finish bitStreams one by one */
294 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); 291 HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
295 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); 292 HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
296 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); 293 HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
297 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); 294 HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
298 295
299 /* check */ 296 /* check */
300 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 297 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
301 if (!endCheck) return ERROR(corruption_detected); } 298 if (!endCheck) return ERROR(corruption_detected); }
302 299
304 return dstSize; 301 return dstSize;
305 } 302 }
306 } 303 }
307 304
308 305
306 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
307 const void *cSrc,
308 size_t cSrcSize,
309 const HUF_DTable *DTable);
310 #if DYNAMIC_BMI2
311
312 #define HUF_DGEN(fn) \
313 \
314 static size_t fn##_default( \
315 void* dst, size_t dstSize, \
316 const void* cSrc, size_t cSrcSize, \
317 const HUF_DTable* DTable) \
318 { \
319 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
320 } \
321 \
322 static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
323 void* dst, size_t dstSize, \
324 const void* cSrc, size_t cSrcSize, \
325 const HUF_DTable* DTable) \
326 { \
327 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
328 } \
329 \
330 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
331 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
332 { \
333 if (bmi2) { \
334 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
335 } \
336 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
337 }
338
339 #else
340
341 #define HUF_DGEN(fn) \
342 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
343 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
344 { \
345 (void)bmi2; \
346 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
347 }
348
349 #endif
350
351 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
352 HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
353
354
355
356 size_t HUF_decompress1X1_usingDTable(
357 void* dst, size_t dstSize,
358 const void* cSrc, size_t cSrcSize,
359 const HUF_DTable* DTable)
360 {
361 DTableDesc dtd = HUF_getDTableDesc(DTable);
362 if (dtd.tableType != 0) return ERROR(GENERIC);
363 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
364 }
365
366 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
367 const void* cSrc, size_t cSrcSize,
368 void* workSpace, size_t wkspSize)
369 {
370 const BYTE* ip = (const BYTE*) cSrc;
371
372 size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
373 if (HUF_isError(hSize)) return hSize;
374 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
375 ip += hSize; cSrcSize -= hSize;
376
377 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
378 }
379
380
381 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
382 const void* cSrc, size_t cSrcSize)
383 {
384 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
385 return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
386 workSpace, sizeof(workSpace));
387 }
388
389 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
390 {
391 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
392 return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
393 }
394
395 size_t HUF_decompress4X1_usingDTable(
396 void* dst, size_t dstSize,
397 const void* cSrc, size_t cSrcSize,
398 const HUF_DTable* DTable)
399 {
400 DTableDesc dtd = HUF_getDTableDesc(DTable);
401 if (dtd.tableType != 0) return ERROR(GENERIC);
402 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
403 }
404
405 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
406 const void* cSrc, size_t cSrcSize,
407 void* workSpace, size_t wkspSize, int bmi2)
408 {
409 const BYTE* ip = (const BYTE*) cSrc;
410
411 size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
412 workSpace, wkspSize);
413 if (HUF_isError(hSize)) return hSize;
414 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
415 ip += hSize; cSrcSize -= hSize;
416
417 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
418 }
419
420 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
421 const void* cSrc, size_t cSrcSize,
422 void* workSpace, size_t wkspSize)
423 {
424 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
425 }
426
427
428 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
429 {
430 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
431 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
432 workSpace, sizeof(workSpace));
433 }
434 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
435 {
436 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
437 return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
438 }
439
440
441 /* *************************/
442 /* double-symbols decoding */
443 /* *************************/
444
445 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
446 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
447 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
448 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
449
450
451 /* HUF_fillDTableX2Level2() :
452 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
453 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
454 const U32* rankValOrigin, const int minWeight,
455 const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
456 U32 nbBitsBaseline, U16 baseSeq)
457 {
458 HUF_DEltX2 DElt;
459 U32 rankVal[HUF_TABLELOG_MAX + 1];
460
461 /* get pre-calculated rankVal */
462 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
463
464 /* fill skipped values */
465 if (minWeight>1) {
466 U32 i, skipSize = rankVal[minWeight];
467 MEM_writeLE16(&(DElt.sequence), baseSeq);
468 DElt.nbBits = (BYTE)(consumed);
469 DElt.length = 1;
470 for (i = 0; i < skipSize; i++)
471 DTable[i] = DElt;
472 }
473
474 /* fill DTable */
475 { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */
476 const U32 symbol = sortedSymbols[s].symbol;
477 const U32 weight = sortedSymbols[s].weight;
478 const U32 nbBits = nbBitsBaseline - weight;
479 const U32 length = 1 << (sizeLog-nbBits);
480 const U32 start = rankVal[weight];
481 U32 i = start;
482 const U32 end = start + length;
483
484 MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
485 DElt.nbBits = (BYTE)(nbBits + consumed);
486 DElt.length = 2;
487 do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */
488
489 rankVal[weight] += length;
490 } }
491 }
492
493
494 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
495 const sortedSymbol_t* sortedList, const U32 sortedListSize,
496 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
497 const U32 nbBitsBaseline)
498 {
499 U32 rankVal[HUF_TABLELOG_MAX + 1];
500 const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
501 const U32 minBits = nbBitsBaseline - maxWeight;
502 U32 s;
503
504 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
505
506 /* fill DTable */
507 for (s=0; s<sortedListSize; s++) {
508 const U16 symbol = sortedList[s].symbol;
509 const U32 weight = sortedList[s].weight;
510 const U32 nbBits = nbBitsBaseline - weight;
511 const U32 start = rankVal[weight];
512 const U32 length = 1 << (targetLog-nbBits);
513
514 if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */
515 U32 sortedRank;
516 int minWeight = nbBits + scaleLog;
517 if (minWeight < 1) minWeight = 1;
518 sortedRank = rankStart[minWeight];
519 HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
520 rankValOrigin[nbBits], minWeight,
521 sortedList+sortedRank, sortedListSize-sortedRank,
522 nbBitsBaseline, symbol);
523 } else {
524 HUF_DEltX2 DElt;
525 MEM_writeLE16(&(DElt.sequence), symbol);
526 DElt.nbBits = (BYTE)(nbBits);
527 DElt.length = 1;
528 { U32 const end = start + length;
529 U32 u;
530 for (u = start; u < end; u++) DTable[u] = DElt;
531 } }
532 rankVal[weight] += length;
533 }
534 }
535
536 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
537 const void* src, size_t srcSize,
538 void* workSpace, size_t wkspSize)
539 {
540 U32 tableLog, maxW, sizeOfSort, nbSymbols;
541 DTableDesc dtd = HUF_getDTableDesc(DTable);
542 U32 const maxTableLog = dtd.maxTableLog;
543 size_t iSize;
544 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
545 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
546 U32 *rankStart;
547
548 rankValCol_t* rankVal;
549 U32* rankStats;
550 U32* rankStart0;
551 sortedSymbol_t* sortedSymbol;
552 BYTE* weightList;
553 size_t spaceUsed32 = 0;
554
555 rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
556 spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
557 rankStats = (U32 *)workSpace + spaceUsed32;
558 spaceUsed32 += HUF_TABLELOG_MAX + 1;
559 rankStart0 = (U32 *)workSpace + spaceUsed32;
560 spaceUsed32 += HUF_TABLELOG_MAX + 2;
561 sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
562 spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
563 weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
564 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
565
566 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
567
568 rankStart = rankStart0 + 1;
569 memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
570
571 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
572 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
573 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
574
575 iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
576 if (HUF_isError(iSize)) return iSize;
577
578 /* check result */
579 if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
580
581 /* find maxWeight */
582 for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
583
584 /* Get start index of each weight */
585 { U32 w, nextRankStart = 0;
586 for (w=1; w<maxW+1; w++) {
587 U32 current = nextRankStart;
588 nextRankStart += rankStats[w];
589 rankStart[w] = current;
590 }
591 rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
592 sizeOfSort = nextRankStart;
593 }
594
595 /* sort symbols by weight */
596 { U32 s;
597 for (s=0; s<nbSymbols; s++) {
598 U32 const w = weightList[s];
599 U32 const r = rankStart[w]++;
600 sortedSymbol[r].symbol = (BYTE)s;
601 sortedSymbol[r].weight = (BYTE)w;
602 }
603 rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
604 }
605
606 /* Build rankVal */
607 { U32* const rankVal0 = rankVal[0];
608 { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
609 U32 nextRankVal = 0;
610 U32 w;
611 for (w=1; w<maxW+1; w++) {
612 U32 current = nextRankVal;
613 nextRankVal += rankStats[w] << (w+rescale);
614 rankVal0[w] = current;
615 } }
616 { U32 const minBits = tableLog+1 - maxW;
617 U32 consumed;
618 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
619 U32* const rankValPtr = rankVal[consumed];
620 U32 w;
621 for (w = 1; w < maxW+1; w++) {
622 rankValPtr[w] = rankVal0[w] >> consumed;
623 } } } }
624
625 HUF_fillDTableX2(dt, maxTableLog,
626 sortedSymbol, sizeOfSort,
627 rankStart0, rankVal, maxW,
628 tableLog+1);
629
630 dtd.tableLog = (BYTE)maxTableLog;
631 dtd.tableType = 1;
632 memcpy(DTable, &dtd, sizeof(dtd));
633 return iSize;
634 }
635
636 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
637 {
638 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
639 return HUF_readDTableX2_wksp(DTable, src, srcSize,
640 workSpace, sizeof(workSpace));
641 }
642
643
309 FORCE_INLINE_TEMPLATE U32 644 FORCE_INLINE_TEMPLATE U32
310 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) 645 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
311 { 646 {
312 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ 647 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
313 memcpy(op, dt+val, 2); 648 memcpy(op, dt+val, 2);
314 BIT_skipBits(DStream, dt[val].nbBits); 649 BIT_skipBits(DStream, dt[val].nbBits);
315 return dt[val].length; 650 return dt[val].length;
316 } 651 }
317 652
318 FORCE_INLINE_TEMPLATE U32 653 FORCE_INLINE_TEMPLATE U32
319 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) 654 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
320 { 655 {
321 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ 656 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
322 memcpy(op, dt+val, 1); 657 memcpy(op, dt+val, 1);
323 if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); 658 if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
324 else { 659 else {
329 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); 664 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
330 } } 665 } }
331 return 1; 666 return 1;
332 } 667 }
333 668
334 #define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ 669 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
335 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 670 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
336 671
337 #define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ 672 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
338 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 673 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
339 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 674 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
340 675
341 #define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ 676 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
342 if (MEM_64bits()) \ 677 if (MEM_64bits()) \
343 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 678 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
344 679
345 HINT_INLINE size_t 680 HINT_INLINE size_t
346 HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, 681 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
347 const HUF_DEltX4* const dt, const U32 dtLog) 682 const HUF_DEltX2* const dt, const U32 dtLog)
348 { 683 {
349 BYTE* const pStart = p; 684 BYTE* const pStart = p;
350 685
351 /* up to 8 symbols at a time */ 686 /* up to 8 symbols at a time */
352 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { 687 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
353 HUF_DECODE_SYMBOLX4_2(p, bitDPtr); 688 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
354 HUF_DECODE_SYMBOLX4_1(p, bitDPtr); 689 HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
355 HUF_DECODE_SYMBOLX4_2(p, bitDPtr); 690 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
356 HUF_DECODE_SYMBOLX4_0(p, bitDPtr); 691 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
357 } 692 }
358 693
359 /* closer to end : up to 2 symbols at a time */ 694 /* closer to end : up to 2 symbols at a time */
360 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) 695 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
361 HUF_DECODE_SYMBOLX4_0(p, bitDPtr); 696 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
362 697
363 while (p <= pEnd-2) 698 while (p <= pEnd-2)
364 HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ 699 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
365 700
366 if (p < pEnd) 701 if (p < pEnd)
367 p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); 702 p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
368 703
369 return p-pStart; 704 return p-pStart;
370 } 705 }
371 706
372 FORCE_INLINE_TEMPLATE size_t 707 FORCE_INLINE_TEMPLATE size_t
373 HUF_decompress1X4_usingDTable_internal_body( 708 HUF_decompress1X2_usingDTable_internal_body(
374 void* dst, size_t dstSize, 709 void* dst, size_t dstSize,
375 const void* cSrc, size_t cSrcSize, 710 const void* cSrc, size_t cSrcSize,
376 const HUF_DTable* DTable) 711 const HUF_DTable* DTable)
377 { 712 {
378 BIT_DStream_t bitD; 713 BIT_DStream_t bitD;
382 717
383 /* decode */ 718 /* decode */
384 { BYTE* const ostart = (BYTE*) dst; 719 { BYTE* const ostart = (BYTE*) dst;
385 BYTE* const oend = ostart + dstSize; 720 BYTE* const oend = ostart + dstSize;
386 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ 721 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
387 const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr; 722 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
388 DTableDesc const dtd = HUF_getDTableDesc(DTable); 723 DTableDesc const dtd = HUF_getDTableDesc(DTable);
389 HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog); 724 HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
390 } 725 }
391 726
392 /* check */ 727 /* check */
393 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); 728 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
394 729
396 return dstSize; 731 return dstSize;
397 } 732 }
398 733
399 734
400 FORCE_INLINE_TEMPLATE size_t 735 FORCE_INLINE_TEMPLATE size_t
401 HUF_decompress4X4_usingDTable_internal_body( 736 HUF_decompress4X2_usingDTable_internal_body(
402 void* dst, size_t dstSize, 737 void* dst, size_t dstSize,
403 const void* cSrc, size_t cSrcSize, 738 const void* cSrc, size_t cSrcSize,
404 const HUF_DTable* DTable) 739 const HUF_DTable* DTable)
405 { 740 {
406 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ 741 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
407 742
408 { const BYTE* const istart = (const BYTE*) cSrc; 743 { const BYTE* const istart = (const BYTE*) cSrc;
409 BYTE* const ostart = (BYTE*) dst; 744 BYTE* const ostart = (BYTE*) dst;
410 BYTE* const oend = ostart + dstSize; 745 BYTE* const oend = ostart + dstSize;
411 const void* const dtPtr = DTable+1; 746 const void* const dtPtr = DTable+1;
412 const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr; 747 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
413 748
414 /* Init */ 749 /* Init */
415 BIT_DStream_t bitD1; 750 BIT_DStream_t bitD1;
416 BIT_DStream_t bitD2; 751 BIT_DStream_t bitD2;
417 BIT_DStream_t bitD3; 752 BIT_DStream_t bitD3;
443 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); 778 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
444 779
445 /* 16-32 symbols per loop (4-8 symbols per stream) */ 780 /* 16-32 symbols per loop (4-8 symbols per stream) */
446 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 781 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
447 for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) { 782 for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
448 HUF_DECODE_SYMBOLX4_2(op1, &bitD1); 783 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
449 HUF_DECODE_SYMBOLX4_2(op2, &bitD2); 784 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
450 HUF_DECODE_SYMBOLX4_2(op3, &bitD3); 785 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
451 HUF_DECODE_SYMBOLX4_2(op4, &bitD4); 786 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
452 HUF_DECODE_SYMBOLX4_1(op1, &bitD1); 787 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
453 HUF_DECODE_SYMBOLX4_1(op2, &bitD2); 788 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
454 HUF_DECODE_SYMBOLX4_1(op3, &bitD3); 789 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
455 HUF_DECODE_SYMBOLX4_1(op4, &bitD4); 790 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
456 HUF_DECODE_SYMBOLX4_2(op1, &bitD1); 791 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
457 HUF_DECODE_SYMBOLX4_2(op2, &bitD2); 792 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
458 HUF_DECODE_SYMBOLX4_2(op3, &bitD3); 793 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
459 HUF_DECODE_SYMBOLX4_2(op4, &bitD4); 794 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
460 HUF_DECODE_SYMBOLX4_0(op1, &bitD1); 795 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
461 HUF_DECODE_SYMBOLX4_0(op2, &bitD2); 796 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
462 HUF_DECODE_SYMBOLX4_0(op3, &bitD3); 797 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
463 HUF_DECODE_SYMBOLX4_0(op4, &bitD4); 798 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
464 799
465 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 800 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
466 } 801 }
467 802
468 /* check corruption */ 803 /* check corruption */
470 if (op2 > opStart3) return ERROR(corruption_detected); 805 if (op2 > opStart3) return ERROR(corruption_detected);
471 if (op3 > opStart4) return ERROR(corruption_detected); 806 if (op3 > opStart4) return ERROR(corruption_detected);
472 /* note : op4 already verified within main loop */ 807 /* note : op4 already verified within main loop */
473 808
474 /* finish bitStreams one by one */ 809 /* finish bitStreams one by one */
475 HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); 810 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
476 HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); 811 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
477 HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); 812 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
478 HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); 813 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
479 814
480 /* check */ 815 /* check */
481 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 816 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
482 if (!endCheck) return ERROR(corruption_detected); } 817 if (!endCheck) return ERROR(corruption_detected); }
483 818
484 /* decoded size */ 819 /* decoded size */
485 return dstSize; 820 return dstSize;
486 } 821 }
487 } 822 }
488 823
489 824 HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
490 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, 825 HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
491 const void *cSrc,
492 size_t cSrcSize,
493 const HUF_DTable *DTable);
494 #if DYNAMIC_BMI2
495
496 #define X(fn) \
497 \
498 static size_t fn##_default( \
499 void* dst, size_t dstSize, \
500 const void* cSrc, size_t cSrcSize, \
501 const HUF_DTable* DTable) \
502 { \
503 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
504 } \
505 \
506 static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
507 void* dst, size_t dstSize, \
508 const void* cSrc, size_t cSrcSize, \
509 const HUF_DTable* DTable) \
510 { \
511 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
512 } \
513 \
514 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
515 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
516 { \
517 if (bmi2) { \
518 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
519 } \
520 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
521 }
522
523 #else
524
525 #define X(fn) \
526 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
527 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
528 { \
529 (void)bmi2; \
530 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
531 }
532
533 #endif
534
535 X(HUF_decompress1X2_usingDTable_internal)
536 X(HUF_decompress4X2_usingDTable_internal)
537 X(HUF_decompress1X4_usingDTable_internal)
538 X(HUF_decompress4X4_usingDTable_internal)
539
540 #undef X
541
542 826
543 size_t HUF_decompress1X2_usingDTable( 827 size_t HUF_decompress1X2_usingDTable(
544 void* dst, size_t dstSize, 828 void* dst, size_t dstSize,
545 const void* cSrc, size_t cSrcSize, 829 const void* cSrc, size_t cSrcSize,
546 const HUF_DTable* DTable) 830 const HUF_DTable* DTable)
547 { 831 {
548 DTableDesc dtd = HUF_getDTableDesc(DTable); 832 DTableDesc dtd = HUF_getDTableDesc(DTable);
549 if (dtd.tableType != 0) return ERROR(GENERIC); 833 if (dtd.tableType != 1) return ERROR(GENERIC);
550 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 834 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
551 } 835 }
552 836
553 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, 837 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
554 const void* cSrc, size_t cSrcSize, 838 const void* cSrc, size_t cSrcSize,
555 void* workSpace, size_t wkspSize) 839 void* workSpace, size_t wkspSize)
556 { 840 {
557 const BYTE* ip = (const BYTE*) cSrc; 841 const BYTE* ip = (const BYTE*) cSrc;
558 842
559 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); 843 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
844 workSpace, wkspSize);
560 if (HUF_isError(hSize)) return hSize; 845 if (HUF_isError(hSize)) return hSize;
561 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 846 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
562 ip += hSize; cSrcSize -= hSize; 847 ip += hSize; cSrcSize -= hSize;
563 848
564 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); 849 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
574 } 859 }
575 860
576 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 861 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
577 { 862 {
578 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); 863 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
579 return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); 864 return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
580 } 865 }
581 866
582 size_t HUF_decompress4X2_usingDTable( 867 size_t HUF_decompress4X2_usingDTable(
583 void* dst, size_t dstSize, 868 void* dst, size_t dstSize,
584 const void* cSrc, size_t cSrcSize, 869 const void* cSrc, size_t cSrcSize,
585 const HUF_DTable* DTable) 870 const HUF_DTable* DTable)
586 { 871 {
587 DTableDesc dtd = HUF_getDTableDesc(DTable); 872 DTableDesc dtd = HUF_getDTableDesc(DTable);
588 if (dtd.tableType != 0) return ERROR(GENERIC); 873 if (dtd.tableType != 1) return ERROR(GENERIC);
589 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 874 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
590 } 875 }
591 876
592 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, 877 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
593 const void* cSrc, size_t cSrcSize, 878 const void* cSrc, size_t cSrcSize,
594 void* workSpace, size_t wkspSize, int bmi2) 879 void* workSpace, size_t wkspSize, int bmi2)
595 { 880 {
596 const BYTE* ip = (const BYTE*) cSrc; 881 const BYTE* ip = (const BYTE*) cSrc;
597 882
598 size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize, 883 size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
599 workSpace, wkspSize);
600 if (HUF_isError(hSize)) return hSize;
601 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
602 ip += hSize; cSrcSize -= hSize;
603
604 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
605 }
606
607 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
608 const void* cSrc, size_t cSrcSize,
609 void* workSpace, size_t wkspSize)
610 {
611 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
612 }
613
614
615 size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
616 {
617 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
618 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
619 workSpace, sizeof(workSpace));
620 }
621 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
622 {
623 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
624 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
625 }
626
627
628 /* *************************/
629 /* double-symbols decoding */
630 /* *************************/
631 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
632
633 /* HUF_fillDTableX4Level2() :
634 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
635 static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
636 const U32* rankValOrigin, const int minWeight,
637 const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
638 U32 nbBitsBaseline, U16 baseSeq)
639 {
640 HUF_DEltX4 DElt;
641 U32 rankVal[HUF_TABLELOG_MAX + 1];
642
643 /* get pre-calculated rankVal */
644 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
645
646 /* fill skipped values */
647 if (minWeight>1) {
648 U32 i, skipSize = rankVal[minWeight];
649 MEM_writeLE16(&(DElt.sequence), baseSeq);
650 DElt.nbBits = (BYTE)(consumed);
651 DElt.length = 1;
652 for (i = 0; i < skipSize; i++)
653 DTable[i] = DElt;
654 }
655
656 /* fill DTable */
657 { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */
658 const U32 symbol = sortedSymbols[s].symbol;
659 const U32 weight = sortedSymbols[s].weight;
660 const U32 nbBits = nbBitsBaseline - weight;
661 const U32 length = 1 << (sizeLog-nbBits);
662 const U32 start = rankVal[weight];
663 U32 i = start;
664 const U32 end = start + length;
665
666 MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
667 DElt.nbBits = (BYTE)(nbBits + consumed);
668 DElt.length = 2;
669 do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */
670
671 rankVal[weight] += length;
672 } }
673 }
674
675 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
676 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
677
678 static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
679 const sortedSymbol_t* sortedList, const U32 sortedListSize,
680 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
681 const U32 nbBitsBaseline)
682 {
683 U32 rankVal[HUF_TABLELOG_MAX + 1];
684 const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
685 const U32 minBits = nbBitsBaseline - maxWeight;
686 U32 s;
687
688 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
689
690 /* fill DTable */
691 for (s=0; s<sortedListSize; s++) {
692 const U16 symbol = sortedList[s].symbol;
693 const U32 weight = sortedList[s].weight;
694 const U32 nbBits = nbBitsBaseline - weight;
695 const U32 start = rankVal[weight];
696 const U32 length = 1 << (targetLog-nbBits);
697
698 if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */
699 U32 sortedRank;
700 int minWeight = nbBits + scaleLog;
701 if (minWeight < 1) minWeight = 1;
702 sortedRank = rankStart[minWeight];
703 HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
704 rankValOrigin[nbBits], minWeight,
705 sortedList+sortedRank, sortedListSize-sortedRank,
706 nbBitsBaseline, symbol);
707 } else {
708 HUF_DEltX4 DElt;
709 MEM_writeLE16(&(DElt.sequence), symbol);
710 DElt.nbBits = (BYTE)(nbBits);
711 DElt.length = 1;
712 { U32 const end = start + length;
713 U32 u;
714 for (u = start; u < end; u++) DTable[u] = DElt;
715 } }
716 rankVal[weight] += length;
717 }
718 }
719
720 size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
721 size_t srcSize, void* workSpace,
722 size_t wkspSize)
723 {
724 U32 tableLog, maxW, sizeOfSort, nbSymbols;
725 DTableDesc dtd = HUF_getDTableDesc(DTable);
726 U32 const maxTableLog = dtd.maxTableLog;
727 size_t iSize;
728 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
729 HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr;
730 U32 *rankStart;
731
732 rankValCol_t* rankVal;
733 U32* rankStats;
734 U32* rankStart0;
735 sortedSymbol_t* sortedSymbol;
736 BYTE* weightList;
737 size_t spaceUsed32 = 0;
738
739 rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
740 spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
741 rankStats = (U32 *)workSpace + spaceUsed32;
742 spaceUsed32 += HUF_TABLELOG_MAX + 1;
743 rankStart0 = (U32 *)workSpace + spaceUsed32;
744 spaceUsed32 += HUF_TABLELOG_MAX + 2;
745 sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
746 spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
747 weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
748 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
749
750 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
751
752 rankStart = rankStart0 + 1;
753 memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
754
755 HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
756 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
757 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
758
759 iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
760 if (HUF_isError(iSize)) return iSize;
761
762 /* check result */
763 if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
764
765 /* find maxWeight */
766 for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
767
768 /* Get start index of each weight */
769 { U32 w, nextRankStart = 0;
770 for (w=1; w<maxW+1; w++) {
771 U32 current = nextRankStart;
772 nextRankStart += rankStats[w];
773 rankStart[w] = current;
774 }
775 rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
776 sizeOfSort = nextRankStart;
777 }
778
779 /* sort symbols by weight */
780 { U32 s;
781 for (s=0; s<nbSymbols; s++) {
782 U32 const w = weightList[s];
783 U32 const r = rankStart[w]++;
784 sortedSymbol[r].symbol = (BYTE)s;
785 sortedSymbol[r].weight = (BYTE)w;
786 }
787 rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
788 }
789
790 /* Build rankVal */
791 { U32* const rankVal0 = rankVal[0];
792 { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
793 U32 nextRankVal = 0;
794 U32 w;
795 for (w=1; w<maxW+1; w++) {
796 U32 current = nextRankVal;
797 nextRankVal += rankStats[w] << (w+rescale);
798 rankVal0[w] = current;
799 } }
800 { U32 const minBits = tableLog+1 - maxW;
801 U32 consumed;
802 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
803 U32* const rankValPtr = rankVal[consumed];
804 U32 w;
805 for (w = 1; w < maxW+1; w++) {
806 rankValPtr[w] = rankVal0[w] >> consumed;
807 } } } }
808
809 HUF_fillDTableX4(dt, maxTableLog,
810 sortedSymbol, sizeOfSort,
811 rankStart0, rankVal, maxW,
812 tableLog+1);
813
814 dtd.tableLog = (BYTE)maxTableLog;
815 dtd.tableType = 1;
816 memcpy(DTable, &dtd, sizeof(dtd));
817 return iSize;
818 }
819
820 size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize)
821 {
822 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
823 return HUF_readDTableX4_wksp(DTable, src, srcSize,
824 workSpace, sizeof(workSpace));
825 }
826
827 size_t HUF_decompress1X4_usingDTable(
828 void* dst, size_t dstSize,
829 const void* cSrc, size_t cSrcSize,
830 const HUF_DTable* DTable)
831 {
832 DTableDesc dtd = HUF_getDTableDesc(DTable);
833 if (dtd.tableType != 1) return ERROR(GENERIC);
834 return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
835 }
836
837 size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
838 const void* cSrc, size_t cSrcSize,
839 void* workSpace, size_t wkspSize)
840 {
841 const BYTE* ip = (const BYTE*) cSrc;
842
843 size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize,
844 workSpace, wkspSize);
845 if (HUF_isError(hSize)) return hSize;
846 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
847 ip += hSize; cSrcSize -= hSize;
848
849 return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
850 }
851
852
853 size_t HUF_decompress1X4_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
854 const void* cSrc, size_t cSrcSize)
855 {
856 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
857 return HUF_decompress1X4_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
858 workSpace, sizeof(workSpace));
859 }
860
861 size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
862 {
863 HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
864 return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
865 }
866
867 size_t HUF_decompress4X4_usingDTable(
868 void* dst, size_t dstSize,
869 const void* cSrc, size_t cSrcSize,
870 const HUF_DTable* DTable)
871 {
872 DTableDesc dtd = HUF_getDTableDesc(DTable);
873 if (dtd.tableType != 1) return ERROR(GENERIC);
874 return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
875 }
876
877 static size_t HUF_decompress4X4_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
878 const void* cSrc, size_t cSrcSize,
879 void* workSpace, size_t wkspSize, int bmi2)
880 {
881 const BYTE* ip = (const BYTE*) cSrc;
882
883 size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize,
884 workSpace, wkspSize); 884 workSpace, wkspSize);
885 if (HUF_isError(hSize)) return hSize; 885 if (HUF_isError(hSize)) return hSize;
886 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 886 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
887 ip += hSize; cSrcSize -= hSize; 887 ip += hSize; cSrcSize -= hSize;
888 888
889 return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 889 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
890 } 890 }
891 891
892 size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 892 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
893 const void* cSrc, size_t cSrcSize, 893 const void* cSrc, size_t cSrcSize,
894 void* workSpace, size_t wkspSize) 894 void* workSpace, size_t wkspSize)
895 { 895 {
896 return HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); 896 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
897 } 897 }
898 898
899 899
900 size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, 900 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
901 const void* cSrc, size_t cSrcSize) 901 const void* cSrc, size_t cSrcSize)
902 { 902 {
903 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 903 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
904 return HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 904 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
905 workSpace, sizeof(workSpace)); 905 workSpace, sizeof(workSpace));
906 } 906 }
907 907
908 size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 908 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
909 { 909 {
910 HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); 910 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
911 return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); 911 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
912 } 912 }
913 913
914 914
915 /* ********************************/ 915 /* ***********************************/
916 /* Generic decompression selector */ 916 /* Universal decompression selectors */
917 /* ********************************/ 917 /* ***********************************/
918 918
919 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, 919 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
920 const void* cSrc, size_t cSrcSize, 920 const void* cSrc, size_t cSrcSize,
921 const HUF_DTable* DTable) 921 const HUF_DTable* DTable)
922 { 922 {
923 DTableDesc const dtd = HUF_getDTableDesc(DTable); 923 DTableDesc const dtd = HUF_getDTableDesc(DTable);
924 return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 924 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
925 HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 925 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
926 } 926 }
927 927
928 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, 928 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
929 const void* cSrc, size_t cSrcSize, 929 const void* cSrc, size_t cSrcSize,
930 const HUF_DTable* DTable) 930 const HUF_DTable* DTable)
931 { 931 {
932 DTableDesc const dtd = HUF_getDTableDesc(DTable); 932 DTableDesc const dtd = HUF_getDTableDesc(DTable);
933 return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 933 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
934 HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 934 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
935 } 935 }
936 936
937 937
938 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; 938 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
939 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = 939 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
958 }; 958 };
959 959
960 /** HUF_selectDecoder() : 960 /** HUF_selectDecoder() :
961 * Tells which decoder is likely to decode faster, 961 * Tells which decoder is likely to decode faster,
962 * based on a set of pre-computed metrics. 962 * based on a set of pre-computed metrics.
963 * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . 963 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
964 * Assumption : 0 < dstSize <= 128 KB */ 964 * Assumption : 0 < dstSize <= 128 KB */
965 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) 965 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
966 { 966 {
967 assert(dstSize > 0); 967 assert(dstSize > 0);
968 assert(dstSize <= 128 KB); 968 assert(dstSize <= 128*1024);
969 /* decoder timing evaluation */ 969 /* decoder timing evaluation */
970 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ 970 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
971 U32 const D256 = (U32)(dstSize >> 8); 971 U32 const D256 = (U32)(dstSize >> 8);
972 U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); 972 U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
973 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); 973 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
978 978
979 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); 979 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
980 980
981 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 981 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
982 { 982 {
983 static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 }; 983 static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
984 984
985 /* validation checks */ 985 /* validation checks */
986 if (dstSize == 0) return ERROR(dstSize_tooSmall); 986 if (dstSize == 0) return ERROR(dstSize_tooSmall);
987 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 987 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
988 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 988 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1000 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1000 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1001 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1001 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1002 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1002 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1003 1003
1004 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1004 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1005 return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : 1005 return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1006 HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; 1006 HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1007 } 1007 }
1008 } 1008 }
1009 1009
1010 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1010 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1011 { 1011 {
1023 /* validation checks */ 1023 /* validation checks */
1024 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1024 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1025 if (cSrcSize == 0) return ERROR(corruption_detected); 1025 if (cSrcSize == 0) return ERROR(corruption_detected);
1026 1026
1027 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1027 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1028 return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize): 1028 return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
1029 HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1029 HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1030 } 1030 }
1031 } 1031 }
1032 1032
1033 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 1033 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1034 const void* cSrc, size_t cSrcSize, 1034 const void* cSrc, size_t cSrcSize,
1039 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1039 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1040 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1040 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1041 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1041 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1042 1042
1043 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1043 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1044 return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, 1044 return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1045 cSrcSize, workSpace, wkspSize): 1045 cSrcSize, workSpace, wkspSize):
1046 HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1046 HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1047 cSrcSize, workSpace, wkspSize); 1047 cSrcSize, workSpace, wkspSize);
1048 } 1048 }
1049 } 1049 }
1050 1050
1051 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, 1051 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1058 1058
1059 1059
1060 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1060 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1061 { 1061 {
1062 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1062 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1063 return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1063 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1064 HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1064 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1065 } 1065 }
1066 1066
1067 size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1067 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1068 { 1068 {
1069 const BYTE* ip = (const BYTE*) cSrc; 1069 const BYTE* ip = (const BYTE*) cSrc;
1070 1070
1071 size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); 1071 size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1072 if (HUF_isError(hSize)) return hSize; 1072 if (HUF_isError(hSize)) return hSize;
1073 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 1073 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1074 ip += hSize; cSrcSize -= hSize; 1074 ip += hSize; cSrcSize -= hSize;
1075 1075
1076 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 1076 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1077 } 1077 }
1078 1078
1079 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1079 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1080 { 1080 {
1081 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1081 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1082 return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1082 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1083 HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1083 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1084 } 1084 }
1085 1085
1086 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1086 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1087 { 1087 {
1088 /* validation checks */ 1088 /* validation checks */
1089 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1089 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1090 if (cSrcSize == 0) return ERROR(corruption_detected); 1090 if (cSrcSize == 0) return ERROR(corruption_detected);
1091 1091
1092 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1092 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1093 return algoNb ? HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : 1093 return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1094 HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1094 HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1095 } 1095 }
1096 } 1096 }