comparison contrib/python-zstandard/zstd/common/fse_decompress.c @ 30434:2e484bdea8c4

zstd: vendor zstd 1.1.1 zstd is a new compression format and it is awesome, yielding higher compression ratios and significantly faster compression and decompression operations compared to zlib (our current compression engine of choice) across the board. We want zstd to be a 1st class citizen in Mercurial and to eventually be the preferred compression format for various operations. This patch starts the formal process of supporting zstd by vendoring a copy of zstd. Why do we need to vendor zstd? Good question. First, zstd is relatively new and not widely available yet. If we didn't vendor zstd or distribute it with Mercurial, most users likely wouldn't have zstd installed or even available to install. What good is a feature if you can't use it? Vendoring and distributing the zstd sources gives us the highest liklihood that zstd will be available to Mercurial installs. Second, the Python bindings to zstd (which will be vendored in a separate changeset) make use of zstd APIs that are only available via static linking. One reason they are only available via static linking is that they are unstable and could change at any time. While it might be possible for the Python bindings to attempt to talk to different versions of the zstd C library, the safest thing to do is link against a specific, known-working version of zstd. This is why the Python zstd bindings themselves vendor zstd and why we must as well. This also explains why the added files are in a "python-zstandard" directory. The added files are from the 1.1.1 release of zstd (Git commit 4c0b44f8ced84c4c8edfa07b564d31e4fa3e8885 from https://github.com/facebook/zstd) and are added without modifications. Not all files from the zstd "distribution" have been added. Notably missing are files to support interacting with "legacy," pre-1.0 versions of zstd. The decision of which files to include is made by the upstream python-zstandard project (which I'm the author of). The files in this commit are a snapshot of the files from the 0.5.0 release of that project, Git commit e637c1b214d5f869cf8116c550dcae23ec13b677 from https://github.com/indygreg/python-zstandard.
author Gregory Szorc <gregory.szorc@gmail.com>
date Thu, 10 Nov 2016 21:45:29 -0800
parents
children b54a2984cdd4
comparison
equal deleted inserted replaced
30433:96f2f50d923f 30434:2e484bdea8c4
1 /* ******************************************************************
2 FSE : Finite State Entropy decoder
3 Copyright (C) 2013-2015, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 ****************************************************************** */
34
35
36 /* **************************************************************
37 * Compiler specifics
38 ****************************************************************/
39 #ifdef _MSC_VER /* Visual Studio */
40 # define FORCE_INLINE static __forceinline
41 # include <intrin.h> /* For Visual 2005 */
42 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
43 # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
44 #else
45 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
46 # ifdef __GNUC__
47 # define FORCE_INLINE static inline __attribute__((always_inline))
48 # else
49 # define FORCE_INLINE static inline
50 # endif
51 # else
52 # define FORCE_INLINE static
53 # endif /* __STDC_VERSION__ */
54 #endif
55
56
57 /* **************************************************************
58 * Includes
59 ****************************************************************/
60 #include <stdlib.h> /* malloc, free, qsort */
61 #include <string.h> /* memcpy, memset */
62 #include <stdio.h> /* printf (debug) */
63 #include "bitstream.h"
64 #define FSE_STATIC_LINKING_ONLY
65 #include "fse.h"
66
67
68 /* **************************************************************
69 * Error Management
70 ****************************************************************/
71 #define FSE_isError ERR_isError
72 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
73
74 /* check and forward error code */
75 #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
76
77
78 /* **************************************************************
79 * Complex types
80 ****************************************************************/
81 typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
82
83
84 /* **************************************************************
85 * Templates
86 ****************************************************************/
87 /*
88 designed to be included
89 for type-specific functions (template emulation in C)
90 Objective is to write these functions only once, for improved maintenance
91 */
92
93 /* safety checks */
94 #ifndef FSE_FUNCTION_EXTENSION
95 # error "FSE_FUNCTION_EXTENSION must be defined"
96 #endif
97 #ifndef FSE_FUNCTION_TYPE
98 # error "FSE_FUNCTION_TYPE must be defined"
99 #endif
100
101 /* Function names */
102 #define FSE_CAT(X,Y) X##Y
103 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
104 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
105
106
107 /* Function templates */
108 FSE_DTable* FSE_createDTable (unsigned tableLog)
109 {
110 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
111 return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
112 }
113
114 void FSE_freeDTable (FSE_DTable* dt)
115 {
116 free(dt);
117 }
118
119 size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
120 {
121 void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
122 FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
123 U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
124
125 U32 const maxSV1 = maxSymbolValue + 1;
126 U32 const tableSize = 1 << tableLog;
127 U32 highThreshold = tableSize-1;
128
129 /* Sanity Checks */
130 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
131 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
132
133 /* Init, lay down lowprob symbols */
134 { FSE_DTableHeader DTableH;
135 DTableH.tableLog = (U16)tableLog;
136 DTableH.fastMode = 1;
137 { S16 const largeLimit= (S16)(1 << (tableLog-1));
138 U32 s;
139 for (s=0; s<maxSV1; s++) {
140 if (normalizedCounter[s]==-1) {
141 tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
142 symbolNext[s] = 1;
143 } else {
144 if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
145 symbolNext[s] = normalizedCounter[s];
146 } } }
147 memcpy(dt, &DTableH, sizeof(DTableH));
148 }
149
150 /* Spread symbols */
151 { U32 const tableMask = tableSize-1;
152 U32 const step = FSE_TABLESTEP(tableSize);
153 U32 s, position = 0;
154 for (s=0; s<maxSV1; s++) {
155 int i;
156 for (i=0; i<normalizedCounter[s]; i++) {
157 tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
158 position = (position + step) & tableMask;
159 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
160 } }
161 if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
162 }
163
164 /* Build Decoding table */
165 { U32 u;
166 for (u=0; u<tableSize; u++) {
167 FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
168 U16 nextState = symbolNext[symbol]++;
169 tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
170 tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
171 } }
172
173 return 0;
174 }
175
176
177 #ifndef FSE_COMMONDEFS_ONLY
178
179 /*-*******************************************************
180 * Decompression (Byte symbols)
181 *********************************************************/
182 size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
183 {
184 void* ptr = dt;
185 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
186 void* dPtr = dt + 1;
187 FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
188
189 DTableH->tableLog = 0;
190 DTableH->fastMode = 0;
191
192 cell->newState = 0;
193 cell->symbol = symbolValue;
194 cell->nbBits = 0;
195
196 return 0;
197 }
198
199
200 size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
201 {
202 void* ptr = dt;
203 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
204 void* dPtr = dt + 1;
205 FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
206 const unsigned tableSize = 1 << nbBits;
207 const unsigned tableMask = tableSize - 1;
208 const unsigned maxSV1 = tableMask+1;
209 unsigned s;
210
211 /* Sanity checks */
212 if (nbBits < 1) return ERROR(GENERIC); /* min size */
213
214 /* Build Decoding Table */
215 DTableH->tableLog = (U16)nbBits;
216 DTableH->fastMode = 1;
217 for (s=0; s<maxSV1; s++) {
218 dinfo[s].newState = 0;
219 dinfo[s].symbol = (BYTE)s;
220 dinfo[s].nbBits = (BYTE)nbBits;
221 }
222
223 return 0;
224 }
225
226 FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
227 void* dst, size_t maxDstSize,
228 const void* cSrc, size_t cSrcSize,
229 const FSE_DTable* dt, const unsigned fast)
230 {
231 BYTE* const ostart = (BYTE*) dst;
232 BYTE* op = ostart;
233 BYTE* const omax = op + maxDstSize;
234 BYTE* const olimit = omax-3;
235
236 BIT_DStream_t bitD;
237 FSE_DState_t state1;
238 FSE_DState_t state2;
239
240 /* Init */
241 CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
242
243 FSE_initDState(&state1, &bitD, dt);
244 FSE_initDState(&state2, &bitD, dt);
245
246 #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
247
248 /* 4 symbols per loop */
249 for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) {
250 op[0] = FSE_GETSYMBOL(&state1);
251
252 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
253 BIT_reloadDStream(&bitD);
254
255 op[1] = FSE_GETSYMBOL(&state2);
256
257 if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
258 { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
259
260 op[2] = FSE_GETSYMBOL(&state1);
261
262 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
263 BIT_reloadDStream(&bitD);
264
265 op[3] = FSE_GETSYMBOL(&state2);
266 }
267
268 /* tail */
269 /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
270 while (1) {
271 if (op>(omax-2)) return ERROR(dstSize_tooSmall);
272 *op++ = FSE_GETSYMBOL(&state1);
273 if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
274 *op++ = FSE_GETSYMBOL(&state2);
275 break;
276 }
277
278 if (op>(omax-2)) return ERROR(dstSize_tooSmall);
279 *op++ = FSE_GETSYMBOL(&state2);
280 if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
281 *op++ = FSE_GETSYMBOL(&state1);
282 break;
283 } }
284
285 return op-ostart;
286 }
287
288
289 size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
290 const void* cSrc, size_t cSrcSize,
291 const FSE_DTable* dt)
292 {
293 const void* ptr = dt;
294 const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
295 const U32 fastMode = DTableH->fastMode;
296
297 /* select fast mode (static) */
298 if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
299 return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
300 }
301
302
303 size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
304 {
305 const BYTE* const istart = (const BYTE*)cSrc;
306 const BYTE* ip = istart;
307 short counting[FSE_MAX_SYMBOL_VALUE+1];
308 DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
309 unsigned tableLog;
310 unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
311
312 if (cSrcSize<2) return ERROR(srcSize_wrong); /* too small input size */
313
314 /* normal FSE decoding mode */
315 { size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
316 if (FSE_isError(NCountLength)) return NCountLength;
317 if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size */
318 ip += NCountLength;
319 cSrcSize -= NCountLength;
320 }
321
322 CHECK_F( FSE_buildDTable (dt, counting, maxSymbolValue, tableLog) );
323
324 return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); /* always return, even if it is an error code */
325 }
326
327
328
329 #endif /* FSE_COMMONDEFS_ONLY */