Mercurial > hg
comparison contrib/python-zstandard/zstd/common/fse_decompress.c @ 30434:2e484bdea8c4
zstd: vendor zstd 1.1.1
zstd is a new compression format and it is awesome, yielding
higher compression ratios and significantly faster compression
and decompression operations compared to zlib (our current
compression engine of choice) across the board.
We want zstd to be a 1st class citizen in Mercurial and to eventually
be the preferred compression format for various operations.
This patch starts the formal process of supporting zstd by vendoring
a copy of zstd. Why do we need to vendor zstd? Good question.
First, zstd is relatively new and not widely available yet. If we
didn't vendor zstd or distribute it with Mercurial, most users likely
wouldn't have zstd installed or even available to install. What good
is a feature if you can't use it? Vendoring and distributing the zstd
sources gives us the highest liklihood that zstd will be available to
Mercurial installs.
Second, the Python bindings to zstd (which will be vendored in a
separate changeset) make use of zstd APIs that are only available
via static linking. One reason they are only available via static
linking is that they are unstable and could change at any time.
While it might be possible for the Python bindings to attempt to
talk to different versions of the zstd C library, the safest thing to
do is link against a specific, known-working version of zstd. This
is why the Python zstd bindings themselves vendor zstd and why we
must as well. This also explains why the added files are in a
"python-zstandard" directory.
The added files are from the 1.1.1 release of zstd (Git commit
4c0b44f8ced84c4c8edfa07b564d31e4fa3e8885 from
https://github.com/facebook/zstd) and are added without modifications.
Not all files from the zstd "distribution" have been added. Notably
missing are files to support interacting with "legacy," pre-1.0
versions of zstd. The decision of which files to include is made by
the upstream python-zstandard project (which I'm the author of). The
files in this commit are a snapshot of the files from the 0.5.0
release of that project, Git commit
e637c1b214d5f869cf8116c550dcae23ec13b677 from
https://github.com/indygreg/python-zstandard.
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Thu, 10 Nov 2016 21:45:29 -0800 |
parents | |
children | b54a2984cdd4 |
comparison
equal
deleted
inserted
replaced
30433:96f2f50d923f | 30434:2e484bdea8c4 |
---|---|
1 /* ****************************************************************** | |
2 FSE : Finite State Entropy decoder | |
3 Copyright (C) 2013-2015, Yann Collet. | |
4 | |
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |
6 | |
7 Redistribution and use in source and binary forms, with or without | |
8 modification, are permitted provided that the following conditions are | |
9 met: | |
10 | |
11 * Redistributions of source code must retain the above copyright | |
12 notice, this list of conditions and the following disclaimer. | |
13 * Redistributions in binary form must reproduce the above | |
14 copyright notice, this list of conditions and the following disclaimer | |
15 in the documentation and/or other materials provided with the | |
16 distribution. | |
17 | |
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 | |
30 You can contact the author at : | |
31 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy | |
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c | |
33 ****************************************************************** */ | |
34 | |
35 | |
36 /* ************************************************************** | |
37 * Compiler specifics | |
38 ****************************************************************/ | |
39 #ifdef _MSC_VER /* Visual Studio */ | |
40 # define FORCE_INLINE static __forceinline | |
41 # include <intrin.h> /* For Visual 2005 */ | |
42 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ | |
43 # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ | |
44 #else | |
45 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ | |
46 # ifdef __GNUC__ | |
47 # define FORCE_INLINE static inline __attribute__((always_inline)) | |
48 # else | |
49 # define FORCE_INLINE static inline | |
50 # endif | |
51 # else | |
52 # define FORCE_INLINE static | |
53 # endif /* __STDC_VERSION__ */ | |
54 #endif | |
55 | |
56 | |
57 /* ************************************************************** | |
58 * Includes | |
59 ****************************************************************/ | |
60 #include <stdlib.h> /* malloc, free, qsort */ | |
61 #include <string.h> /* memcpy, memset */ | |
62 #include <stdio.h> /* printf (debug) */ | |
63 #include "bitstream.h" | |
64 #define FSE_STATIC_LINKING_ONLY | |
65 #include "fse.h" | |
66 | |
67 | |
68 /* ************************************************************** | |
69 * Error Management | |
70 ****************************************************************/ | |
71 #define FSE_isError ERR_isError | |
72 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ | |
73 | |
74 /* check and forward error code */ | |
75 #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; } | |
76 | |
77 | |
78 /* ************************************************************** | |
79 * Complex types | |
80 ****************************************************************/ | |
81 typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; | |
82 | |
83 | |
84 /* ************************************************************** | |
85 * Templates | |
86 ****************************************************************/ | |
87 /* | |
88 designed to be included | |
89 for type-specific functions (template emulation in C) | |
90 Objective is to write these functions only once, for improved maintenance | |
91 */ | |
92 | |
93 /* safety checks */ | |
94 #ifndef FSE_FUNCTION_EXTENSION | |
95 # error "FSE_FUNCTION_EXTENSION must be defined" | |
96 #endif | |
97 #ifndef FSE_FUNCTION_TYPE | |
98 # error "FSE_FUNCTION_TYPE must be defined" | |
99 #endif | |
100 | |
101 /* Function names */ | |
102 #define FSE_CAT(X,Y) X##Y | |
103 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) | |
104 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) | |
105 | |
106 | |
107 /* Function templates */ | |
108 FSE_DTable* FSE_createDTable (unsigned tableLog) | |
109 { | |
110 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; | |
111 return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); | |
112 } | |
113 | |
114 void FSE_freeDTable (FSE_DTable* dt) | |
115 { | |
116 free(dt); | |
117 } | |
118 | |
119 size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) | |
120 { | |
121 void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ | |
122 FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); | |
123 U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; | |
124 | |
125 U32 const maxSV1 = maxSymbolValue + 1; | |
126 U32 const tableSize = 1 << tableLog; | |
127 U32 highThreshold = tableSize-1; | |
128 | |
129 /* Sanity Checks */ | |
130 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); | |
131 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); | |
132 | |
133 /* Init, lay down lowprob symbols */ | |
134 { FSE_DTableHeader DTableH; | |
135 DTableH.tableLog = (U16)tableLog; | |
136 DTableH.fastMode = 1; | |
137 { S16 const largeLimit= (S16)(1 << (tableLog-1)); | |
138 U32 s; | |
139 for (s=0; s<maxSV1; s++) { | |
140 if (normalizedCounter[s]==-1) { | |
141 tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; | |
142 symbolNext[s] = 1; | |
143 } else { | |
144 if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0; | |
145 symbolNext[s] = normalizedCounter[s]; | |
146 } } } | |
147 memcpy(dt, &DTableH, sizeof(DTableH)); | |
148 } | |
149 | |
150 /* Spread symbols */ | |
151 { U32 const tableMask = tableSize-1; | |
152 U32 const step = FSE_TABLESTEP(tableSize); | |
153 U32 s, position = 0; | |
154 for (s=0; s<maxSV1; s++) { | |
155 int i; | |
156 for (i=0; i<normalizedCounter[s]; i++) { | |
157 tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s; | |
158 position = (position + step) & tableMask; | |
159 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */ | |
160 } } | |
161 if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ | |
162 } | |
163 | |
164 /* Build Decoding table */ | |
165 { U32 u; | |
166 for (u=0; u<tableSize; u++) { | |
167 FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol); | |
168 U16 nextState = symbolNext[symbol]++; | |
169 tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) ); | |
170 tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); | |
171 } } | |
172 | |
173 return 0; | |
174 } | |
175 | |
176 | |
177 #ifndef FSE_COMMONDEFS_ONLY | |
178 | |
179 /*-******************************************************* | |
180 * Decompression (Byte symbols) | |
181 *********************************************************/ | |
182 size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) | |
183 { | |
184 void* ptr = dt; | |
185 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; | |
186 void* dPtr = dt + 1; | |
187 FSE_decode_t* const cell = (FSE_decode_t*)dPtr; | |
188 | |
189 DTableH->tableLog = 0; | |
190 DTableH->fastMode = 0; | |
191 | |
192 cell->newState = 0; | |
193 cell->symbol = symbolValue; | |
194 cell->nbBits = 0; | |
195 | |
196 return 0; | |
197 } | |
198 | |
199 | |
200 size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) | |
201 { | |
202 void* ptr = dt; | |
203 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; | |
204 void* dPtr = dt + 1; | |
205 FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; | |
206 const unsigned tableSize = 1 << nbBits; | |
207 const unsigned tableMask = tableSize - 1; | |
208 const unsigned maxSV1 = tableMask+1; | |
209 unsigned s; | |
210 | |
211 /* Sanity checks */ | |
212 if (nbBits < 1) return ERROR(GENERIC); /* min size */ | |
213 | |
214 /* Build Decoding Table */ | |
215 DTableH->tableLog = (U16)nbBits; | |
216 DTableH->fastMode = 1; | |
217 for (s=0; s<maxSV1; s++) { | |
218 dinfo[s].newState = 0; | |
219 dinfo[s].symbol = (BYTE)s; | |
220 dinfo[s].nbBits = (BYTE)nbBits; | |
221 } | |
222 | |
223 return 0; | |
224 } | |
225 | |
226 FORCE_INLINE size_t FSE_decompress_usingDTable_generic( | |
227 void* dst, size_t maxDstSize, | |
228 const void* cSrc, size_t cSrcSize, | |
229 const FSE_DTable* dt, const unsigned fast) | |
230 { | |
231 BYTE* const ostart = (BYTE*) dst; | |
232 BYTE* op = ostart; | |
233 BYTE* const omax = op + maxDstSize; | |
234 BYTE* const olimit = omax-3; | |
235 | |
236 BIT_DStream_t bitD; | |
237 FSE_DState_t state1; | |
238 FSE_DState_t state2; | |
239 | |
240 /* Init */ | |
241 CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize)); | |
242 | |
243 FSE_initDState(&state1, &bitD, dt); | |
244 FSE_initDState(&state2, &bitD, dt); | |
245 | |
246 #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) | |
247 | |
248 /* 4 symbols per loop */ | |
249 for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) { | |
250 op[0] = FSE_GETSYMBOL(&state1); | |
251 | |
252 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ | |
253 BIT_reloadDStream(&bitD); | |
254 | |
255 op[1] = FSE_GETSYMBOL(&state2); | |
256 | |
257 if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ | |
258 { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } | |
259 | |
260 op[2] = FSE_GETSYMBOL(&state1); | |
261 | |
262 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ | |
263 BIT_reloadDStream(&bitD); | |
264 | |
265 op[3] = FSE_GETSYMBOL(&state2); | |
266 } | |
267 | |
268 /* tail */ | |
269 /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ | |
270 while (1) { | |
271 if (op>(omax-2)) return ERROR(dstSize_tooSmall); | |
272 *op++ = FSE_GETSYMBOL(&state1); | |
273 if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { | |
274 *op++ = FSE_GETSYMBOL(&state2); | |
275 break; | |
276 } | |
277 | |
278 if (op>(omax-2)) return ERROR(dstSize_tooSmall); | |
279 *op++ = FSE_GETSYMBOL(&state2); | |
280 if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { | |
281 *op++ = FSE_GETSYMBOL(&state1); | |
282 break; | |
283 } } | |
284 | |
285 return op-ostart; | |
286 } | |
287 | |
288 | |
289 size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, | |
290 const void* cSrc, size_t cSrcSize, | |
291 const FSE_DTable* dt) | |
292 { | |
293 const void* ptr = dt; | |
294 const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; | |
295 const U32 fastMode = DTableH->fastMode; | |
296 | |
297 /* select fast mode (static) */ | |
298 if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); | |
299 return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); | |
300 } | |
301 | |
302 | |
303 size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) | |
304 { | |
305 const BYTE* const istart = (const BYTE*)cSrc; | |
306 const BYTE* ip = istart; | |
307 short counting[FSE_MAX_SYMBOL_VALUE+1]; | |
308 DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ | |
309 unsigned tableLog; | |
310 unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; | |
311 | |
312 if (cSrcSize<2) return ERROR(srcSize_wrong); /* too small input size */ | |
313 | |
314 /* normal FSE decoding mode */ | |
315 { size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); | |
316 if (FSE_isError(NCountLength)) return NCountLength; | |
317 if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size */ | |
318 ip += NCountLength; | |
319 cSrcSize -= NCountLength; | |
320 } | |
321 | |
322 CHECK_F( FSE_buildDTable (dt, counting, maxSymbolValue, tableLog) ); | |
323 | |
324 return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); /* always return, even if it is an error code */ | |
325 } | |
326 | |
327 | |
328 | |
329 #endif /* FSE_COMMONDEFS_ONLY */ |