comparison contrib/python-zstandard/zstd/compress/fse_compress.c @ 40121:73fef626dae3

zstandard: vendor python-zstandard 0.10.1 This was just released. The upstream source distribution from PyPI was extracted. Unwanted files were removed. The clang-format ignore list was updated to reflect the new source of files. setup.py was updated to pass a new argument to python-zstandard's function for returning an Extension instance. Upstream had to change to use relative paths because Python 3.7's packaging doesn't seem to like absolute paths when defining sources, includes, etc. The default relative path calculation is relative to setup_zstd.py which is different from the directory of Mercurial's setup.py. The project contains a vendored copy of zstandard 1.3.6. The old version was 1.3.4. The API should be backwards compatible and nothing in core should need adjusted. However, there is a new "chunker" API that we may find useful in places where we want to emit compressed chunks of a fixed size. There are a pair of bug fixes in 0.10.0 with regards to compressobj() and decompressobj() when block flushing is used. I actually found these bugs when introducing these APIs in Mercurial! But existing Mercurial code is not affected because we don't perform block flushing. # no-check-commit because 3rd party code has different style guidelines Differential Revision: https://phab.mercurial-scm.org/D4911
author Gregory Szorc <gregory.szorc@gmail.com>
date Mon, 08 Oct 2018 16:27:40 -0700
parents b1fb341d8a61
children 675775c33ab6
comparison
equal deleted inserted replaced
40120:89742f1fa6cb 40121:73fef626dae3
1 /* ****************************************************************** 1 /* ******************************************************************
2 FSE : Finite State Entropy encoder 2 FSE : Finite State Entropy encoder
3 Copyright (C) 2013-2015, Yann Collet. 3 Copyright (C) 2013-present, Yann Collet.
4 4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 6
7 Redistribution and use in source and binary forms, with or without 7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are 8 modification, are permitted provided that the following conditions are
35 /* ************************************************************** 35 /* **************************************************************
36 * Includes 36 * Includes
37 ****************************************************************/ 37 ****************************************************************/
38 #include <stdlib.h> /* malloc, free, qsort */ 38 #include <stdlib.h> /* malloc, free, qsort */
39 #include <string.h> /* memcpy, memset */ 39 #include <string.h> /* memcpy, memset */
40 #include <stdio.h> /* printf (debug) */ 40 #include "compiler.h"
41 #include "mem.h" /* U32, U16, etc. */
42 #include "debug.h" /* assert, DEBUGLOG */
43 #include "hist.h" /* HIST_count_wksp */
41 #include "bitstream.h" 44 #include "bitstream.h"
42 #include "compiler.h"
43 #define FSE_STATIC_LINKING_ONLY 45 #define FSE_STATIC_LINKING_ONLY
44 #include "fse.h" 46 #include "fse.h"
45 #include "error_private.h" 47 #include "error_private.h"
46 48
47 49
48 /* ************************************************************** 50 /* **************************************************************
49 * Error Management 51 * Error Management
50 ****************************************************************/ 52 ****************************************************************/
51 #define FSE_isError ERR_isError 53 #define FSE_isError ERR_isError
52 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
53 54
54 55
55 /* ************************************************************** 56 /* **************************************************************
56 * Templates 57 * Templates
57 ****************************************************************/ 58 ****************************************************************/
80 /* FSE_buildCTable_wksp() : 81 /* FSE_buildCTable_wksp() :
81 * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). 82 * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
82 * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)` 83 * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
83 * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements 84 * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
84 */ 85 */
85 size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) 86 size_t FSE_buildCTable_wksp(FSE_CTable* ct,
87 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
88 void* workSpace, size_t wkspSize)
86 { 89 {
87 U32 const tableSize = 1 << tableLog; 90 U32 const tableSize = 1 << tableLog;
88 U32 const tableMask = tableSize - 1; 91 U32 const tableMask = tableSize - 1;
89 void* const ptr = ct; 92 void* const ptr = ct;
90 U16* const tableU16 = ( (U16*) ptr) + 2; 93 U16* const tableU16 = ( (U16*) ptr) + 2;
98 101
99 /* CTable header */ 102 /* CTable header */
100 if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge); 103 if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
101 tableU16[-2] = (U16) tableLog; 104 tableU16[-2] = (U16) tableLog;
102 tableU16[-1] = (U16) maxSymbolValue; 105 tableU16[-1] = (U16) maxSymbolValue;
106 assert(tableLog < 16); /* required for threshold strategy to work */
103 107
104 /* For explanations on how to distribute symbol values over the table : 108 /* For explanations on how to distribute symbol values over the table :
105 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ 109 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
110
111 #ifdef __clang_analyzer__
112 memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
113 #endif
106 114
107 /* symbol start positions */ 115 /* symbol start positions */
108 { U32 u; 116 { U32 u;
109 cumul[0] = 0; 117 cumul[0] = 0;
110 for (u=1; u<=maxSymbolValue+1; u++) { 118 for (u=1; u<=maxSymbolValue+1; u++) {
120 /* Spread symbols */ 128 /* Spread symbols */
121 { U32 position = 0; 129 { U32 position = 0;
122 U32 symbol; 130 U32 symbol;
123 for (symbol=0; symbol<=maxSymbolValue; symbol++) { 131 for (symbol=0; symbol<=maxSymbolValue; symbol++) {
124 int nbOccurences; 132 int nbOccurences;
125 for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) { 133 int const freq = normalizedCounter[symbol];
134 for (nbOccurences=0; nbOccurences<freq; nbOccurences++) {
126 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol; 135 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
127 position = (position + step) & tableMask; 136 position = (position + step) & tableMask;
128 while (position > highThreshold) position = (position + step) & tableMask; /* Low proba area */ 137 while (position > highThreshold)
138 position = (position + step) & tableMask; /* Low proba area */
129 } } 139 } }
130 140
131 if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */ 141 assert(position==0); /* Must have initialized all positions */
132 } 142 }
133 143
134 /* Build table */ 144 /* Build table */
135 { U32 u; for (u=0; u<tableSize; u++) { 145 { U32 u; for (u=0; u<tableSize; u++) {
136 FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */ 146 FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */
141 { unsigned total = 0; 151 { unsigned total = 0;
142 unsigned s; 152 unsigned s;
143 for (s=0; s<=maxSymbolValue; s++) { 153 for (s=0; s<=maxSymbolValue; s++) {
144 switch (normalizedCounter[s]) 154 switch (normalizedCounter[s])
145 { 155 {
146 case 0: break; 156 case 0:
157 /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
158 symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
159 break;
147 160
148 case -1: 161 case -1:
149 case 1: 162 case 1:
150 symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog); 163 symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
151 symbolTT[s].deltaFindState = total - 1; 164 symbolTT[s].deltaFindState = total - 1;
158 symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; 171 symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
159 symbolTT[s].deltaFindState = total - normalizedCounter[s]; 172 symbolTT[s].deltaFindState = total - normalizedCounter[s];
160 total += normalizedCounter[s]; 173 total += normalizedCounter[s];
161 } } } } 174 } } } }
162 175
176 #if 0 /* debug : symbol costs */
177 DEBUGLOG(5, "\n --- table statistics : ");
178 { U32 symbol;
179 for (symbol=0; symbol<=maxSymbolValue; symbol++) {
180 DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f",
181 symbol, normalizedCounter[symbol],
182 FSE_getMaxNbBits(symbolTT, symbol),
183 (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
184 }
185 }
186 #endif
187
163 return 0; 188 return 0;
164 } 189 }
165 190
166 191
167 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) 192 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
172 197
173 198
174 199
175 #ifndef FSE_COMMONDEFS_ONLY 200 #ifndef FSE_COMMONDEFS_ONLY
176 201
202
177 /*-************************************************************** 203 /*-**************************************************************
178 * FSE NCount encoding-decoding 204 * FSE NCount encoding
179 ****************************************************************/ 205 ****************************************************************/
180 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) 206 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
181 { 207 {
182 size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3; 208 size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
183 return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ 209 return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
184 } 210 }
185 211
186 static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, 212 static size_t
187 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, 213 FSE_writeNCount_generic (void* header, size_t headerBufferSize,
188 unsigned writeIsSafe) 214 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
215 unsigned writeIsSafe)
189 { 216 {
190 BYTE* const ostart = (BYTE*) header; 217 BYTE* const ostart = (BYTE*) header;
191 BYTE* out = ostart; 218 BYTE* out = ostart;
192 BYTE* const oend = ostart + headerBufferSize; 219 BYTE* const oend = ostart + headerBufferSize;
193 int nbBits; 220 int nbBits;
194 const int tableSize = 1 << tableLog; 221 const int tableSize = 1 << tableLog;
195 int remaining; 222 int remaining;
196 int threshold; 223 int threshold;
197 U32 bitStream; 224 U32 bitStream = 0;
198 int bitCount; 225 int bitCount = 0;
199 unsigned charnum = 0; 226 unsigned symbol = 0;
200 int previous0 = 0; 227 unsigned const alphabetSize = maxSymbolValue + 1;
201 228 int previousIs0 = 0;
202 bitStream = 0; 229
203 bitCount = 0;
204 /* Table Size */ 230 /* Table Size */
205 bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; 231 bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
206 bitCount += 4; 232 bitCount += 4;
207 233
208 /* Init */ 234 /* Init */
209 remaining = tableSize+1; /* +1 for extra accuracy */ 235 remaining = tableSize+1; /* +1 for extra accuracy */
210 threshold = tableSize; 236 threshold = tableSize;
211 nbBits = tableLog+1; 237 nbBits = tableLog+1;
212 238
213 while (remaining>1) { /* stops at 1 */ 239 while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
214 if (previous0) { 240 if (previousIs0) {
215 unsigned start = charnum; 241 unsigned start = symbol;
216 while (!normalizedCounter[charnum]) charnum++; 242 while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
217 while (charnum >= start+24) { 243 if (symbol == alphabetSize) break; /* incorrect distribution */
244 while (symbol >= start+24) {
218 start+=24; 245 start+=24;
219 bitStream += 0xFFFFU << bitCount; 246 bitStream += 0xFFFFU << bitCount;
220 if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ 247 if ((!writeIsSafe) && (out > oend-2))
248 return ERROR(dstSize_tooSmall); /* Buffer overflow */
221 out[0] = (BYTE) bitStream; 249 out[0] = (BYTE) bitStream;
222 out[1] = (BYTE)(bitStream>>8); 250 out[1] = (BYTE)(bitStream>>8);
223 out+=2; 251 out+=2;
224 bitStream>>=16; 252 bitStream>>=16;
225 } 253 }
226 while (charnum >= start+3) { 254 while (symbol >= start+3) {
227 start+=3; 255 start+=3;
228 bitStream += 3 << bitCount; 256 bitStream += 3 << bitCount;
229 bitCount += 2; 257 bitCount += 2;
230 } 258 }
231 bitStream += (charnum-start) << bitCount; 259 bitStream += (symbol-start) << bitCount;
232 bitCount += 2; 260 bitCount += 2;
233 if (bitCount>16) { 261 if (bitCount>16) {
234 if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ 262 if ((!writeIsSafe) && (out > oend - 2))
263 return ERROR(dstSize_tooSmall); /* Buffer overflow */
235 out[0] = (BYTE)bitStream; 264 out[0] = (BYTE)bitStream;
236 out[1] = (BYTE)(bitStream>>8); 265 out[1] = (BYTE)(bitStream>>8);
237 out += 2; 266 out += 2;
238 bitStream >>= 16; 267 bitStream >>= 16;
239 bitCount -= 16; 268 bitCount -= 16;
240 } } 269 } }
241 { int count = normalizedCounter[charnum++]; 270 { int count = normalizedCounter[symbol++];
242 int const max = (2*threshold-1)-remaining; 271 int const max = (2*threshold-1) - remaining;
243 remaining -= count < 0 ? -count : count; 272 remaining -= count < 0 ? -count : count;
244 count++; /* +1 for extra accuracy */ 273 count++; /* +1 for extra accuracy */
245 if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ 274 if (count>=threshold)
275 count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
246 bitStream += count << bitCount; 276 bitStream += count << bitCount;
247 bitCount += nbBits; 277 bitCount += nbBits;
248 bitCount -= (count<max); 278 bitCount -= (count<max);
249 previous0 = (count==1); 279 previousIs0 = (count==1);
250 if (remaining<1) return ERROR(GENERIC); 280 if (remaining<1) return ERROR(GENERIC);
251 while (remaining<threshold) { nbBits--; threshold>>=1; } 281 while (remaining<threshold) { nbBits--; threshold>>=1; }
252 } 282 }
253 if (bitCount>16) { 283 if (bitCount>16) {
254 if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ 284 if ((!writeIsSafe) && (out > oend - 2))
285 return ERROR(dstSize_tooSmall); /* Buffer overflow */
255 out[0] = (BYTE)bitStream; 286 out[0] = (BYTE)bitStream;
256 out[1] = (BYTE)(bitStream>>8); 287 out[1] = (BYTE)(bitStream>>8);
257 out += 2; 288 out += 2;
258 bitStream >>= 16; 289 bitStream >>= 16;
259 bitCount -= 16; 290 bitCount -= 16;
260 } } 291 } }
261 292
293 if (remaining != 1)
294 return ERROR(GENERIC); /* incorrect normalized distribution */
295 assert(symbol <= alphabetSize);
296
262 /* flush remaining bitStream */ 297 /* flush remaining bitStream */
263 if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ 298 if ((!writeIsSafe) && (out > oend - 2))
299 return ERROR(dstSize_tooSmall); /* Buffer overflow */
264 out[0] = (BYTE)bitStream; 300 out[0] = (BYTE)bitStream;
265 out[1] = (BYTE)(bitStream>>8); 301 out[1] = (BYTE)(bitStream>>8);
266 out+= (bitCount+7) /8; 302 out+= (bitCount+7) /8;
267 303
268 if (charnum > maxSymbolValue + 1) return ERROR(GENERIC);
269
270 return (out-ostart); 304 return (out-ostart);
271 } 305 }
272 306
273 307
274 size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) 308 size_t FSE_writeNCount (void* buffer, size_t bufferSize,
309 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
275 { 310 {
276 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ 311 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */
277 if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ 312 if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
278 313
279 if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) 314 if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
280 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); 315 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
281 316
282 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1); 317 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
283 } 318 }
284
285
286
287 /*-**************************************************************
288 * Counting histogram
289 ****************************************************************/
290 /*! FSE_count_simple
291 This function counts byte values within `src`, and store the histogram into table `count`.
292 It doesn't use any additional memory.
293 But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
294 For this reason, prefer using a table `count` with 256 elements.
295 @return : count of most numerous element.
296 */
297 size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
298 const void* src, size_t srcSize)
299 {
300 const BYTE* ip = (const BYTE*)src;
301 const BYTE* const end = ip + srcSize;
302 unsigned maxSymbolValue = *maxSymbolValuePtr;
303 unsigned max=0;
304
305 memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
306 if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
307
308 while (ip<end) {
309 assert(*ip <= maxSymbolValue);
310 count[*ip++]++;
311 }
312
313 while (!count[maxSymbolValue]) maxSymbolValue--;
314 *maxSymbolValuePtr = maxSymbolValue;
315
316 { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
317
318 return (size_t)max;
319 }
320
321
322 /* FSE_count_parallel_wksp() :
323 * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
324 * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`.
325 * @return : largest histogram frequency, or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
326 static size_t FSE_count_parallel_wksp(
327 unsigned* count, unsigned* maxSymbolValuePtr,
328 const void* source, size_t sourceSize,
329 unsigned checkMax, unsigned* const workSpace)
330 {
331 const BYTE* ip = (const BYTE*)source;
332 const BYTE* const iend = ip+sourceSize;
333 unsigned maxSymbolValue = *maxSymbolValuePtr;
334 unsigned max=0;
335 U32* const Counting1 = workSpace;
336 U32* const Counting2 = Counting1 + 256;
337 U32* const Counting3 = Counting2 + 256;
338 U32* const Counting4 = Counting3 + 256;
339
340 memset(workSpace, 0, 4*256*sizeof(unsigned));
341
342 /* safety checks */
343 if (!sourceSize) {
344 memset(count, 0, maxSymbolValue + 1);
345 *maxSymbolValuePtr = 0;
346 return 0;
347 }
348 if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
349
350 /* by stripes of 16 bytes */
351 { U32 cached = MEM_read32(ip); ip += 4;
352 while (ip < iend-15) {
353 U32 c = cached; cached = MEM_read32(ip); ip += 4;
354 Counting1[(BYTE) c ]++;
355 Counting2[(BYTE)(c>>8) ]++;
356 Counting3[(BYTE)(c>>16)]++;
357 Counting4[ c>>24 ]++;
358 c = cached; cached = MEM_read32(ip); ip += 4;
359 Counting1[(BYTE) c ]++;
360 Counting2[(BYTE)(c>>8) ]++;
361 Counting3[(BYTE)(c>>16)]++;
362 Counting4[ c>>24 ]++;
363 c = cached; cached = MEM_read32(ip); ip += 4;
364 Counting1[(BYTE) c ]++;
365 Counting2[(BYTE)(c>>8) ]++;
366 Counting3[(BYTE)(c>>16)]++;
367 Counting4[ c>>24 ]++;
368 c = cached; cached = MEM_read32(ip); ip += 4;
369 Counting1[(BYTE) c ]++;
370 Counting2[(BYTE)(c>>8) ]++;
371 Counting3[(BYTE)(c>>16)]++;
372 Counting4[ c>>24 ]++;
373 }
374 ip-=4;
375 }
376
377 /* finish last symbols */
378 while (ip<iend) Counting1[*ip++]++;
379
380 if (checkMax) { /* verify stats will fit into destination table */
381 U32 s; for (s=255; s>maxSymbolValue; s--) {
382 Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
383 if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
384 } }
385
386 { U32 s;
387 if (maxSymbolValue > 255) maxSymbolValue = 255;
388 for (s=0; s<=maxSymbolValue; s++) {
389 count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
390 if (count[s] > max) max = count[s];
391 } }
392
393 while (!count[maxSymbolValue]) maxSymbolValue--;
394 *maxSymbolValuePtr = maxSymbolValue;
395 return (size_t)max;
396 }
397
398 /* FSE_countFast_wksp() :
399 * Same as FSE_countFast(), but using an externally provided scratch buffer.
400 * `workSpace` size must be table of >= `1024` unsigned */
401 size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
402 const void* source, size_t sourceSize,
403 unsigned* workSpace)
404 {
405 if (sourceSize < 1500) /* heuristic threshold */
406 return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
407 return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
408 }
409
410 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
411 size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
412 const void* source, size_t sourceSize)
413 {
414 unsigned tmpCounters[1024];
415 return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
416 }
417
418 /* FSE_count_wksp() :
419 * Same as FSE_count(), but using an externally provided scratch buffer.
420 * `workSpace` size must be table of >= `1024` unsigned */
421 size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
422 const void* source, size_t sourceSize, unsigned* workSpace)
423 {
424 if (*maxSymbolValuePtr < 255)
425 return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
426 *maxSymbolValuePtr = 255;
427 return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
428 }
429
430 size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
431 const void* src, size_t srcSize)
432 {
433 unsigned tmpCounters[1024];
434 return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
435 }
436
437 319
438 320
439 /*-************************************************************** 321 /*-**************************************************************
440 * FSE Compression Code 322 * FSE Compression Code
441 ****************************************************************/ 323 ****************************************************************/
442 /*! FSE_sizeof_CTable() :
443 FSE_CTable is a variable size structure which contains :
444 `U16 tableLog;`
445 `U16 maxSymbolValue;`
446 `U16 nextStateNumber[1 << tableLog];` // This size is variable
447 `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable
448 Allocation is manual (C standard does not support variable-size structures).
449 */
450 size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
451 {
452 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
453 return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
454 }
455 324
456 FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) 325 FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
457 { 326 {
458 size_t size; 327 size_t size;
459 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; 328 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
464 void FSE_freeCTable (FSE_CTable* ct) { free(ct); } 333 void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
465 334
466 /* provides the minimum logSize to safely represent a distribution */ 335 /* provides the minimum logSize to safely represent a distribution */
467 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) 336 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
468 { 337 {
469 U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1; 338 U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
470 U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; 339 U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
471 U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; 340 U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
472 assert(srcSize > 1); /* Not supported, RLE should be used instead */ 341 assert(srcSize > 1); /* Not supported, RLE should be used instead */
473 return minBits; 342 return minBits;
474 } 343 }
526 } 395 }
527 396
528 norm[s]=NOT_YET_ASSIGNED; 397 norm[s]=NOT_YET_ASSIGNED;
529 } 398 }
530 ToDistribute = (1 << tableLog) - distributed; 399 ToDistribute = (1 << tableLog) - distributed;
400
401 if (ToDistribute == 0)
402 return 0;
531 403
532 if ((total / ToDistribute) > lowOne) { 404 if ((total / ToDistribute) > lowOne) {
533 /* risk of rounding to zero */ 405 /* risk of rounding to zero */
534 lowOne = (U32)((total * 3) / (ToDistribute * 2)); 406 lowOne = (U32)((total * 3) / (ToDistribute * 2));
535 for (s=0; s<=maxSymbolValue; s++) { 407 for (s=0; s<=maxSymbolValue; s++) {
627 #if 0 499 #if 0
628 { /* Print Table (debug) */ 500 { /* Print Table (debug) */
629 U32 s; 501 U32 s;
630 U32 nTotal = 0; 502 U32 nTotal = 0;
631 for (s=0; s<=maxSymbolValue; s++) 503 for (s=0; s<=maxSymbolValue; s++)
632 printf("%3i: %4i \n", s, normalizedCounter[s]); 504 RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
633 for (s=0; s<=maxSymbolValue; s++) 505 for (s=0; s<=maxSymbolValue; s++)
634 nTotal += abs(normalizedCounter[s]); 506 nTotal += abs(normalizedCounter[s]);
635 if (nTotal != (1U<<tableLog)) 507 if (nTotal != (1U<<tableLog))
636 printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog); 508 RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
637 getchar(); 509 getchar();
638 } 510 }
639 #endif 511 #endif
640 512
641 return tableLog; 513 return tableLog;
798 if (srcSize <= 1) return 0; /* Not compressible */ 670 if (srcSize <= 1) return 0; /* Not compressible */
799 if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; 671 if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
800 if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG; 672 if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
801 673
802 /* Scan input and build symbol stats */ 674 /* Scan input and build symbol stats */
803 { CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) ); 675 { CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
804 if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */ 676 if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
805 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ 677 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
806 if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ 678 if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
807 } 679 }
808 680
833 } fseWkspMax_t; 705 } fseWkspMax_t;
834 706
835 size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) 707 size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
836 { 708 {
837 fseWkspMax_t scratchBuffer; 709 fseWkspMax_t scratchBuffer;
838 FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ 710 DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
839 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); 711 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
840 return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); 712 return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
841 } 713 }
842 714
843 size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) 715 size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)