contrib/python-zstandard/zstd/compress/huf_compress.c
changeset 37495 b1fb341d8a61
parent 30822 b54a2984cdd4
child 40122 73fef626dae3
equal deleted inserted replaced
37494:1ce7a55b09d1 37495:b1fb341d8a61
    44 *  Includes
    44 *  Includes
    45 ****************************************************************/
    45 ****************************************************************/
    46 #include <string.h>     /* memcpy, memset */
    46 #include <string.h>     /* memcpy, memset */
    47 #include <stdio.h>      /* printf (debug) */
    47 #include <stdio.h>      /* printf (debug) */
    48 #include "bitstream.h"
    48 #include "bitstream.h"
       
    49 #include "compiler.h"
    49 #define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
    50 #define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
    50 #include "fse.h"        /* header compression */
    51 #include "fse.h"        /* header compression */
    51 #define HUF_STATIC_LINKING_ONLY
    52 #define HUF_STATIC_LINKING_ONLY
    52 #include "huf.h"
    53 #include "huf.h"
       
    54 #include "error_private.h"
    53 
    55 
    54 
    56 
    55 /* **************************************************************
    57 /* **************************************************************
    56 *  Error Management
    58 *  Error Management
    57 ****************************************************************/
    59 ****************************************************************/
       
    60 #define HUF_isError ERR_isError
    58 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
    61 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
    59 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f
    62 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
    60 #define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
    63 #define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
    61 
    64 
    62 
    65 
    63 /* **************************************************************
    66 /* **************************************************************
    64 *  Utils
    67 *  Utils
   125   U16  val;
   128   U16  val;
   126   BYTE nbBits;
   129   BYTE nbBits;
   127 };   /* typedef'd to HUF_CElt within "huf.h" */
   130 };   /* typedef'd to HUF_CElt within "huf.h" */
   128 
   131 
   129 /*! HUF_writeCTable() :
   132 /*! HUF_writeCTable() :
   130     `CTable` : huffman tree to save, using huf representation.
   133     `CTable` : Huffman tree to save, using huf representation.
   131     @return : size of saved CTable */
   134     @return : size of saved CTable */
   132 size_t HUF_writeCTable (void* dst, size_t maxDstSize,
   135 size_t HUF_writeCTable (void* dst, size_t maxDstSize,
   133                         const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
   136                         const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
   134 {
   137 {
   135     BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
   138     BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
   163         op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
   166         op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
   164     return ((maxSymbolValue+1)/2) + 1;
   167     return ((maxSymbolValue+1)/2) + 1;
   165 }
   168 }
   166 
   169 
   167 
   170 
   168 size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, size_t srcSize)
   171 size_t HUF_readCTable (HUF_CElt* CTable, U32* maxSymbolValuePtr, const void* src, size_t srcSize)
   169 {
   172 {
   170     BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
   173     BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
   171     U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
   174     U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
   172     U32 tableLog = 0;
   175     U32 tableLog = 0;
   173     U32 nbSymbols = 0;
   176     U32 nbSymbols = 0;
   175     /* get symbol weights */
   178     /* get symbol weights */
   176     CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
   179     CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
   177 
   180 
   178     /* check result */
   181     /* check result */
   179     if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
   182     if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
   180     if (nbSymbols > maxSymbolValue+1) return ERROR(maxSymbolValue_tooSmall);
   183     if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
   181 
   184 
   182     /* Prepare base value per rank */
   185     /* Prepare base value per rank */
   183     {   U32 n, nextRankStart = 0;
   186     {   U32 n, nextRankStart = 0;
   184         for (n=1; n<=tableLog; n++) {
   187         for (n=1; n<=tableLog; n++) {
   185             U32 current = nextRankStart;
   188             U32 current = nextRankStart;
   204                 valPerRank[n] = min;     /* get starting value within each rank */
   207                 valPerRank[n] = min;     /* get starting value within each rank */
   205                 min += nbPerRank[n];
   208                 min += nbPerRank[n];
   206                 min >>= 1;
   209                 min >>= 1;
   207         }   }
   210         }   }
   208         /* assign value within rank, symbol order */
   211         /* assign value within rank, symbol order */
   209         { U32 n; for (n=0; n<=maxSymbolValue; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
   212         { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
   210     }
   213     }
   211 
   214 
       
   215     *maxSymbolValuePtr = nbSymbols - 1;
   212     return readSize;
   216     return readSize;
   213 }
   217 }
   214 
   218 
   215 
   219 
   216 typedef struct nodeElt_s {
   220 typedef struct nodeElt_s {
   264                     {   U32 const highTotal = huffNode[highPos].count;
   268                     {   U32 const highTotal = huffNode[highPos].count;
   265                         U32 const lowTotal = 2 * huffNode[lowPos].count;
   269                         U32 const lowTotal = 2 * huffNode[lowPos].count;
   266                         if (highTotal <= lowTotal) break;
   270                         if (highTotal <= lowTotal) break;
   267                 }   }
   271                 }   }
   268                 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
   272                 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
   269                 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))  /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
   273                 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
       
   274                 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
   270                     nBitsToDecrease ++;
   275                     nBitsToDecrease ++;
   271                 totalCost -= 1 << (nBitsToDecrease-1);
   276                 totalCost -= 1 << (nBitsToDecrease-1);
   272                 if (rankLast[nBitsToDecrease-1] == noSymbol)
   277                 if (rankLast[nBitsToDecrease-1] == noSymbol)
   273                     rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];   /* this rank is no longer empty */
   278                     rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];   /* this rank is no longer empty */
   274                 huffNode[rankLast[nBitsToDecrease]].nbBits ++;
   279                 huffNode[rankLast[nBitsToDecrease]].nbBits ++;
   316     for (n=0; n<32; n++) rank[n].current = rank[n].base;
   321     for (n=0; n<32; n++) rank[n].current = rank[n].base;
   317     for (n=0; n<=maxSymbolValue; n++) {
   322     for (n=0; n<=maxSymbolValue; n++) {
   318         U32 const c = count[n];
   323         U32 const c = count[n];
   319         U32 const r = BIT_highbit32(c+1) + 1;
   324         U32 const r = BIT_highbit32(c+1) + 1;
   320         U32 pos = rank[r].current++;
   325         U32 pos = rank[r].current++;
   321         while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--;
   326         while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) {
       
   327             huffNode[pos] = huffNode[pos-1];
       
   328             pos--;
       
   329         }
   322         huffNode[pos].count = c;
   330         huffNode[pos].count = c;
   323         huffNode[pos].byte  = (BYTE)n;
   331         huffNode[pos].byte  = (BYTE)n;
   324     }
   332     }
   325 }
   333 }
   326 
   334 
   327 
   335 
   328 /** HUF_buildCTable_wksp() :
   336 /** HUF_buildCTable_wksp() :
   329  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
   337  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
   330  *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
   338  *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned.
   331  */
   339  */
   332 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
   340 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
   333 typedef nodeElt huffNodeTable[2*HUF_SYMBOLVALUE_MAX+1 +1];
   341 typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
   334 size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
   342 size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
   335 {
   343 {
   336     nodeElt* const huffNode0 = (nodeElt*)workSpace;
   344     nodeElt* const huffNode0 = (nodeElt*)workSpace;
   337     nodeElt* const huffNode = huffNode0+1;
   345     nodeElt* const huffNode = huffNode0+1;
   338     U32 n, nonNullRank;
   346     U32 n, nonNullRank;
   339     int lowS, lowN;
   347     int lowS, lowN;
   340     U16 nodeNb = STARTNODE;
   348     U16 nodeNb = STARTNODE;
   341     U32 nodeRoot;
   349     U32 nodeRoot;
   342 
   350 
   343     /* safety checks */
   351     /* safety checks */
   344     if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC);   /* workSpace is not large enough */
   352     if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
       
   353     if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
   345     if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
   354     if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
   346     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
   355     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
   347     memset(huffNode0, 0, sizeof(huffNodeTable));
   356     memset(huffNode0, 0, sizeof(huffNodeTable));
   348 
   357 
   349     /* sort, decreasing order */
   358     /* sort, decreasing order */
   350     HUF_sort(huffNode, count, maxSymbolValue);
   359     HUF_sort(huffNode, count, maxSymbolValue);
   351 
   360 
   399 
   408 
   400     return maxNbBits;
   409     return maxNbBits;
   401 }
   410 }
   402 
   411 
   403 /** HUF_buildCTable() :
   412 /** HUF_buildCTable() :
       
   413  * @return : maxNbBits
   404  *  Note : count is used before tree is written, so they can safely overlap
   414  *  Note : count is used before tree is written, so they can safely overlap
   405  */
   415  */
   406 size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
   416 size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
   407 {
   417 {
   408     huffNodeTable nodeTable;
   418     huffNodeTable nodeTable;
   409     return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
   419     return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
   410 }
   420 }
   411 
   421 
   412 static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
   422 static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
       
   423 {
       
   424     size_t nbBits = 0;
       
   425     int s;
       
   426     for (s = 0; s <= (int)maxSymbolValue; ++s) {
       
   427         nbBits += CTable[s].nbBits * count[s];
       
   428     }
       
   429     return nbBits >> 3;
       
   430 }
       
   431 
       
   432 static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
       
   433   int bad = 0;
       
   434   int s;
       
   435   for (s = 0; s <= (int)maxSymbolValue; ++s) {
       
   436     bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
       
   437   }
       
   438   return !bad;
       
   439 }
       
   440 
       
   441 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
       
   442 
       
   443 FORCE_INLINE_TEMPLATE void
       
   444 HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
   413 {
   445 {
   414     BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
   446     BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
   415 }
   447 }
   416 
   448 
   417 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
   449 #define HUF_FLUSHBITS(s)  BIT_flushBits(s)
   418 
       
   419 #define HUF_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
       
   420 
   450 
   421 #define HUF_FLUSHBITS_1(stream) \
   451 #define HUF_FLUSHBITS_1(stream) \
   422     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
   452     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
   423 
   453 
   424 #define HUF_FLUSHBITS_2(stream) \
   454 #define HUF_FLUSHBITS_2(stream) \
   425     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
   455     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
   426 
   456 
   427 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
   457 FORCE_INLINE_TEMPLATE size_t
       
   458 HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
       
   459                                    const void* src, size_t srcSize,
       
   460                                    const HUF_CElt* CTable)
   428 {
   461 {
   429     const BYTE* ip = (const BYTE*) src;
   462     const BYTE* ip = (const BYTE*) src;
   430     BYTE* const ostart = (BYTE*)dst;
   463     BYTE* const ostart = (BYTE*)dst;
   431     BYTE* const oend = ostart + dstSize;
   464     BYTE* const oend = ostart + dstSize;
   432     BYTE* op = ostart;
   465     BYTE* op = ostart;
   433     size_t n;
   466     size_t n;
   434     const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize));
       
   435     BIT_CStream_t bitC;
   467     BIT_CStream_t bitC;
   436 
   468 
   437     /* init */
   469     /* init */
   438     if (dstSize < 8) return 0;   /* not enough space to compress */
   470     if (dstSize < 8) return 0;   /* not enough space to compress */
   439     { size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
   471     { size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
   442     n = srcSize & ~3;  /* join to mod 4 */
   474     n = srcSize & ~3;  /* join to mod 4 */
   443     switch (srcSize & 3)
   475     switch (srcSize & 3)
   444     {
   476     {
   445         case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
   477         case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
   446                  HUF_FLUSHBITS_2(&bitC);
   478                  HUF_FLUSHBITS_2(&bitC);
       
   479 		 /* fall-through */
   447         case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
   480         case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
   448                  HUF_FLUSHBITS_1(&bitC);
   481                  HUF_FLUSHBITS_1(&bitC);
       
   482 		 /* fall-through */
   449         case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
   483         case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
   450                  HUF_FLUSHBITS(&bitC);
   484                  HUF_FLUSHBITS(&bitC);
   451         case 0 :
   485 		 /* fall-through */
   452         default: ;
   486         case 0 : /* fall-through */
       
   487         default: break;
   453     }
   488     }
   454 
   489 
   455     for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
   490     for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
   456         HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
   491         HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
   457         HUF_FLUSHBITS_1(&bitC);
   492         HUF_FLUSHBITS_1(&bitC);
   464     }
   499     }
   465 
   500 
   466     return BIT_closeCStream(&bitC);
   501     return BIT_closeCStream(&bitC);
   467 }
   502 }
   468 
   503 
   469 
   504 #if DYNAMIC_BMI2
   470 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
   505 
       
   506 static TARGET_ATTRIBUTE("bmi2") size_t
       
   507 HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
       
   508                                    const void* src, size_t srcSize,
       
   509                                    const HUF_CElt* CTable)
       
   510 {
       
   511     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
       
   512 }
       
   513 
       
   514 static size_t
       
   515 HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
       
   516                                       const void* src, size_t srcSize,
       
   517                                       const HUF_CElt* CTable)
       
   518 {
       
   519     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
       
   520 }
       
   521 
       
   522 static size_t
       
   523 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
       
   524                               const void* src, size_t srcSize,
       
   525                               const HUF_CElt* CTable, const int bmi2)
       
   526 {
       
   527     if (bmi2) {
       
   528         return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
       
   529     }
       
   530     return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
       
   531 }
       
   532 
       
   533 #else
       
   534 
       
   535 static size_t
       
   536 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
       
   537                               const void* src, size_t srcSize,
       
   538                               const HUF_CElt* CTable, const int bmi2)
       
   539 {
       
   540     (void)bmi2;
       
   541     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
       
   542 }
       
   543 
       
   544 #endif
       
   545 
       
   546 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
       
   547 {
       
   548     return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
       
   549 }
       
   550 
       
   551 
       
   552 static size_t
       
   553 HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
       
   554                               const void* src, size_t srcSize,
       
   555                               const HUF_CElt* CTable, int bmi2)
   471 {
   556 {
   472     size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
   557     size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
   473     const BYTE* ip = (const BYTE*) src;
   558     const BYTE* ip = (const BYTE*) src;
   474     const BYTE* const iend = ip + srcSize;
   559     const BYTE* const iend = ip + srcSize;
   475     BYTE* const ostart = (BYTE*) dst;
   560     BYTE* const ostart = (BYTE*) dst;
   478 
   563 
   479     if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
   564     if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
   480     if (srcSize < 12) return 0;   /* no saving possible : too small input */
   565     if (srcSize < 12) return 0;   /* no saving possible : too small input */
   481     op += 6;   /* jumpTable */
   566     op += 6;   /* jumpTable */
   482 
   567 
   483     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
   568     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
   484         if (cSize==0) return 0;
   569         if (cSize==0) return 0;
       
   570         assert(cSize <= 65535);
   485         MEM_writeLE16(ostart, (U16)cSize);
   571         MEM_writeLE16(ostart, (U16)cSize);
   486         op += cSize;
   572         op += cSize;
   487     }
   573     }
   488 
   574 
   489     ip += segmentSize;
   575     ip += segmentSize;
   490     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
   576     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
   491         if (cSize==0) return 0;
   577         if (cSize==0) return 0;
       
   578         assert(cSize <= 65535);
   492         MEM_writeLE16(ostart+2, (U16)cSize);
   579         MEM_writeLE16(ostart+2, (U16)cSize);
   493         op += cSize;
   580         op += cSize;
   494     }
   581     }
   495 
   582 
   496     ip += segmentSize;
   583     ip += segmentSize;
   497     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
   584     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
   498         if (cSize==0) return 0;
   585         if (cSize==0) return 0;
       
   586         assert(cSize <= 65535);
   499         MEM_writeLE16(ostart+4, (U16)cSize);
   587         MEM_writeLE16(ostart+4, (U16)cSize);
   500         op += cSize;
   588         op += cSize;
   501     }
   589     }
   502 
   590 
   503     ip += segmentSize;
   591     ip += segmentSize;
   504     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable) );
   592     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
   505         if (cSize==0) return 0;
   593         if (cSize==0) return 0;
   506         op += cSize;
   594         op += cSize;
   507     }
   595     }
   508 
   596 
   509     return op-ostart;
   597     return op-ostart;
   510 }
   598 }
   511 
   599 
   512 
   600 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
   513 /* `workSpace` must a table of at least 1024 unsigned */
   601 {
       
   602     return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
       
   603 }
       
   604 
       
   605 
       
   606 static size_t HUF_compressCTable_internal(
       
   607                 BYTE* const ostart, BYTE* op, BYTE* const oend,
       
   608                 const void* src, size_t srcSize,
       
   609                 unsigned singleStream, const HUF_CElt* CTable, const int bmi2)
       
   610 {
       
   611     size_t const cSize = singleStream ?
       
   612                          HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
       
   613                          HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
       
   614     if (HUF_isError(cSize)) { return cSize; }
       
   615     if (cSize==0) { return 0; }   /* uncompressible */
       
   616     op += cSize;
       
   617     /* check compressibility */
       
   618     if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
       
   619     return op-ostart;
       
   620 }
       
   621 
       
   622 typedef struct {
       
   623     U32 count[HUF_SYMBOLVALUE_MAX + 1];
       
   624     HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
       
   625     huffNodeTable nodeTable;
       
   626 } HUF_compress_tables_t;
       
   627 
       
   628 /* HUF_compress_internal() :
       
   629  * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
   514 static size_t HUF_compress_internal (
   630 static size_t HUF_compress_internal (
   515                 void* dst, size_t dstSize,
   631                 void* dst, size_t dstSize,
   516                 const void* src, size_t srcSize,
   632                 const void* src, size_t srcSize,
   517                 unsigned maxSymbolValue, unsigned huffLog,
   633                 unsigned maxSymbolValue, unsigned huffLog,
   518                 unsigned singleStream,
   634                 unsigned singleStream,
   519                 void* workSpace, size_t wkspSize)
   635                 void* workSpace, size_t wkspSize,
   520 {
   636                 HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
       
   637                 const int bmi2)
       
   638 {
       
   639     HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
   521     BYTE* const ostart = (BYTE*)dst;
   640     BYTE* const ostart = (BYTE*)dst;
   522     BYTE* const oend = ostart + dstSize;
   641     BYTE* const oend = ostart + dstSize;
   523     BYTE* op = ostart;
   642     BYTE* op = ostart;
   524 
   643 
   525     union {
       
   526         U32 count[HUF_SYMBOLVALUE_MAX+1];
       
   527         HUF_CElt CTable[HUF_SYMBOLVALUE_MAX+1];
       
   528     } table;   /* `count` can overlap with `CTable`; saves 1 KB */
       
   529 
       
   530     /* checks & inits */
   644     /* checks & inits */
   531     if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC);
   645     if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
   532     if (!srcSize) return 0;  /* Uncompressed (note : 1 means rle, so first byte must be correct) */
   646     if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
   533     if (!dstSize) return 0;  /* cannot fit within dst budget */
   647     if (!srcSize) return 0;  /* Uncompressed */
       
   648     if (!dstSize) return 0;  /* cannot fit anything within dst budget */
   534     if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
   649     if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
   535     if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
   650     if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
       
   651     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
   536     if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
   652     if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
   537     if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
   653     if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
   538 
   654 
       
   655     /* Heuristic : If old table is valid, use it for small inputs */
       
   656     if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
       
   657         return HUF_compressCTable_internal(ostart, op, oend,
       
   658                                            src, srcSize,
       
   659                                            singleStream, oldHufTable, bmi2);
       
   660     }
       
   661 
   539     /* Scan input and build symbol stats */
   662     /* Scan input and build symbol stats */
   540     {   CHECK_V_F(largest, FSE_count_wksp (table.count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) );
   663     {   CHECK_V_F(largest, FSE_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
   541         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
   664         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
   542         if (largest <= (srcSize >> 7)+1) return 0;   /* Fast heuristic : not compressible enough */
   665         if (largest <= (srcSize >> 7)+1) return 0;   /* heuristic : probably not compressible enough */
       
   666     }
       
   667 
       
   668     /* Check validity of previous table */
       
   669     if ( repeat
       
   670       && *repeat == HUF_repeat_check
       
   671       && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
       
   672         *repeat = HUF_repeat_none;
       
   673     }
       
   674     /* Heuristic : use existing table for small inputs */
       
   675     if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
       
   676         return HUF_compressCTable_internal(ostart, op, oend,
       
   677                                            src, srcSize,
       
   678                                            singleStream, oldHufTable, bmi2);
   543     }
   679     }
   544 
   680 
   545     /* Build Huffman Tree */
   681     /* Build Huffman Tree */
   546     huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
   682     huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
   547     {   CHECK_V_F(maxBits, HUF_buildCTable_wksp (table.CTable, table.count, maxSymbolValue, huffLog, workSpace, wkspSize) );
   683     {   CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count,
       
   684                                                 maxSymbolValue, huffLog,
       
   685                                                 table->nodeTable, sizeof(table->nodeTable)) );
   548         huffLog = (U32)maxBits;
   686         huffLog = (U32)maxBits;
       
   687         /* Zero unused symbols in CTable, so we can check it for validity */
       
   688         memset(table->CTable + (maxSymbolValue + 1), 0,
       
   689                sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
   549     }
   690     }
   550 
   691 
   551     /* Write table description header */
   692     /* Write table description header */
   552     {   CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table.CTable, maxSymbolValue, huffLog) );
   693     {   CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
   553         if (hSize + 12 >= srcSize) return 0;   /* not useful to try compression */
   694         /* Check if using previous huffman table is beneficial */
       
   695         if (repeat && *repeat != HUF_repeat_none) {
       
   696             size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
       
   697             size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
       
   698             if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
       
   699                 return HUF_compressCTable_internal(ostart, op, oend,
       
   700                                                    src, srcSize,
       
   701                                                    singleStream, oldHufTable, bmi2);
       
   702         }   }
       
   703 
       
   704         /* Use the new huffman table */
       
   705         if (hSize + 12ul >= srcSize) { return 0; }
   554         op += hSize;
   706         op += hSize;
   555     }
   707         if (repeat) { *repeat = HUF_repeat_none; }
   556 
   708         if (oldHufTable)
   557     /* Compress */
   709             memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
   558     {   size_t const cSize = (singleStream) ?
   710     }
   559                             HUF_compress1X_usingCTable(op, oend - op, src, srcSize, table.CTable) :   /* single segment */
   711     return HUF_compressCTable_internal(ostart, op, oend,
   560                             HUF_compress4X_usingCTable(op, oend - op, src, srcSize, table.CTable);
   712                                        src, srcSize,
   561         if (HUF_isError(cSize)) return cSize;
   713                                        singleStream, table->CTable, bmi2);
   562         if (cSize==0) return 0;   /* uncompressible */
       
   563         op += cSize;
       
   564     }
       
   565 
       
   566     /* check compressibility */
       
   567     if ((size_t)(op-ostart) >= srcSize-1)
       
   568         return 0;
       
   569 
       
   570     return op-ostart;
       
   571 }
   714 }
   572 
   715 
   573 
   716 
   574 size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
   717 size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
   575                       const void* src, size_t srcSize,
   718                       const void* src, size_t srcSize,
   576                       unsigned maxSymbolValue, unsigned huffLog,
   719                       unsigned maxSymbolValue, unsigned huffLog,
   577                       void* workSpace, size_t wkspSize)
   720                       void* workSpace, size_t wkspSize)
   578 {
   721 {
   579     return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize);
   722     return HUF_compress_internal(dst, dstSize, src, srcSize,
       
   723                                  maxSymbolValue, huffLog, 1 /*single stream*/,
       
   724                                  workSpace, wkspSize,
       
   725                                  NULL, NULL, 0, 0 /*bmi2*/);
       
   726 }
       
   727 
       
   728 size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
       
   729                       const void* src, size_t srcSize,
       
   730                       unsigned maxSymbolValue, unsigned huffLog,
       
   731                       void* workSpace, size_t wkspSize,
       
   732                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
       
   733 {
       
   734     return HUF_compress_internal(dst, dstSize, src, srcSize,
       
   735                                  maxSymbolValue, huffLog, 1 /*single stream*/,
       
   736                                  workSpace, wkspSize, hufTable,
       
   737                                  repeat, preferRepeat, bmi2);
   580 }
   738 }
   581 
   739 
   582 size_t HUF_compress1X (void* dst, size_t dstSize,
   740 size_t HUF_compress1X (void* dst, size_t dstSize,
   583                  const void* src, size_t srcSize,
   741                  const void* src, size_t srcSize,
   584                  unsigned maxSymbolValue, unsigned huffLog)
   742                  unsigned maxSymbolValue, unsigned huffLog)
   585 {
   743 {
   586     unsigned workSpace[1024];
   744     unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
   587     return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
   745     return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
   588 }
   746 }
   589 
   747 
       
   748 /* HUF_compress4X_repeat():
       
   749  * compress input using 4 streams.
       
   750  * provide workspace to generate compression tables */
   590 size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
   751 size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
   591                       const void* src, size_t srcSize,
   752                       const void* src, size_t srcSize,
   592                       unsigned maxSymbolValue, unsigned huffLog,
   753                       unsigned maxSymbolValue, unsigned huffLog,
   593                       void* workSpace, size_t wkspSize)
   754                       void* workSpace, size_t wkspSize)
   594 {
   755 {
   595     return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize);
   756     return HUF_compress_internal(dst, dstSize, src, srcSize,
       
   757                                  maxSymbolValue, huffLog, 0 /*4 streams*/,
       
   758                                  workSpace, wkspSize,
       
   759                                  NULL, NULL, 0, 0 /*bmi2*/);
       
   760 }
       
   761 
       
   762 /* HUF_compress4X_repeat():
       
   763  * compress input using 4 streams.
       
   764  * re-use an existing huffman compression table */
       
   765 size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
       
   766                       const void* src, size_t srcSize,
       
   767                       unsigned maxSymbolValue, unsigned huffLog,
       
   768                       void* workSpace, size_t wkspSize,
       
   769                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
       
   770 {
       
   771     return HUF_compress_internal(dst, dstSize, src, srcSize,
       
   772                                  maxSymbolValue, huffLog, 0 /* 4 streams */,
       
   773                                  workSpace, wkspSize,
       
   774                                  hufTable, repeat, preferRepeat, bmi2);
   596 }
   775 }
   597 
   776 
   598 size_t HUF_compress2 (void* dst, size_t dstSize,
   777 size_t HUF_compress2 (void* dst, size_t dstSize,
   599                 const void* src, size_t srcSize,
   778                 const void* src, size_t srcSize,
   600                 unsigned maxSymbolValue, unsigned huffLog)
   779                 unsigned maxSymbolValue, unsigned huffLog)
   601 {
   780 {
   602     unsigned workSpace[1024];
   781     unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
   603     return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
   782     return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
   604 }
   783 }
   605 
   784 
   606 size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
   785 size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
   607 {
   786 {
   608     return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT);
   787     return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
   609 }
   788 }