contrib/python-zstandard/zstd/dictBuilder/zdict.c
changeset 30924 c32454d69b85
parent 30822 b54a2984cdd4
child 37495 b1fb341d8a61
equal deleted inserted replaced
30923:5b60464efbde 30924:c32454d69b85
    34 #include <string.h>        /* memset */
    34 #include <string.h>        /* memset */
    35 #include <stdio.h>         /* fprintf, fopen, ftello64 */
    35 #include <stdio.h>         /* fprintf, fopen, ftello64 */
    36 #include <time.h>          /* clock */
    36 #include <time.h>          /* clock */
    37 
    37 
    38 #include "mem.h"           /* read */
    38 #include "mem.h"           /* read */
    39 #include "error_private.h"
       
    40 #include "fse.h"           /* FSE_normalizeCount, FSE_writeNCount */
    39 #include "fse.h"           /* FSE_normalizeCount, FSE_writeNCount */
    41 #define HUF_STATIC_LINKING_ONLY
    40 #define HUF_STATIC_LINKING_ONLY
    42 #include "huf.h"
    41 #include "huf.h"           /* HUF_buildCTable, HUF_writeCTable */
    43 #include "zstd_internal.h" /* includes zstd.h */
    42 #include "zstd_internal.h" /* includes zstd.h */
    44 #include "xxhash.h"
    43 #include "xxhash.h"        /* XXH64 */
    45 #include "divsufsort.h"
    44 #include "divsufsort.h"
    46 #ifndef ZDICT_STATIC_LINKING_ONLY
    45 #ifndef ZDICT_STATIC_LINKING_ONLY
    47 #  define ZDICT_STATIC_LINKING_ONLY
    46 #  define ZDICT_STATIC_LINKING_ONLY
    48 #endif
    47 #endif
    49 #include "zdict.h"
    48 #include "zdict.h"
    59 #define DICTLISTSIZE_DEFAULT 10000
    58 #define DICTLISTSIZE_DEFAULT 10000
    60 
    59 
    61 #define NOISELENGTH 32
    60 #define NOISELENGTH 32
    62 
    61 
    63 #define MINRATIO 4
    62 #define MINRATIO 4
    64 static const int g_compressionLevel_default = 5;
    63 static const int g_compressionLevel_default = 6;
    65 static const U32 g_selectivity_default = 9;
    64 static const U32 g_selectivity_default = 9;
    66 static const size_t g_provision_entropySize = 200;
    65 static const size_t g_provision_entropySize = 200;
    67 static const size_t g_min_fast_dictContent = 192;
    66 static const size_t g_min_fast_dictContent = 192;
    68 
    67 
    69 
    68 
   305             if (length >= LLIMIT) length = LLIMIT-1;
   304             if (length >= LLIMIT) length = LLIMIT-1;
   306             lengthList[length]++;
   305             lengthList[length]++;
   307         } while (length >=MINMATCHLENGTH);
   306         } while (length >=MINMATCHLENGTH);
   308 
   307 
   309         /* look backward */
   308         /* look backward */
   310 		length = MINMATCHLENGTH;
   309         length = MINMATCHLENGTH;
   311 		while ((length >= MINMATCHLENGTH) & (start > 0)) {
   310         while ((length >= MINMATCHLENGTH) & (start > 0)) {
   312 			length = ZDICT_count(b + pos, b + suffix[start - 1]);
   311         	length = ZDICT_count(b + pos, b + suffix[start - 1]);
   313 			if (length >= LLIMIT) length = LLIMIT - 1;
   312         	if (length >= LLIMIT) length = LLIMIT - 1;
   314 			lengthList[length]++;
   313         	lengthList[length]++;
   315 			if (length >= MINMATCHLENGTH) start--;
   314         	if (length >= MINMATCHLENGTH) start--;
   316 		}
   315         }
   317 
   316 
   318         /* largest useful length */
   317         /* largest useful length */
   319         memset(cumulLength, 0, sizeof(cumulLength));
   318         memset(cumulLength, 0, sizeof(cumulLength));
   320         cumulLength[maxLength-1] = lengthList[maxLength-1];
   319         cumulLength[maxLength-1] = lengthList[maxLength-1];
   321         for (i=(int)(maxLength-2); i>=0; i--)
   320         for (i=(int)(maxLength-2); i>=0; i--)
   568     if (srcSize > blockSizeMax) srcSize = blockSizeMax;   /* protection vs large samples */
   567     if (srcSize > blockSizeMax) srcSize = blockSizeMax;   /* protection vs large samples */
   569     {  size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
   568     {  size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
   570             if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
   569             if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
   571     }
   570     }
   572     cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
   571     cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
   573     if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
   572     if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
   574 
   573 
   575     if (cSize) {  /* if == 0; block is not compressible */
   574     if (cSize) {  /* if == 0; block is not compressible */
   576         const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
   575         const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
   577 
   576 
   578         /* literals stats */
   577         /* literals stats */
   823 
   822 
   824     return eSize;
   823     return eSize;
   825 }
   824 }
   826 
   825 
   827 
   826 
       
   827 
       
   828 size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
       
   829                           const void* customDictContent, size_t dictContentSize,
       
   830                           const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
       
   831                           ZDICT_params_t params)
       
   832 {
       
   833     size_t hSize;
       
   834 #define HBUFFSIZE 256
       
   835     BYTE header[HBUFFSIZE];
       
   836     int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
       
   837     U32 const notificationLevel = params.notificationLevel;
       
   838 
       
   839     /* check conditions */
       
   840     if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
       
   841     if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
       
   842     if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
       
   843 
       
   844     /* dictionary header */
       
   845     MEM_writeLE32(header, ZSTD_DICT_MAGIC);
       
   846     {   U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
       
   847         U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
       
   848         U32 const dictID = params.dictID ? params.dictID : compliantID;
       
   849         MEM_writeLE32(header+4, dictID);
       
   850     }
       
   851     hSize = 8;
       
   852 
       
   853     /* entropy tables */
       
   854     DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
       
   855     DISPLAYLEVEL(2, "statistics ... \n");
       
   856     {   size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize,
       
   857                                   compressionLevel,
       
   858                                   samplesBuffer, samplesSizes, nbSamples,
       
   859                                   customDictContent, dictContentSize,
       
   860                                   notificationLevel);
       
   861         if (ZDICT_isError(eSize)) return eSize;
       
   862         hSize += eSize;
       
   863     }
       
   864 
       
   865     /* copy elements in final buffer ; note : src and dst buffer can overlap */
       
   866     if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
       
   867     {   size_t const dictSize = hSize + dictContentSize;
       
   868         char* dictEnd = (char*)dictBuffer + dictSize;
       
   869         memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
       
   870         memcpy(dictBuffer, header, hSize);
       
   871         return dictSize;
       
   872     }
       
   873 }
       
   874 
       
   875 
   828 size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
   876 size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
   829                                                  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
   877                                                  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
   830                                                  ZDICT_params_t params)
   878                                                  ZDICT_params_t params)
   831 {
   879 {
   832     size_t hSize;
   880     size_t hSize;