34 #include <string.h> /* memset */ |
34 #include <string.h> /* memset */ |
35 #include <stdio.h> /* fprintf, fopen, ftello64 */ |
35 #include <stdio.h> /* fprintf, fopen, ftello64 */ |
36 #include <time.h> /* clock */ |
36 #include <time.h> /* clock */ |
37 |
37 |
38 #include "mem.h" /* read */ |
38 #include "mem.h" /* read */ |
39 #include "error_private.h" |
|
40 #include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */ |
39 #include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */ |
41 #define HUF_STATIC_LINKING_ONLY |
40 #define HUF_STATIC_LINKING_ONLY |
42 #include "huf.h" |
41 #include "huf.h" /* HUF_buildCTable, HUF_writeCTable */ |
43 #include "zstd_internal.h" /* includes zstd.h */ |
42 #include "zstd_internal.h" /* includes zstd.h */ |
44 #include "xxhash.h" |
43 #include "xxhash.h" /* XXH64 */ |
45 #include "divsufsort.h" |
44 #include "divsufsort.h" |
46 #ifndef ZDICT_STATIC_LINKING_ONLY |
45 #ifndef ZDICT_STATIC_LINKING_ONLY |
47 # define ZDICT_STATIC_LINKING_ONLY |
46 # define ZDICT_STATIC_LINKING_ONLY |
48 #endif |
47 #endif |
49 #include "zdict.h" |
48 #include "zdict.h" |
59 #define DICTLISTSIZE_DEFAULT 10000 |
58 #define DICTLISTSIZE_DEFAULT 10000 |
60 |
59 |
61 #define NOISELENGTH 32 |
60 #define NOISELENGTH 32 |
62 |
61 |
63 #define MINRATIO 4 |
62 #define MINRATIO 4 |
64 static const int g_compressionLevel_default = 5; |
63 static const int g_compressionLevel_default = 6; |
65 static const U32 g_selectivity_default = 9; |
64 static const U32 g_selectivity_default = 9; |
66 static const size_t g_provision_entropySize = 200; |
65 static const size_t g_provision_entropySize = 200; |
67 static const size_t g_min_fast_dictContent = 192; |
66 static const size_t g_min_fast_dictContent = 192; |
68 |
67 |
69 |
68 |
305 if (length >= LLIMIT) length = LLIMIT-1; |
304 if (length >= LLIMIT) length = LLIMIT-1; |
306 lengthList[length]++; |
305 lengthList[length]++; |
307 } while (length >=MINMATCHLENGTH); |
306 } while (length >=MINMATCHLENGTH); |
308 |
307 |
309 /* look backward */ |
308 /* look backward */ |
310 length = MINMATCHLENGTH; |
309 length = MINMATCHLENGTH; |
311 while ((length >= MINMATCHLENGTH) & (start > 0)) { |
310 while ((length >= MINMATCHLENGTH) & (start > 0)) { |
312 length = ZDICT_count(b + pos, b + suffix[start - 1]); |
311 length = ZDICT_count(b + pos, b + suffix[start - 1]); |
313 if (length >= LLIMIT) length = LLIMIT - 1; |
312 if (length >= LLIMIT) length = LLIMIT - 1; |
314 lengthList[length]++; |
313 lengthList[length]++; |
315 if (length >= MINMATCHLENGTH) start--; |
314 if (length >= MINMATCHLENGTH) start--; |
316 } |
315 } |
317 |
316 |
318 /* largest useful length */ |
317 /* largest useful length */ |
319 memset(cumulLength, 0, sizeof(cumulLength)); |
318 memset(cumulLength, 0, sizeof(cumulLength)); |
320 cumulLength[maxLength-1] = lengthList[maxLength-1]; |
319 cumulLength[maxLength-1] = lengthList[maxLength-1]; |
321 for (i=(int)(maxLength-2); i>=0; i--) |
320 for (i=(int)(maxLength-2); i>=0; i--) |
568 if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */ |
567 if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */ |
569 { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0); |
568 { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0); |
570 if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; } |
569 if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; } |
571 } |
570 } |
572 cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize); |
571 cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize); |
573 if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; } |
572 if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; } |
574 |
573 |
575 if (cSize) { /* if == 0; block is not compressible */ |
574 if (cSize) { /* if == 0; block is not compressible */ |
576 const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc); |
575 const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc); |
577 |
576 |
578 /* literals stats */ |
577 /* literals stats */ |
823 |
822 |
824 return eSize; |
823 return eSize; |
825 } |
824 } |
826 |
825 |
827 |
826 |
|
827 |
|
828 size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, |
|
829 const void* customDictContent, size_t dictContentSize, |
|
830 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, |
|
831 ZDICT_params_t params) |
|
832 { |
|
833 size_t hSize; |
|
834 #define HBUFFSIZE 256 |
|
835 BYTE header[HBUFFSIZE]; |
|
836 int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel; |
|
837 U32 const notificationLevel = params.notificationLevel; |
|
838 |
|
839 /* check conditions */ |
|
840 if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall); |
|
841 if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong); |
|
842 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall); |
|
843 |
|
844 /* dictionary header */ |
|
845 MEM_writeLE32(header, ZSTD_DICT_MAGIC); |
|
846 { U64 const randomID = XXH64(customDictContent, dictContentSize, 0); |
|
847 U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768; |
|
848 U32 const dictID = params.dictID ? params.dictID : compliantID; |
|
849 MEM_writeLE32(header+4, dictID); |
|
850 } |
|
851 hSize = 8; |
|
852 |
|
853 /* entropy tables */ |
|
854 DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ |
|
855 DISPLAYLEVEL(2, "statistics ... \n"); |
|
856 { size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize, |
|
857 compressionLevel, |
|
858 samplesBuffer, samplesSizes, nbSamples, |
|
859 customDictContent, dictContentSize, |
|
860 notificationLevel); |
|
861 if (ZDICT_isError(eSize)) return eSize; |
|
862 hSize += eSize; |
|
863 } |
|
864 |
|
865 /* copy elements in final buffer ; note : src and dst buffer can overlap */ |
|
866 if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize; |
|
867 { size_t const dictSize = hSize + dictContentSize; |
|
868 char* dictEnd = (char*)dictBuffer + dictSize; |
|
869 memmove(dictEnd - dictContentSize, customDictContent, dictContentSize); |
|
870 memcpy(dictBuffer, header, hSize); |
|
871 return dictSize; |
|
872 } |
|
873 } |
|
874 |
|
875 |
828 size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, |
876 size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, |
829 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, |
877 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, |
830 ZDICT_params_t params) |
878 ZDICT_params_t params) |
831 { |
879 { |
832 size_t hSize; |
880 size_t hSize; |