comparison contrib/python-zstandard/zstd/compress/zstd_ldm.c @ 40121:73fef626dae3

zstandard: vendor python-zstandard 0.10.1 This was just released. The upstream source distribution from PyPI was extracted. Unwanted files were removed. The clang-format ignore list was updated to reflect the new source of files. setup.py was updated to pass a new argument to python-zstandard's function for returning an Extension instance. Upstream had to change to use relative paths because Python 3.7's packaging doesn't seem to like absolute paths when defining sources, includes, etc. The default relative path calculation is relative to setup_zstd.py which is different from the directory of Mercurial's setup.py. The project contains a vendored copy of zstandard 1.3.6. The old version was 1.3.4. The API should be backwards compatible and nothing in core should need adjusted. However, there is a new "chunker" API that we may find useful in places where we want to emit compressed chunks of a fixed size. There are a pair of bug fixes in 0.10.0 with regards to compressobj() and decompressobj() when block flushing is used. I actually found these bugs when introducing these APIs in Mercurial! But existing Mercurial code is not affected because we don't perform block flushing. # no-check-commit because 3rd party code has different style guidelines Differential Revision: https://phab.mercurial-scm.org/D4911
author Gregory Szorc <gregory.szorc@gmail.com>
date Mon, 08 Oct 2018 16:27:40 -0700
parents b1fb341d8a61
children 675775c33ab6
comparison
equal deleted inserted replaced
40120:89742f1fa6cb 40121:73fef626dae3
7 * in the COPYING file in the root directory of this source tree). 7 * in the COPYING file in the root directory of this source tree).
8 */ 8 */
9 9
10 #include "zstd_ldm.h" 10 #include "zstd_ldm.h"
11 11
12 #include "debug.h"
12 #include "zstd_fast.h" /* ZSTD_fillHashTable() */ 13 #include "zstd_fast.h" /* ZSTD_fillHashTable() */
13 #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ 14 #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
14 15
15 #define LDM_BUCKET_SIZE_LOG 3 16 #define LDM_BUCKET_SIZE_LOG 3
16 #define LDM_MIN_MATCH_LENGTH 64 17 #define LDM_MIN_MATCH_LENGTH 64
18 #define LDM_HASH_CHAR_OFFSET 10 19 #define LDM_HASH_CHAR_OFFSET 10
19 20
20 void ZSTD_ldm_adjustParameters(ldmParams_t* params, 21 void ZSTD_ldm_adjustParameters(ldmParams_t* params,
21 ZSTD_compressionParameters const* cParams) 22 ZSTD_compressionParameters const* cParams)
22 { 23 {
23 U32 const windowLog = cParams->windowLog; 24 params->windowLog = cParams->windowLog;
24 ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); 25 ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
25 DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); 26 DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
26 if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; 27 if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
27 if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; 28 if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
28 if (cParams->strategy >= ZSTD_btopt) { 29 if (cParams->strategy >= ZSTD_btopt) {
31 assert(minMatch >= ZSTD_LDM_MINMATCH_MIN); 32 assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
32 assert(minMatch <= ZSTD_LDM_MINMATCH_MAX); 33 assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
33 params->minMatchLength = minMatch; 34 params->minMatchLength = minMatch;
34 } 35 }
35 if (params->hashLog == 0) { 36 if (params->hashLog == 0) {
36 params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG); 37 params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
37 assert(params->hashLog <= ZSTD_HASHLOG_MAX); 38 assert(params->hashLog <= ZSTD_HASHLOG_MAX);
38 } 39 }
39 if (params->hashEveryLog == 0) { 40 if (params->hashEveryLog == 0) {
40 params->hashEveryLog = 41 params->hashEveryLog = params->windowLog < params->hashLog
41 windowLog < params->hashLog ? 0 : windowLog - params->hashLog; 42 ? 0
43 : params->windowLog - params->hashLog;
42 } 44 }
43 params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); 45 params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
44 } 46 }
45 47
46 size_t ZSTD_ldm_getTableSize(ldmParams_t params) 48 size_t ZSTD_ldm_getTableSize(ldmParams_t params)
214 * This is similar to ZSTD_loadDictionaryContent. 216 * This is similar to ZSTD_loadDictionaryContent.
215 * 217 *
216 * The tables for the other strategies are filled within their 218 * The tables for the other strategies are filled within their
217 * block compressors. */ 219 * block compressors. */
218 static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, 220 static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
219 ZSTD_compressionParameters const* cParams,
220 void const* end) 221 void const* end)
221 { 222 {
222 const BYTE* const iend = (const BYTE*)end; 223 const BYTE* const iend = (const BYTE*)end;
223 224
224 switch(cParams->strategy) 225 switch(ms->cParams.strategy)
225 { 226 {
226 case ZSTD_fast: 227 case ZSTD_fast:
227 ZSTD_fillHashTable(ms, cParams, iend); 228 ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
228 ms->nextToUpdate = (U32)(iend - ms->window.base);
229 break; 229 break;
230 230
231 case ZSTD_dfast: 231 case ZSTD_dfast:
232 ZSTD_fillDoubleHashTable(ms, cParams, iend); 232 ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
233 ms->nextToUpdate = (U32)(iend - ms->window.base);
234 break; 233 break;
235 234
236 case ZSTD_greedy: 235 case ZSTD_greedy:
237 case ZSTD_lazy: 236 case ZSTD_lazy:
238 case ZSTD_lazy2: 237 case ZSTD_lazy2:
506 * the window through early invalidation. 505 * the window through early invalidation.
507 * TODO: * Test the chunk size. 506 * TODO: * Test the chunk size.
508 * * Try invalidation after the sequence generation and test the 507 * * Try invalidation after the sequence generation and test the
509 * the offset against maxDist directly. 508 * the offset against maxDist directly.
510 */ 509 */
511 ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL); 510 ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
512 /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ 511 /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
513 newLeftoverSize = ZSTD_ldm_generateSequences_internal( 512 newLeftoverSize = ZSTD_ldm_generateSequences_internal(
514 ldmState, sequences, params, chunkStart, chunkSize); 513 ldmState, sequences, params, chunkStart, chunkSize);
515 if (ZSTD_isError(newLeftoverSize)) 514 if (ZSTD_isError(newLeftoverSize))
516 return newLeftoverSize; 515 return newLeftoverSize;
589 return sequence; 588 return sequence;
590 } 589 }
591 590
592 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, 591 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
593 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 592 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
594 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, 593 void const* src, size_t srcSize)
595 int const extDict) 594 {
596 { 595 const ZSTD_compressionParameters* const cParams = &ms->cParams;
597 unsigned const minMatch = cParams->searchLength; 596 unsigned const minMatch = cParams->searchLength;
598 ZSTD_blockCompressor const blockCompressor = 597 ZSTD_blockCompressor const blockCompressor =
599 ZSTD_selectBlockCompressor(cParams->strategy, extDict); 598 ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
600 BYTE const* const base = ms->window.base;
601 /* Input bounds */ 599 /* Input bounds */
602 BYTE const* const istart = (BYTE const*)src; 600 BYTE const* const istart = (BYTE const*)src;
603 BYTE const* const iend = istart + srcSize; 601 BYTE const* const iend = istart + srcSize;
604 /* Input positions */ 602 /* Input positions */
605 BYTE const* ip = istart; 603 BYTE const* ip = istart;
606 604
605 DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
607 assert(rawSeqStore->pos <= rawSeqStore->size); 606 assert(rawSeqStore->pos <= rawSeqStore->size);
608 assert(rawSeqStore->size <= rawSeqStore->capacity); 607 assert(rawSeqStore->size <= rawSeqStore->capacity);
609 /* Loop through each sequence and apply the block compressor to the lits */ 608 /* Loop through each sequence and apply the block compressor to the lits */
610 while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { 609 while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
611 /* maybeSplitSequence updates rawSeqStore->pos */ 610 /* maybeSplitSequence updates rawSeqStore->pos */
619 assert(sequence.offset <= (1U << cParams->windowLog)); 618 assert(sequence.offset <= (1U << cParams->windowLog));
620 assert(ip + sequence.litLength + sequence.matchLength <= iend); 619 assert(ip + sequence.litLength + sequence.matchLength <= iend);
621 620
622 /* Fill tables for block compressor */ 621 /* Fill tables for block compressor */
623 ZSTD_ldm_limitTableUpdate(ms, ip); 622 ZSTD_ldm_limitTableUpdate(ms, ip);
624 ZSTD_ldm_fillFastTables(ms, cParams, ip); 623 ZSTD_ldm_fillFastTables(ms, ip);
625 /* Run the block compressor */ 624 /* Run the block compressor */
625 DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
626 { 626 {
627 size_t const newLitLength = 627 size_t const newLitLength =
628 blockCompressor(ms, seqStore, rep, cParams, ip, 628 blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
629 sequence.litLength);
630 ip += sequence.litLength; 629 ip += sequence.litLength;
631 ms->nextToUpdate = (U32)(ip - base);
632 /* Update the repcodes */ 630 /* Update the repcodes */
633 for (i = ZSTD_REP_NUM - 1; i > 0; i--) 631 for (i = ZSTD_REP_NUM - 1; i > 0; i--)
634 rep[i] = rep[i-1]; 632 rep[i] = rep[i-1];
635 rep[0] = sequence.offset; 633 rep[0] = sequence.offset;
636 /* Store the sequence */ 634 /* Store the sequence */
640 ip += sequence.matchLength; 638 ip += sequence.matchLength;
641 } 639 }
642 } 640 }
643 /* Fill the tables for the block compressor */ 641 /* Fill the tables for the block compressor */
644 ZSTD_ldm_limitTableUpdate(ms, ip); 642 ZSTD_ldm_limitTableUpdate(ms, ip);
645 ZSTD_ldm_fillFastTables(ms, cParams, ip); 643 ZSTD_ldm_fillFastTables(ms, ip);
646 /* Compress the last literals */ 644 /* Compress the last literals */
647 { 645 return blockCompressor(ms, seqStore, rep, ip, iend - ip);
648 size_t const lastLiterals = blockCompressor(ms, seqStore, rep, cParams, 646 }
649 ip, iend - ip);
650 ms->nextToUpdate = (U32)(iend - base);
651 return lastLiterals;
652 }
653 }