comparison contrib/python-zstandard/zstd/compress/zstd_opt.h @ 30822:b54a2984cdd4

zstd: vendor python-zstandard 0.6.0 Commit 63c68d6f5fc8de4afd9bde81b13b537beb4e47e8 from https://github.com/indygreg/python-zstandard is imported without modifications (other than removing unwanted files). This includes minor performance and feature improvements. It also changes the vendored zstd library from 1.1.1 to 1.1.2. # no-check-commit
author Gregory Szorc <gregory.szorc@gmail.com>
date Sat, 14 Jan 2017 19:41:43 -0800
parents 2e484bdea8c4
children c32454d69b85
comparison
equal deleted inserted replaced
30821:7005c03f7387 30822:b54a2984cdd4
13 13
14 #ifndef ZSTD_OPT_H_91842398743 14 #ifndef ZSTD_OPT_H_91842398743
15 #define ZSTD_OPT_H_91842398743 15 #define ZSTD_OPT_H_91842398743
16 16
17 17
18 #define ZSTD_FREQ_DIV 5 18 #define ZSTD_LITFREQ_ADD 2
19 #define ZSTD_MAX_PRICE (1<<30) 19 #define ZSTD_FREQ_DIV 4
20 #define ZSTD_MAX_PRICE (1<<30)
20 21
21 /*-************************************* 22 /*-*************************************
22 * Price functions for optimal parser 23 * Price functions for optimal parser
23 ***************************************/ 24 ***************************************/
24 FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr) 25 FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr)
29 ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum+1); 30 ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum+1);
30 ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum)); 31 ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum));
31 } 32 }
32 33
33 34
34 MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) 35 MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t srcSize)
35 { 36 {
36 unsigned u; 37 unsigned u;
37 38
38 ssPtr->cachedLiterals = NULL; 39 ssPtr->cachedLiterals = NULL;
39 ssPtr->cachedPrice = ssPtr->cachedLitLength = 0; 40 ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
41 ssPtr->staticPrices = 0;
40 42
41 if (ssPtr->litLengthSum == 0) { 43 if (ssPtr->litLengthSum == 0) {
42 ssPtr->litSum = (2<<Litbits); 44 if (srcSize <= 1024) ssPtr->staticPrices = 1;
45
46 for (u=0; u<=MaxLit; u++)
47 ssPtr->litFreq[u] = 0;
48 for (u=0; u<srcSize; u++)
49 ssPtr->litFreq[src[u]]++;
50
51 ssPtr->litSum = 0;
43 ssPtr->litLengthSum = MaxLL+1; 52 ssPtr->litLengthSum = MaxLL+1;
44 ssPtr->matchLengthSum = MaxML+1; 53 ssPtr->matchLengthSum = MaxML+1;
45 ssPtr->offCodeSum = (MaxOff+1); 54 ssPtr->offCodeSum = (MaxOff+1);
46 ssPtr->matchSum = (2<<Litbits); 55 ssPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits);
47 56
48 for (u=0; u<=MaxLit; u++) 57 for (u=0; u<=MaxLit; u++) {
49 ssPtr->litFreq[u] = 2; 58 ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
59 ssPtr->litSum += ssPtr->litFreq[u];
60 }
50 for (u=0; u<=MaxLL; u++) 61 for (u=0; u<=MaxLL; u++)
51 ssPtr->litLengthFreq[u] = 1; 62 ssPtr->litLengthFreq[u] = 1;
52 for (u=0; u<=MaxML; u++) 63 for (u=0; u<=MaxML; u++)
53 ssPtr->matchLengthFreq[u] = 1; 64 ssPtr->matchLengthFreq[u] = 1;
54 for (u=0; u<=MaxOff; u++) 65 for (u=0; u<=MaxOff; u++)
59 ssPtr->offCodeSum = 0; 70 ssPtr->offCodeSum = 0;
60 ssPtr->matchSum = 0; 71 ssPtr->matchSum = 0;
61 ssPtr->litSum = 0; 72 ssPtr->litSum = 0;
62 73
63 for (u=0; u<=MaxLit; u++) { 74 for (u=0; u<=MaxLit; u++) {
64 ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); 75 ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1));
65 ssPtr->litSum += ssPtr->litFreq[u]; 76 ssPtr->litSum += ssPtr->litFreq[u];
66 } 77 }
67 for (u=0; u<=MaxLL; u++) { 78 for (u=0; u<=MaxLL; u++) {
68 ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV); 79 ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
69 ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; 80 ssPtr->litLengthSum += ssPtr->litLengthFreq[u];
70 } 81 }
71 for (u=0; u<=MaxML; u++) { 82 for (u=0; u<=MaxML; u++) {
72 ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); 83 ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
73 ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; 84 ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u];
74 ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); 85 ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3);
75 } 86 }
87 ssPtr->matchSum *= ZSTD_LITFREQ_ADD;
76 for (u=0; u<=MaxOff; u++) { 88 for (u=0; u<=MaxOff; u++) {
77 ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); 89 ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
78 ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; 90 ssPtr->offCodeSum += ssPtr->offCodeFreq[u];
79 } 91 }
80 } 92 }
84 96
85 97
86 FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BYTE* literals) 98 FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BYTE* literals)
87 { 99 {
88 U32 price, u; 100 U32 price, u;
101
102 if (ssPtr->staticPrices)
103 return ZSTD_highbit32((U32)litLength+1) + (litLength*6);
89 104
90 if (litLength == 0) 105 if (litLength == 0)
91 return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1); 106 return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1);
92 107
93 /* literals */ 108 /* literals */
122 137
123 138
124 FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra) 139 FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
125 { 140 {
126 /* offset */ 141 /* offset */
142 U32 price;
127 BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); 143 BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
128 U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1); 144
129 145 if (seqStorePtr->staticPrices)
146 return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode;
147
148 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
130 if (!ultra && offCode >= 20) price += (offCode-19)*2; 149 if (!ultra && offCode >= 20) price += (offCode-19)*2;
131 150
132 /* match Length */ 151 /* match Length */
133 { const BYTE ML_deltaCode = 36; 152 { const BYTE ML_deltaCode = 36;
134 const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; 153 const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
142 MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) 161 MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
143 { 162 {
144 U32 u; 163 U32 u;
145 164
146 /* literals */ 165 /* literals */
147 seqStorePtr->litSum += litLength; 166 seqStorePtr->litSum += litLength*ZSTD_LITFREQ_ADD;
148 for (u=0; u < litLength; u++) 167 for (u=0; u < litLength; u++)
149 seqStorePtr->litFreq[literals[u]]++; 168 seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
150 169
151 /* literal Length */ 170 /* literal Length */
152 { const BYTE LL_deltaCode = 19; 171 { const BYTE LL_deltaCode = 19;
153 const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; 172 const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
154 seqStorePtr->litLengthFreq[llCode]++; 173 seqStorePtr->litLengthFreq[llCode]++;
399 const BYTE* inr; 418 const BYTE* inr;
400 U32 offset, rep[ZSTD_REP_NUM]; 419 U32 offset, rep[ZSTD_REP_NUM];
401 420
402 /* init */ 421 /* init */
403 ctx->nextToUpdate3 = ctx->nextToUpdate; 422 ctx->nextToUpdate3 = ctx->nextToUpdate;
404 ZSTD_rescaleFreqs(seqStorePtr); 423 ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
405 ip += (ip==prefixStart); 424 ip += (ip==prefixStart);
406 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; } 425 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
407 426
408 /* Match Loop */ 427 /* Match Loop */
409 while (ip < ilimit) { 428 while (ip < ilimit) {
414 litlen = (U32)(ip - anchor); 433 litlen = (U32)(ip - anchor);
415 434
416 /* check repCode */ 435 /* check repCode */
417 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); 436 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
418 for (i=(ip == anchor); i<last_i; i++) { 437 for (i=(ip == anchor); i<last_i; i++) {
419 const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i]; 438 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
420 if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart)) 439 if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
421 && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) { 440 && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) {
422 mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch; 441 mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
423 if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { 442 if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
424 best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; 443 best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
499 } 518 }
500 519
501 best_mlen = minMatch; 520 best_mlen = minMatch;
502 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); 521 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
503 for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */ 522 for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */
504 const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; 523 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
505 if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart)) 524 if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
506 && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) { 525 && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) {
507 mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch; 526 mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;
508 527
509 if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { 528 if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
599 rep[1] = rep[0]; 618 rep[1] = rep[0];
600 rep[0] = offset - ZSTD_REP_MOVE_OPT; 619 rep[0] = offset - ZSTD_REP_MOVE_OPT;
601 offset--; 620 offset--;
602 } else { 621 } else {
603 if (offset != 0) { 622 if (offset != 0) {
604 best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]); 623 best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
605 if (offset != 1) rep[2] = rep[1]; 624 if (offset != 1) rep[2] = rep[1];
606 rep[1] = rep[0]; 625 rep[1] = rep[0];
607 rep[0] = best_off; 626 rep[0] = best_off;
608 } 627 }
609 if (litLength==0) offset--; 628 if (litLength==0) offset--;
654 /* init */ 673 /* init */
655 U32 offset, rep[ZSTD_REP_NUM]; 674 U32 offset, rep[ZSTD_REP_NUM];
656 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; } 675 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
657 676
658 ctx->nextToUpdate3 = ctx->nextToUpdate; 677 ctx->nextToUpdate3 = ctx->nextToUpdate;
659 ZSTD_rescaleFreqs(seqStorePtr); 678 ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
660 ip += (ip==prefixStart); 679 ip += (ip==prefixStart);
661 680
662 /* Match Loop */ 681 /* Match Loop */
663 while (ip < ilimit) { 682 while (ip < ilimit) {
664 U32 cur, match_num, last_pos, litlen, price; 683 U32 cur, match_num, last_pos, litlen, price;
669 opt[0].litlen = (U32)(ip - anchor); 688 opt[0].litlen = (U32)(ip - anchor);
670 689
671 /* check repCode */ 690 /* check repCode */
672 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); 691 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
673 for (i = (ip==anchor); i<last_i; i++) { 692 for (i = (ip==anchor); i<last_i; i++) {
674 const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i]; 693 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
675 const U32 repIndex = (U32)(current - repCur); 694 const U32 repIndex = (U32)(current - repCur);
676 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 695 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
677 const BYTE* const repMatch = repBase + repIndex; 696 const BYTE* const repMatch = repBase + repIndex;
678 if ( (repCur > 0 && repCur <= (S32)current) 697 if ( (repCur > 0 && repCur <= (S32)current)
679 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ 698 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
765 } 784 }
766 785
767 best_mlen = minMatch; 786 best_mlen = minMatch;
768 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); 787 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
769 for (i = (mlen != 1); i<last_i; i++) { 788 for (i = (mlen != 1); i<last_i; i++) {
770 const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; 789 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
771 const U32 repIndex = (U32)(current+cur - repCur); 790 const U32 repIndex = (U32)(current+cur - repCur);
772 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 791 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
773 const BYTE* const repMatch = repBase + repIndex; 792 const BYTE* const repMatch = repBase + repIndex;
774 if ( (repCur > 0 && repCur <= (S32)(current+cur)) 793 if ( (repCur > 0 && repCur <= (S32)(current+cur))
775 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ 794 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
871 rep[1] = rep[0]; 890 rep[1] = rep[0];
872 rep[0] = offset - ZSTD_REP_MOVE_OPT; 891 rep[0] = offset - ZSTD_REP_MOVE_OPT;
873 offset--; 892 offset--;
874 } else { 893 } else {
875 if (offset != 0) { 894 if (offset != 0) {
876 best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]); 895 best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
877 if (offset != 1) rep[2] = rep[1]; 896 if (offset != 1) rep[2] = rep[1];
878 rep[1] = rep[0]; 897 rep[1] = rep[0];
879 rep[0] = best_off; 898 rep[0] = best_off;
880 } 899 }
881 900