Mercurial > hg
comparison contrib/python-zstandard/zstd/compress/zstd_opt.h @ 30822:b54a2984cdd4
zstd: vendor python-zstandard 0.6.0
Commit 63c68d6f5fc8de4afd9bde81b13b537beb4e47e8 from
https://github.com/indygreg/python-zstandard is imported without
modifications (other than removing unwanted files).
This includes minor performance and feature improvements. It also
changes the vendored zstd library from 1.1.1 to 1.1.2.
# no-check-commit
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Sat, 14 Jan 2017 19:41:43 -0800 |
parents | 2e484bdea8c4 |
children | c32454d69b85 |
comparison
equal
deleted
inserted
replaced
30821:7005c03f7387 | 30822:b54a2984cdd4 |
---|---|
13 | 13 |
14 #ifndef ZSTD_OPT_H_91842398743 | 14 #ifndef ZSTD_OPT_H_91842398743 |
15 #define ZSTD_OPT_H_91842398743 | 15 #define ZSTD_OPT_H_91842398743 |
16 | 16 |
17 | 17 |
18 #define ZSTD_FREQ_DIV 5 | 18 #define ZSTD_LITFREQ_ADD 2 |
19 #define ZSTD_MAX_PRICE (1<<30) | 19 #define ZSTD_FREQ_DIV 4 |
20 #define ZSTD_MAX_PRICE (1<<30) | |
20 | 21 |
21 /*-************************************* | 22 /*-************************************* |
22 * Price functions for optimal parser | 23 * Price functions for optimal parser |
23 ***************************************/ | 24 ***************************************/ |
24 FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr) | 25 FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr) |
29 ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum+1); | 30 ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum+1); |
30 ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum)); | 31 ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum)); |
31 } | 32 } |
32 | 33 |
33 | 34 |
34 MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) | 35 MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t srcSize) |
35 { | 36 { |
36 unsigned u; | 37 unsigned u; |
37 | 38 |
38 ssPtr->cachedLiterals = NULL; | 39 ssPtr->cachedLiterals = NULL; |
39 ssPtr->cachedPrice = ssPtr->cachedLitLength = 0; | 40 ssPtr->cachedPrice = ssPtr->cachedLitLength = 0; |
41 ssPtr->staticPrices = 0; | |
40 | 42 |
41 if (ssPtr->litLengthSum == 0) { | 43 if (ssPtr->litLengthSum == 0) { |
42 ssPtr->litSum = (2<<Litbits); | 44 if (srcSize <= 1024) ssPtr->staticPrices = 1; |
45 | |
46 for (u=0; u<=MaxLit; u++) | |
47 ssPtr->litFreq[u] = 0; | |
48 for (u=0; u<srcSize; u++) | |
49 ssPtr->litFreq[src[u]]++; | |
50 | |
51 ssPtr->litSum = 0; | |
43 ssPtr->litLengthSum = MaxLL+1; | 52 ssPtr->litLengthSum = MaxLL+1; |
44 ssPtr->matchLengthSum = MaxML+1; | 53 ssPtr->matchLengthSum = MaxML+1; |
45 ssPtr->offCodeSum = (MaxOff+1); | 54 ssPtr->offCodeSum = (MaxOff+1); |
46 ssPtr->matchSum = (2<<Litbits); | 55 ssPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits); |
47 | 56 |
48 for (u=0; u<=MaxLit; u++) | 57 for (u=0; u<=MaxLit; u++) { |
49 ssPtr->litFreq[u] = 2; | 58 ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); |
59 ssPtr->litSum += ssPtr->litFreq[u]; | |
60 } | |
50 for (u=0; u<=MaxLL; u++) | 61 for (u=0; u<=MaxLL; u++) |
51 ssPtr->litLengthFreq[u] = 1; | 62 ssPtr->litLengthFreq[u] = 1; |
52 for (u=0; u<=MaxML; u++) | 63 for (u=0; u<=MaxML; u++) |
53 ssPtr->matchLengthFreq[u] = 1; | 64 ssPtr->matchLengthFreq[u] = 1; |
54 for (u=0; u<=MaxOff; u++) | 65 for (u=0; u<=MaxOff; u++) |
59 ssPtr->offCodeSum = 0; | 70 ssPtr->offCodeSum = 0; |
60 ssPtr->matchSum = 0; | 71 ssPtr->matchSum = 0; |
61 ssPtr->litSum = 0; | 72 ssPtr->litSum = 0; |
62 | 73 |
63 for (u=0; u<=MaxLit; u++) { | 74 for (u=0; u<=MaxLit; u++) { |
64 ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); | 75 ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1)); |
65 ssPtr->litSum += ssPtr->litFreq[u]; | 76 ssPtr->litSum += ssPtr->litFreq[u]; |
66 } | 77 } |
67 for (u=0; u<=MaxLL; u++) { | 78 for (u=0; u<=MaxLL; u++) { |
68 ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV); | 79 ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1)); |
69 ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; | 80 ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; |
70 } | 81 } |
71 for (u=0; u<=MaxML; u++) { | 82 for (u=0; u<=MaxML; u++) { |
72 ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); | 83 ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); |
73 ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; | 84 ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; |
74 ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); | 85 ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); |
75 } | 86 } |
87 ssPtr->matchSum *= ZSTD_LITFREQ_ADD; | |
76 for (u=0; u<=MaxOff; u++) { | 88 for (u=0; u<=MaxOff; u++) { |
77 ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); | 89 ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); |
78 ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; | 90 ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; |
79 } | 91 } |
80 } | 92 } |
84 | 96 |
85 | 97 |
86 FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BYTE* literals) | 98 FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BYTE* literals) |
87 { | 99 { |
88 U32 price, u; | 100 U32 price, u; |
101 | |
102 if (ssPtr->staticPrices) | |
103 return ZSTD_highbit32((U32)litLength+1) + (litLength*6); | |
89 | 104 |
90 if (litLength == 0) | 105 if (litLength == 0) |
91 return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1); | 106 return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1); |
92 | 107 |
93 /* literals */ | 108 /* literals */ |
122 | 137 |
123 | 138 |
124 FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra) | 139 FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra) |
125 { | 140 { |
126 /* offset */ | 141 /* offset */ |
142 U32 price; | |
127 BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); | 143 BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); |
128 U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1); | 144 |
129 | 145 if (seqStorePtr->staticPrices) |
146 return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode; | |
147 | |
148 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1); | |
130 if (!ultra && offCode >= 20) price += (offCode-19)*2; | 149 if (!ultra && offCode >= 20) price += (offCode-19)*2; |
131 | 150 |
132 /* match Length */ | 151 /* match Length */ |
133 { const BYTE ML_deltaCode = 36; | 152 { const BYTE ML_deltaCode = 36; |
134 const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; | 153 const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; |
142 MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) | 161 MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) |
143 { | 162 { |
144 U32 u; | 163 U32 u; |
145 | 164 |
146 /* literals */ | 165 /* literals */ |
147 seqStorePtr->litSum += litLength; | 166 seqStorePtr->litSum += litLength*ZSTD_LITFREQ_ADD; |
148 for (u=0; u < litLength; u++) | 167 for (u=0; u < litLength; u++) |
149 seqStorePtr->litFreq[literals[u]]++; | 168 seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; |
150 | 169 |
151 /* literal Length */ | 170 /* literal Length */ |
152 { const BYTE LL_deltaCode = 19; | 171 { const BYTE LL_deltaCode = 19; |
153 const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; | 172 const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; |
154 seqStorePtr->litLengthFreq[llCode]++; | 173 seqStorePtr->litLengthFreq[llCode]++; |
399 const BYTE* inr; | 418 const BYTE* inr; |
400 U32 offset, rep[ZSTD_REP_NUM]; | 419 U32 offset, rep[ZSTD_REP_NUM]; |
401 | 420 |
402 /* init */ | 421 /* init */ |
403 ctx->nextToUpdate3 = ctx->nextToUpdate; | 422 ctx->nextToUpdate3 = ctx->nextToUpdate; |
404 ZSTD_rescaleFreqs(seqStorePtr); | 423 ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize); |
405 ip += (ip==prefixStart); | 424 ip += (ip==prefixStart); |
406 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; } | 425 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; } |
407 | 426 |
408 /* Match Loop */ | 427 /* Match Loop */ |
409 while (ip < ilimit) { | 428 while (ip < ilimit) { |
414 litlen = (U32)(ip - anchor); | 433 litlen = (U32)(ip - anchor); |
415 | 434 |
416 /* check repCode */ | 435 /* check repCode */ |
417 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); | 436 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); |
418 for (i=(ip == anchor); i<last_i; i++) { | 437 for (i=(ip == anchor); i<last_i; i++) { |
419 const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i]; | 438 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i]; |
420 if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart)) | 439 if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart)) |
421 && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) { | 440 && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) { |
422 mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch; | 441 mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch; |
423 if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { | 442 if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { |
424 best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; | 443 best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; |
499 } | 518 } |
500 | 519 |
501 best_mlen = minMatch; | 520 best_mlen = minMatch; |
502 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); | 521 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); |
503 for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */ | 522 for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */ |
504 const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; | 523 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; |
505 if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart)) | 524 if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart)) |
506 && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) { | 525 && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) { |
507 mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch; | 526 mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch; |
508 | 527 |
509 if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { | 528 if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { |
599 rep[1] = rep[0]; | 618 rep[1] = rep[0]; |
600 rep[0] = offset - ZSTD_REP_MOVE_OPT; | 619 rep[0] = offset - ZSTD_REP_MOVE_OPT; |
601 offset--; | 620 offset--; |
602 } else { | 621 } else { |
603 if (offset != 0) { | 622 if (offset != 0) { |
604 best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]); | 623 best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); |
605 if (offset != 1) rep[2] = rep[1]; | 624 if (offset != 1) rep[2] = rep[1]; |
606 rep[1] = rep[0]; | 625 rep[1] = rep[0]; |
607 rep[0] = best_off; | 626 rep[0] = best_off; |
608 } | 627 } |
609 if (litLength==0) offset--; | 628 if (litLength==0) offset--; |
654 /* init */ | 673 /* init */ |
655 U32 offset, rep[ZSTD_REP_NUM]; | 674 U32 offset, rep[ZSTD_REP_NUM]; |
656 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; } | 675 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; } |
657 | 676 |
658 ctx->nextToUpdate3 = ctx->nextToUpdate; | 677 ctx->nextToUpdate3 = ctx->nextToUpdate; |
659 ZSTD_rescaleFreqs(seqStorePtr); | 678 ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize); |
660 ip += (ip==prefixStart); | 679 ip += (ip==prefixStart); |
661 | 680 |
662 /* Match Loop */ | 681 /* Match Loop */ |
663 while (ip < ilimit) { | 682 while (ip < ilimit) { |
664 U32 cur, match_num, last_pos, litlen, price; | 683 U32 cur, match_num, last_pos, litlen, price; |
669 opt[0].litlen = (U32)(ip - anchor); | 688 opt[0].litlen = (U32)(ip - anchor); |
670 | 689 |
671 /* check repCode */ | 690 /* check repCode */ |
672 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); | 691 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); |
673 for (i = (ip==anchor); i<last_i; i++) { | 692 for (i = (ip==anchor); i<last_i; i++) { |
674 const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i]; | 693 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i]; |
675 const U32 repIndex = (U32)(current - repCur); | 694 const U32 repIndex = (U32)(current - repCur); |
676 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; | 695 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; |
677 const BYTE* const repMatch = repBase + repIndex; | 696 const BYTE* const repMatch = repBase + repIndex; |
678 if ( (repCur > 0 && repCur <= (S32)current) | 697 if ( (repCur > 0 && repCur <= (S32)current) |
679 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ | 698 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ |
765 } | 784 } |
766 | 785 |
767 best_mlen = minMatch; | 786 best_mlen = minMatch; |
768 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); | 787 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); |
769 for (i = (mlen != 1); i<last_i; i++) { | 788 for (i = (mlen != 1); i<last_i; i++) { |
770 const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; | 789 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; |
771 const U32 repIndex = (U32)(current+cur - repCur); | 790 const U32 repIndex = (U32)(current+cur - repCur); |
772 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; | 791 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; |
773 const BYTE* const repMatch = repBase + repIndex; | 792 const BYTE* const repMatch = repBase + repIndex; |
774 if ( (repCur > 0 && repCur <= (S32)(current+cur)) | 793 if ( (repCur > 0 && repCur <= (S32)(current+cur)) |
775 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ | 794 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ |
871 rep[1] = rep[0]; | 890 rep[1] = rep[0]; |
872 rep[0] = offset - ZSTD_REP_MOVE_OPT; | 891 rep[0] = offset - ZSTD_REP_MOVE_OPT; |
873 offset--; | 892 offset--; |
874 } else { | 893 } else { |
875 if (offset != 0) { | 894 if (offset != 0) { |
876 best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]); | 895 best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); |
877 if (offset != 1) rep[2] = rep[1]; | 896 if (offset != 1) rep[2] = rep[1]; |
878 rep[1] = rep[0]; | 897 rep[1] = rep[0]; |
879 rep[0] = best_off; | 898 rep[0] = best_off; |
880 } | 899 } |
881 | 900 |