comparison contrib/python-zstandard/zstd/compress/zstd_opt.c @ 40121:73fef626dae3

zstandard: vendor python-zstandard 0.10.1 This was just released. The upstream source distribution from PyPI was extracted. Unwanted files were removed. The clang-format ignore list was updated to reflect the new source of files. setup.py was updated to pass a new argument to python-zstandard's function for returning an Extension instance. Upstream had to change to use relative paths because Python 3.7's packaging doesn't seem to like absolute paths when defining sources, includes, etc. The default relative path calculation is relative to setup_zstd.py which is different from the directory of Mercurial's setup.py. The project contains a vendored copy of zstandard 1.3.6. The old version was 1.3.4. The API should be backwards compatible and nothing in core should need adjusted. However, there is a new "chunker" API that we may find useful in places where we want to emit compressed chunks of a fixed size. There are a pair of bug fixes in 0.10.0 with regards to compressobj() and decompressobj() when block flushing is used. I actually found these bugs when introducing these APIs in Mercurial! But existing Mercurial code is not affected because we don't perform block flushing. # no-check-commit because 3rd party code has different style guidelines Differential Revision: https://phab.mercurial-scm.org/D4911
author Gregory Szorc <gregory.szorc@gmail.com>
date Mon, 08 Oct 2018 16:27:40 -0700
parents b1fb341d8a61
children 675775c33ab6
comparison
equal deleted inserted replaced
40120:89742f1fa6cb 40121:73fef626dae3
7 * in the COPYING file in the root directory of this source tree). 7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses. 8 * You may select, at your option, one of the above-listed licenses.
9 */ 9 */
10 10
11 #include "zstd_compress_internal.h" 11 #include "zstd_compress_internal.h"
12 #include "hist.h"
12 #include "zstd_opt.h" 13 #include "zstd_opt.h"
13 14
14 15
15 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */ 16 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
16 #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ 17 #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
17 #define ZSTD_MAX_PRICE (1<<30) 18 #define ZSTD_MAX_PRICE (1<<30)
18 19
19 20
20 /*-************************************* 21 /*-*************************************
21 * Price functions for optimal parser 22 * Price functions for optimal parser
22 ***************************************/ 23 ***************************************/
23 static void ZSTD_setLog2Prices(optState_t* optPtr) 24
24 { 25 #if 0 /* approximation at bit level */
25 optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1); 26 # define BITCOST_ACCURACY 0
26 optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1); 27 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
27 optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1); 28 # define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
28 optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1); 29 #elif 0 /* fractional bit accuracy */
29 } 30 # define BITCOST_ACCURACY 8
30 31 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
32 # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
33 #else /* opt==approx, ultra==accurate */
34 # define BITCOST_ACCURACY 8
35 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
36 # define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
37 #endif
38
39 MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
40 {
41 return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
42 }
43
44 MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
45 {
46 U32 const stat = rawStat + 1;
47 U32 const hb = ZSTD_highbit32(stat);
48 U32 const BWeight = hb * BITCOST_MULTIPLIER;
49 U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
50 U32 const weight = BWeight + FWeight;
51 assert(hb + BITCOST_ACCURACY < 31);
52 return weight;
53 }
54
55 /* debugging function, @return price in bytes */
56 MEM_STATIC double ZSTD_fCost(U32 price)
57 {
58 return (double)price / (BITCOST_MULTIPLIER*8);
59 }
60
61 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
62 {
63 optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
64 optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
65 optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
66 optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
67 }
68
69
70 static U32 ZSTD_downscaleStat(U32* table, U32 lastEltIndex, int malus)
71 {
72 U32 s, sum=0;
73 assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
74 for (s=0; s<=lastEltIndex; s++) {
75 table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
76 sum += table[s];
77 }
78 return sum;
79 }
31 80
32 static void ZSTD_rescaleFreqs(optState_t* const optPtr, 81 static void ZSTD_rescaleFreqs(optState_t* const optPtr,
33 const BYTE* const src, size_t const srcSize) 82 const BYTE* const src, size_t const srcSize,
34 { 83 int optLevel)
35 optPtr->staticPrices = 0; 84 {
36 85 optPtr->priceType = zop_dynamic;
37 if (optPtr->litLengthSum == 0) { /* first init */ 86
38 unsigned u; 87 if (optPtr->litLengthSum == 0) { /* first block : init */
39 if (srcSize <= 1024) optPtr->staticPrices = 1; 88 if (srcSize <= 1024) /* heuristic */
40 89 optPtr->priceType = zop_predef;
41 assert(optPtr->litFreq!=NULL); 90
42 for (u=0; u<=MaxLit; u++) 91 assert(optPtr->symbolCosts != NULL);
43 optPtr->litFreq[u] = 0; 92 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */
44 for (u=0; u<srcSize; u++) 93 optPtr->priceType = zop_dynamic;
45 optPtr->litFreq[src[u]]++; 94
46 optPtr->litSum = 0; 95 assert(optPtr->litFreq != NULL);
47 for (u=0; u<=MaxLit; u++) { 96 optPtr->litSum = 0;
48 optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> ZSTD_FREQ_DIV); 97 { unsigned lit;
49 optPtr->litSum += optPtr->litFreq[u]; 98 for (lit=0; lit<=MaxLit; lit++) {
50 } 99 U32 const scaleLog = 11; /* scale to 2K */
51 100 U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
52 for (u=0; u<=MaxLL; u++) 101 assert(bitCost <= scaleLog);
53 optPtr->litLengthFreq[u] = 1; 102 optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
54 optPtr->litLengthSum = MaxLL+1; 103 optPtr->litSum += optPtr->litFreq[lit];
55 for (u=0; u<=MaxML; u++) 104 } }
56 optPtr->matchLengthFreq[u] = 1; 105
57 optPtr->matchLengthSum = MaxML+1; 106 { unsigned ll;
58 for (u=0; u<=MaxOff; u++) 107 FSE_CState_t llstate;
59 optPtr->offCodeFreq[u] = 1; 108 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
60 optPtr->offCodeSum = (MaxOff+1); 109 optPtr->litLengthSum = 0;
61 110 for (ll=0; ll<=MaxLL; ll++) {
62 } else { 111 U32 const scaleLog = 10; /* scale to 1K */
63 unsigned u; 112 U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
64 113 assert(bitCost < scaleLog);
65 optPtr->litSum = 0; 114 optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
66 for (u=0; u<=MaxLit; u++) { 115 optPtr->litLengthSum += optPtr->litLengthFreq[ll];
67 optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1)); 116 } }
68 optPtr->litSum += optPtr->litFreq[u]; 117
69 } 118 { unsigned ml;
70 optPtr->litLengthSum = 0; 119 FSE_CState_t mlstate;
71 for (u=0; u<=MaxLL; u++) { 120 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
72 optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1)); 121 optPtr->matchLengthSum = 0;
73 optPtr->litLengthSum += optPtr->litLengthFreq[u]; 122 for (ml=0; ml<=MaxML; ml++) {
74 } 123 U32 const scaleLog = 10;
75 optPtr->matchLengthSum = 0; 124 U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
76 for (u=0; u<=MaxML; u++) { 125 assert(bitCost < scaleLog);
77 optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); 126 optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
78 optPtr->matchLengthSum += optPtr->matchLengthFreq[u]; 127 optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
79 } 128 } }
80 optPtr->offCodeSum = 0; 129
81 for (u=0; u<=MaxOff; u++) { 130 { unsigned of;
82 optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); 131 FSE_CState_t ofstate;
83 optPtr->offCodeSum += optPtr->offCodeFreq[u]; 132 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
84 } 133 optPtr->offCodeSum = 0;
85 } 134 for (of=0; of<=MaxOff; of++) {
86 135 U32 const scaleLog = 10;
87 ZSTD_setLog2Prices(optPtr); 136 U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
88 } 137 assert(bitCost < scaleLog);
89 138 optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
139 optPtr->offCodeSum += optPtr->offCodeFreq[of];
140 } }
141
142 } else { /* not a dictionary */
143
144 assert(optPtr->litFreq != NULL);
145 { unsigned lit = MaxLit;
146 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
147 }
148 optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
149
150 { unsigned ll;
151 for (ll=0; ll<=MaxLL; ll++)
152 optPtr->litLengthFreq[ll] = 1;
153 }
154 optPtr->litLengthSum = MaxLL+1;
155
156 { unsigned ml;
157 for (ml=0; ml<=MaxML; ml++)
158 optPtr->matchLengthFreq[ml] = 1;
159 }
160 optPtr->matchLengthSum = MaxML+1;
161
162 { unsigned of;
163 for (of=0; of<=MaxOff; of++)
164 optPtr->offCodeFreq[of] = 1;
165 }
166 optPtr->offCodeSum = MaxOff+1;
167
168 }
169
170 } else { /* new block : re-use previous statistics, scaled down */
171
172 optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
173 optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
174 optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
175 optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
176 }
177
178 ZSTD_setBasePrices(optPtr, optLevel);
179 }
90 180
91 /* ZSTD_rawLiteralsCost() : 181 /* ZSTD_rawLiteralsCost() :
92 * cost of literals (only) in given segment (which length can be null) 182 * price of literals (only) in specified segment (which length can be 0).
93 * does not include cost of literalLength symbol */ 183 * does not include price of literalLength symbol */
94 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, 184 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
95 const optState_t* const optPtr) 185 const optState_t* const optPtr,
96 { 186 int optLevel)
97 if (optPtr->staticPrices) return (litLength*6); /* 6 bit per literal - no statistic used */ 187 {
98 if (litLength == 0) return 0; 188 if (litLength == 0) return 0;
99 189 if (optPtr->priceType == zop_predef)
100 /* literals */ 190 return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
101 { U32 u; 191
102 U32 cost = litLength * optPtr->log2litSum; 192 /* dynamic statistics */
103 for (u=0; u < litLength; u++) 193 { U32 price = litLength * optPtr->litSumBasePrice;
104 cost -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1); 194 U32 u;
105 return cost; 195 for (u=0; u < litLength; u++) {
196 assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
197 price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
198 }
199 return price;
106 } 200 }
107 } 201 }
108 202
109 /* ZSTD_litLengthPrice() : 203 /* ZSTD_litLengthPrice() :
110 * cost of literalLength symbol */ 204 * cost of literalLength symbol */
111 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr) 205 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
112 { 206 {
113 if (optPtr->staticPrices) return ZSTD_highbit32((U32)litLength+1); 207 if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
114 208
115 /* literal Length */ 209 /* dynamic statistics */
116 { U32 const llCode = ZSTD_LLcode(litLength); 210 { U32 const llCode = ZSTD_LLcode(litLength);
117 U32 const price = LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1); 211 return (LL_bits[llCode] * BITCOST_MULTIPLIER) + (optPtr->litLengthSumBasePrice - WEIGHT(optPtr->litLengthFreq[llCode], optLevel));
118 return price; 212 }
119 }
120 }
121
122 /* ZSTD_litLengthPrice() :
123 * cost of the literal part of a sequence,
124 * including literals themselves, and literalLength symbol */
125 static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength,
126 const optState_t* const optPtr)
127 {
128 return ZSTD_rawLiteralsCost(literals, litLength, optPtr)
129 + ZSTD_litLengthPrice(litLength, optPtr);
130 } 213 }
131 214
132 /* ZSTD_litLengthContribution() : 215 /* ZSTD_litLengthContribution() :
133 * @return ( cost(litlength) - cost(0) ) 216 * @return ( cost(litlength) - cost(0) )
134 * this value can then be added to rawLiteralsCost() 217 * this value can then be added to rawLiteralsCost()
135 * to provide a cost which is directly comparable to a match ending at same position */ 218 * to provide a cost which is directly comparable to a match ending at same position */
136 static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr) 219 static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
137 { 220 {
138 if (optPtr->staticPrices) return ZSTD_highbit32(litLength+1); 221 if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel);
139 222
140 /* literal Length */ 223 /* dynamic statistics */
141 { U32 const llCode = ZSTD_LLcode(litLength); 224 { U32 const llCode = ZSTD_LLcode(litLength);
142 int const contribution = LL_bits[llCode] 225 int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER)
143 + ZSTD_highbit32(optPtr->litLengthFreq[0]+1) 226 + WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
144 - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1); 227 - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
145 #if 1 228 #if 1
146 return contribution; 229 return contribution;
147 #else 230 #else
148 return MAX(0, contribution); /* sometimes better, sometimes not ... */ 231 return MAX(0, contribution); /* sometimes better, sometimes not ... */
149 #endif 232 #endif
153 /* ZSTD_literalsContribution() : 236 /* ZSTD_literalsContribution() :
154 * creates a fake cost for the literals part of a sequence 237 * creates a fake cost for the literals part of a sequence
155 * which can be compared to the ending cost of a match 238 * which can be compared to the ending cost of a match
156 * should a new match start at this position */ 239 * should a new match start at this position */
157 static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength, 240 static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
158 const optState_t* const optPtr) 241 const optState_t* const optPtr,
159 { 242 int optLevel)
160 int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr) 243 {
161 + ZSTD_litLengthContribution(litLength, optPtr); 244 int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
245 + ZSTD_litLengthContribution(litLength, optPtr, optLevel);
162 return contribution; 246 return contribution;
163 } 247 }
164 248
165 /* ZSTD_getMatchPrice() : 249 /* ZSTD_getMatchPrice() :
166 * Provides the cost of the match part (offset + matchLength) of a sequence 250 * Provides the cost of the match part (offset + matchLength) of a sequence
167 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. 251 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
168 * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ 252 * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
169 FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice( 253 FORCE_INLINE_TEMPLATE U32
170 U32 const offset, U32 const matchLength, 254 ZSTD_getMatchPrice(U32 const offset,
171 const optState_t* const optPtr, 255 U32 const matchLength,
172 int const optLevel) 256 const optState_t* const optPtr,
257 int const optLevel)
173 { 258 {
174 U32 price; 259 U32 price;
175 U32 const offCode = ZSTD_highbit32(offset+1); 260 U32 const offCode = ZSTD_highbit32(offset+1);
176 U32 const mlBase = matchLength - MINMATCH; 261 U32 const mlBase = matchLength - MINMATCH;
177 assert(matchLength >= MINMATCH); 262 assert(matchLength >= MINMATCH);
178 263
179 if (optPtr->staticPrices) /* fixed scheme, do not use statistics */ 264 if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
180 return ZSTD_highbit32((U32)mlBase+1) + 16 + offCode; 265 return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
181 266
182 price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1); 267 /* dynamic statistics */
183 if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */ 268 price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
269 if ((optLevel<2) /*static*/ && offCode >= 20)
270 price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
184 271
185 /* match Length */ 272 /* match Length */
186 { U32 const mlCode = ZSTD_MLcode(mlBase); 273 { U32 const mlCode = ZSTD_MLcode(mlBase);
187 price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1); 274 price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
188 } 275 }
276
277 price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
189 278
190 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); 279 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
191 return price; 280 return price;
192 } 281 }
193 282
283 /* ZSTD_updateStats() :
284 * assumption : literals + litLengtn <= iend */
194 static void ZSTD_updateStats(optState_t* const optPtr, 285 static void ZSTD_updateStats(optState_t* const optPtr,
195 U32 litLength, const BYTE* literals, 286 U32 litLength, const BYTE* literals,
196 U32 offsetCode, U32 matchLength) 287 U32 offsetCode, U32 matchLength)
197 { 288 {
198 /* literals */ 289 /* literals */
267 ***************************************/ 358 ***************************************/
268 /** ZSTD_insertBt1() : add one or multiple positions to tree. 359 /** ZSTD_insertBt1() : add one or multiple positions to tree.
269 * ip : assumed <= iend-8 . 360 * ip : assumed <= iend-8 .
270 * @return : nb of positions added */ 361 * @return : nb of positions added */
271 static U32 ZSTD_insertBt1( 362 static U32 ZSTD_insertBt1(
272 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, 363 ZSTD_matchState_t* ms,
273 const BYTE* const ip, const BYTE* const iend, 364 const BYTE* const ip, const BYTE* const iend,
274 U32 const mls, U32 const extDict) 365 U32 const mls, const int extDict)
275 { 366 {
367 const ZSTD_compressionParameters* const cParams = &ms->cParams;
276 U32* const hashTable = ms->hashTable; 368 U32* const hashTable = ms->hashTable;
277 U32 const hashLog = cParams->hashLog; 369 U32 const hashLog = cParams->hashLog;
278 size_t const h = ZSTD_hashPtr(ip, hashLog, mls); 370 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
279 U32* const bt = ms->chainTable; 371 U32* const bt = ms->chainTable;
280 U32 const btLog = cParams->chainLog - 1; 372 U32 const btLog = cParams->chainLog - 1;
291 const U32 btLow = btMask >= current ? 0 : current - btMask; 383 const U32 btLow = btMask >= current ? 0 : current - btMask;
292 U32* smallerPtr = bt + 2*(current&btMask); 384 U32* smallerPtr = bt + 2*(current&btMask);
293 U32* largerPtr = smallerPtr + 1; 385 U32* largerPtr = smallerPtr + 1;
294 U32 dummy32; /* to be nullified at the end */ 386 U32 dummy32; /* to be nullified at the end */
295 U32 const windowLow = ms->window.lowLimit; 387 U32 const windowLow = ms->window.lowLimit;
388 U32 const matchLow = windowLow ? windowLow : 1;
296 U32 matchEndIdx = current+8+1; 389 U32 matchEndIdx = current+8+1;
297 size_t bestLength = 8; 390 size_t bestLength = 8;
298 U32 nbCompares = 1U << cParams->searchLog; 391 U32 nbCompares = 1U << cParams->searchLog;
299 #ifdef ZSTD_C_PREDICT 392 #ifdef ZSTD_C_PREDICT
300 U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); 393 U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
306 DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current); 399 DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
307 400
308 assert(ip <= iend-8); /* required for h calculation */ 401 assert(ip <= iend-8); /* required for h calculation */
309 hashTable[h] = current; /* Update Hash Table */ 402 hashTable[h] = current; /* Update Hash Table */
310 403
311 while (nbCompares-- && (matchIndex > windowLow)) { 404 while (nbCompares-- && (matchIndex >= matchLow)) {
312 U32* const nextPtr = bt + 2*(matchIndex & btMask); 405 U32* const nextPtr = bt + 2*(matchIndex & btMask);
313 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 406 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
314 assert(matchIndex < current); 407 assert(matchIndex < current);
315 408
316 #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ 409 #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
332 predictedLarge = predictPtr[0] + (predictPtr[0]>0); 425 predictedLarge = predictPtr[0] + (predictPtr[0]>0);
333 continue; 426 continue;
334 } 427 }
335 #endif 428 #endif
336 429
337 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { 430 if (!extDict || (matchIndex+matchLength >= dictLimit)) {
338 assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */ 431 assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */
339 match = base + matchIndex; 432 match = base + matchIndex;
340 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); 433 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
341 } else { 434 } else {
342 match = dictBase + matchIndex; 435 match = dictBase + matchIndex;
343 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 436 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
377 return matchEndIdx - (current + 8); 470 return matchEndIdx - (current + 8);
378 } 471 }
379 472
380 FORCE_INLINE_TEMPLATE 473 FORCE_INLINE_TEMPLATE
381 void ZSTD_updateTree_internal( 474 void ZSTD_updateTree_internal(
382 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, 475 ZSTD_matchState_t* ms,
383 const BYTE* const ip, const BYTE* const iend, 476 const BYTE* const ip, const BYTE* const iend,
384 const U32 mls, const U32 extDict) 477 const U32 mls, const ZSTD_dictMode_e dictMode)
385 { 478 {
386 const BYTE* const base = ms->window.base; 479 const BYTE* const base = ms->window.base;
387 U32 const target = (U32)(ip - base); 480 U32 const target = (U32)(ip - base);
388 U32 idx = ms->nextToUpdate; 481 U32 idx = ms->nextToUpdate;
389 DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)", 482 DEBUGLOG(5, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
390 idx, target, extDict); 483 idx, target, dictMode);
391 484
392 while(idx < target) 485 while(idx < target)
393 idx += ZSTD_insertBt1(ms, cParams, base+idx, iend, mls, extDict); 486 idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
394 ms->nextToUpdate = target; 487 ms->nextToUpdate = target;
395 } 488 }
396 489
397 void ZSTD_updateTree( 490 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
398 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, 491 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.searchLength, ZSTD_noDict);
399 const BYTE* ip, const BYTE* iend)
400 {
401 ZSTD_updateTree_internal(ms, cParams, ip, iend, cParams->searchLength, 0 /*extDict*/);
402 } 492 }
403 493
404 FORCE_INLINE_TEMPLATE 494 FORCE_INLINE_TEMPLATE
405 U32 ZSTD_insertBtAndGetAllMatches ( 495 U32 ZSTD_insertBtAndGetAllMatches (
406 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, 496 ZSTD_matchState_t* ms,
407 const BYTE* const ip, const BYTE* const iLimit, int const extDict, 497 const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
408 U32 rep[ZSTD_REP_NUM], U32 const ll0, 498 U32 rep[ZSTD_REP_NUM], U32 const ll0,
409 ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */) 499 ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */)
410 { 500 {
501 const ZSTD_compressionParameters* const cParams = &ms->cParams;
411 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); 502 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
412 const BYTE* const base = ms->window.base; 503 const BYTE* const base = ms->window.base;
413 U32 const current = (U32)(ip-base); 504 U32 const current = (U32)(ip-base);
414 U32 const hashLog = cParams->hashLog; 505 U32 const hashLog = cParams->hashLog;
415 U32 const minMatch = (mls==3) ? 3 : 4; 506 U32 const minMatch = (mls==3) ? 3 : 4;
424 U32 const dictLimit = ms->window.dictLimit; 515 U32 const dictLimit = ms->window.dictLimit;
425 const BYTE* const dictEnd = dictBase + dictLimit; 516 const BYTE* const dictEnd = dictBase + dictLimit;
426 const BYTE* const prefixStart = base + dictLimit; 517 const BYTE* const prefixStart = base + dictLimit;
427 U32 const btLow = btMask >= current ? 0 : current - btMask; 518 U32 const btLow = btMask >= current ? 0 : current - btMask;
428 U32 const windowLow = ms->window.lowLimit; 519 U32 const windowLow = ms->window.lowLimit;
520 U32 const matchLow = windowLow ? windowLow : 1;
429 U32* smallerPtr = bt + 2*(current&btMask); 521 U32* smallerPtr = bt + 2*(current&btMask);
430 U32* largerPtr = bt + 2*(current&btMask) + 1; 522 U32* largerPtr = bt + 2*(current&btMask) + 1;
431 U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */ 523 U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
432 U32 dummy32; /* to be nullified at the end */ 524 U32 dummy32; /* to be nullified at the end */
433 U32 mnum = 0; 525 U32 mnum = 0;
434 U32 nbCompares = 1U << cParams->searchLog; 526 U32 nbCompares = 1U << cParams->searchLog;
435 527
528 const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
529 const ZSTD_compressionParameters* const dmsCParams =
530 dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
531 const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
532 const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
533 U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
534 U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
535 U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
536 U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
537 U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
538 U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
539 U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
540
436 size_t bestLength = lengthToBeat-1; 541 size_t bestLength = lengthToBeat-1;
437 DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches"); 542 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
438 543
439 /* check repCode */ 544 /* check repCode */
440 { U32 const lastR = ZSTD_REP_NUM + ll0; 545 { U32 const lastR = ZSTD_REP_NUM + ll0;
441 U32 repCode; 546 U32 repCode;
442 for (repCode = ll0; repCode < lastR; repCode++) { 547 for (repCode = ll0; repCode < lastR; repCode++) {
447 if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */ 552 if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */
448 if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) { 553 if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) {
449 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; 554 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
450 } 555 }
451 } else { /* repIndex < dictLimit || repIndex >= current */ 556 } else { /* repIndex < dictLimit || repIndex >= current */
452 const BYTE* const repMatch = dictBase + repIndex; 557 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
558 dmsBase + repIndex - dmsIndexDelta :
559 dictBase + repIndex;
453 assert(current >= windowLow); 560 assert(current >= windowLow);
454 if ( extDict /* this case only valid in extDict mode */ 561 if ( dictMode == ZSTD_extDict
455 && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */ 562 && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */
456 & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) 563 & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
457 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { 564 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
458 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; 565 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
566 }
567 if (dictMode == ZSTD_dictMatchState
568 && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */
569 & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
570 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
571 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
459 } } 572 } }
460 /* save longer solution */ 573 /* save longer solution */
461 if (repLen > bestLength) { 574 if (repLen > bestLength) {
462 DEBUGLOG(8, "found rep-match %u of length %u", 575 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
463 repCode - ll0, (U32)repLen); 576 repCode, ll0, repOffset, repLen);
464 bestLength = repLen; 577 bestLength = repLen;
465 matches[mnum].off = repCode - ll0; 578 matches[mnum].off = repCode - ll0;
466 matches[mnum].len = (U32)repLen; 579 matches[mnum].len = (U32)repLen;
467 mnum++; 580 mnum++;
468 if ( (repLen > sufficient_len) 581 if ( (repLen > sufficient_len)
471 } } } } 584 } } } }
472 585
473 /* HC3 match finder */ 586 /* HC3 match finder */
474 if ((mls == 3) /*static*/ && (bestLength < mls)) { 587 if ((mls == 3) /*static*/ && (bestLength < mls)) {
475 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip); 588 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
476 if ((matchIndex3 > windowLow) 589 if ((matchIndex3 >= matchLow)
477 & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { 590 & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
478 size_t mlen; 591 size_t mlen;
479 if ((!extDict) /*static*/ || (matchIndex3 >= dictLimit)) { 592 if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
480 const BYTE* const match = base + matchIndex3; 593 const BYTE* const match = base + matchIndex3;
481 mlen = ZSTD_count(ip, match, iLimit); 594 mlen = ZSTD_count(ip, match, iLimit);
482 } else { 595 } else {
483 const BYTE* const match = dictBase + matchIndex3; 596 const BYTE* const match = dictBase + matchIndex3;
484 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); 597 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
496 mnum = 1; 609 mnum = 1;
497 if ( (mlen > sufficient_len) | 610 if ( (mlen > sufficient_len) |
498 (ip+mlen == iLimit) ) { /* best possible length */ 611 (ip+mlen == iLimit) ) { /* best possible length */
499 ms->nextToUpdate = current+1; /* skip insertion */ 612 ms->nextToUpdate = current+1; /* skip insertion */
500 return 1; 613 return 1;
501 } } } } 614 }
615 }
616 }
617 /* no dictMatchState lookup: dicts don't have a populated HC3 table */
618 }
502 619
503 hashTable[h] = current; /* Update Hash Table */ 620 hashTable[h] = current; /* Update Hash Table */
504 621
505 while (nbCompares-- && (matchIndex > windowLow)) { 622 while (nbCompares-- && (matchIndex >= matchLow)) {
506 U32* const nextPtr = bt + 2*(matchIndex & btMask); 623 U32* const nextPtr = bt + 2*(matchIndex & btMask);
507 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 624 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
508 const BYTE* match; 625 const BYTE* match;
509 assert(current > matchIndex); 626 assert(current > matchIndex);
510 627
511 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { 628 if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
512 assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ 629 assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
513 match = base + matchIndex; 630 match = base + matchIndex;
514 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); 631 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
515 } else { 632 } else {
516 match = dictBase + matchIndex; 633 match = dictBase + matchIndex;
518 if (matchIndex+matchLength >= dictLimit) 635 if (matchIndex+matchLength >= dictLimit)
519 match = base + matchIndex; /* prepare for match[matchLength] */ 636 match = base + matchIndex; /* prepare for match[matchLength] */
520 } 637 }
521 638
522 if (matchLength > bestLength) { 639 if (matchLength > bestLength) {
523 DEBUGLOG(8, "found match of length %u at distance %u", 640 DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
524 (U32)matchLength, current - matchIndex); 641 (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
525 assert(matchEndIdx > matchIndex); 642 assert(matchEndIdx > matchIndex);
526 if (matchLength > matchEndIdx - matchIndex) 643 if (matchLength > matchEndIdx - matchIndex)
527 matchEndIdx = matchIndex + (U32)matchLength; 644 matchEndIdx = matchIndex + (U32)matchLength;
528 bestLength = matchLength; 645 bestLength = matchLength;
529 matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; 646 matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
530 matches[mnum].len = (U32)matchLength; 647 matches[mnum].len = (U32)matchLength;
531 mnum++; 648 mnum++;
532 if (matchLength > ZSTD_OPT_NUM) break; 649 if ( (matchLength > ZSTD_OPT_NUM)
533 if (ip+matchLength == iLimit) { /* equal : no way to know if inf or sup */ 650 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
534 break; /* drop, to preserve bt consistency (miss a little bit of compression) */ 651 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
652 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
535 } 653 }
536 } 654 }
537 655
538 if (match[matchLength] < ip[matchLength]) { 656 if (match[matchLength] < ip[matchLength]) {
539 /* match smaller than current */ 657 /* match smaller than current */
550 matchIndex = nextPtr[0]; 668 matchIndex = nextPtr[0];
551 } } 669 } }
552 670
553 *smallerPtr = *largerPtr = 0; 671 *smallerPtr = *largerPtr = 0;
554 672
673 if (dictMode == ZSTD_dictMatchState && nbCompares) {
674 size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
675 U32 dictMatchIndex = dms->hashTable[dmsH];
676 const U32* const dmsBt = dms->chainTable;
677 commonLengthSmaller = commonLengthLarger = 0;
678 while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
679 const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
680 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
681 const BYTE* match = dmsBase + dictMatchIndex;
682 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
683 if (dictMatchIndex+matchLength >= dmsHighLimit)
684 match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
685
686 if (matchLength > bestLength) {
687 matchIndex = dictMatchIndex + dmsIndexDelta;
688 DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
689 (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
690 if (matchLength > matchEndIdx - matchIndex)
691 matchEndIdx = matchIndex + (U32)matchLength;
692 bestLength = matchLength;
693 matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
694 matches[mnum].len = (U32)matchLength;
695 mnum++;
696 if ( (matchLength > ZSTD_OPT_NUM)
697 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
698 break; /* drop, to guarantee consistency (miss a little bit of compression) */
699 }
700 }
701
702 if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
703 if (match[matchLength] < ip[matchLength]) {
704 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
705 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
706 } else {
707 /* match is larger than current */
708 commonLengthLarger = matchLength;
709 dictMatchIndex = nextPtr[0];
710 }
711 }
712 }
713
555 assert(matchEndIdx > current+8); 714 assert(matchEndIdx > current+8);
556 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ 715 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
557 return mnum; 716 return mnum;
558 } 717 }
559 718
560 719
561 FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( 720 FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
562 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, 721 ZSTD_matchState_t* ms,
563 const BYTE* ip, const BYTE* const iHighLimit, int const extDict, 722 const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
564 U32 rep[ZSTD_REP_NUM], U32 const ll0, 723 U32 rep[ZSTD_REP_NUM], U32 const ll0,
565 ZSTD_match_t* matches, U32 const lengthToBeat) 724 ZSTD_match_t* matches, U32 const lengthToBeat)
566 { 725 {
726 const ZSTD_compressionParameters* const cParams = &ms->cParams;
567 U32 const matchLengthSearch = cParams->searchLength; 727 U32 const matchLengthSearch = cParams->searchLength;
568 DEBUGLOG(7, "ZSTD_BtGetAllMatches"); 728 DEBUGLOG(8, "ZSTD_BtGetAllMatches");
569 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ 729 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
570 ZSTD_updateTree_internal(ms, cParams, ip, iHighLimit, matchLengthSearch, extDict); 730 ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
571 switch(matchLengthSearch) 731 switch(matchLengthSearch)
572 { 732 {
573 case 3 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 3); 733 case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3);
574 default : 734 default :
575 case 4 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 4); 735 case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4);
576 case 5 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 5); 736 case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5);
577 case 7 : 737 case 7 :
578 case 6 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 6); 738 case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6);
579 } 739 }
580 } 740 }
581 741
582 742
583 /*-******************************* 743 /*-*******************************
585 *********************************/ 745 *********************************/
586 typedef struct repcodes_s { 746 typedef struct repcodes_s {
587 U32 rep[3]; 747 U32 rep[3];
588 } repcodes_t; 748 } repcodes_t;
589 749
590 repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) 750 static repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
591 { 751 {
592 repcodes_t newReps; 752 repcodes_t newReps;
593 if (offset >= ZSTD_REP_NUM) { /* full offset */ 753 if (offset >= ZSTD_REP_NUM) { /* full offset */
594 newReps.rep[2] = rep[1]; 754 newReps.rep[2] = rep[1];
595 newReps.rep[1] = rep[0]; 755 newReps.rep[1] = rep[0];
607 } 767 }
608 return newReps; 768 return newReps;
609 } 769 }
610 770
611 771
612 typedef struct { 772 static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
613 const BYTE* anchor; 773 {
614 U32 litlen; 774 return sol.litlen + sol.mlen;
615 U32 rawLitCost; 775 }
616 } cachedLiteralPrice_t; 776
617 777 FORCE_INLINE_TEMPLATE size_t
618 static U32 ZSTD_rawLiteralsCost_cached( 778 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
619 cachedLiteralPrice_t* const cachedLitPrice, 779 seqStore_t* seqStore,
620 const BYTE* const anchor, U32 const litlen, 780 U32 rep[ZSTD_REP_NUM],
621 const optState_t* const optStatePtr) 781 const void* src, size_t srcSize,
622 { 782 const int optLevel, const ZSTD_dictMode_e dictMode)
623 U32 startCost;
624 U32 remainingLength;
625 const BYTE* startPosition;
626
627 if (anchor == cachedLitPrice->anchor) {
628 startCost = cachedLitPrice->rawLitCost;
629 startPosition = anchor + cachedLitPrice->litlen;
630 assert(litlen >= cachedLitPrice->litlen);
631 remainingLength = litlen - cachedLitPrice->litlen;
632 } else {
633 startCost = 0;
634 startPosition = anchor;
635 remainingLength = litlen;
636 }
637
638 { U32 const rawLitCost = startCost + ZSTD_rawLiteralsCost(startPosition, remainingLength, optStatePtr);
639 cachedLitPrice->anchor = anchor;
640 cachedLitPrice->litlen = litlen;
641 cachedLitPrice->rawLitCost = rawLitCost;
642 return rawLitCost;
643 }
644 }
645
646 static U32 ZSTD_fullLiteralsCost_cached(
647 cachedLiteralPrice_t* const cachedLitPrice,
648 const BYTE* const anchor, U32 const litlen,
649 const optState_t* const optStatePtr)
650 {
651 return ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
652 + ZSTD_litLengthPrice(litlen, optStatePtr);
653 }
654
655 static int ZSTD_literalsContribution_cached(
656 cachedLiteralPrice_t* const cachedLitPrice,
657 const BYTE* const anchor, U32 const litlen,
658 const optState_t* const optStatePtr)
659 {
660 int const contribution = ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
661 + ZSTD_litLengthContribution(litlen, optStatePtr);
662 return contribution;
663 }
664
665 FORCE_INLINE_TEMPLATE
666 size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore,
667 U32 rep[ZSTD_REP_NUM],
668 ZSTD_compressionParameters const* cParams,
669 const void* src, size_t srcSize,
670 const int optLevel, const int extDict)
671 { 783 {
672 optState_t* const optStatePtr = &ms->opt; 784 optState_t* const optStatePtr = &ms->opt;
673 const BYTE* const istart = (const BYTE*)src; 785 const BYTE* const istart = (const BYTE*)src;
674 const BYTE* ip = istart; 786 const BYTE* ip = istart;
675 const BYTE* anchor = istart; 787 const BYTE* anchor = istart;
676 const BYTE* const iend = istart + srcSize; 788 const BYTE* const iend = istart + srcSize;
677 const BYTE* const ilimit = iend - 8; 789 const BYTE* const ilimit = iend - 8;
678 const BYTE* const base = ms->window.base; 790 const BYTE* const base = ms->window.base;
679 const BYTE* const prefixStart = base + ms->window.dictLimit; 791 const BYTE* const prefixStart = base + ms->window.dictLimit;
792 const ZSTD_compressionParameters* const cParams = &ms->cParams;
680 793
681 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); 794 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
682 U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4; 795 U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4;
683 796
684 ZSTD_optimal_t* const opt = optStatePtr->priceTable; 797 ZSTD_optimal_t* const opt = optStatePtr->priceTable;
685 ZSTD_match_t* const matches = optStatePtr->matchTable; 798 ZSTD_match_t* const matches = optStatePtr->matchTable;
686 cachedLiteralPrice_t cachedLitPrice; 799 ZSTD_optimal_t lastSequence;
687 800
688 /* init */ 801 /* init */
689 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic"); 802 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
803 assert(optLevel <= 2);
690 ms->nextToUpdate3 = ms->nextToUpdate; 804 ms->nextToUpdate3 = ms->nextToUpdate;
691 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize); 805 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
692 ip += (ip==prefixStart); 806 ip += (ip==prefixStart);
693 memset(&cachedLitPrice, 0, sizeof(cachedLitPrice));
694 807
695 /* Match Loop */ 808 /* Match Loop */
696 while (ip < ilimit) { 809 while (ip < ilimit) {
697 U32 cur, last_pos = 0; 810 U32 cur, last_pos = 0;
698 U32 best_mlen, best_off;
699 811
700 /* find first match */ 812 /* find first match */
701 { U32 const litlen = (U32)(ip - anchor); 813 { U32 const litlen = (U32)(ip - anchor);
702 U32 const ll0 = !litlen; 814 U32 const ll0 = !litlen;
703 U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, ip, iend, extDict, rep, ll0, matches, minMatch); 815 U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0, matches, minMatch);
704 if (!nbMatches) { ip++; continue; } 816 if (!nbMatches) { ip++; continue; }
705 817
706 /* initialize opt[0] */ 818 /* initialize opt[0] */
707 { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; } 819 { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
708 opt[0].mlen = 1; 820 opt[0].mlen = 0; /* means is_a_literal */
709 opt[0].litlen = litlen; 821 opt[0].litlen = litlen;
822 opt[0].price = ZSTD_literalsContribution(anchor, litlen, optStatePtr, optLevel);
710 823
711 /* large match -> immediate encoding */ 824 /* large match -> immediate encoding */
712 { U32 const maxML = matches[nbMatches-1].len; 825 { U32 const maxML = matches[nbMatches-1].len;
713 DEBUGLOG(7, "found %u matches of maxLength=%u and offset=%u at cPos=%u => start new serie", 826 U32 const maxOffset = matches[nbMatches-1].off;
714 nbMatches, maxML, matches[nbMatches-1].off, (U32)(ip-prefixStart)); 827 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new serie",
828 nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
715 829
716 if (maxML > sufficient_len) { 830 if (maxML > sufficient_len) {
717 best_mlen = maxML; 831 lastSequence.litlen = litlen;
718 best_off = matches[nbMatches-1].off; 832 lastSequence.mlen = maxML;
719 DEBUGLOG(7, "large match (%u>%u), immediate encoding", 833 lastSequence.off = maxOffset;
720 best_mlen, sufficient_len); 834 DEBUGLOG(6, "large match (%u>%u), immediate encoding",
835 maxML, sufficient_len);
721 cur = 0; 836 cur = 0;
722 last_pos = 1; 837 last_pos = ZSTD_totalLen(lastSequence);
723 goto _shortestPath; 838 goto _shortestPath;
724 } } 839 } }
725 840
726 /* set prices for first matches starting position == 0 */ 841 /* set prices for first matches starting position == 0 */
727 { U32 const literalsPrice = ZSTD_fullLiteralsCost_cached(&cachedLitPrice, anchor, litlen, optStatePtr); 842 { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
728 U32 pos; 843 U32 pos;
729 U32 matchNb; 844 U32 matchNb;
730 for (pos = 0; pos < minMatch; pos++) { 845 for (pos = 1; pos < minMatch; pos++) {
731 opt[pos].mlen = 1; 846 opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
732 opt[pos].price = ZSTD_MAX_PRICE;
733 } 847 }
734 for (matchNb = 0; matchNb < nbMatches; matchNb++) { 848 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
735 U32 const offset = matches[matchNb].off; 849 U32 const offset = matches[matchNb].off;
736 U32 const end = matches[matchNb].len; 850 U32 const end = matches[matchNb].len;
737 repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0); 851 repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
738 for ( ; pos <= end ; pos++ ) { 852 for ( ; pos <= end ; pos++ ) {
739 U32 const matchPrice = literalsPrice + ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); 853 U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
740 DEBUGLOG(7, "rPos:%u => set initial price : %u", 854 U32 const sequencePrice = literalsPrice + matchPrice;
741 pos, matchPrice); 855 DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
856 pos, ZSTD_fCost(sequencePrice));
742 opt[pos].mlen = pos; 857 opt[pos].mlen = pos;
743 opt[pos].off = offset; 858 opt[pos].off = offset;
744 opt[pos].litlen = litlen; 859 opt[pos].litlen = litlen;
745 opt[pos].price = matchPrice; 860 opt[pos].price = sequencePrice;
861 ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
746 memcpy(opt[pos].rep, &repHistory, sizeof(repHistory)); 862 memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
747 } } 863 } }
748 last_pos = pos-1; 864 last_pos = pos-1;
749 } 865 }
750 } 866 }
751 867
752 /* check further positions */ 868 /* check further positions */
753 for (cur = 1; cur <= last_pos; cur++) { 869 for (cur = 1; cur <= last_pos; cur++) {
754 const BYTE* const inr = ip + cur; 870 const BYTE* const inr = ip + cur;
755 assert(cur < ZSTD_OPT_NUM); 871 assert(cur < ZSTD_OPT_NUM);
872 DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
756 873
757 /* Fix current position with one literal if cheaper */ 874 /* Fix current position with one literal if cheaper */
758 { U32 const litlen = (opt[cur-1].mlen == 1) ? opt[cur-1].litlen + 1 : 1; 875 { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
759 int price; /* note : contribution can be negative */ 876 int const price = opt[cur-1].price
760 if (cur > litlen) { 877 + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
761 price = opt[cur - litlen].price + ZSTD_literalsContribution(inr-litlen, litlen, optStatePtr); 878 + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
762 } else { 879 - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
763 price = ZSTD_literalsContribution_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
764 }
765 assert(price < 1000000000); /* overflow check */ 880 assert(price < 1000000000); /* overflow check */
766 if (price <= opt[cur].price) { 881 if (price <= opt[cur].price) {
767 DEBUGLOG(7, "rPos:%u : better price (%u<%u) using literal", 882 DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
768 cur, price, opt[cur].price); 883 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
769 opt[cur].mlen = 1; 884 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
885 opt[cur].mlen = 0;
770 opt[cur].off = 0; 886 opt[cur].off = 0;
771 opt[cur].litlen = litlen; 887 opt[cur].litlen = litlen;
772 opt[cur].price = price; 888 opt[cur].price = price;
773 memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep)); 889 memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
774 } } 890 } else {
891 DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
892 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
893 opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
894 }
895 }
775 896
776 /* last match must start at a minimum distance of 8 from oend */ 897 /* last match must start at a minimum distance of 8 from oend */
777 if (inr > ilimit) continue; 898 if (inr > ilimit) continue;
778 899
779 if (cur == last_pos) break; 900 if (cur == last_pos) break;
780 901
781 if ( (optLevel==0) /*static*/ 902 if ( (optLevel==0) /*static_test*/
782 && (opt[cur+1].price <= opt[cur].price) ) 903 && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
904 DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
783 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ 905 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
784 906 }
785 { U32 const ll0 = (opt[cur].mlen != 1); 907
786 U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0; 908 { U32 const ll0 = (opt[cur].mlen != 0);
787 U32 const previousPrice = (cur > litlen) ? opt[cur-litlen].price : 0; 909 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
788 U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr); 910 U32 const previousPrice = opt[cur].price;
789 U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, inr, iend, extDict, opt[cur].rep, ll0, matches, minMatch); 911 U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
912 U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch);
790 U32 matchNb; 913 U32 matchNb;
791 if (!nbMatches) continue; 914 if (!nbMatches) {
915 DEBUGLOG(7, "rPos:%u : no match found", cur);
916 continue;
917 }
792 918
793 { U32 const maxML = matches[nbMatches-1].len; 919 { U32 const maxML = matches[nbMatches-1].len;
794 DEBUGLOG(7, "rPos:%u, found %u matches, of maxLength=%u", 920 DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
795 cur, nbMatches, maxML); 921 inr-istart, cur, nbMatches, maxML);
796 922
797 if ( (maxML > sufficient_len) 923 if ( (maxML > sufficient_len)
798 | (cur + maxML >= ZSTD_OPT_NUM) ) { 924 || (cur + maxML >= ZSTD_OPT_NUM) ) {
799 best_mlen = maxML; 925 lastSequence.mlen = maxML;
800 best_off = matches[nbMatches-1].off; 926 lastSequence.off = matches[nbMatches-1].off;
801 last_pos = cur + 1; 927 lastSequence.litlen = litlen;
928 cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
929 last_pos = cur + ZSTD_totalLen(lastSequence);
930 if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
802 goto _shortestPath; 931 goto _shortestPath;
803 } 932 } }
804 }
805 933
806 /* set prices using matches found at position == cur */ 934 /* set prices using matches found at position == cur */
807 for (matchNb = 0; matchNb < nbMatches; matchNb++) { 935 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
808 U32 const offset = matches[matchNb].off; 936 U32 const offset = matches[matchNb].off;
809 repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0); 937 repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
810 U32 const lastML = matches[matchNb].len; 938 U32 const lastML = matches[matchNb].len;
811 U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; 939 U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
812 U32 mlen; 940 U32 mlen;
813 941
814 DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u", 942 DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
815 matchNb, matches[matchNb].off, lastML, litlen); 943 matchNb, matches[matchNb].off, lastML, litlen);
816 944
817 for (mlen = lastML; mlen >= startML; mlen--) { 945 for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
818 U32 const pos = cur + mlen; 946 U32 const pos = cur + mlen;
819 int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); 947 int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
820 948
821 if ((pos > last_pos) || (price < opt[pos].price)) { 949 if ((pos > last_pos) || (price < opt[pos].price)) {
822 DEBUGLOG(7, "rPos:%u => new better price (%u<%u)", 950 DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
823 pos, price, opt[pos].price); 951 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
824 while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } 952 while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
825 opt[pos].mlen = mlen; 953 opt[pos].mlen = mlen;
826 opt[pos].off = offset; 954 opt[pos].off = offset;
827 opt[pos].litlen = litlen; 955 opt[pos].litlen = litlen;
828 opt[pos].price = price; 956 opt[pos].price = price;
957 ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
829 memcpy(opt[pos].rep, &repHistory, sizeof(repHistory)); 958 memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
830 } else { 959 } else {
831 if (optLevel==0) break; /* gets ~+10% speed for about -0.01 ratio loss */ 960 DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
961 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
962 if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
832 } 963 }
833 } } } 964 } } }
834 } /* for (cur = 1; cur <= last_pos; cur++) */ 965 } /* for (cur = 1; cur <= last_pos; cur++) */
835 966
836 best_mlen = opt[last_pos].mlen; 967 lastSequence = opt[last_pos];
837 best_off = opt[last_pos].off; 968 cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
838 cur = last_pos - best_mlen; 969 assert(cur < ZSTD_OPT_NUM); /* control overflow*/
839 970
840 _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ 971 _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
841 assert(opt[0].mlen == 1); 972 assert(opt[0].mlen == 0);
842 973
843 /* reverse traversal */ 974 { U32 const storeEnd = cur + 1;
844 DEBUGLOG(7, "start reverse traversal (last_pos:%u, cur:%u)", 975 U32 storeStart = storeEnd;
845 last_pos, cur); 976 U32 seqPos = cur;
846 { U32 selectedMatchLength = best_mlen; 977
847 U32 selectedOffset = best_off; 978 DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
848 U32 pos = cur; 979 last_pos, cur); (void)last_pos;
849 while (1) { 980 assert(storeEnd < ZSTD_OPT_NUM);
850 U32 const mlen = opt[pos].mlen; 981 DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
851 U32 const off = opt[pos].off; 982 storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
852 opt[pos].mlen = selectedMatchLength; 983 opt[storeEnd] = lastSequence;
853 opt[pos].off = selectedOffset; 984 while (seqPos > 0) {
854 selectedMatchLength = mlen; 985 U32 const backDist = ZSTD_totalLen(opt[seqPos]);
855 selectedOffset = off; 986 storeStart--;
856 if (mlen > pos) break; 987 DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
857 pos -= mlen; 988 seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
858 } } 989 opt[storeStart] = opt[seqPos];
859 990 seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
860 /* save sequences */ 991 }
861 { U32 pos; 992
862 for (pos=0; pos < last_pos; ) { 993 /* save sequences */
863 U32 const llen = (U32)(ip - anchor); 994 DEBUGLOG(6, "sending selected sequences into seqStore")
864 U32 const mlen = opt[pos].mlen; 995 { U32 storePos;
865 U32 const offset = opt[pos].off; 996 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
866 if (mlen == 1) { ip++; pos++; continue; } /* literal position => move on */ 997 U32 const llen = opt[storePos].litlen;
867 pos += mlen; ip += mlen; 998 U32 const mlen = opt[storePos].mlen;
868 999 U32 const offCode = opt[storePos].off;
869 /* repcodes update : like ZSTD_updateRep(), but update in place */ 1000 U32 const advance = llen + mlen;
870 if (offset >= ZSTD_REP_NUM) { /* full offset */ 1001 DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
871 rep[2] = rep[1]; 1002 anchor - istart, llen, mlen);
872 rep[1] = rep[0]; 1003
873 rep[0] = offset - ZSTD_REP_MOVE; 1004 if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
874 } else { /* repcode */ 1005 assert(storePos == storeEnd); /* must be last sequence */
875 U32 const repCode = offset + (llen==0); 1006 ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
876 if (repCode) { /* note : if repCode==0, no change */ 1007 continue; /* will finish */
877 U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; 1008 }
878 if (repCode >= 2) rep[2] = rep[1]; 1009
1010 /* repcodes update : like ZSTD_updateRep(), but update in place */
1011 if (offCode >= ZSTD_REP_NUM) { /* full offset */
1012 rep[2] = rep[1];
879 rep[1] = rep[0]; 1013 rep[1] = rep[0];
880 rep[0] = currentOffset; 1014 rep[0] = offCode - ZSTD_REP_MOVE;
881 } 1015 } else { /* repcode */
882 } 1016 U32 const repCode = offCode + (llen==0);
883 1017 if (repCode) { /* note : if repCode==0, no change */
884 ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen); 1018 U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
885 ZSTD_storeSeq(seqStore, llen, anchor, offset, mlen-MINMATCH); 1019 if (repCode >= 2) rep[2] = rep[1];
886 anchor = ip; 1020 rep[1] = rep[0];
887 } } 1021 rep[0] = currentOffset;
888 ZSTD_setLog2Prices(optStatePtr); 1022 } }
1023
1024 assert(anchor + llen <= iend);
1025 ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1026 ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH);
1027 anchor += advance;
1028 ip = anchor;
1029 } }
1030 ZSTD_setBasePrices(optStatePtr, optLevel);
1031 }
1032
889 } /* while (ip < ilimit) */ 1033 } /* while (ip < ilimit) */
890 1034
891 /* Return the last literals size */ 1035 /* Return the last literals size */
892 return iend - anchor; 1036 return iend - anchor;
893 } 1037 }
894 1038
895 1039
896 size_t ZSTD_compressBlock_btopt( 1040 size_t ZSTD_compressBlock_btopt(
897 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1041 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
898 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) 1042 const void* src, size_t srcSize)
899 { 1043 {
900 DEBUGLOG(5, "ZSTD_compressBlock_btopt"); 1044 DEBUGLOG(5, "ZSTD_compressBlock_btopt");
901 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/); 1045 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
1046 }
1047
1048
1049 /* used in 2-pass strategy */
1050 static U32 ZSTD_upscaleStat(U32* table, U32 lastEltIndex, int bonus)
1051 {
1052 U32 s, sum=0;
1053 assert(ZSTD_FREQ_DIV+bonus > 0);
1054 for (s=0; s<=lastEltIndex; s++) {
1055 table[s] <<= ZSTD_FREQ_DIV+bonus;
1056 table[s]--;
1057 sum += table[s];
1058 }
1059 return sum;
1060 }
1061
1062 /* used in 2-pass strategy */
1063 MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
1064 {
1065 optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
1066 optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 1);
1067 optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1);
1068 optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1);
902 } 1069 }
903 1070
904 size_t ZSTD_compressBlock_btultra( 1071 size_t ZSTD_compressBlock_btultra(
905 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1072 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
906 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) 1073 const void* src, size_t srcSize)
907 { 1074 {
908 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/); 1075 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1076 #if 0
1077 /* 2-pass strategy (disabled)
1078 * this strategy makes a first pass over first block to collect statistics
1079 * and seed next round's statistics with it.
1080 * The compression ratio gain is generally small (~0.5% on first block),
1081 * the cost is 2x cpu time on first block. */
1082 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1083 if ( (ms->opt.litLengthSum==0) /* first block */
1084 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1085 && (ms->window.dictLimit == ms->window.lowLimit) ) { /* no dictionary */
1086 U32 tmpRep[ZSTD_REP_NUM];
1087 DEBUGLOG(5, "ZSTD_compressBlock_btultra: first block: collecting statistics");
1088 assert(ms->nextToUpdate >= ms->window.dictLimit
1089 && ms->nextToUpdate <= ms->window.dictLimit + 1);
1090 memcpy(tmpRep, rep, sizeof(tmpRep));
1091 ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
1092 ZSTD_resetSeqStore(seqStore);
1093 /* invalidate first scan from history */
1094 ms->window.base -= srcSize;
1095 ms->window.dictLimit += (U32)srcSize;
1096 ms->window.lowLimit = ms->window.dictLimit;
1097 ms->nextToUpdate = ms->window.dictLimit;
1098 ms->nextToUpdate3 = ms->window.dictLimit;
1099 /* re-inforce weight of collected statistics */
1100 ZSTD_upscaleStats(&ms->opt);
1101 }
1102 #endif
1103 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1104 }
1105
1106 size_t ZSTD_compressBlock_btopt_dictMatchState(
1107 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1108 const void* src, size_t srcSize)
1109 {
1110 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
1111 }
1112
1113 size_t ZSTD_compressBlock_btultra_dictMatchState(
1114 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1115 const void* src, size_t srcSize)
1116 {
1117 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
909 } 1118 }
910 1119
911 size_t ZSTD_compressBlock_btopt_extDict( 1120 size_t ZSTD_compressBlock_btopt_extDict(
912 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1121 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
913 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) 1122 const void* src, size_t srcSize)
914 { 1123 {
915 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/); 1124 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
916 } 1125 }
917 1126
918 size_t ZSTD_compressBlock_btultra_extDict( 1127 size_t ZSTD_compressBlock_btultra_extDict(
919 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1128 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
920 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) 1129 const void* src, size_t srcSize)
921 { 1130 {
922 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/); 1131 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
923 } 1132 }