117 |
117 |
118 /** ZSTD_ldm_makeEntryAndInsertByTag() : |
118 /** ZSTD_ldm_makeEntryAndInsertByTag() : |
119 * |
119 * |
120 * Gets the small hash, checksum, and tag from the rollingHash. |
120 * Gets the small hash, checksum, and tag from the rollingHash. |
121 * |
121 * |
122 * If the tag matches (1 << ldmParams.hashEveryLog)-1, then |
122 * If the tag matches (1 << ldmParams.hashRateLog)-1, then |
123 * creates an ldmEntry from the offset, and inserts it into the hash table. |
123 * creates an ldmEntry from the offset, and inserts it into the hash table. |
124 * |
124 * |
125 * hBits is the length of the small hash, which is the most significant hBits |
125 * hBits is the length of the small hash, which is the most significant hBits |
126 * of rollingHash. The checksum is the next 32 most significant bits, followed |
126 * of rollingHash. The checksum is the next 32 most significant bits, followed |
127 * by ldmParams.hashEveryLog bits that make up the tag. */ |
127 * by ldmParams.hashRateLog bits that make up the tag. */ |
128 static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, |
128 static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, |
129 U64 const rollingHash, |
129 U64 const rollingHash, |
130 U32 const hBits, |
130 U32 const hBits, |
131 U32 const offset, |
131 U32 const offset, |
132 ldmParams_t const ldmParams) |
132 ldmParams_t const ldmParams) |
133 { |
133 { |
134 U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog); |
134 U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog); |
135 U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1; |
135 U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1; |
136 if (tag == tagMask) { |
136 if (tag == tagMask) { |
137 U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); |
137 U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); |
138 U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); |
138 U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); |
139 ldmEntry_t entry; |
139 ldmEntry_t entry; |
140 entry.offset = offset; |
140 entry.offset = offset; |
141 entry.checksum = checksum; |
141 entry.checksum = checksum; |
142 ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams); |
142 ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams); |
143 } |
143 } |
144 } |
144 } |
145 |
145 |
146 /** ZSTD_ldm_getRollingHash() : |
|
147 * Get a 64-bit hash using the first len bytes from buf. |
|
148 * |
|
149 * Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be |
|
150 * H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0) |
|
151 * |
|
152 * where the constant a is defined to be prime8bytes. |
|
153 * |
|
154 * The implementation adds an offset to each byte, so |
|
155 * H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */ |
|
156 static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len) |
|
157 { |
|
158 U64 ret = 0; |
|
159 U32 i; |
|
160 for (i = 0; i < len; i++) { |
|
161 ret *= prime8bytes; |
|
162 ret += buf[i] + LDM_HASH_CHAR_OFFSET; |
|
163 } |
|
164 return ret; |
|
165 } |
|
166 |
|
167 /** ZSTD_ldm_ipow() : |
|
168 * Return base^exp. */ |
|
169 static U64 ZSTD_ldm_ipow(U64 base, U64 exp) |
|
170 { |
|
171 U64 ret = 1; |
|
172 while (exp) { |
|
173 if (exp & 1) { ret *= base; } |
|
174 exp >>= 1; |
|
175 base *= base; |
|
176 } |
|
177 return ret; |
|
178 } |
|
179 |
|
180 U64 ZSTD_ldm_getHashPower(U32 minMatchLength) { |
|
181 DEBUGLOG(4, "ZSTD_ldm_getHashPower: mml=%u", minMatchLength); |
|
182 assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN); |
|
183 return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1); |
|
184 } |
|
185 |
|
186 /** ZSTD_ldm_updateHash() : |
|
187 * Updates hash by removing toRemove and adding toAdd. */ |
|
188 static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower) |
|
189 { |
|
190 hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower); |
|
191 hash *= prime8bytes; |
|
192 hash += toAdd + LDM_HASH_CHAR_OFFSET; |
|
193 return hash; |
|
194 } |
|
195 |
|
196 /** ZSTD_ldm_countBackwardsMatch() : |
146 /** ZSTD_ldm_countBackwardsMatch() : |
197 * Returns the number of bytes that match backwards before pIn and pMatch. |
147 * Returns the number of bytes that match backwards before pIn and pMatch. |
198 * |
148 * |
199 * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ |
149 * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ |
200 static size_t ZSTD_ldm_countBackwardsMatch( |
150 static size_t ZSTD_ldm_countBackwardsMatch( |
295 int const extDict = ZSTD_window_hasExtDict(ldmState->window); |
246 int const extDict = ZSTD_window_hasExtDict(ldmState->window); |
296 U32 const minMatchLength = params->minMatchLength; |
247 U32 const minMatchLength = params->minMatchLength; |
297 U64 const hashPower = ldmState->hashPower; |
248 U64 const hashPower = ldmState->hashPower; |
298 U32 const hBits = params->hashLog - params->bucketSizeLog; |
249 U32 const hBits = params->hashLog - params->bucketSizeLog; |
299 U32 const ldmBucketSize = 1U << params->bucketSizeLog; |
250 U32 const ldmBucketSize = 1U << params->bucketSizeLog; |
300 U32 const hashEveryLog = params->hashEveryLog; |
251 U32 const hashRateLog = params->hashRateLog; |
301 U32 const ldmTagMask = (1U << params->hashEveryLog) - 1; |
252 U32 const ldmTagMask = (1U << params->hashRateLog) - 1; |
302 /* Prefix and extDict parameters */ |
253 /* Prefix and extDict parameters */ |
303 U32 const dictLimit = ldmState->window.dictLimit; |
254 U32 const dictLimit = ldmState->window.dictLimit; |
304 U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; |
255 U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; |
305 BYTE const* const base = ldmState->window.base; |
256 BYTE const* const base = ldmState->window.base; |
306 BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; |
257 BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; |
322 size_t mLength; |
273 size_t mLength; |
323 U32 const current = (U32)(ip - base); |
274 U32 const current = (U32)(ip - base); |
324 size_t forwardMatchLength = 0, backwardMatchLength = 0; |
275 size_t forwardMatchLength = 0, backwardMatchLength = 0; |
325 ldmEntry_t* bestEntry = NULL; |
276 ldmEntry_t* bestEntry = NULL; |
326 if (ip != istart) { |
277 if (ip != istart) { |
327 rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0], |
278 rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0], |
328 lastHashed[minMatchLength], |
279 lastHashed[minMatchLength], |
329 hashPower); |
280 hashPower); |
330 } else { |
281 } else { |
331 rollingHash = ZSTD_ldm_getRollingHash(ip, minMatchLength); |
282 rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength); |
332 } |
283 } |
333 lastHashed = ip; |
284 lastHashed = ip; |
334 |
285 |
335 /* Do not insert and do not look for a match */ |
286 /* Do not insert and do not look for a match */ |
336 if (ZSTD_ldm_getTag(rollingHash, hBits, hashEveryLog) != ldmTagMask) { |
287 if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) { |
337 ip++; |
288 ip++; |
338 continue; |
289 continue; |
339 } |
290 } |
340 |
291 |
341 /* Get the best entry and compute the match lengths */ |
292 /* Get the best entry and compute the match lengths */ |
591 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, |
542 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, |
592 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
543 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
593 void const* src, size_t srcSize) |
544 void const* src, size_t srcSize) |
594 { |
545 { |
595 const ZSTD_compressionParameters* const cParams = &ms->cParams; |
546 const ZSTD_compressionParameters* const cParams = &ms->cParams; |
596 unsigned const minMatch = cParams->searchLength; |
547 unsigned const minMatch = cParams->minMatch; |
597 ZSTD_blockCompressor const blockCompressor = |
548 ZSTD_blockCompressor const blockCompressor = |
598 ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); |
549 ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); |
599 /* Input bounds */ |
550 /* Input bounds */ |
600 BYTE const* const istart = (BYTE const*)src; |
551 BYTE const* const istart = (BYTE const*)src; |
601 BYTE const* const iend = istart + srcSize; |
552 BYTE const* const iend = istart + srcSize; |