contrib/python-zstandard/zstd/compress/zstd_ldm.c
changeset 42070 675775c33ab6
parent 40121 73fef626dae3
child 42937 69de49c4e39c
equal deleted inserted replaced
42069:668eff08387f 42070:675775c33ab6
    35     }
    35     }
    36     if (params->hashLog == 0) {
    36     if (params->hashLog == 0) {
    37         params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
    37         params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
    38         assert(params->hashLog <= ZSTD_HASHLOG_MAX);
    38         assert(params->hashLog <= ZSTD_HASHLOG_MAX);
    39     }
    39     }
    40     if (params->hashEveryLog == 0) {
    40     if (params->hashRateLog == 0) {
    41         params->hashEveryLog = params->windowLog < params->hashLog
    41         params->hashRateLog = params->windowLog < params->hashLog
    42                                    ? 0
    42                                    ? 0
    43                                    : params->windowLog - params->hashLog;
    43                                    : params->windowLog - params->hashLog;
    44     }
    44     }
    45     params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
    45     params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
    46 }
    46 }
   117 
   117 
   118 /** ZSTD_ldm_makeEntryAndInsertByTag() :
   118 /** ZSTD_ldm_makeEntryAndInsertByTag() :
   119  *
   119  *
   120  *  Gets the small hash, checksum, and tag from the rollingHash.
   120  *  Gets the small hash, checksum, and tag from the rollingHash.
   121  *
   121  *
   122  *  If the tag matches (1 << ldmParams.hashEveryLog)-1, then
   122  *  If the tag matches (1 << ldmParams.hashRateLog)-1, then
   123  *  creates an ldmEntry from the offset, and inserts it into the hash table.
   123  *  creates an ldmEntry from the offset, and inserts it into the hash table.
   124  *
   124  *
   125  *  hBits is the length of the small hash, which is the most significant hBits
   125  *  hBits is the length of the small hash, which is the most significant hBits
   126  *  of rollingHash. The checksum is the next 32 most significant bits, followed
   126  *  of rollingHash. The checksum is the next 32 most significant bits, followed
   127  *  by ldmParams.hashEveryLog bits that make up the tag. */
   127  *  by ldmParams.hashRateLog bits that make up the tag. */
   128 static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
   128 static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
   129                                              U64 const rollingHash,
   129                                              U64 const rollingHash,
   130                                              U32 const hBits,
   130                                              U32 const hBits,
   131                                              U32 const offset,
   131                                              U32 const offset,
   132                                              ldmParams_t const ldmParams)
   132                                              ldmParams_t const ldmParams)
   133 {
   133 {
   134     U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog);
   134     U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
   135     U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
   135     U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
   136     if (tag == tagMask) {
   136     if (tag == tagMask) {
   137         U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
   137         U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
   138         U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
   138         U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
   139         ldmEntry_t entry;
   139         ldmEntry_t entry;
   140         entry.offset = offset;
   140         entry.offset = offset;
   141         entry.checksum = checksum;
   141         entry.checksum = checksum;
   142         ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
   142         ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
   143     }
   143     }
   144 }
   144 }
   145 
   145 
   146 /** ZSTD_ldm_getRollingHash() :
       
   147  *  Get a 64-bit hash using the first len bytes from buf.
       
   148  *
       
   149  *  Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be
       
   150  *  H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0)
       
   151  *
       
   152  *  where the constant a is defined to be prime8bytes.
       
   153  *
       
   154  *  The implementation adds an offset to each byte, so
       
   155  *  H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */
       
   156 static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len)
       
   157 {
       
   158     U64 ret = 0;
       
   159     U32 i;
       
   160     for (i = 0; i < len; i++) {
       
   161         ret *= prime8bytes;
       
   162         ret += buf[i] + LDM_HASH_CHAR_OFFSET;
       
   163     }
       
   164     return ret;
       
   165 }
       
   166 
       
   167 /** ZSTD_ldm_ipow() :
       
   168  *  Return base^exp. */
       
   169 static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
       
   170 {
       
   171     U64 ret = 1;
       
   172     while (exp) {
       
   173         if (exp & 1) { ret *= base; }
       
   174         exp >>= 1;
       
   175         base *= base;
       
   176     }
       
   177     return ret;
       
   178 }
       
   179 
       
   180 U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
       
   181     DEBUGLOG(4, "ZSTD_ldm_getHashPower: mml=%u", minMatchLength);
       
   182     assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
       
   183     return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
       
   184 }
       
   185 
       
   186 /** ZSTD_ldm_updateHash() :
       
   187  *  Updates hash by removing toRemove and adding toAdd. */
       
   188 static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower)
       
   189 {
       
   190     hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower);
       
   191     hash *= prime8bytes;
       
   192     hash += toAdd + LDM_HASH_CHAR_OFFSET;
       
   193     return hash;
       
   194 }
       
   195 
       
   196 /** ZSTD_ldm_countBackwardsMatch() :
   146 /** ZSTD_ldm_countBackwardsMatch() :
   197  *  Returns the number of bytes that match backwards before pIn and pMatch.
   147  *  Returns the number of bytes that match backwards before pIn and pMatch.
   198  *
   148  *
   199  *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
   149  *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
   200 static size_t ZSTD_ldm_countBackwardsMatch(
   150 static size_t ZSTD_ldm_countBackwardsMatch(
   236     case ZSTD_lazy:
   186     case ZSTD_lazy:
   237     case ZSTD_lazy2:
   187     case ZSTD_lazy2:
   238     case ZSTD_btlazy2:
   188     case ZSTD_btlazy2:
   239     case ZSTD_btopt:
   189     case ZSTD_btopt:
   240     case ZSTD_btultra:
   190     case ZSTD_btultra:
       
   191     case ZSTD_btultra2:
   241         break;
   192         break;
   242     default:
   193     default:
   243         assert(0);  /* not possible : not a valid strategy id */
   194         assert(0);  /* not possible : not a valid strategy id */
   244     }
   195     }
   245 
   196 
   259 {
   210 {
   260     U64 rollingHash = lastHash;
   211     U64 rollingHash = lastHash;
   261     const BYTE* cur = lastHashed + 1;
   212     const BYTE* cur = lastHashed + 1;
   262 
   213 
   263     while (cur < iend) {
   214     while (cur < iend) {
   264         rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1],
   215         rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
   265                                           cur[ldmParams.minMatchLength-1],
   216                                               cur[ldmParams.minMatchLength-1],
   266                                           state->hashPower);
   217                                               state->hashPower);
   267         ZSTD_ldm_makeEntryAndInsertByTag(state,
   218         ZSTD_ldm_makeEntryAndInsertByTag(state,
   268                                          rollingHash, hBits,
   219                                          rollingHash, hBits,
   269                                          (U32)(cur - base), ldmParams);
   220                                          (U32)(cur - base), ldmParams);
   270         ++cur;
   221         ++cur;
   271     }
   222     }
   295     int const extDict = ZSTD_window_hasExtDict(ldmState->window);
   246     int const extDict = ZSTD_window_hasExtDict(ldmState->window);
   296     U32 const minMatchLength = params->minMatchLength;
   247     U32 const minMatchLength = params->minMatchLength;
   297     U64 const hashPower = ldmState->hashPower;
   248     U64 const hashPower = ldmState->hashPower;
   298     U32 const hBits = params->hashLog - params->bucketSizeLog;
   249     U32 const hBits = params->hashLog - params->bucketSizeLog;
   299     U32 const ldmBucketSize = 1U << params->bucketSizeLog;
   250     U32 const ldmBucketSize = 1U << params->bucketSizeLog;
   300     U32 const hashEveryLog = params->hashEveryLog;
   251     U32 const hashRateLog = params->hashRateLog;
   301     U32 const ldmTagMask = (1U << params->hashEveryLog) - 1;
   252     U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
   302     /* Prefix and extDict parameters */
   253     /* Prefix and extDict parameters */
   303     U32 const dictLimit = ldmState->window.dictLimit;
   254     U32 const dictLimit = ldmState->window.dictLimit;
   304     U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
   255     U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
   305     BYTE const* const base = ldmState->window.base;
   256     BYTE const* const base = ldmState->window.base;
   306     BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
   257     BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
   322         size_t mLength;
   273         size_t mLength;
   323         U32 const current = (U32)(ip - base);
   274         U32 const current = (U32)(ip - base);
   324         size_t forwardMatchLength = 0, backwardMatchLength = 0;
   275         size_t forwardMatchLength = 0, backwardMatchLength = 0;
   325         ldmEntry_t* bestEntry = NULL;
   276         ldmEntry_t* bestEntry = NULL;
   326         if (ip != istart) {
   277         if (ip != istart) {
   327             rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
   278             rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
   328                                               lastHashed[minMatchLength],
   279                                                   lastHashed[minMatchLength],
   329                                               hashPower);
   280                                                   hashPower);
   330         } else {
   281         } else {
   331             rollingHash = ZSTD_ldm_getRollingHash(ip, minMatchLength);
   282             rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
   332         }
   283         }
   333         lastHashed = ip;
   284         lastHashed = ip;
   334 
   285 
   335         /* Do not insert and do not look for a match */
   286         /* Do not insert and do not look for a match */
   336         if (ZSTD_ldm_getTag(rollingHash, hBits, hashEveryLog) != ldmTagMask) {
   287         if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
   337            ip++;
   288            ip++;
   338            continue;
   289            continue;
   339         }
   290         }
   340 
   291 
   341         /* Get the best entry and compute the match lengths */
   292         /* Get the best entry and compute the match lengths */
   591 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
   542 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
   592     ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   543     ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   593     void const* src, size_t srcSize)
   544     void const* src, size_t srcSize)
   594 {
   545 {
   595     const ZSTD_compressionParameters* const cParams = &ms->cParams;
   546     const ZSTD_compressionParameters* const cParams = &ms->cParams;
   596     unsigned const minMatch = cParams->searchLength;
   547     unsigned const minMatch = cParams->minMatch;
   597     ZSTD_blockCompressor const blockCompressor =
   548     ZSTD_blockCompressor const blockCompressor =
   598         ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
   549         ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
   599     /* Input bounds */
   550     /* Input bounds */
   600     BYTE const* const istart = (BYTE const*)src;
   551     BYTE const* const istart = (BYTE const*)src;
   601     BYTE const* const iend = istart + srcSize;
   552     BYTE const* const iend = istart + srcSize;