mercurial/revlog.py
changeset 39357 655b5b465953
parent 39356 729082bb9938
child 39359 6f4b8f607a31
equal deleted inserted replaced
39356:729082bb9938 39357:655b5b465953
    15 
    15 
    16 import collections
    16 import collections
    17 import contextlib
    17 import contextlib
    18 import errno
    18 import errno
    19 import hashlib
    19 import hashlib
    20 import heapq
       
    21 import os
    20 import os
    22 import re
    21 import re
    23 import struct
    22 import struct
    24 import zlib
    23 import zlib
    25 
    24 
    37 )
    36 )
    38 from .i18n import _
    37 from .i18n import _
    39 from .revlogutils.constants import (
    38 from .revlogutils.constants import (
    40     FLAG_GENERALDELTA,
    39     FLAG_GENERALDELTA,
    41     FLAG_INLINE_DATA,
    40     FLAG_INLINE_DATA,
    42     LIMIT_DELTA2TEXT,
       
    43     REVIDX_DEFAULT_FLAGS,
    41     REVIDX_DEFAULT_FLAGS,
    44     REVIDX_ELLIPSIS,
    42     REVIDX_ELLIPSIS,
    45     REVIDX_EXTSTORED,
    43     REVIDX_EXTSTORED,
    46     REVIDX_FLAGS_ORDER,
    44     REVIDX_FLAGS_ORDER,
    47     REVIDX_ISCENSORED,
    45     REVIDX_ISCENSORED,
    67     pycompat,
    65     pycompat,
    68     repository,
    66     repository,
    69     templatefilters,
    67     templatefilters,
    70     util,
    68     util,
    71 )
    69 )
       
    70 from .revlogutils import (
       
    71     deltas as deltautil,
       
    72 )
    72 from .utils import (
    73 from .utils import (
    73     interfaceutil,
    74     interfaceutil,
    74     stringutil,
    75     stringutil,
    75 )
    76 )
    76 
    77 
   208             b = p1
   209             b = p1
   209         s = hashlib.sha1(a)
   210         s = hashlib.sha1(a)
   210         s.update(b)
   211         s.update(b)
   211     s.update(text)
   212     s.update(text)
   212     return s.digest()
   213     return s.digest()
   213 
       
   214 class _testrevlog(object):
       
   215     """minimalist fake revlog to use in doctests"""
       
   216 
       
   217     def __init__(self, data, density=0.5, mingap=0):
       
   218         """data is an list of revision payload boundaries"""
       
   219         self._data = data
       
   220         self._srdensitythreshold = density
       
   221         self._srmingapsize = mingap
       
   222 
       
   223     def start(self, rev):
       
   224         if rev == 0:
       
   225             return 0
       
   226         return self._data[rev - 1]
       
   227 
       
   228     def end(self, rev):
       
   229         return self._data[rev]
       
   230 
       
   231     def length(self, rev):
       
   232         return self.end(rev) - self.start(rev)
       
   233 
       
   234     def __len__(self):
       
   235         return len(self._data)
       
   236 
       
   237 def _trimchunk(revlog, revs, startidx, endidx=None):
       
   238     """returns revs[startidx:endidx] without empty trailing revs
       
   239 
       
   240     Doctest Setup
       
   241     >>> revlog = _testrevlog([
       
   242     ...  5,  #0
       
   243     ...  10, #1
       
   244     ...  12, #2
       
   245     ...  12, #3 (empty)
       
   246     ...  17, #4
       
   247     ...  21, #5
       
   248     ...  21, #6 (empty)
       
   249     ... ])
       
   250 
       
   251     Contiguous cases:
       
   252     >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
       
   253     [0, 1, 2, 3, 4, 5]
       
   254     >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
       
   255     [0, 1, 2, 3, 4]
       
   256     >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
       
   257     [0, 1, 2]
       
   258     >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
       
   259     [2]
       
   260     >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
       
   261     [3, 4, 5]
       
   262     >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
       
   263     [3, 4]
       
   264 
       
   265     Discontiguous cases:
       
   266     >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
       
   267     [1, 3, 5]
       
   268     >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
       
   269     [1]
       
   270     >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
       
   271     [3, 5]
       
   272     >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
       
   273     [3, 5]
       
   274     """
       
   275     length = revlog.length
       
   276 
       
   277     if endidx is None:
       
   278         endidx = len(revs)
       
   279 
       
   280     # If we have a non-emtpy delta candidate, there are nothing to trim
       
   281     if revs[endidx - 1] < len(revlog):
       
   282         # Trim empty revs at the end, except the very first revision of a chain
       
   283         while (endidx > 1
       
   284                 and endidx > startidx
       
   285                 and length(revs[endidx - 1]) == 0):
       
   286             endidx -= 1
       
   287 
       
   288     return revs[startidx:endidx]
       
   289 
       
   290 def _segmentspan(revlog, revs, deltainfo=None):
       
   291     """Get the byte span of a segment of revisions
       
   292 
       
   293     revs is a sorted array of revision numbers
       
   294 
       
   295     >>> revlog = _testrevlog([
       
   296     ...  5,  #0
       
   297     ...  10, #1
       
   298     ...  12, #2
       
   299     ...  12, #3 (empty)
       
   300     ...  17, #4
       
   301     ... ])
       
   302 
       
   303     >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
       
   304     17
       
   305     >>> _segmentspan(revlog, [0, 4])
       
   306     17
       
   307     >>> _segmentspan(revlog, [3, 4])
       
   308     5
       
   309     >>> _segmentspan(revlog, [1, 2, 3,])
       
   310     7
       
   311     >>> _segmentspan(revlog, [1, 3])
       
   312     7
       
   313     """
       
   314     if not revs:
       
   315         return 0
       
   316     if deltainfo is not None and len(revlog) <= revs[-1]:
       
   317         if len(revs) == 1:
       
   318             return deltainfo.deltalen
       
   319         offset = revlog.end(len(revlog) - 1)
       
   320         end = deltainfo.deltalen + offset
       
   321     else:
       
   322         end = revlog.end(revs[-1])
       
   323     return end - revlog.start(revs[0])
       
   324 
       
   325 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
       
   326     """slice revs to reduce the amount of unrelated data to be read from disk.
       
   327 
       
   328     ``revs`` is sliced into groups that should be read in one time.
       
   329     Assume that revs are sorted.
       
   330 
       
   331     The initial chunk is sliced until the overall density (payload/chunks-span
       
   332     ratio) is above `revlog._srdensitythreshold`. No gap smaller than
       
   333     `revlog._srmingapsize` is skipped.
       
   334 
       
   335     If `targetsize` is set, no chunk larger than `targetsize` will be yield.
       
   336     For consistency with other slicing choice, this limit won't go lower than
       
   337     `revlog._srmingapsize`.
       
   338 
       
   339     If individual revisions chunk are larger than this limit, they will still
       
   340     be raised individually.
       
   341 
       
   342     >>> revlog = _testrevlog([
       
   343     ...  5,  #00 (5)
       
   344     ...  10, #01 (5)
       
   345     ...  12, #02 (2)
       
   346     ...  12, #03 (empty)
       
   347     ...  27, #04 (15)
       
   348     ...  31, #05 (4)
       
   349     ...  31, #06 (empty)
       
   350     ...  42, #07 (11)
       
   351     ...  47, #08 (5)
       
   352     ...  47, #09 (empty)
       
   353     ...  48, #10 (1)
       
   354     ...  51, #11 (3)
       
   355     ...  74, #12 (23)
       
   356     ...  85, #13 (11)
       
   357     ...  86, #14 (1)
       
   358     ...  91, #15 (5)
       
   359     ... ])
       
   360 
       
   361     >>> list(_slicechunk(revlog, list(range(16))))
       
   362     [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
       
   363     >>> list(_slicechunk(revlog, [0, 15]))
       
   364     [[0], [15]]
       
   365     >>> list(_slicechunk(revlog, [0, 11, 15]))
       
   366     [[0], [11], [15]]
       
   367     >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
       
   368     [[0], [11, 13, 15]]
       
   369     >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
       
   370     [[1, 2], [5, 8, 10, 11], [14]]
       
   371 
       
   372     Slicing with a maximum chunk size
       
   373     >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
       
   374     [[0], [11], [13], [15]]
       
   375     >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
       
   376     [[0], [11], [13, 15]]
       
   377     """
       
   378     if targetsize is not None:
       
   379         targetsize = max(targetsize, revlog._srmingapsize)
       
   380     # targetsize should not be specified when evaluating delta candidates:
       
   381     # * targetsize is used to ensure we stay within specification when reading,
       
   382     # * deltainfo is used to pick are good delta chain when writing.
       
   383     if not (deltainfo is None or targetsize is None):
       
   384         msg = 'cannot use `targetsize` with a `deltainfo`'
       
   385         raise error.ProgrammingError(msg)
       
   386     for chunk in _slicechunktodensity(revlog, revs,
       
   387                                       deltainfo,
       
   388                                       revlog._srdensitythreshold,
       
   389                                       revlog._srmingapsize):
       
   390         for subchunk in _slicechunktosize(revlog, chunk, targetsize):
       
   391             yield subchunk
       
   392 
       
   393 def _slicechunktosize(revlog, revs, targetsize=None):
       
   394     """slice revs to match the target size
       
   395 
       
   396     This is intended to be used on chunk that density slicing selected by that
       
   397     are still too large compared to the read garantee of revlog. This might
       
   398     happens when "minimal gap size" interrupted the slicing or when chain are
       
   399     built in a way that create large blocks next to each other.
       
   400 
       
   401     >>> revlog = _testrevlog([
       
   402     ...  3,  #0 (3)
       
   403     ...  5,  #1 (2)
       
   404     ...  6,  #2 (1)
       
   405     ...  8,  #3 (2)
       
   406     ...  8,  #4 (empty)
       
   407     ...  11, #5 (3)
       
   408     ...  12, #6 (1)
       
   409     ...  13, #7 (1)
       
   410     ...  14, #8 (1)
       
   411     ... ])
       
   412 
       
   413     Cases where chunk is already small enough
       
   414     >>> list(_slicechunktosize(revlog, [0], 3))
       
   415     [[0]]
       
   416     >>> list(_slicechunktosize(revlog, [6, 7], 3))
       
   417     [[6, 7]]
       
   418     >>> list(_slicechunktosize(revlog, [0], None))
       
   419     [[0]]
       
   420     >>> list(_slicechunktosize(revlog, [6, 7], None))
       
   421     [[6, 7]]
       
   422 
       
   423     cases where we need actual slicing
       
   424     >>> list(_slicechunktosize(revlog, [0, 1], 3))
       
   425     [[0], [1]]
       
   426     >>> list(_slicechunktosize(revlog, [1, 3], 3))
       
   427     [[1], [3]]
       
   428     >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
       
   429     [[1, 2], [3]]
       
   430     >>> list(_slicechunktosize(revlog, [3, 5], 3))
       
   431     [[3], [5]]
       
   432     >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
       
   433     [[3], [5]]
       
   434     >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
       
   435     [[5], [6, 7, 8]]
       
   436     >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
       
   437     [[0], [1, 2], [3], [5], [6, 7, 8]]
       
   438 
       
   439     Case with too large individual chunk (must return valid chunk)
       
   440     >>> list(_slicechunktosize(revlog, [0, 1], 2))
       
   441     [[0], [1]]
       
   442     >>> list(_slicechunktosize(revlog, [1, 3], 1))
       
   443     [[1], [3]]
       
   444     >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
       
   445     [[3], [5]]
       
   446     """
       
   447     assert targetsize is None or 0 <= targetsize
       
   448     if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
       
   449         yield revs
       
   450         return
       
   451 
       
   452     startrevidx = 0
       
   453     startdata = revlog.start(revs[0])
       
   454     endrevidx = 0
       
   455     iterrevs = enumerate(revs)
       
   456     next(iterrevs) # skip first rev.
       
   457     for idx, r in iterrevs:
       
   458         span = revlog.end(r) - startdata
       
   459         if span <= targetsize:
       
   460             endrevidx = idx
       
   461         else:
       
   462             chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
       
   463             if chunk:
       
   464                 yield chunk
       
   465             startrevidx = idx
       
   466             startdata = revlog.start(r)
       
   467             endrevidx = idx
       
   468     yield _trimchunk(revlog, revs, startrevidx)
       
   469 
       
   470 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
       
   471                          mingapsize=0):
       
   472     """slice revs to reduce the amount of unrelated data to be read from disk.
       
   473 
       
   474     ``revs`` is sliced into groups that should be read in one time.
       
   475     Assume that revs are sorted.
       
   476 
       
   477     ``deltainfo`` is a _deltainfo instance of a revision that we would append
       
   478     to the top of the revlog.
       
   479 
       
   480     The initial chunk is sliced until the overall density (payload/chunks-span
       
   481     ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
       
   482     skipped.
       
   483 
       
   484     >>> revlog = _testrevlog([
       
   485     ...  5,  #00 (5)
       
   486     ...  10, #01 (5)
       
   487     ...  12, #02 (2)
       
   488     ...  12, #03 (empty)
       
   489     ...  27, #04 (15)
       
   490     ...  31, #05 (4)
       
   491     ...  31, #06 (empty)
       
   492     ...  42, #07 (11)
       
   493     ...  47, #08 (5)
       
   494     ...  47, #09 (empty)
       
   495     ...  48, #10 (1)
       
   496     ...  51, #11 (3)
       
   497     ...  74, #12 (23)
       
   498     ...  85, #13 (11)
       
   499     ...  86, #14 (1)
       
   500     ...  91, #15 (5)
       
   501     ... ])
       
   502 
       
   503     >>> list(_slicechunktodensity(revlog, list(range(16))))
       
   504     [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
       
   505     >>> list(_slicechunktodensity(revlog, [0, 15]))
       
   506     [[0], [15]]
       
   507     >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
       
   508     [[0], [11], [15]]
       
   509     >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
       
   510     [[0], [11, 13, 15]]
       
   511     >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
       
   512     [[1, 2], [5, 8, 10, 11], [14]]
       
   513     >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
       
   514     ...                           mingapsize=20))
       
   515     [[1, 2, 3, 5, 8, 10, 11], [14]]
       
   516     >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
       
   517     ...                           targetdensity=0.95))
       
   518     [[1, 2], [5], [8, 10, 11], [14]]
       
   519     >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
       
   520     ...                           targetdensity=0.95, mingapsize=12))
       
   521     [[1, 2], [5, 8, 10, 11], [14]]
       
   522     """
       
   523     start = revlog.start
       
   524     length = revlog.length
       
   525 
       
   526     if len(revs) <= 1:
       
   527         yield revs
       
   528         return
       
   529 
       
   530     nextrev = len(revlog)
       
   531     nextoffset = revlog.end(nextrev - 1)
       
   532 
       
   533     if deltainfo is None:
       
   534         deltachainspan = _segmentspan(revlog, revs)
       
   535         chainpayload = sum(length(r) for r in revs)
       
   536     else:
       
   537         deltachainspan = deltainfo.distance
       
   538         chainpayload = deltainfo.compresseddeltalen
       
   539 
       
   540     if deltachainspan < mingapsize:
       
   541         yield revs
       
   542         return
       
   543 
       
   544     readdata = deltachainspan
       
   545 
       
   546     if deltachainspan:
       
   547         density = chainpayload / float(deltachainspan)
       
   548     else:
       
   549         density = 1.0
       
   550 
       
   551     if density >= targetdensity:
       
   552         yield revs
       
   553         return
       
   554 
       
   555     if deltainfo is not None and deltainfo.deltalen:
       
   556         revs = list(revs)
       
   557         revs.append(nextrev)
       
   558 
       
   559     # Store the gaps in a heap to have them sorted by decreasing size
       
   560     gapsheap = []
       
   561     heapq.heapify(gapsheap)
       
   562     prevend = None
       
   563     for i, rev in enumerate(revs):
       
   564         if rev < nextrev:
       
   565             revstart = start(rev)
       
   566             revlen = length(rev)
       
   567         else:
       
   568             revstart = nextoffset
       
   569             revlen = deltainfo.deltalen
       
   570 
       
   571         # Skip empty revisions to form larger holes
       
   572         if revlen == 0:
       
   573             continue
       
   574 
       
   575         if prevend is not None:
       
   576             gapsize = revstart - prevend
       
   577             # only consider holes that are large enough
       
   578             if gapsize > mingapsize:
       
   579                 heapq.heappush(gapsheap, (-gapsize, i))
       
   580 
       
   581         prevend = revstart + revlen
       
   582 
       
   583     # Collect the indices of the largest holes until the density is acceptable
       
   584     indicesheap = []
       
   585     heapq.heapify(indicesheap)
       
   586     while gapsheap and density < targetdensity:
       
   587         oppgapsize, gapidx = heapq.heappop(gapsheap)
       
   588 
       
   589         heapq.heappush(indicesheap, gapidx)
       
   590 
       
   591         # the gap sizes are stored as negatives to be sorted decreasingly
       
   592         # by the heap
       
   593         readdata -= (-oppgapsize)
       
   594         if readdata > 0:
       
   595             density = chainpayload / float(readdata)
       
   596         else:
       
   597             density = 1.0
       
   598 
       
   599     # Cut the revs at collected indices
       
   600     previdx = 0
       
   601     while indicesheap:
       
   602         idx = heapq.heappop(indicesheap)
       
   603 
       
   604         chunk = _trimchunk(revlog, revs, previdx, idx)
       
   605         if chunk:
       
   606             yield chunk
       
   607 
       
   608         previdx = idx
       
   609 
       
   610     chunk = _trimchunk(revlog, revs, previdx)
       
   611     if chunk:
       
   612         yield chunk
       
   613 
       
   614 @attr.s(slots=True, frozen=True)
       
   615 class _deltainfo(object):
       
   616     distance = attr.ib()
       
   617     deltalen = attr.ib()
       
   618     data = attr.ib()
       
   619     base = attr.ib()
       
   620     chainbase = attr.ib()
       
   621     chainlen = attr.ib()
       
   622     compresseddeltalen = attr.ib()
       
   623     snapshotdepth = attr.ib()
       
   624 
       
   625 class _deltacomputer(object):
       
   626     def __init__(self, revlog):
       
   627         self.revlog = revlog
       
   628 
       
   629     def _getcandidaterevs(self, p1, p2, cachedelta):
       
   630         """
       
   631         Provides revisions that present an interest to be diffed against,
       
   632         grouped by level of easiness.
       
   633         """
       
   634         revlog = self.revlog
       
   635         gdelta = revlog._generaldelta
       
   636         curr = len(revlog)
       
   637         prev = curr - 1
       
   638         p1r, p2r = revlog.rev(p1), revlog.rev(p2)
       
   639 
       
   640         # should we try to build a delta?
       
   641         if prev != nullrev and revlog._storedeltachains:
       
   642             tested = set()
       
   643             # This condition is true most of the time when processing
       
   644             # changegroup data into a generaldelta repo. The only time it
       
   645             # isn't true is if this is the first revision in a delta chain
       
   646             # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
       
   647             if cachedelta and gdelta and revlog._lazydeltabase:
       
   648                 # Assume what we received from the server is a good choice
       
   649                 # build delta will reuse the cache
       
   650                 yield (cachedelta[0],)
       
   651                 tested.add(cachedelta[0])
       
   652 
       
   653             if gdelta:
       
   654                 # exclude already lazy tested base if any
       
   655                 parents = [p for p in (p1r, p2r)
       
   656                            if p != nullrev and p not in tested]
       
   657 
       
   658                 if not revlog._deltabothparents and len(parents) == 2:
       
   659                     parents.sort()
       
   660                     # To minimize the chance of having to build a fulltext,
       
   661                     # pick first whichever parent is closest to us (max rev)
       
   662                     yield (parents[1],)
       
   663                     # then the other one (min rev) if the first did not fit
       
   664                     yield (parents[0],)
       
   665                     tested.update(parents)
       
   666                 elif len(parents) > 0:
       
   667                     # Test all parents (1 or 2), and keep the best candidate
       
   668                     yield parents
       
   669                     tested.update(parents)
       
   670 
       
   671             if prev not in tested:
       
   672                 # other approach failed try against prev to hopefully save us a
       
   673                 # fulltext.
       
   674                 yield (prev,)
       
   675                 tested.add(prev)
       
   676 
       
   677     def buildtext(self, revinfo, fh):
       
   678         """Builds a fulltext version of a revision
       
   679 
       
   680         revinfo: _revisioninfo instance that contains all needed info
       
   681         fh:      file handle to either the .i or the .d revlog file,
       
   682                  depending on whether it is inlined or not
       
   683         """
       
   684         btext = revinfo.btext
       
   685         if btext[0] is not None:
       
   686             return btext[0]
       
   687 
       
   688         revlog = self.revlog
       
   689         cachedelta = revinfo.cachedelta
       
   690         flags = revinfo.flags
       
   691         node = revinfo.node
       
   692 
       
   693         baserev = cachedelta[0]
       
   694         delta = cachedelta[1]
       
   695         # special case deltas which replace entire base; no need to decode
       
   696         # base revision. this neatly avoids censored bases, which throw when
       
   697         # they're decoded.
       
   698         hlen = struct.calcsize(">lll")
       
   699         if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
       
   700                                                    len(delta) - hlen):
       
   701             btext[0] = delta[hlen:]
       
   702         else:
       
   703             # deltabase is rawtext before changed by flag processors, which is
       
   704             # equivalent to non-raw text
       
   705             basetext = revlog.revision(baserev, _df=fh, raw=False)
       
   706             btext[0] = mdiff.patch(basetext, delta)
       
   707 
       
   708         try:
       
   709             res = revlog._processflags(btext[0], flags, 'read', raw=True)
       
   710             btext[0], validatehash = res
       
   711             if validatehash:
       
   712                 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
       
   713             if flags & REVIDX_ISCENSORED:
       
   714                 raise RevlogError(_('node %s is not censored') % node)
       
   715         except CensoredNodeError:
       
   716             # must pass the censored index flag to add censored revisions
       
   717             if not flags & REVIDX_ISCENSORED:
       
   718                 raise
       
   719         return btext[0]
       
   720 
       
   721     def _builddeltadiff(self, base, revinfo, fh):
       
   722         revlog = self.revlog
       
   723         t = self.buildtext(revinfo, fh)
       
   724         if revlog.iscensored(base):
       
   725             # deltas based on a censored revision must replace the
       
   726             # full content in one patch, so delta works everywhere
       
   727             header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
       
   728             delta = header + t
       
   729         else:
       
   730             ptext = revlog.revision(base, _df=fh, raw=True)
       
   731             delta = mdiff.textdiff(ptext, t)
       
   732 
       
   733         return delta
       
   734 
       
   735     def _builddeltainfo(self, revinfo, base, fh):
       
   736         # can we use the cached delta?
       
   737         if revinfo.cachedelta and revinfo.cachedelta[0] == base:
       
   738             delta = revinfo.cachedelta[1]
       
   739         else:
       
   740             delta = self._builddeltadiff(base, revinfo, fh)
       
   741         revlog = self.revlog
       
   742         header, data = revlog.compress(delta)
       
   743         deltalen = len(header) + len(data)
       
   744         chainbase = revlog.chainbase(base)
       
   745         offset = revlog.end(len(revlog) - 1)
       
   746         dist = deltalen + offset - revlog.start(chainbase)
       
   747         if revlog._generaldelta:
       
   748             deltabase = base
       
   749         else:
       
   750             deltabase = chainbase
       
   751         chainlen, compresseddeltalen = revlog._chaininfo(base)
       
   752         chainlen += 1
       
   753         compresseddeltalen += deltalen
       
   754 
       
   755         revlog = self.revlog
       
   756         snapshotdepth = None
       
   757         if deltabase == nullrev:
       
   758             snapshotdepth = 0
       
   759         elif revlog._sparserevlog and revlog.issnapshot(deltabase):
       
   760             # A delta chain should always be one full snapshot,
       
   761             # zero or more semi-snapshots, and zero or more deltas
       
   762             p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
       
   763             if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
       
   764                 snapshotdepth = len(revlog._deltachain(deltabase)[0])
       
   765 
       
   766         return _deltainfo(dist, deltalen, (header, data), deltabase,
       
   767                           chainbase, chainlen, compresseddeltalen,
       
   768                           snapshotdepth)
       
   769 
       
   770     def finddeltainfo(self, revinfo, fh):
       
   771         """Find an acceptable delta against a candidate revision
       
   772 
       
   773         revinfo: information about the revision (instance of _revisioninfo)
       
   774         fh:      file handle to either the .i or the .d revlog file,
       
   775                  depending on whether it is inlined or not
       
   776 
       
   777         Returns the first acceptable candidate revision, as ordered by
       
   778         _getcandidaterevs
       
   779         """
       
   780         if not revinfo.textlen:
       
   781             return None # empty file do not need delta
       
   782 
       
   783         cachedelta = revinfo.cachedelta
       
   784         p1 = revinfo.p1
       
   785         p2 = revinfo.p2
       
   786         revlog = self.revlog
       
   787 
       
   788         deltalength = self.revlog.length
       
   789         deltaparent = self.revlog.deltaparent
       
   790 
       
   791         deltainfo = None
       
   792         deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
       
   793         for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
       
   794             # filter out delta base that will never produce good delta
       
   795             candidaterevs = [r for r in candidaterevs
       
   796                              if self.revlog.length(r) <= deltas_limit]
       
   797             nominateddeltas = []
       
   798             for candidaterev in candidaterevs:
       
   799                 # skip over empty delta (no need to include them in a chain)
       
   800                 while candidaterev != nullrev and not deltalength(candidaterev):
       
   801                     candidaterev = deltaparent(candidaterev)
       
   802                 # no need to try a delta against nullid, this will be handled
       
   803                 # by fulltext later.
       
   804                 if candidaterev == nullrev:
       
   805                     continue
       
   806                 # no delta for rawtext-changing revs (see "candelta" for why)
       
   807                 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
       
   808                     continue
       
   809                 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
       
   810                 if revlog._isgooddeltainfo(candidatedelta, revinfo):
       
   811                     nominateddeltas.append(candidatedelta)
       
   812             if nominateddeltas:
       
   813                 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
       
   814                 break
       
   815 
       
   816         return deltainfo
       
   817 
   214 
   818 @attr.s(slots=True, frozen=True)
   215 @attr.s(slots=True, frozen=True)
   819 class _revisioninfo(object):
   216 class _revisioninfo(object):
   820     """Information about a revision that allows building its fulltext
   217     """Information about a revision that allows building its fulltext
   821     node:       expected hash of the revision
   218     node:       expected hash of the revision
  2093         ladd = l.append
  1490         ladd = l.append
  2094 
  1491 
  2095         if not self._withsparseread:
  1492         if not self._withsparseread:
  2096             slicedchunks = (revs,)
  1493             slicedchunks = (revs,)
  2097         else:
  1494         else:
  2098             slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
  1495             slicedchunks = deltautil.slicechunk(self, revs,
       
  1496                                                 targetsize=targetsize)
  2099 
  1497 
  2100         for revschunk in slicedchunks:
  1498         for revschunk in slicedchunks:
  2101             firstrev = revschunk[0]
  1499             firstrev = revschunk[0]
  2102             # Skip trailing revisions with empty diff
  1500             # Skip trailing revisions with empty diff
  2103             for lastrev in revschunk[::-1]:
  1501             for lastrev in revschunk[::-1]:
  2391         cachedelta - an optional precomputed delta
  1789         cachedelta - an optional precomputed delta
  2392         node - nodeid of revision; typically node is not specified, and it is
  1790         node - nodeid of revision; typically node is not specified, and it is
  2393             computed by default as hash(text, p1, p2), however subclasses might
  1791             computed by default as hash(text, p1, p2), however subclasses might
  2394             use different hashing method (and override checkhash() in such case)
  1792             use different hashing method (and override checkhash() in such case)
  2395         flags - the known flags to set on the revision
  1793         flags - the known flags to set on the revision
  2396         deltacomputer - an optional _deltacomputer instance shared between
  1794         deltacomputer - an optional deltacomputer instance shared between
  2397             multiple calls
  1795             multiple calls
  2398         """
  1796         """
  2399         if link == nullrev:
  1797         if link == nullrev:
  2400             raise RevlogError(_("attempted to add linkrev -1 to %s")
  1798             raise RevlogError(_("attempted to add linkrev -1 to %s")
  2401                               % self.indexfile)
  1799                               % self.indexfile)
  2514             except KeyError:
  1912             except KeyError:
  2515                 raise RevlogError(_('unknown compression type %r') % t)
  1913                 raise RevlogError(_('unknown compression type %r') % t)
  2516 
  1914 
  2517         return compressor.decompress(data)
  1915         return compressor.decompress(data)
  2518 
  1916 
  2519     def _isgooddeltainfo(self, deltainfo, revinfo):
       
  2520         """Returns True if the given delta is good. Good means that it is within
       
  2521         the disk span, disk size, and chain length bounds that we know to be
       
  2522         performant."""
       
  2523         if deltainfo is None:
       
  2524             return False
       
  2525 
       
  2526         # - 'deltainfo.distance' is the distance from the base revision --
       
  2527         #   bounding it limits the amount of I/O we need to do.
       
  2528         # - 'deltainfo.compresseddeltalen' is the sum of the total size of
       
  2529         #   deltas we need to apply -- bounding it limits the amount of CPU
       
  2530         #   we consume.
       
  2531 
       
  2532         if self._sparserevlog:
       
  2533             # As sparse-read will be used, we can consider that the distance,
       
  2534             # instead of being the span of the whole chunk,
       
  2535             # is the span of the largest read chunk
       
  2536             base = deltainfo.base
       
  2537 
       
  2538             if base != nullrev:
       
  2539                 deltachain = self._deltachain(base)[0]
       
  2540             else:
       
  2541                 deltachain = []
       
  2542 
       
  2543             # search for the first non-snapshot revision
       
  2544             for idx, r in enumerate(deltachain):
       
  2545                 if not self.issnapshot(r):
       
  2546                     break
       
  2547             deltachain = deltachain[idx:]
       
  2548             chunks = _slicechunk(self, deltachain, deltainfo)
       
  2549             all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
       
  2550             distance = max(all_span)
       
  2551         else:
       
  2552             distance = deltainfo.distance
       
  2553 
       
  2554         textlen = revinfo.textlen
       
  2555         defaultmax = textlen * 4
       
  2556         maxdist = self._maxdeltachainspan
       
  2557         if not maxdist:
       
  2558             maxdist = distance # ensure the conditional pass
       
  2559         maxdist = max(maxdist, defaultmax)
       
  2560         if self._sparserevlog and maxdist < self._srmingapsize:
       
  2561             # In multiple place, we are ignoring irrelevant data range below a
       
  2562             # certain size. Be also apply this tradeoff here and relax span
       
  2563             # constraint for small enought content.
       
  2564             maxdist = self._srmingapsize
       
  2565 
       
  2566         # Bad delta from read span:
       
  2567         #
       
  2568         #   If the span of data read is larger than the maximum allowed.
       
  2569         if maxdist < distance:
       
  2570             return False
       
  2571 
       
  2572         # Bad delta from new delta size:
       
  2573         #
       
  2574         #   If the delta size is larger than the target text, storing the
       
  2575         #   delta will be inefficient.
       
  2576         if textlen < deltainfo.deltalen:
       
  2577             return False
       
  2578 
       
  2579         # Bad delta from cumulated payload size:
       
  2580         #
       
  2581         #   If the sum of delta get larger than K * target text length.
       
  2582         if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
       
  2583             return False
       
  2584 
       
  2585         # Bad delta from chain length:
       
  2586         #
       
  2587         #   If the number of delta in the chain gets too high.
       
  2588         if self._maxchainlen and  self._maxchainlen < deltainfo.chainlen:
       
  2589             return False
       
  2590 
       
  2591         # bad delta from intermediate snapshot size limit
       
  2592         #
       
  2593         #   If an intermediate snapshot size is higher than the limit.  The
       
  2594         #   limit exist to prevent endless chain of intermediate delta to be
       
  2595         #   created.
       
  2596         if (deltainfo.snapshotdepth is not None and
       
  2597                 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
       
  2598             return False
       
  2599 
       
  2600         # bad delta if new intermediate snapshot is larger than the previous
       
  2601         # snapshot
       
  2602         if (deltainfo.snapshotdepth
       
  2603                 and self.length(deltainfo.base) < deltainfo.deltalen):
       
  2604             return False
       
  2605 
       
  2606         return True
       
  2607 
       
  2608     def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
  1917     def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
  2609                      cachedelta, ifh, dfh, alwayscache=False,
  1918                      cachedelta, ifh, dfh, alwayscache=False,
  2610                      deltacomputer=None):
  1919                      deltacomputer=None):
  2611         """internal function to add revisions to the log
  1920         """internal function to add revisions to the log
  2612 
  1921 
  2650                                         cachedelta[1])
  1959                                         cachedelta[1])
  2651         else:
  1960         else:
  2652             textlen = len(rawtext)
  1961             textlen = len(rawtext)
  2653 
  1962 
  2654         if deltacomputer is None:
  1963         if deltacomputer is None:
  2655             deltacomputer = _deltacomputer(self)
  1964             deltacomputer = deltautil.deltacomputer(self)
  2656 
  1965 
  2657         revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
  1966         revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
  2658 
  1967 
  2659         # no delta for flag processor revision (see "candelta" for why)
  1968         # no delta for flag processor revision (see "candelta" for why)
  2660         # not calling candelta since only one revision needs test, also to
  1969         # not calling candelta since only one revision needs test, also to
  2752         def flush():
  2061         def flush():
  2753             if dfh:
  2062             if dfh:
  2754                 dfh.flush()
  2063                 dfh.flush()
  2755             ifh.flush()
  2064             ifh.flush()
  2756         try:
  2065         try:
  2757             deltacomputer = _deltacomputer(self)
  2066             deltacomputer = deltautil.deltacomputer(self)
  2758             # loop through our set of deltas
  2067             # loop through our set of deltas
  2759             for data in deltas:
  2068             for data in deltas:
  2760                 node, p1, p2, linknode, deltabase, delta, flags = data
  2069                 node, p1, p2, linknode, deltabase, delta, flags = data
  2761                 link = linkmapper(linknode)
  2070                 link = linkmapper(linknode)
  2762                 flags = flags or REVIDX_DEFAULT_FLAGS
  2071                 flags = flags or REVIDX_DEFAULT_FLAGS
  3125             destrevlog._deltabothparents = deltabothparents or oldamd
  2434             destrevlog._deltabothparents = deltabothparents or oldamd
  3126 
  2435 
  3127             populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
  2436             populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
  3128                                                 self.DELTAREUSESAMEREVS)
  2437                                                 self.DELTAREUSESAMEREVS)
  3129 
  2438 
  3130             deltacomputer = _deltacomputer(destrevlog)
  2439             deltacomputer = deltautil.deltacomputer(destrevlog)
  3131             index = self.index
  2440             index = self.index
  3132             for rev in self:
  2441             for rev in self:
  3133                 entry = index[rev]
  2442                 entry = index[rev]
  3134 
  2443 
  3135                 # Some classes override linkrev to take filtered revs into
  2444                 # Some classes override linkrev to take filtered revs into