# HG changeset patch # User Boris Feld # Date 1531294501 25200 # Node ID e59e27e52297134e38bd35e0109630b9fed72b59 # Parent feba6be0941b1a53db09426ddbee4d2ece09f8f8 revlog: add function to slice chunk down to a given size It is possible to encounter situations where the slicing based on density did not achieve chunk smaller than the 4*textlength limit. To avoid extra memory consumption in those cases, we need to be able to break down chunk to a given size. Actual caller comes in the next changesets. diff -r feba6be0941b -r e59e27e52297 mercurial/revlog.py --- a/mercurial/revlog.py Tue Jul 10 11:53:36 2018 +0200 +++ b/mercurial/revlog.py Wed Jul 11 00:35:01 2018 -0700 @@ -338,6 +338,83 @@ revlog._srmingapsize): yield chunk +def _slicechunktosize(revlog, revs, targetsize): + """slice revs to match the target size + + This is intended to be used on chunk that density slicing selected by that + are still too large compared to the read garantee of revlog. This might + happens when "minimal gap size" interrupted the slicing or when chain are + built in a way that create large blocks next to each other. + + >>> revlog = _testrevlog([ + ... 3, #0 (3) + ... 5, #1 (2) + ... 6, #2 (1) + ... 8, #3 (2) + ... 8, #4 (empty) + ... 11, #5 (3) + ... 12, #6 (1) + ... 13, #7 (1) + ... 14, #8 (1) + ... ]) + + Cases where chunk is already small enough + >>> list(_slicechunktosize(revlog, [0], 3)) + [[0]] + >>> list(_slicechunktosize(revlog, [6, 7], 3)) + [[6, 7]] + >>> list(_slicechunktosize(revlog, [0], None)) + [[0]] + >>> list(_slicechunktosize(revlog, [6, 7], None)) + [[6, 7]] + + cases where we need actual slicing + >>> list(_slicechunktosize(revlog, [0, 1], 3)) + [[0], [1]] + >>> list(_slicechunktosize(revlog, [1, 3], 3)) + [[1], [3]] + >>> list(_slicechunktosize(revlog, [1, 2, 3], 3)) + [[1, 2], [3]] + >>> list(_slicechunktosize(revlog, [3, 5], 3)) + [[3], [5]] + >>> list(_slicechunktosize(revlog, [3, 4, 5], 3)) + [[3], [5]] + >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3)) + [[5], [6, 7, 8]] + >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3)) + [[0], [1, 2], [3], [5], [6, 7, 8]] + + Case with too large individual chunk (must return valid chunk) + >>> list(_slicechunktosize(revlog, [0, 1], 2)) + [[0], [1]] + >>> list(_slicechunktosize(revlog, [1, 3], 1)) + [[1], [3]] + >>> list(_slicechunktosize(revlog, [3, 4, 5], 2)) + [[3], [5]] + """ + assert targetsize is None or 0 <= targetsize + if targetsize is None or _segmentspan(revlog, revs) <= targetsize: + yield revs + return + + startrevidx = 0 + startdata = revlog.start(revs[0]) + endrevidx = 0 + iterrevs = enumerate(revs) + next(iterrevs) # skip first rev. + for idx, r in iterrevs: + span = revlog.end(r) - startdata + if span <= targetsize: + endrevidx = idx + else: + chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1) + if chunk: + yield chunk + startrevidx = idx + startdata = revlog.start(r) + endrevidx = idx + yield _trimchunk(revlog, revs, startrevidx) + def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0): """slice revs to reduce the amount of unrelated data to be read from disk.