changeset 38643:967fee55e8d9

revlog: postprocess chunk to slice them down to a certain size After the density slicing is done, we enforce a maximum chunk size to avoid memory consumption issue.
author Boris Feld <boris.feld@octobus.net>
date Tue, 10 Jul 2018 11:57:33 +0200
parents e59e27e52297
children 43d0619cec90
files mercurial/revlog.py
diffstat 1 files changed, 18 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/revlog.py	Wed Jul 11 00:35:01 2018 -0700
+++ b/mercurial/revlog.py	Tue Jul 10 11:57:33 2018 +0200
@@ -293,7 +293,7 @@
         return 0
     return revlog.end(revs[-1]) - revlog.start(revs[0])
 
-def _slicechunk(revlog, revs):
+def _slicechunk(revlog, revs, targetsize=None):
     """slice revs to reduce the amount of unrelated data to be read from disk.
 
     ``revs`` is sliced into groups that should be read in one time.
@@ -303,6 +303,13 @@
     ratio) is above `revlog._srdensitythreshold`. No gap smaller than
     `revlog._srmingapsize` is skipped.
 
+    If `targetsize` is set, no chunk larger than `targetsize` will be yield.
+    For consistency with other slicing choice, this limit won't go lower than
+    `revlog._srmingapsize`.
+
+    If individual revisions chunk are larger than this limit, they will still
+    be raised individually.
+
     >>> revlog = _testrevlog([
     ...  5,  #00 (5)
     ...  10, #01 (5)
@@ -332,11 +339,20 @@
     [[0], [11, 13, 15]]
     >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
     [[1, 2], [5, 8, 10, 11], [14]]
+
+    Slicing with a maximum chunk size
+    >>> list(_slicechunk(revlog, [0, 11, 13, 15], 15))
+    [[0], [11], [13], [15]]
+    >>> list(_slicechunk(revlog, [0, 11, 13, 15], 20))
+    [[0], [11], [13, 15]]
     """
+    if targetsize is not None:
+        targetsize = max(targetsize, revlog._srmingapsize)
     for chunk in _slicechunktodensity(revlog, revs,
                                       revlog._srdensitythreshold,
                                       revlog._srmingapsize):
-        yield chunk
+        for subchunk in _slicechunktosize(revlog, chunk, targetsize):
+            yield subchunk
 
 def _slicechunktosize(revlog, revs, targetsize):
     """slice revs to match the target size