sparse-read: ignore trailing empty revs in each read chunk
authorPaul Morelle <paul.morelle@octobus.net>
Wed, 18 Oct 2017 15:28:19 +0200
changeset 34898 1bde8e8e5de0
parent 34897 2e350d2a0eca
child 34899 1e2dc50da17b
sparse-read: ignore trailing empty revs in each read chunk An empty entry in the revlog may happen for two reasons: - when the file is empty, and the revlog stores a snapshot; - when there is a merge and both parents were identical. `hg debugindex -m | awk '$3=="0"{print}' | wc -l` gives 1917 of such entries in my clone of pypy, and 113 on my clone of mercurial. These empty revision may be located at the end of a sparse chain, and in some special cases may lead to read relatively large amounts of data for nothing.
mercurial/revlog.py
--- a/mercurial/revlog.py	Wed Oct 18 15:42:44 2017 -0700
+++ b/mercurial/revlog.py	Wed Oct 18 15:28:19 2017 +0200
@@ -162,6 +162,20 @@
     s.update(text)
     return s.digest()
 
+def _trimchunk(revlog, revs, startidx, endidx=None):
+    """returns revs[startidx:endidx] without empty trailing revs
+    """
+    length = revlog.length
+
+    if endidx is None:
+        endidx = len(revs)
+
+    # Trim empty revs at the end, but never the very first revision of a chain
+    while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
+        endidx -= 1
+
+    return revs[startidx:endidx]
+
 def _slicechunk(revlog, revs):
     """slice revs to reduce the amount of unrelated data to be read from disk.
 
@@ -194,6 +208,10 @@
         revstart = start(rev)
         revlen = length(rev)
 
+        # Skip empty revisions to form larger holes
+        if revlen == 0:
+            continue
+
         if prevend is not None:
             gapsize = revstart - prevend
             # only consider holes that are large enough
@@ -222,9 +240,16 @@
     previdx = 0
     while indicesheap:
         idx = heapq.heappop(indicesheap)
-        yield revs[previdx:idx]
+
+        chunk = _trimchunk(revlog, revs, previdx, idx)
+        if chunk:
+            yield chunk
+
         previdx = idx
-    yield revs[previdx:]
+
+    chunk = _trimchunk(revlog, revs, previdx)
+    if chunk:
+        yield chunk
 
 # index v0:
 #  4 bytes: offset