changeset 23255:76effa770ff9

revlog: add config variable for limiting delta-chain length The current heuristic for deciding between storing delta and full texts is based on ratio of (sizeofdeltas)/(sizeoffulltext). In some cases (for example a manifest for ahuge repo) this approach can result in extremely long delta chains (~30,000) which are very slow to read. (In the case of a manifest ~500ms are added to every hg command because of that). This commit introduces "revlog.maxchainlength" configuration variable that will limit delta chain length.
author Mateusz Kwapich <mitrandir@fb.com>
date Thu, 06 Nov 2014 14:20:05 -0800
parents d23834b871ac
children 1c11393d5dfb
files mercurial/localrepo.py mercurial/revlog.py tests/test-debugcommands.t
diffstat 3 files changed, 46 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/localrepo.py	Thu Nov 06 14:08:25 2014 -0800
+++ b/mercurial/localrepo.py	Thu Nov 06 14:20:05 2014 -0800
@@ -316,6 +316,9 @@
         chunkcachesize = self.ui.configint('format', 'chunkcachesize')
         if chunkcachesize is not None:
             self.sopener.options['chunkcachesize'] = chunkcachesize
+        maxchainlen = self.ui.configint('revlog', 'maxchainlen')
+        if maxchainlen is not None:
+            self.sopener.options['maxchainlen'] = maxchainlen
 
     def _writerequirements(self):
         reqfile = self.opener("requires", "w")
--- a/mercurial/revlog.py	Thu Nov 06 14:08:25 2014 -0800
+++ b/mercurial/revlog.py	Thu Nov 06 14:20:05 2014 -0800
@@ -204,6 +204,7 @@
         self._basecache = None
         self._chunkcache = (0, '')
         self._chunkcachesize = 65536
+        self._maxchainlen = None
         self.index = []
         self._pcache = {}
         self._nodecache = {nullid: nullrev}
@@ -219,6 +220,8 @@
                 v = 0
             if 'chunkcachesize' in opts:
                 self._chunkcachesize = opts['chunkcachesize']
+            if 'maxchainlen' in opts:
+                self._maxchainlen = opts['maxchainlen']
 
         if self._chunkcachesize <= 0:
             raise RevlogError(_('revlog chunk cache size %r is not greater '
@@ -1216,11 +1219,13 @@
                 base = rev
             else:
                 base = chainbase
-            return dist, l, data, base, chainbase
+            chainlen = self.chainlen(rev) + 1
+            return dist, l, data, base, chainbase, chainlen
 
         curr = len(self)
         prev = curr - 1
         base = chainbase = curr
+        chainlen = None
         offset = self.end(prev)
         flags = 0
         d = None
@@ -1240,7 +1245,7 @@
                     d = builddelta(prev)
             else:
                 d = builddelta(prev)
-            dist, l, data, base, chainbase = d
+            dist, l, data, base, chainbase, chainlen = d
 
         # full versions are inserted when the needed deltas
         # become comparable to the uncompressed text
@@ -1249,7 +1254,8 @@
                                         cachedelta[1])
         else:
             textlen = len(text)
-        if d is None or dist > textlen * 2:
+        if (d is None or dist > textlen * 2 or
+            self._maxchainlen and chainlen > self._maxchainlen):
             text = buildtext()
             data = self.compress(text)
             l = len(data[1]) + len(data[0])
--- a/tests/test-debugcommands.t	Thu Nov 06 14:08:25 2014 -0800
+++ b/tests/test-debugcommands.t	Thu Nov 06 14:20:05 2014 -0800
@@ -24,6 +24,40 @@
   full revision size (min/max/avg)     : 44 / 44 / 44
   delta size (min/max/avg)             : 0 / 0 / 0
 
+Test max chain len
+  $ cat >> $HGRCPATH << EOF
+  > [revlog]
+  > maxchainlen=4
+  > EOF
+
+  $ echo "This test checks if maxchainlen config value is respected also it can serve as basic test for debugrevlog -d <file>.\n" >> a
+  $ hg ci -m a
+  $ echo "b\n" >> a
+  $ hg ci -m a
+  $ echo "c\n" >> a
+  $ hg ci -m a
+  $ echo "d\n" >> a
+  $ hg ci -m a
+  $ echo "e\n" >> a
+  $ hg ci -m a
+  $ echo "f\n" >> a
+  $ hg ci -m a
+  $ echo 'g\n' >> a
+  $ hg ci -m a
+  $ echo 'h\n' >> a
+  $ hg ci -m a
+  $ hg debugrevlog -d a
+  # rev p1rev p2rev start   end deltastart base   p1   p2 rawsize totalsize compression heads chainlen
+      0    -1    -1     0   ???          0    0    0    0     ???      ????           ?     1        0 (glob)
+      1     0    -1   ???   ???          0    0    0    0     ???      ????           ?     1        1 (glob)
+      2     1    -1   ???   ???        ???  ???  ???    0     ???      ????           ?     1        2 (glob)
+      3     2    -1   ???   ???        ???  ???  ???    0     ???      ????           ?     1        3 (glob)
+      4     3    -1   ???   ???        ???  ???  ???    0     ???      ????           ?     1        4 (glob)
+      5     4    -1   ???   ???        ???  ???  ???    0     ???      ????           ?     1        0 (glob)
+      6     5    -1   ???   ???        ???  ???  ???    0     ???      ????           ?     1        1 (glob)
+      7     6    -1   ???   ???        ???  ???  ???    0     ???      ????           ?     1        2 (glob)
+      8     7    -1   ???   ???        ???  ???  ???    0     ???      ????           ?     1        3 (glob)
+  $ cd ..
 
 Test internal debugstacktrace command