revlog: add config variable for limiting delta-chain length
The current heuristic for deciding between storing delta and full texts
is based on ratio of (sizeofdeltas)/(sizeoffulltext).
In some cases (for example a manifest for ahuge repo) this approach
can result in extremely long delta chains (~30,000) which are very slow to
read. (In the case of a manifest ~500ms are added to every hg command because of that).
This commit introduces "revlog.maxchainlength" configuration variable that will
limit delta chain length.
--- a/mercurial/localrepo.py Thu Nov 06 14:08:25 2014 -0800
+++ b/mercurial/localrepo.py Thu Nov 06 14:20:05 2014 -0800
@@ -316,6 +316,9 @@
chunkcachesize = self.ui.configint('format', 'chunkcachesize')
if chunkcachesize is not None:
self.sopener.options['chunkcachesize'] = chunkcachesize
+ maxchainlen = self.ui.configint('revlog', 'maxchainlen')
+ if maxchainlen is not None:
+ self.sopener.options['maxchainlen'] = maxchainlen
def _writerequirements(self):
reqfile = self.opener("requires", "w")
--- a/mercurial/revlog.py Thu Nov 06 14:08:25 2014 -0800
+++ b/mercurial/revlog.py Thu Nov 06 14:20:05 2014 -0800
@@ -204,6 +204,7 @@
self._basecache = None
self._chunkcache = (0, '')
self._chunkcachesize = 65536
+ self._maxchainlen = None
self.index = []
self._pcache = {}
self._nodecache = {nullid: nullrev}
@@ -219,6 +220,8 @@
v = 0
if 'chunkcachesize' in opts:
self._chunkcachesize = opts['chunkcachesize']
+ if 'maxchainlen' in opts:
+ self._maxchainlen = opts['maxchainlen']
if self._chunkcachesize <= 0:
raise RevlogError(_('revlog chunk cache size %r is not greater '
@@ -1216,11 +1219,13 @@
base = rev
else:
base = chainbase
- return dist, l, data, base, chainbase
+ chainlen = self.chainlen(rev) + 1
+ return dist, l, data, base, chainbase, chainlen
curr = len(self)
prev = curr - 1
base = chainbase = curr
+ chainlen = None
offset = self.end(prev)
flags = 0
d = None
@@ -1240,7 +1245,7 @@
d = builddelta(prev)
else:
d = builddelta(prev)
- dist, l, data, base, chainbase = d
+ dist, l, data, base, chainbase, chainlen = d
# full versions are inserted when the needed deltas
# become comparable to the uncompressed text
@@ -1249,7 +1254,8 @@
cachedelta[1])
else:
textlen = len(text)
- if d is None or dist > textlen * 2:
+ if (d is None or dist > textlen * 2 or
+ self._maxchainlen and chainlen > self._maxchainlen):
text = buildtext()
data = self.compress(text)
l = len(data[1]) + len(data[0])
--- a/tests/test-debugcommands.t Thu Nov 06 14:08:25 2014 -0800
+++ b/tests/test-debugcommands.t Thu Nov 06 14:20:05 2014 -0800
@@ -24,6 +24,40 @@
full revision size (min/max/avg) : 44 / 44 / 44
delta size (min/max/avg) : 0 / 0 / 0
+Test max chain len
+ $ cat >> $HGRCPATH << EOF
+ > [revlog]
+ > maxchainlen=4
+ > EOF
+
+ $ echo "This test checks if maxchainlen config value is respected also it can serve as basic test for debugrevlog -d <file>.\n" >> a
+ $ hg ci -m a
+ $ echo "b\n" >> a
+ $ hg ci -m a
+ $ echo "c\n" >> a
+ $ hg ci -m a
+ $ echo "d\n" >> a
+ $ hg ci -m a
+ $ echo "e\n" >> a
+ $ hg ci -m a
+ $ echo "f\n" >> a
+ $ hg ci -m a
+ $ echo 'g\n' >> a
+ $ hg ci -m a
+ $ echo 'h\n' >> a
+ $ hg ci -m a
+ $ hg debugrevlog -d a
+ # rev p1rev p2rev start end deltastart base p1 p2 rawsize totalsize compression heads chainlen
+ 0 -1 -1 0 ??? 0 0 0 0 ??? ???? ? 1 0 (glob)
+ 1 0 -1 ??? ??? 0 0 0 0 ??? ???? ? 1 1 (glob)
+ 2 1 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 2 (glob)
+ 3 2 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 3 (glob)
+ 4 3 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 4 (glob)
+ 5 4 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 0 (glob)
+ 6 5 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 1 (glob)
+ 7 6 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 2 (glob)
+ 8 7 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 3 (glob)
+ $ cd ..
Test internal debugstacktrace command