sparse-revlog: new requirement enabled with format.sparse-revlog
authorPaul Morelle <paul.morelle@octobus.net>
Mon, 04 Jun 2018 22:23:18 +0200
changeset 38717 aa21a9ad46ea
parent 38716 c67093e81a3e
child 38718 f8762ea73e0d
sparse-revlog: new requirement enabled with format.sparse-revlog The meaning of the new 'sparse-revlog' requirement is that the revlogs are allowed to contain wider delta chains with larger holes between the interesting chunks. These sparse delta chains should be read in several chunks to avoid a potential explosion of memory usage. Former version won't know how to read a delta chain in several chunks. They would keep reading them in a single read, and therefore would be subject to the potential memory explosion. Hence this new requirement: only versions having support of sparse-revlog reading should be allowed to read such a revlog. Implementation of this new algorithm and tools to enable or disable the requirement will follow in the next changesets.
mercurial/configitems.py
mercurial/localrepo.py
mercurial/revlog.py
--- a/mercurial/configitems.py	Mon Jun 04 12:12:00 2018 +0200
+++ b/mercurial/configitems.py	Mon Jun 04 22:23:18 2018 +0200
@@ -652,6 +652,9 @@
 coreconfigitem('format', 'obsstore-version',
     default=None,
 )
+coreconfigitem('format', 'sparse-revlog',
+    default=False,
+)
 coreconfigitem('format', 'usefncache',
     default=True,
 )
--- a/mercurial/localrepo.py	Mon Jun 04 12:12:00 2018 +0200
+++ b/mercurial/localrepo.py	Mon Jun 04 22:23:18 2018 +0200
@@ -354,6 +354,15 @@
 # clients.
 REVLOGV2_REQUIREMENT = 'exp-revlogv2.0'
 
+# A repository with the sparserevlog feature will have delta chains that
+# can spread over a larger span. Sparse reading cuts these large spans into
+# pieces, so that each piece isn't too big.
+# Without the sparserevlog capability, reading from the repository could use
+# huge amounts of memory, because the whole span would be read at once,
+# including all the intermediate revisions that aren't pertinent for the chain.
+# This is why once a repository has enabled sparse-read, it becomes required.
+SPARSEREVLOG_REQUIREMENT = 'sparserevlog'
+
 # Functions receiving (ui, features) that extensions can register to impact
 # the ability to load repositories with custom requirements. Only
 # functions defined in loaded extensions are called.
@@ -376,6 +385,7 @@
         'generaldelta',
         'treemanifest',
         REVLOGV2_REQUIREMENT,
+        SPARSEREVLOG_REQUIREMENT,
     }
     _basesupported = supportedformats | {
         'store',
@@ -678,6 +688,8 @@
         self.svfs.options['with-sparse-read'] = withsparseread
         self.svfs.options['sparse-read-density-threshold'] = srdensitythres
         self.svfs.options['sparse-read-min-gap-size'] = srmingapsize
+        sparserevlog = SPARSEREVLOG_REQUIREMENT in self.requirements
+        self.svfs.options['sparse-revlog'] = sparserevlog
 
         for r in self.requirements:
             if r.startswith('exp-compression-'):
@@ -2370,6 +2382,9 @@
         requirements.add('generaldelta')
     if ui.configbool('experimental', 'treemanifest'):
         requirements.add('treemanifest')
+    # experimental config: format.sparse-revlog
+    if ui.configbool('format', 'sparse-revlog'):
+        requirements.add(SPARSEREVLOG_REQUIREMENT)
 
     revlogv2 = ui.config('experimental', 'revlogv2')
     if revlogv2 == 'enable-unstable-format-and-corrupt-my-data':
--- a/mercurial/revlog.py	Mon Jun 04 12:12:00 2018 +0200
+++ b/mercurial/revlog.py	Mon Jun 04 22:23:18 2018 +0200
@@ -895,6 +895,7 @@
         self._compengine = 'zlib'
         self._maxdeltachainspan = -1
         self._withsparseread = False
+        self._sparserevlog = False
         self._srdensitythreshold = 0.50
         self._srmingapsize = 262144
 
@@ -923,7 +924,10 @@
                 self._maxdeltachainspan = opts['maxdeltachainspan']
             if mmaplargeindex and 'mmapindexthreshold' in opts:
                 mmapindexthreshold = opts['mmapindexthreshold']
-            self._withsparseread = bool(opts.get('with-sparse-read', False))
+            self._sparserevlog = bool(opts.get('sparse-revlog', False))
+            withsparseread = bool(opts.get('with-sparse-read', False))
+            # sparse-revlog forces sparse-read
+            self._withsparseread = self._sparserevlog or withsparseread
             if 'sparse-read-density-threshold' in opts:
                 self._srdensitythreshold = opts['sparse-read-density-threshold']
             if 'sparse-read-min-gap-size' in opts: