revlog: introduce an explicit tracking of what the revlog is about
authorPierre-Yves David <pierre-yves.david@octobus.net>
Tue, 06 Apr 2021 05:20:24 +0200
changeset 47072 4c041c71ec01
parent 47071 3e381eb557f3
child 47073 64cd1496bb70
revlog: introduce an explicit tracking of what the revlog is about Since the dawn of time, people have been forced to rely to lossy introspection of the index filename to determine what the purpose and role of the revlog they encounter is. This is hacky, error prone, inflexible, abstraction-leaky, <insert-your-own-complaints-here>. In f63299ee7e4d Raphaël introduced a new attribute to track this information: `revlog_kind`. However it is initialized in an odd place and various instances end up not having it set. In addition is only tracking some of the information we end up having to introspect in various pieces of code. So we add a new attribute that holds more data and is more strictly enforced. This work is done in collaboration with Raphaël. The `revlog_kind` one will be removed/adapted in the next changeset. We expect to be able to clean up various existing piece of code and to simplify coming work around the newer revlog format. Differential Revision: https://phab.mercurial-scm.org/D10352
contrib/dumprevlog
contrib/perf.py
contrib/undumprevlog
mercurial/bundlerepo.py
mercurial/changelog.py
mercurial/cmdutil.py
mercurial/filelog.py
mercurial/manifest.py
mercurial/revlog.py
mercurial/revlogutils/constants.py
mercurial/unionrepo.py
tests/test-revlog-raw.py
tests/test-revlog.t
--- a/contrib/dumprevlog	Tue May 04 08:54:28 2021 -0700
+++ b/contrib/dumprevlog	Tue Apr 06 05:20:24 2021 +0200
@@ -13,6 +13,10 @@
 )
 from mercurial.utils import procutil
 
+from mercurial.revlogutils import (
+    constants as revlog_constants,
+)
+
 for fp in (sys.stdin, sys.stdout, sys.stderr):
     procutil.setbinary(fp)
 
@@ -32,7 +36,11 @@
 
 
 for f in sys.argv[1:]:
-    r = revlog.revlog(binopen, encoding.strtolocal(f))
+    r = revlog.revlog(
+        binopen,
+        target=(revlog_constants.KIND_OTHER, b'dump-revlog'),
+        indexfile=encoding.strtolocal(f),
+    )
     print("file:", f)
     for i in r:
         n = r.node(i)
--- a/contrib/perf.py	Tue May 04 08:54:28 2021 -0700
+++ b/contrib/perf.py	Tue Apr 06 05:20:24 2021 +0200
@@ -66,6 +66,8 @@
 import tempfile
 import threading
 import time
+
+import mercurial.revlog
 from mercurial import (
     changegroup,
     cmdutil,
@@ -76,7 +78,6 @@
     hg,
     mdiff,
     merge,
-    revlog,
     util,
 )
 
@@ -119,6 +120,21 @@
 except ImportError:
     profiling = None
 
+try:
+    from mercurial.revlogutils import constants as revlog_constants
+
+    perf_rl_kind = (revlog_constants.KIND_OTHER, b'created-by-perf')
+
+    def revlog(opener, *args, **kwargs):
+        return mercurial.revlog.revlog(opener, perf_rl_kind, *args, **kwargs)
+
+
+except (ImportError, AttributeError):
+    perf_rl_kind = None
+
+    def revlog(opener, *args, **kwargs):
+        return mercurial.revlog.revlog(opener, *args, **kwargs)
+
 
 def identity(a):
     return a
@@ -1809,7 +1825,8 @@
 
     mercurial.revlog._prereadsize = 2 ** 24  # disable lazy parser in old hg
     n = scmutil.revsingle(repo, rev).node()
-    cl = mercurial.revlog.revlog(getsvfs(repo), b"00changelog.i")
+
+    cl = revlog(getsvfs(repo), indexfile=b"00changelog.i")
 
     def d():
         cl.rev(n)
@@ -2602,9 +2619,9 @@
     else:
         raise error.Abort(b'unsupported revlog version: %d' % version)
 
-    parse_index_v1 = getattr(revlog, 'parse_index_v1', None)
+    parse_index_v1 = getattr(mercurial.revlog, 'parse_index_v1', None)
     if parse_index_v1 is None:
-        parse_index_v1 = revlog.revlogio().parseindex
+        parse_index_v1 = mercurial.revlog.revlogio().parseindex
 
     rllen = len(rl)
 
@@ -2620,7 +2637,7 @@
     allnodesrev = list(reversed(allnodes))
 
     def constructor():
-        revlog.revlog(opener, indexfile)
+        revlog(opener, indexfile=indexfile)
 
     def read():
         with opener(indexfile) as fh:
@@ -3042,7 +3059,7 @@
         vfs = vfsmod.vfs(tmpdir)
         vfs.options = getattr(orig.opener, 'options', None)
 
-        dest = revlog.revlog(
+        dest = revlog(
             vfs, indexfile=indexname, datafile=dataname, **revlogkwargs
         )
         if dest._inline:
--- a/contrib/undumprevlog	Tue May 04 08:54:28 2021 -0700
+++ b/contrib/undumprevlog	Tue Apr 06 05:20:24 2021 +0200
@@ -15,6 +15,10 @@
 )
 from mercurial.utils import procutil
 
+from mercurial.revlogutils import (
+    constants as revlog_constants,
+)
+
 for fp in (sys.stdin, sys.stdout, sys.stderr):
     procutil.setbinary(fp)
 
@@ -28,7 +32,11 @@
         break
     if l.startswith("file:"):
         f = encoding.strtolocal(l[6:-1])
-        r = revlog.revlog(opener, f)
+        r = revlog.revlog(
+            opener,
+            target=(revlog_constants.KIND_OTHER, b'undump-revlog'),
+            indexfile=f,
+        )
         procutil.stdout.write(b'%s\n' % f)
     elif l.startswith("node:"):
         n = bin(l[6:-1])
--- a/mercurial/bundlerepo.py	Tue May 04 08:54:28 2021 -0700
+++ b/mercurial/bundlerepo.py	Tue Apr 06 05:20:24 2021 +0200
@@ -46,9 +46,13 @@
     urlutil,
 )
 
+from .revlogutils import (
+    constants as revlog_constants,
+)
+
 
 class bundlerevlog(revlog.revlog):
-    def __init__(self, opener, indexfile, cgunpacker, linkmapper):
+    def __init__(self, opener, target, indexfile, cgunpacker, linkmapper):
         # How it works:
         # To retrieve a revision, we need to know the offset of the revision in
         # the bundle (an unbundle object). We store this offset in the index
@@ -57,7 +61,7 @@
         # To differentiate a rev in the bundle from a rev in the revlog, we
         # check revision against repotiprev.
         opener = vfsmod.readonlyvfs(opener)
-        revlog.revlog.__init__(self, opener, indexfile)
+        revlog.revlog.__init__(self, opener, target=target, indexfile=indexfile)
         self.bundle = cgunpacker
         n = len(self)
         self.repotiprev = n - 1
@@ -171,7 +175,12 @@
         changelog.changelog.__init__(self, opener)
         linkmapper = lambda x: x
         bundlerevlog.__init__(
-            self, opener, self.indexfile, cgunpacker, linkmapper
+            self,
+            opener,
+            (revlog_constants.KIND_CHANGELOG, None),
+            self.indexfile,
+            cgunpacker,
+            linkmapper,
         )
 
 
@@ -187,7 +196,12 @@
     ):
         manifest.manifestrevlog.__init__(self, nodeconstants, opener, tree=dir)
         bundlerevlog.__init__(
-            self, opener, self.indexfile, cgunpacker, linkmapper
+            self,
+            opener,
+            (revlog_constants.KIND_MANIFESTLOG, dir),
+            self.indexfile,
+            cgunpacker,
+            linkmapper,
         )
         if dirlogstarts is None:
             dirlogstarts = {}
@@ -214,7 +228,12 @@
     def __init__(self, opener, path, cgunpacker, linkmapper):
         filelog.filelog.__init__(self, opener, path)
         self._revlog = bundlerevlog(
-            opener, self.indexfile, cgunpacker, linkmapper
+            opener,
+            # XXX should use the unencoded path
+            target=(revlog_constants.KIND_FILELOG, path),
+            indexfile=self.indexfile,
+            cgunpacker=cgunpacker,
+            linkmapper=linkmapper,
         )
 
 
--- a/mercurial/changelog.py	Tue May 04 08:54:28 2021 -0700
+++ b/mercurial/changelog.py	Tue Apr 06 05:20:24 2021 +0200
@@ -25,7 +25,10 @@
     dateutil,
     stringutil,
 )
-from .revlogutils import flagutil
+from .revlogutils import (
+    constants as revlog_constants,
+    flagutil,
+)
 
 _defaultextra = {b'branch': b'default'}
 
@@ -401,7 +404,8 @@
         revlog.revlog.__init__(
             self,
             opener,
-            indexfile,
+            target=(revlog_constants.KIND_CHANGELOG, None),
+            indexfile=indexfile,
             datafile=datafile,
             checkambig=True,
             mmaplargeindex=True,
--- a/mercurial/cmdutil.py	Tue May 04 08:54:28 2021 -0700
+++ b/mercurial/cmdutil.py	Tue Apr 06 05:20:24 2021 +0200
@@ -61,6 +61,10 @@
     stringutil,
 )
 
+from .revlogutils import (
+    constants as revlog_constants,
+)
+
 if pycompat.TYPE_CHECKING:
     from typing import (
         Any,
@@ -1428,8 +1432,12 @@
             raise error.CommandError(cmd, _(b'invalid arguments'))
         if not os.path.isfile(file_):
             raise error.InputError(_(b"revlog '%s' not found") % file_)
+
+        target = (revlog_constants.KIND_OTHER, b'free-form:%s' % file_)
         r = revlog.revlog(
-            vfsmod.vfs(encoding.getcwd(), audit=False), file_[:-2] + b".i"
+            vfsmod.vfs(encoding.getcwd(), audit=False),
+            target=target,
+            indexfile=file_[:-2] + b".i",
         )
     return r
 
--- a/mercurial/filelog.py	Tue May 04 08:54:28 2021 -0700
+++ b/mercurial/filelog.py	Tue Apr 06 05:20:24 2021 +0200
@@ -18,13 +18,20 @@
     util as interfaceutil,
 )
 from .utils import storageutil
+from .revlogutils import (
+    constants as revlog_constants,
+)
 
 
 @interfaceutil.implementer(repository.ifilestorage)
 class filelog(object):
     def __init__(self, opener, path):
         self._revlog = revlog.revlog(
-            opener, b'/'.join((b'data', path + b'.i')), censorable=True
+            opener,
+            # XXX should use the unencoded path
+            target=(revlog_constants.KIND_FILELOG, path),
+            indexfile=b'/'.join((b'data', path + b'.i')),
+            censorable=True,
         )
         # Full name of the user visible file, relative to the repository root.
         # Used by LFS.
--- a/mercurial/manifest.py	Tue May 04 08:54:28 2021 -0700
+++ b/mercurial/manifest.py	Tue Apr 06 05:20:24 2021 +0200
@@ -34,6 +34,9 @@
     repository,
     util as interfaceutil,
 )
+from .revlogutils import (
+    constants as revlog_constants,
+)
 
 parsers = policy.importmod('parsers')
 propertycache = util.propertycache
@@ -1610,7 +1613,8 @@
 
         self._revlog = revlog.revlog(
             opener,
-            indexfile,
+            target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
+            indexfile=indexfile,
             # only root indexfile is cached
             checkambig=not bool(tree),
             mmaplargeindex=True,
--- a/mercurial/revlog.py	Tue May 04 08:54:28 2021 -0700
+++ b/mercurial/revlog.py	Tue Apr 06 05:20:24 2021 +0200
@@ -34,6 +34,7 @@
 from .i18n import _
 from .pycompat import getattr
 from .revlogutils.constants import (
+    ALL_KINDS,
     FLAG_GENERALDELTA,
     FLAG_INLINE_DATA,
     INDEX_HEADER,
@@ -287,7 +288,8 @@
     def __init__(
         self,
         opener,
-        indexfile,
+        target,
+        indexfile=None,
         datafile=None,
         checkambig=False,
         mmaplargeindex=False,
@@ -302,6 +304,12 @@
         opener is a function that abstracts the file opening operation
         and can be used to implement COW semantics or the like.
 
+        `target`: a (KIND, ID) tuple that identify the content stored in
+        this revlog. It help the rest of the code to understand what the revlog
+        is about without having to resort to heuristic and index filename
+        analysis. Note: that this must be reliably be set by normal code, but
+        that test, debug, or performance measurement code might not set this to
+        accurate value.
         """
         self.upperboundcomp = upperboundcomp
         self.indexfile = indexfile
@@ -313,6 +321,9 @@
             )
 
         self.opener = opener
+        assert target[0] in ALL_KINDS
+        assert len(target) == 2
+        self.target = target
         #  When True, indexfile is opened with checkambig=True at writing, to
         #  avoid file stat ambiguity.
         self._checkambig = checkambig
@@ -2869,7 +2880,13 @@
         newdatafile = self.datafile + b'.tmpcensored'
 
         # This is a bit dangerous. We could easily have a mismatch of state.
-        newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
+        newrl = revlog(
+            self.opener,
+            target=self.target,
+            indexfile=newindexfile,
+            datafile=newdatafile,
+            censorable=True,
+        )
         newrl.version = self.version
         newrl._generaldelta = self._generaldelta
         newrl._parse_index = self._parse_index
--- a/mercurial/revlogutils/constants.py	Tue May 04 08:54:28 2021 -0700
+++ b/mercurial/revlogutils/constants.py	Tue Apr 06 05:20:24 2021 +0200
@@ -13,6 +13,20 @@
 
 from ..interfaces import repository
 
+### Internal utily constants
+
+KIND_CHANGELOG = 1001  # over 256 to not be comparable with a bytes
+KIND_MANIFESTLOG = 1002
+KIND_FILELOG = 1003
+KIND_OTHER = 1004
+
+ALL_KINDS = {
+    KIND_CHANGELOG,
+    KIND_MANIFESTLOG,
+    KIND_FILELOG,
+    KIND_OTHER,
+}
+
 ### main revlog header
 
 INDEX_HEADER = struct.Struct(b">I")
--- a/mercurial/unionrepo.py	Tue May 04 08:54:28 2021 -0700
+++ b/mercurial/unionrepo.py	Tue Apr 06 05:20:24 2021 +0200
@@ -41,7 +41,11 @@
         # To differentiate a rev in the second revlog from a rev in the revlog,
         # we check revision against repotiprev.
         opener = vfsmod.readonlyvfs(opener)
-        revlog.revlog.__init__(self, opener, indexfile)
+        target = getattr(revlog2, 'target', None)
+        if target is None:
+            # a revlog wrapper, eg: the manifestlog that is not an actual revlog
+            target = revlog2._revlog.target
+        revlog.revlog.__init__(self, opener, target=target, indexfile=indexfile)
         self.revlog2 = revlog2
 
         n = len(self)
--- a/tests/test-revlog-raw.py	Tue May 04 08:54:28 2021 -0700
+++ b/tests/test-revlog-raw.py	Tue Apr 06 05:20:24 2021 +0200
@@ -14,6 +14,7 @@
 )
 
 from mercurial.revlogutils import (
+    constants,
     deltas,
     flagutil,
 )
@@ -81,7 +82,9 @@
 def newrevlog(name=b'_testrevlog.i', recreate=False):
     if recreate:
         tvfs.tryunlink(name)
-    rlog = revlog.revlog(tvfs, name)
+    rlog = revlog.revlog(
+        tvfs, target=(constants.KIND_OTHER, b'test'), indexfile=name
+    )
     return rlog
 
 
--- a/tests/test-revlog.t	Tue May 04 08:54:28 2021 -0700
+++ b/tests/test-revlog.t	Tue Apr 06 05:20:24 2021 +0200
@@ -45,9 +45,10 @@
        0       2 99e0332bd498 000000000000 000000000000
        1       3 6674f57a23d8 99e0332bd498 000000000000
 
+  >>> from mercurial.revlogutils.constants import KIND_OTHER
   >>> from mercurial import revlog, vfs
   >>> tvfs = vfs.vfs(b'.')
   >>> tvfs.options = {b'revlogv1': True}
-  >>> rl = revlog.revlog(tvfs, b'a.i')
+  >>> rl = revlog.revlog(tvfs, target=(KIND_OTHER, b'test'), indexfile=b'a.i')
   >>> rl.revision(1)
   mpatchError(*'patch cannot be decoded'*) (glob)