# HG changeset patch # User Pierre-Yves David # Date 1617679224 -7200 # Node ID 4c041c71ec01356c6e9fb329b5fdc2c34041bb39 # Parent 3e381eb557f3c6869301c86d711904c5098ed06f revlog: introduce an explicit tracking of what the revlog is about Since the dawn of time, people have been forced to rely to lossy introspection of the index filename to determine what the purpose and role of the revlog they encounter is. This is hacky, error prone, inflexible, abstraction-leaky, . In f63299ee7e4d Raphaël introduced a new attribute to track this information: `revlog_kind`. However it is initialized in an odd place and various instances end up not having it set. In addition is only tracking some of the information we end up having to introspect in various pieces of code. So we add a new attribute that holds more data and is more strictly enforced. This work is done in collaboration with Raphaël. The `revlog_kind` one will be removed/adapted in the next changeset. We expect to be able to clean up various existing piece of code and to simplify coming work around the newer revlog format. Differential Revision: https://phab.mercurial-scm.org/D10352 diff -r 3e381eb557f3 -r 4c041c71ec01 contrib/dumprevlog --- a/contrib/dumprevlog Tue May 04 08:54:28 2021 -0700 +++ b/contrib/dumprevlog Tue Apr 06 05:20:24 2021 +0200 @@ -13,6 +13,10 @@ ) from mercurial.utils import procutil +from mercurial.revlogutils import ( + constants as revlog_constants, +) + for fp in (sys.stdin, sys.stdout, sys.stderr): procutil.setbinary(fp) @@ -32,7 +36,11 @@ for f in sys.argv[1:]: - r = revlog.revlog(binopen, encoding.strtolocal(f)) + r = revlog.revlog( + binopen, + target=(revlog_constants.KIND_OTHER, b'dump-revlog'), + indexfile=encoding.strtolocal(f), + ) print("file:", f) for i in r: n = r.node(i) diff -r 3e381eb557f3 -r 4c041c71ec01 contrib/perf.py --- a/contrib/perf.py Tue May 04 08:54:28 2021 -0700 +++ b/contrib/perf.py Tue Apr 06 05:20:24 2021 +0200 @@ -66,6 +66,8 @@ import tempfile import threading import time + +import mercurial.revlog from mercurial import ( changegroup, cmdutil, @@ -76,7 +78,6 @@ hg, mdiff, merge, - revlog, util, ) @@ -119,6 +120,21 @@ except ImportError: profiling = None +try: + from mercurial.revlogutils import constants as revlog_constants + + perf_rl_kind = (revlog_constants.KIND_OTHER, b'created-by-perf') + + def revlog(opener, *args, **kwargs): + return mercurial.revlog.revlog(opener, perf_rl_kind, *args, **kwargs) + + +except (ImportError, AttributeError): + perf_rl_kind = None + + def revlog(opener, *args, **kwargs): + return mercurial.revlog.revlog(opener, *args, **kwargs) + def identity(a): return a @@ -1809,7 +1825,8 @@ mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg n = scmutil.revsingle(repo, rev).node() - cl = mercurial.revlog.revlog(getsvfs(repo), b"00changelog.i") + + cl = revlog(getsvfs(repo), indexfile=b"00changelog.i") def d(): cl.rev(n) @@ -2602,9 +2619,9 @@ else: raise error.Abort(b'unsupported revlog version: %d' % version) - parse_index_v1 = getattr(revlog, 'parse_index_v1', None) + parse_index_v1 = getattr(mercurial.revlog, 'parse_index_v1', None) if parse_index_v1 is None: - parse_index_v1 = revlog.revlogio().parseindex + parse_index_v1 = mercurial.revlog.revlogio().parseindex rllen = len(rl) @@ -2620,7 +2637,7 @@ allnodesrev = list(reversed(allnodes)) def constructor(): - revlog.revlog(opener, indexfile) + revlog(opener, indexfile=indexfile) def read(): with opener(indexfile) as fh: @@ -3042,7 +3059,7 @@ vfs = vfsmod.vfs(tmpdir) vfs.options = getattr(orig.opener, 'options', None) - dest = revlog.revlog( + dest = revlog( vfs, indexfile=indexname, datafile=dataname, **revlogkwargs ) if dest._inline: diff -r 3e381eb557f3 -r 4c041c71ec01 contrib/undumprevlog --- a/contrib/undumprevlog Tue May 04 08:54:28 2021 -0700 +++ b/contrib/undumprevlog Tue Apr 06 05:20:24 2021 +0200 @@ -15,6 +15,10 @@ ) from mercurial.utils import procutil +from mercurial.revlogutils import ( + constants as revlog_constants, +) + for fp in (sys.stdin, sys.stdout, sys.stderr): procutil.setbinary(fp) @@ -28,7 +32,11 @@ break if l.startswith("file:"): f = encoding.strtolocal(l[6:-1]) - r = revlog.revlog(opener, f) + r = revlog.revlog( + opener, + target=(revlog_constants.KIND_OTHER, b'undump-revlog'), + indexfile=f, + ) procutil.stdout.write(b'%s\n' % f) elif l.startswith("node:"): n = bin(l[6:-1]) diff -r 3e381eb557f3 -r 4c041c71ec01 mercurial/bundlerepo.py --- a/mercurial/bundlerepo.py Tue May 04 08:54:28 2021 -0700 +++ b/mercurial/bundlerepo.py Tue Apr 06 05:20:24 2021 +0200 @@ -46,9 +46,13 @@ urlutil, ) +from .revlogutils import ( + constants as revlog_constants, +) + class bundlerevlog(revlog.revlog): - def __init__(self, opener, indexfile, cgunpacker, linkmapper): + def __init__(self, opener, target, indexfile, cgunpacker, linkmapper): # How it works: # To retrieve a revision, we need to know the offset of the revision in # the bundle (an unbundle object). We store this offset in the index @@ -57,7 +61,7 @@ # To differentiate a rev in the bundle from a rev in the revlog, we # check revision against repotiprev. opener = vfsmod.readonlyvfs(opener) - revlog.revlog.__init__(self, opener, indexfile) + revlog.revlog.__init__(self, opener, target=target, indexfile=indexfile) self.bundle = cgunpacker n = len(self) self.repotiprev = n - 1 @@ -171,7 +175,12 @@ changelog.changelog.__init__(self, opener) linkmapper = lambda x: x bundlerevlog.__init__( - self, opener, self.indexfile, cgunpacker, linkmapper + self, + opener, + (revlog_constants.KIND_CHANGELOG, None), + self.indexfile, + cgunpacker, + linkmapper, ) @@ -187,7 +196,12 @@ ): manifest.manifestrevlog.__init__(self, nodeconstants, opener, tree=dir) bundlerevlog.__init__( - self, opener, self.indexfile, cgunpacker, linkmapper + self, + opener, + (revlog_constants.KIND_MANIFESTLOG, dir), + self.indexfile, + cgunpacker, + linkmapper, ) if dirlogstarts is None: dirlogstarts = {} @@ -214,7 +228,12 @@ def __init__(self, opener, path, cgunpacker, linkmapper): filelog.filelog.__init__(self, opener, path) self._revlog = bundlerevlog( - opener, self.indexfile, cgunpacker, linkmapper + opener, + # XXX should use the unencoded path + target=(revlog_constants.KIND_FILELOG, path), + indexfile=self.indexfile, + cgunpacker=cgunpacker, + linkmapper=linkmapper, ) diff -r 3e381eb557f3 -r 4c041c71ec01 mercurial/changelog.py --- a/mercurial/changelog.py Tue May 04 08:54:28 2021 -0700 +++ b/mercurial/changelog.py Tue Apr 06 05:20:24 2021 +0200 @@ -25,7 +25,10 @@ dateutil, stringutil, ) -from .revlogutils import flagutil +from .revlogutils import ( + constants as revlog_constants, + flagutil, +) _defaultextra = {b'branch': b'default'} @@ -401,7 +404,8 @@ revlog.revlog.__init__( self, opener, - indexfile, + target=(revlog_constants.KIND_CHANGELOG, None), + indexfile=indexfile, datafile=datafile, checkambig=True, mmaplargeindex=True, diff -r 3e381eb557f3 -r 4c041c71ec01 mercurial/cmdutil.py --- a/mercurial/cmdutil.py Tue May 04 08:54:28 2021 -0700 +++ b/mercurial/cmdutil.py Tue Apr 06 05:20:24 2021 +0200 @@ -61,6 +61,10 @@ stringutil, ) +from .revlogutils import ( + constants as revlog_constants, +) + if pycompat.TYPE_CHECKING: from typing import ( Any, @@ -1428,8 +1432,12 @@ raise error.CommandError(cmd, _(b'invalid arguments')) if not os.path.isfile(file_): raise error.InputError(_(b"revlog '%s' not found") % file_) + + target = (revlog_constants.KIND_OTHER, b'free-form:%s' % file_) r = revlog.revlog( - vfsmod.vfs(encoding.getcwd(), audit=False), file_[:-2] + b".i" + vfsmod.vfs(encoding.getcwd(), audit=False), + target=target, + indexfile=file_[:-2] + b".i", ) return r diff -r 3e381eb557f3 -r 4c041c71ec01 mercurial/filelog.py --- a/mercurial/filelog.py Tue May 04 08:54:28 2021 -0700 +++ b/mercurial/filelog.py Tue Apr 06 05:20:24 2021 +0200 @@ -18,13 +18,20 @@ util as interfaceutil, ) from .utils import storageutil +from .revlogutils import ( + constants as revlog_constants, +) @interfaceutil.implementer(repository.ifilestorage) class filelog(object): def __init__(self, opener, path): self._revlog = revlog.revlog( - opener, b'/'.join((b'data', path + b'.i')), censorable=True + opener, + # XXX should use the unencoded path + target=(revlog_constants.KIND_FILELOG, path), + indexfile=b'/'.join((b'data', path + b'.i')), + censorable=True, ) # Full name of the user visible file, relative to the repository root. # Used by LFS. diff -r 3e381eb557f3 -r 4c041c71ec01 mercurial/manifest.py --- a/mercurial/manifest.py Tue May 04 08:54:28 2021 -0700 +++ b/mercurial/manifest.py Tue Apr 06 05:20:24 2021 +0200 @@ -34,6 +34,9 @@ repository, util as interfaceutil, ) +from .revlogutils import ( + constants as revlog_constants, +) parsers = policy.importmod('parsers') propertycache = util.propertycache @@ -1610,7 +1613,8 @@ self._revlog = revlog.revlog( opener, - indexfile, + target=(revlog_constants.KIND_MANIFESTLOG, self.tree), + indexfile=indexfile, # only root indexfile is cached checkambig=not bool(tree), mmaplargeindex=True, diff -r 3e381eb557f3 -r 4c041c71ec01 mercurial/revlog.py --- a/mercurial/revlog.py Tue May 04 08:54:28 2021 -0700 +++ b/mercurial/revlog.py Tue Apr 06 05:20:24 2021 +0200 @@ -34,6 +34,7 @@ from .i18n import _ from .pycompat import getattr from .revlogutils.constants import ( + ALL_KINDS, FLAG_GENERALDELTA, FLAG_INLINE_DATA, INDEX_HEADER, @@ -287,7 +288,8 @@ def __init__( self, opener, - indexfile, + target, + indexfile=None, datafile=None, checkambig=False, mmaplargeindex=False, @@ -302,6 +304,12 @@ opener is a function that abstracts the file opening operation and can be used to implement COW semantics or the like. + `target`: a (KIND, ID) tuple that identify the content stored in + this revlog. It help the rest of the code to understand what the revlog + is about without having to resort to heuristic and index filename + analysis. Note: that this must be reliably be set by normal code, but + that test, debug, or performance measurement code might not set this to + accurate value. """ self.upperboundcomp = upperboundcomp self.indexfile = indexfile @@ -313,6 +321,9 @@ ) self.opener = opener + assert target[0] in ALL_KINDS + assert len(target) == 2 + self.target = target # When True, indexfile is opened with checkambig=True at writing, to # avoid file stat ambiguity. self._checkambig = checkambig @@ -2869,7 +2880,13 @@ newdatafile = self.datafile + b'.tmpcensored' # This is a bit dangerous. We could easily have a mismatch of state. - newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True) + newrl = revlog( + self.opener, + target=self.target, + indexfile=newindexfile, + datafile=newdatafile, + censorable=True, + ) newrl.version = self.version newrl._generaldelta = self._generaldelta newrl._parse_index = self._parse_index diff -r 3e381eb557f3 -r 4c041c71ec01 mercurial/revlogutils/constants.py --- a/mercurial/revlogutils/constants.py Tue May 04 08:54:28 2021 -0700 +++ b/mercurial/revlogutils/constants.py Tue Apr 06 05:20:24 2021 +0200 @@ -13,6 +13,20 @@ from ..interfaces import repository +### Internal utily constants + +KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes +KIND_MANIFESTLOG = 1002 +KIND_FILELOG = 1003 +KIND_OTHER = 1004 + +ALL_KINDS = { + KIND_CHANGELOG, + KIND_MANIFESTLOG, + KIND_FILELOG, + KIND_OTHER, +} + ### main revlog header INDEX_HEADER = struct.Struct(b">I") diff -r 3e381eb557f3 -r 4c041c71ec01 mercurial/unionrepo.py --- a/mercurial/unionrepo.py Tue May 04 08:54:28 2021 -0700 +++ b/mercurial/unionrepo.py Tue Apr 06 05:20:24 2021 +0200 @@ -41,7 +41,11 @@ # To differentiate a rev in the second revlog from a rev in the revlog, # we check revision against repotiprev. opener = vfsmod.readonlyvfs(opener) - revlog.revlog.__init__(self, opener, indexfile) + target = getattr(revlog2, 'target', None) + if target is None: + # a revlog wrapper, eg: the manifestlog that is not an actual revlog + target = revlog2._revlog.target + revlog.revlog.__init__(self, opener, target=target, indexfile=indexfile) self.revlog2 = revlog2 n = len(self) diff -r 3e381eb557f3 -r 4c041c71ec01 tests/test-revlog-raw.py --- a/tests/test-revlog-raw.py Tue May 04 08:54:28 2021 -0700 +++ b/tests/test-revlog-raw.py Tue Apr 06 05:20:24 2021 +0200 @@ -14,6 +14,7 @@ ) from mercurial.revlogutils import ( + constants, deltas, flagutil, ) @@ -81,7 +82,9 @@ def newrevlog(name=b'_testrevlog.i', recreate=False): if recreate: tvfs.tryunlink(name) - rlog = revlog.revlog(tvfs, name) + rlog = revlog.revlog( + tvfs, target=(constants.KIND_OTHER, b'test'), indexfile=name + ) return rlog diff -r 3e381eb557f3 -r 4c041c71ec01 tests/test-revlog.t --- a/tests/test-revlog.t Tue May 04 08:54:28 2021 -0700 +++ b/tests/test-revlog.t Tue Apr 06 05:20:24 2021 +0200 @@ -45,9 +45,10 @@ 0 2 99e0332bd498 000000000000 000000000000 1 3 6674f57a23d8 99e0332bd498 000000000000 + >>> from mercurial.revlogutils.constants import KIND_OTHER >>> from mercurial import revlog, vfs >>> tvfs = vfs.vfs(b'.') >>> tvfs.options = {b'revlogv1': True} - >>> rl = revlog.revlog(tvfs, b'a.i') + >>> rl = revlog.revlog(tvfs, target=(KIND_OTHER, b'test'), indexfile=b'a.i') >>> rl.revision(1) mpatchError(*'patch cannot be decoded'*) (glob)