revlog: introduce an explicit tracking of what the revlog is about
Since the dawn of time, people have been forced to rely to lossy introspection
of the index filename to determine what the purpose and role of the revlog they
encounter is. This is hacky, error prone, inflexible, abstraction-leaky,
<insert-your-own-complaints-here>.
In
f63299ee7e4d Raphaël introduced a new attribute to track this information:
`revlog_kind`. However it is initialized in an odd place and various instances
end up not having it set. In addition is only tracking some of the information
we end up having to introspect in various pieces of code.
So we add a new attribute that holds more data and is more strictly enforced.
This work is done in collaboration with Raphaël.
The `revlog_kind` one will be removed/adapted in the next changeset. We expect
to be able to clean up various existing piece of code and to simplify coming
work around the newer revlog format.
Differential Revision: https://phab.mercurial-scm.org/D10352
--- a/contrib/dumprevlog Tue May 04 08:54:28 2021 -0700
+++ b/contrib/dumprevlog Tue Apr 06 05:20:24 2021 +0200
@@ -13,6 +13,10 @@
)
from mercurial.utils import procutil
+from mercurial.revlogutils import (
+ constants as revlog_constants,
+)
+
for fp in (sys.stdin, sys.stdout, sys.stderr):
procutil.setbinary(fp)
@@ -32,7 +36,11 @@
for f in sys.argv[1:]:
- r = revlog.revlog(binopen, encoding.strtolocal(f))
+ r = revlog.revlog(
+ binopen,
+ target=(revlog_constants.KIND_OTHER, b'dump-revlog'),
+ indexfile=encoding.strtolocal(f),
+ )
print("file:", f)
for i in r:
n = r.node(i)
--- a/contrib/perf.py Tue May 04 08:54:28 2021 -0700
+++ b/contrib/perf.py Tue Apr 06 05:20:24 2021 +0200
@@ -66,6 +66,8 @@
import tempfile
import threading
import time
+
+import mercurial.revlog
from mercurial import (
changegroup,
cmdutil,
@@ -76,7 +78,6 @@
hg,
mdiff,
merge,
- revlog,
util,
)
@@ -119,6 +120,21 @@
except ImportError:
profiling = None
+try:
+ from mercurial.revlogutils import constants as revlog_constants
+
+ perf_rl_kind = (revlog_constants.KIND_OTHER, b'created-by-perf')
+
+ def revlog(opener, *args, **kwargs):
+ return mercurial.revlog.revlog(opener, perf_rl_kind, *args, **kwargs)
+
+
+except (ImportError, AttributeError):
+ perf_rl_kind = None
+
+ def revlog(opener, *args, **kwargs):
+ return mercurial.revlog.revlog(opener, *args, **kwargs)
+
def identity(a):
return a
@@ -1809,7 +1825,8 @@
mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
n = scmutil.revsingle(repo, rev).node()
- cl = mercurial.revlog.revlog(getsvfs(repo), b"00changelog.i")
+
+ cl = revlog(getsvfs(repo), indexfile=b"00changelog.i")
def d():
cl.rev(n)
@@ -2602,9 +2619,9 @@
else:
raise error.Abort(b'unsupported revlog version: %d' % version)
- parse_index_v1 = getattr(revlog, 'parse_index_v1', None)
+ parse_index_v1 = getattr(mercurial.revlog, 'parse_index_v1', None)
if parse_index_v1 is None:
- parse_index_v1 = revlog.revlogio().parseindex
+ parse_index_v1 = mercurial.revlog.revlogio().parseindex
rllen = len(rl)
@@ -2620,7 +2637,7 @@
allnodesrev = list(reversed(allnodes))
def constructor():
- revlog.revlog(opener, indexfile)
+ revlog(opener, indexfile=indexfile)
def read():
with opener(indexfile) as fh:
@@ -3042,7 +3059,7 @@
vfs = vfsmod.vfs(tmpdir)
vfs.options = getattr(orig.opener, 'options', None)
- dest = revlog.revlog(
+ dest = revlog(
vfs, indexfile=indexname, datafile=dataname, **revlogkwargs
)
if dest._inline:
--- a/contrib/undumprevlog Tue May 04 08:54:28 2021 -0700
+++ b/contrib/undumprevlog Tue Apr 06 05:20:24 2021 +0200
@@ -15,6 +15,10 @@
)
from mercurial.utils import procutil
+from mercurial.revlogutils import (
+ constants as revlog_constants,
+)
+
for fp in (sys.stdin, sys.stdout, sys.stderr):
procutil.setbinary(fp)
@@ -28,7 +32,11 @@
break
if l.startswith("file:"):
f = encoding.strtolocal(l[6:-1])
- r = revlog.revlog(opener, f)
+ r = revlog.revlog(
+ opener,
+ target=(revlog_constants.KIND_OTHER, b'undump-revlog'),
+ indexfile=f,
+ )
procutil.stdout.write(b'%s\n' % f)
elif l.startswith("node:"):
n = bin(l[6:-1])
--- a/mercurial/bundlerepo.py Tue May 04 08:54:28 2021 -0700
+++ b/mercurial/bundlerepo.py Tue Apr 06 05:20:24 2021 +0200
@@ -46,9 +46,13 @@
urlutil,
)
+from .revlogutils import (
+ constants as revlog_constants,
+)
+
class bundlerevlog(revlog.revlog):
- def __init__(self, opener, indexfile, cgunpacker, linkmapper):
+ def __init__(self, opener, target, indexfile, cgunpacker, linkmapper):
# How it works:
# To retrieve a revision, we need to know the offset of the revision in
# the bundle (an unbundle object). We store this offset in the index
@@ -57,7 +61,7 @@
# To differentiate a rev in the bundle from a rev in the revlog, we
# check revision against repotiprev.
opener = vfsmod.readonlyvfs(opener)
- revlog.revlog.__init__(self, opener, indexfile)
+ revlog.revlog.__init__(self, opener, target=target, indexfile=indexfile)
self.bundle = cgunpacker
n = len(self)
self.repotiprev = n - 1
@@ -171,7 +175,12 @@
changelog.changelog.__init__(self, opener)
linkmapper = lambda x: x
bundlerevlog.__init__(
- self, opener, self.indexfile, cgunpacker, linkmapper
+ self,
+ opener,
+ (revlog_constants.KIND_CHANGELOG, None),
+ self.indexfile,
+ cgunpacker,
+ linkmapper,
)
@@ -187,7 +196,12 @@
):
manifest.manifestrevlog.__init__(self, nodeconstants, opener, tree=dir)
bundlerevlog.__init__(
- self, opener, self.indexfile, cgunpacker, linkmapper
+ self,
+ opener,
+ (revlog_constants.KIND_MANIFESTLOG, dir),
+ self.indexfile,
+ cgunpacker,
+ linkmapper,
)
if dirlogstarts is None:
dirlogstarts = {}
@@ -214,7 +228,12 @@
def __init__(self, opener, path, cgunpacker, linkmapper):
filelog.filelog.__init__(self, opener, path)
self._revlog = bundlerevlog(
- opener, self.indexfile, cgunpacker, linkmapper
+ opener,
+ # XXX should use the unencoded path
+ target=(revlog_constants.KIND_FILELOG, path),
+ indexfile=self.indexfile,
+ cgunpacker=cgunpacker,
+ linkmapper=linkmapper,
)
--- a/mercurial/changelog.py Tue May 04 08:54:28 2021 -0700
+++ b/mercurial/changelog.py Tue Apr 06 05:20:24 2021 +0200
@@ -25,7 +25,10 @@
dateutil,
stringutil,
)
-from .revlogutils import flagutil
+from .revlogutils import (
+ constants as revlog_constants,
+ flagutil,
+)
_defaultextra = {b'branch': b'default'}
@@ -401,7 +404,8 @@
revlog.revlog.__init__(
self,
opener,
- indexfile,
+ target=(revlog_constants.KIND_CHANGELOG, None),
+ indexfile=indexfile,
datafile=datafile,
checkambig=True,
mmaplargeindex=True,
--- a/mercurial/cmdutil.py Tue May 04 08:54:28 2021 -0700
+++ b/mercurial/cmdutil.py Tue Apr 06 05:20:24 2021 +0200
@@ -61,6 +61,10 @@
stringutil,
)
+from .revlogutils import (
+ constants as revlog_constants,
+)
+
if pycompat.TYPE_CHECKING:
from typing import (
Any,
@@ -1428,8 +1432,12 @@
raise error.CommandError(cmd, _(b'invalid arguments'))
if not os.path.isfile(file_):
raise error.InputError(_(b"revlog '%s' not found") % file_)
+
+ target = (revlog_constants.KIND_OTHER, b'free-form:%s' % file_)
r = revlog.revlog(
- vfsmod.vfs(encoding.getcwd(), audit=False), file_[:-2] + b".i"
+ vfsmod.vfs(encoding.getcwd(), audit=False),
+ target=target,
+ indexfile=file_[:-2] + b".i",
)
return r
--- a/mercurial/filelog.py Tue May 04 08:54:28 2021 -0700
+++ b/mercurial/filelog.py Tue Apr 06 05:20:24 2021 +0200
@@ -18,13 +18,20 @@
util as interfaceutil,
)
from .utils import storageutil
+from .revlogutils import (
+ constants as revlog_constants,
+)
@interfaceutil.implementer(repository.ifilestorage)
class filelog(object):
def __init__(self, opener, path):
self._revlog = revlog.revlog(
- opener, b'/'.join((b'data', path + b'.i')), censorable=True
+ opener,
+ # XXX should use the unencoded path
+ target=(revlog_constants.KIND_FILELOG, path),
+ indexfile=b'/'.join((b'data', path + b'.i')),
+ censorable=True,
)
# Full name of the user visible file, relative to the repository root.
# Used by LFS.
--- a/mercurial/manifest.py Tue May 04 08:54:28 2021 -0700
+++ b/mercurial/manifest.py Tue Apr 06 05:20:24 2021 +0200
@@ -34,6 +34,9 @@
repository,
util as interfaceutil,
)
+from .revlogutils import (
+ constants as revlog_constants,
+)
parsers = policy.importmod('parsers')
propertycache = util.propertycache
@@ -1610,7 +1613,8 @@
self._revlog = revlog.revlog(
opener,
- indexfile,
+ target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
+ indexfile=indexfile,
# only root indexfile is cached
checkambig=not bool(tree),
mmaplargeindex=True,
--- a/mercurial/revlog.py Tue May 04 08:54:28 2021 -0700
+++ b/mercurial/revlog.py Tue Apr 06 05:20:24 2021 +0200
@@ -34,6 +34,7 @@
from .i18n import _
from .pycompat import getattr
from .revlogutils.constants import (
+ ALL_KINDS,
FLAG_GENERALDELTA,
FLAG_INLINE_DATA,
INDEX_HEADER,
@@ -287,7 +288,8 @@
def __init__(
self,
opener,
- indexfile,
+ target,
+ indexfile=None,
datafile=None,
checkambig=False,
mmaplargeindex=False,
@@ -302,6 +304,12 @@
opener is a function that abstracts the file opening operation
and can be used to implement COW semantics or the like.
+ `target`: a (KIND, ID) tuple that identify the content stored in
+ this revlog. It help the rest of the code to understand what the revlog
+ is about without having to resort to heuristic and index filename
+ analysis. Note: that this must be reliably be set by normal code, but
+ that test, debug, or performance measurement code might not set this to
+ accurate value.
"""
self.upperboundcomp = upperboundcomp
self.indexfile = indexfile
@@ -313,6 +321,9 @@
)
self.opener = opener
+ assert target[0] in ALL_KINDS
+ assert len(target) == 2
+ self.target = target
# When True, indexfile is opened with checkambig=True at writing, to
# avoid file stat ambiguity.
self._checkambig = checkambig
@@ -2869,7 +2880,13 @@
newdatafile = self.datafile + b'.tmpcensored'
# This is a bit dangerous. We could easily have a mismatch of state.
- newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
+ newrl = revlog(
+ self.opener,
+ target=self.target,
+ indexfile=newindexfile,
+ datafile=newdatafile,
+ censorable=True,
+ )
newrl.version = self.version
newrl._generaldelta = self._generaldelta
newrl._parse_index = self._parse_index
--- a/mercurial/revlogutils/constants.py Tue May 04 08:54:28 2021 -0700
+++ b/mercurial/revlogutils/constants.py Tue Apr 06 05:20:24 2021 +0200
@@ -13,6 +13,20 @@
from ..interfaces import repository
+### Internal utily constants
+
+KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
+KIND_MANIFESTLOG = 1002
+KIND_FILELOG = 1003
+KIND_OTHER = 1004
+
+ALL_KINDS = {
+ KIND_CHANGELOG,
+ KIND_MANIFESTLOG,
+ KIND_FILELOG,
+ KIND_OTHER,
+}
+
### main revlog header
INDEX_HEADER = struct.Struct(b">I")
--- a/mercurial/unionrepo.py Tue May 04 08:54:28 2021 -0700
+++ b/mercurial/unionrepo.py Tue Apr 06 05:20:24 2021 +0200
@@ -41,7 +41,11 @@
# To differentiate a rev in the second revlog from a rev in the revlog,
# we check revision against repotiprev.
opener = vfsmod.readonlyvfs(opener)
- revlog.revlog.__init__(self, opener, indexfile)
+ target = getattr(revlog2, 'target', None)
+ if target is None:
+ # a revlog wrapper, eg: the manifestlog that is not an actual revlog
+ target = revlog2._revlog.target
+ revlog.revlog.__init__(self, opener, target=target, indexfile=indexfile)
self.revlog2 = revlog2
n = len(self)
--- a/tests/test-revlog-raw.py Tue May 04 08:54:28 2021 -0700
+++ b/tests/test-revlog-raw.py Tue Apr 06 05:20:24 2021 +0200
@@ -14,6 +14,7 @@
)
from mercurial.revlogutils import (
+ constants,
deltas,
flagutil,
)
@@ -81,7 +82,9 @@
def newrevlog(name=b'_testrevlog.i', recreate=False):
if recreate:
tvfs.tryunlink(name)
- rlog = revlog.revlog(tvfs, name)
+ rlog = revlog.revlog(
+ tvfs, target=(constants.KIND_OTHER, b'test'), indexfile=name
+ )
return rlog
--- a/tests/test-revlog.t Tue May 04 08:54:28 2021 -0700
+++ b/tests/test-revlog.t Tue Apr 06 05:20:24 2021 +0200
@@ -45,9 +45,10 @@
0 2 99e0332bd498 000000000000 000000000000
1 3 6674f57a23d8 99e0332bd498 000000000000
+ >>> from mercurial.revlogutils.constants import KIND_OTHER
>>> from mercurial import revlog, vfs
>>> tvfs = vfs.vfs(b'.')
>>> tvfs.options = {b'revlogv1': True}
- >>> rl = revlog.revlog(tvfs, b'a.i')
+ >>> rl = revlog.revlog(tvfs, target=(KIND_OTHER, b'test'), indexfile=b'a.i')
>>> rl.revision(1)
mpatchError(*'patch cannot be decoded'*) (glob)