verify: start to abstract file verification
Currently, the file storage interface has a handful of attributes
that are exclusively or near-exclusively used by repo verification
code. In order to support verification on non-revlog/alternate
storage backends, we'll need to abstract verification so it can
be performed in a storage-agnostic way.
This commit starts that process.
We establish a new verifyintegrity() method on revlogs and expose
it to the file storage interface. Most of verify.verifier.checklog()
has been ported to this new method.
We need a way to represent verification problems. So we invent an
interface to represent a verification problem, invent a revlog type
to implement that interface, and use it.
The arguments to verifyintegrity() will almost certainly change in
the future, once more functionality is ported from the verify code.
And the "revlogv1" version check is very hacky. (The code in verify
is actually buggy because it is comparing the full 32-bit header
integer instead of just the revlog version short. I'll likely fix
this in a subsequent commit.)
Differential Revision: https://phab.mercurial-scm.org/D4701
--- a/mercurial/filelog.py Mon Sep 24 08:58:57 2018 -0700
+++ b/mercurial/filelog.py Wed Sep 19 11:17:28 2018 -0700
@@ -189,6 +189,9 @@
return True
+ def verifyintegrity(self, state):
+ return self._revlog.verifyintegrity(state)
+
# TODO these aren't part of the interface and aren't internal methods.
# Callers should be fixed to not use them.
--- a/mercurial/repository.py Mon Sep 24 08:58:57 2018 -0700
+++ b/mercurial/repository.py Wed Sep 19 11:17:28 2018 -0700
@@ -318,6 +318,20 @@
_('cannot %s; remote repository does not support the %r '
'capability') % (purpose, name))
+class iverifyproblem(interfaceutil.Interface):
+ """Represents a problem with the integrity of the repository.
+
+ Instances of this interface are emitted to describe an integrity issue
+ with a repository (e.g. corrupt storage, missing data, etc).
+
+ Instances are essentially messages associated with severity.
+ """
+ warning = interfaceutil.Attribute(
+ """Message indicating a non-fatal problem.""")
+
+ error = interfaceutil.Attribute(
+ """Message indicating a fatal problem.""")
+
class irevisiondelta(interfaceutil.Interface):
"""Represents a delta between one revision and another.
@@ -749,6 +763,17 @@
TODO this is used by verify and it should not be part of the interface.
"""
+ def verifyintegrity(state):
+ """Verifies the integrity of file storage.
+
+ ``state`` is a dict holding state of the verifier process. It can be
+ used to communicate data between invocations of multiple storage
+ primitives.
+
+ The method yields objects conforming to the ``iverifyproblem``
+ interface.
+ """
+
class idirs(interfaceutil.Interface):
"""Interface representing a collection of directories from paths.
--- a/mercurial/revlog.py Mon Sep 24 08:58:57 2018 -0700
+++ b/mercurial/revlog.py Wed Sep 19 11:17:28 2018 -0700
@@ -254,6 +254,12 @@
revision = attr.ib()
delta = attr.ib()
+@interfaceutil.implementer(repository.iverifyproblem)
+@attr.s(frozen=True)
+class revlogproblem(object):
+ warning = attr.ib(default=None)
+ error = attr.ib(default=None)
+
# index v0:
# 4 bytes: offset
# 4 bytes: compressed length
@@ -2581,3 +2587,23 @@
if dataread is not idxread:
dataread.close()
datawrite.close()
+
+ def verifyintegrity(self, state):
+ """Verifies the integrity of the revlog.
+
+ Yields ``revlogproblem`` instances describing problems that are
+ found.
+ """
+ dd, di = self.checksize()
+ if dd:
+ yield revlogproblem(error=_('data length off by %d bytes') % dd)
+ if di:
+ yield revlogproblem(error=_('index contains %d extra bytes') % di)
+
+ if self.version != REVLOGV0:
+ if not state['revlogv1']:
+ yield revlogproblem(warning=_("warning: `%s' uses revlog "
+ "format 1") % self.indexfile)
+ elif state['revlogv1']:
+ yield revlogproblem(warning=_("warning: `%s' uses revlog "
+ "format 0") % self.indexfile)
--- a/mercurial/verify.py Mon Sep 24 08:58:57 2018 -0700
+++ b/mercurial/verify.py Wed Sep 19 11:17:28 2018 -0700
@@ -341,6 +341,10 @@
elif (size > 0 or not revlogv1) and f.startswith('data/'):
storefiles.add(_normpath(f))
+ state = {
+ 'revlogv1': self.revlogv1,
+ }
+
files = sorted(set(filenodes) | set(filelinkrevs))
revisions = 0
progress = ui.makeprogress(_('checking'), unit=_('files'),
@@ -373,7 +377,19 @@
ff)
self.fncachewarned = True
- self.checklog(fl, f, lr)
+ if not len(fl) and (self.havecl or self.havemf):
+ self.err(lr, _("empty or missing %s") % f)
+ else:
+ for problem in fl.verifyintegrity(state):
+ if problem.warning:
+ self.warn(problem.warning)
+ elif problem.error:
+ self.err(lr, problem.error, f)
+ else:
+ raise error.ProgrammingError(
+ 'problem instance does not set warning or error '
+ 'attribute: %s' % problem.msg)
+
seen = {}
rp = None
for i in fl:
--- a/tests/test-check-interfaces.py Mon Sep 24 08:58:57 2018 -0700
+++ b/tests/test-check-interfaces.py Wed Sep 19 11:17:28 2018 -0700
@@ -229,4 +229,8 @@
basenode=b'')
checkzobject(rdr)
+ ziverify.verifyClass(repository.iverifyproblem,
+ revlog.revlogproblem)
+ checkzobject(revlog.revlogproblem())
+
main()