verify: start to abstract file verification
authorGregory Szorc <gregory.szorc@gmail.com>
Wed, 19 Sep 2018 11:17:28 -0700
changeset 39847 97986c9c69d3
parent 39846 39f51064e9f5
child 39848 68282a7b29a7
verify: start to abstract file verification Currently, the file storage interface has a handful of attributes that are exclusively or near-exclusively used by repo verification code. In order to support verification on non-revlog/alternate storage backends, we'll need to abstract verification so it can be performed in a storage-agnostic way. This commit starts that process. We establish a new verifyintegrity() method on revlogs and expose it to the file storage interface. Most of verify.verifier.checklog() has been ported to this new method. We need a way to represent verification problems. So we invent an interface to represent a verification problem, invent a revlog type to implement that interface, and use it. The arguments to verifyintegrity() will almost certainly change in the future, once more functionality is ported from the verify code. And the "revlogv1" version check is very hacky. (The code in verify is actually buggy because it is comparing the full 32-bit header integer instead of just the revlog version short. I'll likely fix this in a subsequent commit.) Differential Revision: https://phab.mercurial-scm.org/D4701
mercurial/filelog.py
mercurial/repository.py
mercurial/revlog.py
mercurial/verify.py
tests/test-check-interfaces.py
--- a/mercurial/filelog.py	Mon Sep 24 08:58:57 2018 -0700
+++ b/mercurial/filelog.py	Wed Sep 19 11:17:28 2018 -0700
@@ -189,6 +189,9 @@
 
         return True
 
+    def verifyintegrity(self, state):
+        return self._revlog.verifyintegrity(state)
+
     # TODO these aren't part of the interface and aren't internal methods.
     # Callers should be fixed to not use them.
 
--- a/mercurial/repository.py	Mon Sep 24 08:58:57 2018 -0700
+++ b/mercurial/repository.py	Wed Sep 19 11:17:28 2018 -0700
@@ -318,6 +318,20 @@
             _('cannot %s; remote repository does not support the %r '
               'capability') % (purpose, name))
 
+class iverifyproblem(interfaceutil.Interface):
+    """Represents a problem with the integrity of the repository.
+
+    Instances of this interface are emitted to describe an integrity issue
+    with a repository (e.g. corrupt storage, missing data, etc).
+
+    Instances are essentially messages associated with severity.
+    """
+    warning = interfaceutil.Attribute(
+        """Message indicating a non-fatal problem.""")
+
+    error = interfaceutil.Attribute(
+        """Message indicating a fatal problem.""")
+
 class irevisiondelta(interfaceutil.Interface):
     """Represents a delta between one revision and another.
 
@@ -749,6 +763,17 @@
         TODO this is used by verify and it should not be part of the interface.
         """
 
+    def verifyintegrity(state):
+        """Verifies the integrity of file storage.
+
+        ``state`` is a dict holding state of the verifier process. It can be
+        used to communicate data between invocations of multiple storage
+        primitives.
+
+        The method yields objects conforming to the ``iverifyproblem``
+        interface.
+        """
+
 class idirs(interfaceutil.Interface):
     """Interface representing a collection of directories from paths.
 
--- a/mercurial/revlog.py	Mon Sep 24 08:58:57 2018 -0700
+++ b/mercurial/revlog.py	Wed Sep 19 11:17:28 2018 -0700
@@ -254,6 +254,12 @@
     revision = attr.ib()
     delta = attr.ib()
 
+@interfaceutil.implementer(repository.iverifyproblem)
+@attr.s(frozen=True)
+class revlogproblem(object):
+    warning = attr.ib(default=None)
+    error = attr.ib(default=None)
+
 # index v0:
 #  4 bytes: offset
 #  4 bytes: compressed length
@@ -2581,3 +2587,23 @@
         if dataread is not idxread:
             dataread.close()
             datawrite.close()
+
+    def verifyintegrity(self, state):
+        """Verifies the integrity of the revlog.
+
+        Yields ``revlogproblem`` instances describing problems that are
+        found.
+        """
+        dd, di = self.checksize()
+        if dd:
+            yield revlogproblem(error=_('data length off by %d bytes') % dd)
+        if di:
+            yield revlogproblem(error=_('index contains %d extra bytes') % di)
+
+        if self.version != REVLOGV0:
+            if not state['revlogv1']:
+                yield revlogproblem(warning=_("warning: `%s' uses revlog "
+                                             "format 1") % self.indexfile)
+        elif state['revlogv1']:
+            yield revlogproblem(warning=_("warning: `%s' uses revlog "
+                                          "format 0") % self.indexfile)
--- a/mercurial/verify.py	Mon Sep 24 08:58:57 2018 -0700
+++ b/mercurial/verify.py	Wed Sep 19 11:17:28 2018 -0700
@@ -341,6 +341,10 @@
             elif (size > 0 or not revlogv1) and f.startswith('data/'):
                 storefiles.add(_normpath(f))
 
+        state = {
+            'revlogv1': self.revlogv1,
+        }
+
         files = sorted(set(filenodes) | set(filelinkrevs))
         revisions = 0
         progress = ui.makeprogress(_('checking'), unit=_('files'),
@@ -373,7 +377,19 @@
                                   ff)
                         self.fncachewarned = True
 
-            self.checklog(fl, f, lr)
+            if not len(fl) and (self.havecl or self.havemf):
+                self.err(lr, _("empty or missing %s") % f)
+            else:
+                for problem in fl.verifyintegrity(state):
+                    if problem.warning:
+                        self.warn(problem.warning)
+                    elif problem.error:
+                        self.err(lr, problem.error, f)
+                    else:
+                        raise error.ProgrammingError(
+                            'problem instance does not set warning or error '
+                            'attribute: %s' % problem.msg)
+
             seen = {}
             rp = None
             for i in fl:
--- a/tests/test-check-interfaces.py	Mon Sep 24 08:58:57 2018 -0700
+++ b/tests/test-check-interfaces.py	Wed Sep 19 11:17:28 2018 -0700
@@ -229,4 +229,8 @@
         basenode=b'')
     checkzobject(rdr)
 
+    ziverify.verifyClass(repository.iverifyproblem,
+                         revlog.revlogproblem)
+    checkzobject(revlog.revlogproblem())
+
 main()