changeset 27443:937e73a6e4ff

verify: move verify logic into a class In order to allow extensions to hook into the verification logic more easily, we need to refactor it into multiple functions. The first step is to move it to a class so the shared state can be more easily accessed.
author Durham Goode <durham@fb.com>
date Fri, 18 Dec 2015 16:42:39 -0800
parents f67c6d8cc606
children 6647401858ab
files mercurial/verify.py
diffstat 1 files changed, 279 insertions(+), 278 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/verify.py	Thu Dec 17 21:18:02 2015 -0500
+++ b/mercurial/verify.py	Fri Dec 18 16:42:39 2015 -0800
@@ -24,7 +24,7 @@
 def verify(repo):
     lock = repo.lock()
     try:
-        return _verify(repo)
+        return verifier().verify(repo)
     finally:
         lock.release()
 
@@ -46,307 +46,308 @@
     """
     return True
 
-def _verify(repo):
-    repo = repo.unfiltered()
-    mflinkrevs = {}
-    filelinkrevs = {}
-    filenodes = {}
-    revisions = 0
-    badrevs = set()
-    errors = [0]
-    warnings = [0]
-    ui = repo.ui
-    cl = repo.changelog
-    mf = repo.manifest
-    lrugetctx = util.lrucachefunc(repo.changectx)
-
-    if not repo.url().startswith('file:'):
-        raise error.Abort(_("cannot verify bundle or remote repos"))
-
-    def err(linkrev, msg, filename=None):
-        if linkrev is not None:
-            badrevs.add(linkrev)
-        else:
-            linkrev = '?'
-        msg = "%s: %s" % (linkrev, msg)
-        if filename:
-            msg = "%s@%s" % (filename, msg)
-        ui.warn(" " + msg + "\n")
-        errors[0] += 1
+class verifier(object):
+    def verify(self, repo):
+        repo = repo.unfiltered()
+        mflinkrevs = {}
+        filelinkrevs = {}
+        filenodes = {}
+        revisions = 0
+        badrevs = set()
+        errors = [0]
+        warnings = [0]
+        ui = repo.ui
+        cl = repo.changelog
+        mf = repo.manifest
+        lrugetctx = util.lrucachefunc(repo.changectx)
 
-    def exc(linkrev, msg, inst, filename=None):
-        if isinstance(inst, KeyboardInterrupt):
-            ui.warn(_("interrupted"))
-            raise
-        if not str(inst):
-            inst = repr(inst)
-        err(linkrev, "%s: %s" % (msg, inst), filename)
-
-    def warn(msg):
-        ui.warn(msg + "\n")
-        warnings[0] += 1
-
-    def checklog(obj, name, linkrev):
-        if not len(obj) and (havecl or havemf):
-            err(linkrev, _("empty or missing %s") % name)
-            return
+        if not repo.url().startswith('file:'):
+            raise error.Abort(_("cannot verify bundle or remote repos"))
 
-        d = obj.checksize()
-        if d[0]:
-            err(None, _("data length off by %d bytes") % d[0], name)
-        if d[1]:
-            err(None, _("index contains %d extra bytes") % d[1], name)
-
-        if obj.version != revlog.REVLOGV0:
-            if not revlogv1:
-                warn(_("warning: `%s' uses revlog format 1") % name)
-        elif revlogv1:
-            warn(_("warning: `%s' uses revlog format 0") % name)
-
-    def checkentry(obj, i, node, seen, linkrevs, f):
-        lr = obj.linkrev(obj.rev(node))
-        if lr < 0 or (havecl and lr not in linkrevs):
-            if lr < 0 or lr >= len(cl):
-                msg = _("rev %d points to nonexistent changeset %d")
+        def err(linkrev, msg, filename=None):
+            if linkrev is not None:
+                badrevs.add(linkrev)
             else:
-                msg = _("rev %d points to unexpected changeset %d")
-            err(None, msg % (i, lr), f)
-            if linkrevs:
-                if f and len(linkrevs) > 1:
-                    try:
-                        # attempt to filter down to real linkrevs
-                        linkrevs = [l for l in linkrevs
-                                    if lrugetctx(l)[f].filenode() == node]
-                    except Exception:
-                        pass
-                warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
-            lr = None # can't be trusted
+                linkrev = '?'
+            msg = "%s: %s" % (linkrev, msg)
+            if filename:
+                msg = "%s@%s" % (filename, msg)
+            ui.warn(" " + msg + "\n")
+            errors[0] += 1
+
+        def exc(linkrev, msg, inst, filename=None):
+            if isinstance(inst, KeyboardInterrupt):
+                ui.warn(_("interrupted"))
+                raise
+            if not str(inst):
+                inst = repr(inst)
+            err(linkrev, "%s: %s" % (msg, inst), filename)
 
-        try:
-            p1, p2 = obj.parents(node)
-            if p1 not in seen and p1 != nullid:
-                err(lr, _("unknown parent 1 %s of %s") %
-                    (short(p1), short(node)), f)
-            if p2 not in seen and p2 != nullid:
-                err(lr, _("unknown parent 2 %s of %s") %
-                    (short(p2), short(node)), f)
-        except Exception as inst:
-            exc(lr, _("checking parents of %s") % short(node), inst, f)
+        def warn(msg):
+            ui.warn(msg + "\n")
+            warnings[0] += 1
+
+        def checklog(obj, name, linkrev):
+            if not len(obj) and (havecl or havemf):
+                err(linkrev, _("empty or missing %s") % name)
+                return
 
-        if node in seen:
-            err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
-        seen[node] = i
-        return lr
-
-    if os.path.exists(repo.sjoin("journal")):
-        ui.warn(_("abandoned transaction found - run hg recover\n"))
+            d = obj.checksize()
+            if d[0]:
+                err(None, _("data length off by %d bytes") % d[0], name)
+            if d[1]:
+                err(None, _("index contains %d extra bytes") % d[1], name)
 
-    revlogv1 = cl.version != revlog.REVLOGV0
-    if ui.verbose or not revlogv1:
-        ui.status(_("repository uses revlog format %d\n") %
-                       (revlogv1 and 1 or 0))
-
-    havecl = len(cl) > 0
-    havemf = len(mf) > 0
+            if obj.version != revlog.REVLOGV0:
+                if not revlogv1:
+                    warn(_("warning: `%s' uses revlog format 1") % name)
+            elif revlogv1:
+                warn(_("warning: `%s' uses revlog format 0") % name)
 
-    ui.status(_("checking changesets\n"))
-    refersmf = False
-    seen = {}
-    checklog(cl, "changelog", 0)
-    total = len(repo)
-    for i in repo:
-        ui.progress(_('checking'), i, total=total, unit=_('changesets'))
-        n = cl.node(i)
-        checkentry(cl, i, n, seen, [i], "changelog")
+        def checkentry(obj, i, node, seen, linkrevs, f):
+            lr = obj.linkrev(obj.rev(node))
+            if lr < 0 or (havecl and lr not in linkrevs):
+                if lr < 0 or lr >= len(cl):
+                    msg = _("rev %d points to nonexistent changeset %d")
+                else:
+                    msg = _("rev %d points to unexpected changeset %d")
+                err(None, msg % (i, lr), f)
+                if linkrevs:
+                    if f and len(linkrevs) > 1:
+                        try:
+                            # attempt to filter down to real linkrevs
+                            linkrevs = [l for l in linkrevs
+                                        if lrugetctx(l)[f].filenode() == node]
+                        except Exception:
+                            pass
+                    warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
+                lr = None # can't be trusted
+
+            try:
+                p1, p2 = obj.parents(node)
+                if p1 not in seen and p1 != nullid:
+                    err(lr, _("unknown parent 1 %s of %s") %
+                        (short(p1), short(node)), f)
+                if p2 not in seen and p2 != nullid:
+                    err(lr, _("unknown parent 2 %s of %s") %
+                        (short(p2), short(node)), f)
+            except Exception as inst:
+                exc(lr, _("checking parents of %s") % short(node), inst, f)
 
-        try:
-            changes = cl.read(n)
-            if changes[0] != nullid:
-                mflinkrevs.setdefault(changes[0], []).append(i)
-                refersmf = True
-            for f in changes[3]:
-                if _validpath(repo, f):
-                    filelinkrevs.setdefault(_normpath(f), []).append(i)
-        except Exception as inst:
-            refersmf = True
-            exc(i, _("unpacking changeset %s") % short(n), inst)
-    ui.progress(_('checking'), None)
+            if node in seen:
+                err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
+            seen[node] = i
+            return lr
+
+        if os.path.exists(repo.sjoin("journal")):
+            ui.warn(_("abandoned transaction found - run hg recover\n"))
+
+        revlogv1 = cl.version != revlog.REVLOGV0
+        if ui.verbose or not revlogv1:
+            ui.status(_("repository uses revlog format %d\n") %
+                           (revlogv1 and 1 or 0))
+
+        havecl = len(cl) > 0
+        havemf = len(mf) > 0
 
-    ui.status(_("checking manifests\n"))
-    seen = {}
-    if refersmf:
-        # Do not check manifest if there are only changelog entries with
-        # null manifests.
-        checklog(mf, "manifest", 0)
-    total = len(mf)
-    for i in mf:
-        ui.progress(_('checking'), i, total=total, unit=_('manifests'))
-        n = mf.node(i)
-        lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
-        if n in mflinkrevs:
-            del mflinkrevs[n]
-        else:
-            err(lr, _("%s not in changesets") % short(n), "manifest")
+        ui.status(_("checking changesets\n"))
+        refersmf = False
+        seen = {}
+        checklog(cl, "changelog", 0)
+        total = len(repo)
+        for i in repo:
+            ui.progress(_('checking'), i, total=total, unit=_('changesets'))
+            n = cl.node(i)
+            checkentry(cl, i, n, seen, [i], "changelog")
 
-        try:
-            for f, fn in mf.readdelta(n).iteritems():
-                if not f:
-                    err(lr, _("file without name in manifest"))
-                elif f != "/dev/null": # ignore this in very old repos
+            try:
+                changes = cl.read(n)
+                if changes[0] != nullid:
+                    mflinkrevs.setdefault(changes[0], []).append(i)
+                    refersmf = True
+                for f in changes[3]:
                     if _validpath(repo, f):
-                        filenodes.setdefault(
-                            _normpath(f), {}).setdefault(fn, lr)
-        except Exception as inst:
-            exc(lr, _("reading manifest delta %s") % short(n), inst)
-    ui.progress(_('checking'), None)
-
-    ui.status(_("crosschecking files in changesets and manifests\n"))
+                        filelinkrevs.setdefault(_normpath(f), []).append(i)
+            except Exception as inst:
+                refersmf = True
+                exc(i, _("unpacking changeset %s") % short(n), inst)
+        ui.progress(_('checking'), None)
 
-    total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
-    count = 0
-    if havemf:
-        for c, m in sorted([(c, m) for m in mflinkrevs
-                            for c in mflinkrevs[m]]):
-            count += 1
-            if m == nullid:
-                continue
-            ui.progress(_('crosschecking'), count, total=total)
-            err(c, _("changeset refers to unknown manifest %s") % short(m))
-        mflinkrevs = None # del is bad here due to scope issues
+        ui.status(_("checking manifests\n"))
+        seen = {}
+        if refersmf:
+            # Do not check manifest if there are only changelog entries with
+            # null manifests.
+            checklog(mf, "manifest", 0)
+        total = len(mf)
+        for i in mf:
+            ui.progress(_('checking'), i, total=total, unit=_('manifests'))
+            n = mf.node(i)
+            lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
+            if n in mflinkrevs:
+                del mflinkrevs[n]
+            else:
+                err(lr, _("%s not in changesets") % short(n), "manifest")
 
-        for f in sorted(filelinkrevs):
-            count += 1
-            ui.progress(_('crosschecking'), count, total=total)
-            if f not in filenodes:
-                lr = filelinkrevs[f][0]
-                err(lr, _("in changeset but not in manifest"), f)
+            try:
+                for f, fn in mf.readdelta(n).iteritems():
+                    if not f:
+                        err(lr, _("file without name in manifest"))
+                    elif f != "/dev/null": # ignore this in very old repos
+                        if _validpath(repo, f):
+                            filenodes.setdefault(
+                                _normpath(f), {}).setdefault(fn, lr)
+            except Exception as inst:
+                exc(lr, _("reading manifest delta %s") % short(n), inst)
+        ui.progress(_('checking'), None)
+
+        ui.status(_("crosschecking files in changesets and manifests\n"))
 
-    if havecl:
-        for f in sorted(filenodes):
-            count += 1
-            ui.progress(_('crosschecking'), count, total=total)
-            if f not in filelinkrevs:
-                try:
-                    fl = repo.file(f)
-                    lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
-                except Exception:
-                    lr = None
-                err(lr, _("in manifest but not in changeset"), f)
+        total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
+        count = 0
+        if havemf:
+            for c, m in sorted([(c, m) for m in mflinkrevs
+                                for c in mflinkrevs[m]]):
+                count += 1
+                if m == nullid:
+                    continue
+                ui.progress(_('crosschecking'), count, total=total)
+                err(c, _("changeset refers to unknown manifest %s") % short(m))
+            mflinkrevs = None # del is bad here due to scope issues
 
-    ui.progress(_('crosschecking'), None)
-
-    ui.status(_("checking files\n"))
-
-    storefiles = set()
-    for f, f2, size in repo.store.datafiles():
-        if not f:
-            err(None, _("cannot decode filename '%s'") % f2)
-        elif size > 0 or not revlogv1:
-            storefiles.add(_normpath(f))
+            for f in sorted(filelinkrevs):
+                count += 1
+                ui.progress(_('crosschecking'), count, total=total)
+                if f not in filenodes:
+                    lr = filelinkrevs[f][0]
+                    err(lr, _("in changeset but not in manifest"), f)
 
-    fncachewarned = False
-    files = sorted(set(filenodes) | set(filelinkrevs))
-    total = len(files)
-    for i, f in enumerate(files):
-        ui.progress(_('checking'), i, item=f, total=total)
-        try:
-            linkrevs = filelinkrevs[f]
-        except KeyError:
-            # in manifest but not in changelog
-            linkrevs = []
+        if havecl:
+            for f in sorted(filenodes):
+                count += 1
+                ui.progress(_('crosschecking'), count, total=total)
+                if f not in filelinkrevs:
+                    try:
+                        fl = repo.file(f)
+                        lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
+                    except Exception:
+                        lr = None
+                    err(lr, _("in manifest but not in changeset"), f)
+
+        ui.progress(_('crosschecking'), None)
 
-        if linkrevs:
-            lr = linkrevs[0]
-        else:
-            lr = None
+        ui.status(_("checking files\n"))
 
-        try:
-            fl = repo.file(f)
-        except error.RevlogError as e:
-            err(lr, _("broken revlog! (%s)") % e, f)
-            continue
+        storefiles = set()
+        for f, f2, size in repo.store.datafiles():
+            if not f:
+                err(None, _("cannot decode filename '%s'") % f2)
+            elif size > 0 or not revlogv1:
+                storefiles.add(_normpath(f))
 
-        for ff in fl.files():
+        fncachewarned = False
+        files = sorted(set(filenodes) | set(filelinkrevs))
+        total = len(files)
+        for i, f in enumerate(files):
+            ui.progress(_('checking'), i, item=f, total=total)
             try:
-                storefiles.remove(ff)
+                linkrevs = filelinkrevs[f]
             except KeyError:
-                warn(_(" warning: revlog '%s' not in fncache!") % ff)
-                fncachewarned = True
-
-        checklog(fl, f, lr)
-        seen = {}
-        rp = None
-        for i in fl:
-            revisions += 1
-            n = fl.node(i)
-            lr = checkentry(fl, i, n, seen, linkrevs, f)
-            if f in filenodes:
-                if havemf and n not in filenodes[f]:
-                    err(lr, _("%s not in manifests") % (short(n)), f)
-                else:
-                    del filenodes[f][n]
+                # in manifest but not in changelog
+                linkrevs = []
 
-            # verify contents
-            try:
-                l = len(fl.read(n))
-                rp = fl.renamed(n)
-                if l != fl.size(i):
-                    if len(fl.revision(n)) != fl.size(i):
-                        err(lr, _("unpacked size is %s, %s expected") %
-                            (l, fl.size(i)), f)
-            except error.CensoredNodeError:
-                # experimental config: censor.policy
-                if ui.config("censor", "policy", "abort") == "abort":
-                    err(lr, _("censored file data"), f)
-            except Exception as inst:
-                exc(lr, _("unpacking %s") % short(n), inst, f)
+            if linkrevs:
+                lr = linkrevs[0]
+            else:
+                lr = None
 
-            # check renames
             try:
-                if rp:
-                    if lr is not None and ui.verbose:
-                        ctx = lrugetctx(lr)
-                        found = False
-                        for pctx in ctx.parents():
-                            if rp[0] in pctx:
-                                found = True
-                                break
-                        if not found:
-                            warn(_("warning: copy source of '%s' not"
-                                   " in parents of %s") % (f, ctx))
-                    fl2 = repo.file(rp[0])
-                    if not len(fl2):
-                        err(lr, _("empty or missing copy source revlog %s:%s")
-                            % (rp[0], short(rp[1])), f)
-                    elif rp[1] == nullid:
-                        ui.note(_("warning: %s@%s: copy source"
-                                  " revision is nullid %s:%s\n")
-                            % (f, lr, rp[0], short(rp[1])))
+                fl = repo.file(f)
+            except error.RevlogError as e:
+                err(lr, _("broken revlog! (%s)") % e, f)
+                continue
+
+            for ff in fl.files():
+                try:
+                    storefiles.remove(ff)
+                except KeyError:
+                    warn(_(" warning: revlog '%s' not in fncache!") % ff)
+                    fncachewarned = True
+
+            checklog(fl, f, lr)
+            seen = {}
+            rp = None
+            for i in fl:
+                revisions += 1
+                n = fl.node(i)
+                lr = checkentry(fl, i, n, seen, linkrevs, f)
+                if f in filenodes:
+                    if havemf and n not in filenodes[f]:
+                        err(lr, _("%s not in manifests") % (short(n)), f)
                     else:
-                        fl2.rev(rp[1])
-            except Exception as inst:
-                exc(lr, _("checking rename of %s") % short(n), inst, f)
+                        del filenodes[f][n]
+
+                # verify contents
+                try:
+                    l = len(fl.read(n))
+                    rp = fl.renamed(n)
+                    if l != fl.size(i):
+                        if len(fl.revision(n)) != fl.size(i):
+                            err(lr, _("unpacked size is %s, %s expected") %
+                                (l, fl.size(i)), f)
+                except error.CensoredNodeError:
+                    # experimental config: censor.policy
+                    if ui.config("censor", "policy", "abort") == "abort":
+                        err(lr, _("censored file data"), f)
+                except Exception as inst:
+                    exc(lr, _("unpacking %s") % short(n), inst, f)
 
-        # cross-check
-        if f in filenodes:
-            fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
-            for lr, node in sorted(fns):
-                err(lr, _("%s in manifests not found") % short(node), f)
-    ui.progress(_('checking'), None)
-
-    for f in storefiles:
-        warn(_("warning: orphan revlog '%s'") % f)
+                # check renames
+                try:
+                    if rp:
+                        if lr is not None and ui.verbose:
+                            ctx = lrugetctx(lr)
+                            found = False
+                            for pctx in ctx.parents():
+                                if rp[0] in pctx:
+                                    found = True
+                                    break
+                            if not found:
+                                warn(_("warning: copy source of '%s' not"
+                                       " in parents of %s") % (f, ctx))
+                        fl2 = repo.file(rp[0])
+                        if not len(fl2):
+                            err(lr, _("empty or missing copy source revlog "
+                                      "%s:%s") % (rp[0], short(rp[1])), f)
+                        elif rp[1] == nullid:
+                            ui.note(_("warning: %s@%s: copy source"
+                                      " revision is nullid %s:%s\n")
+                                % (f, lr, rp[0], short(rp[1])))
+                        else:
+                            fl2.rev(rp[1])
+                except Exception as inst:
+                    exc(lr, _("checking rename of %s") % short(n), inst, f)
 
-    ui.status(_("%d files, %d changesets, %d total revisions\n") %
-                   (len(files), len(cl), revisions))
-    if warnings[0]:
-        ui.warn(_("%d warnings encountered!\n") % warnings[0])
-    if fncachewarned:
-        ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
-                  'corrupt fncache\n'))
-    if errors[0]:
-        ui.warn(_("%d integrity errors encountered!\n") % errors[0])
-        if badrevs:
-            ui.warn(_("(first damaged changeset appears to be %d)\n")
-                    % min(badrevs))
-        return 1
+            # cross-check
+            if f in filenodes:
+                fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
+                for lr, node in sorted(fns):
+                    err(lr, _("%s in manifests not found") % short(node), f)
+        ui.progress(_('checking'), None)
+
+        for f in storefiles:
+            warn(_("warning: orphan revlog '%s'") % f)
+
+        ui.status(_("%d files, %d changesets, %d total revisions\n") %
+                       (len(files), len(cl), revisions))
+        if warnings[0]:
+            ui.warn(_("%d warnings encountered!\n") % warnings[0])
+        if fncachewarned:
+            ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
+                      'corrupt fncache\n'))
+        if errors[0]:
+            ui.warn(_("%d integrity errors encountered!\n") % errors[0])
+            if badrevs:
+                ui.warn(_("(first damaged changeset appears to be %d)\n")
+                        % min(badrevs))
+            return 1