# HG changeset patch # User Durham Goode # Date 1450485759 28800 # Node ID 937e73a6e4ffc15ce6c6e9ec635e64f93fa59931 # Parent f67c6d8cc606840ebc628f478927bbb804a07a07 verify: move verify logic into a class In order to allow extensions to hook into the verification logic more easily, we need to refactor it into multiple functions. The first step is to move it to a class so the shared state can be more easily accessed. diff -r f67c6d8cc606 -r 937e73a6e4ff mercurial/verify.py --- a/mercurial/verify.py Thu Dec 17 21:18:02 2015 -0500 +++ b/mercurial/verify.py Fri Dec 18 16:42:39 2015 -0800 @@ -24,7 +24,7 @@ def verify(repo): lock = repo.lock() try: - return _verify(repo) + return verifier().verify(repo) finally: lock.release() @@ -46,307 +46,308 @@ """ return True -def _verify(repo): - repo = repo.unfiltered() - mflinkrevs = {} - filelinkrevs = {} - filenodes = {} - revisions = 0 - badrevs = set() - errors = [0] - warnings = [0] - ui = repo.ui - cl = repo.changelog - mf = repo.manifest - lrugetctx = util.lrucachefunc(repo.changectx) - - if not repo.url().startswith('file:'): - raise error.Abort(_("cannot verify bundle or remote repos")) - - def err(linkrev, msg, filename=None): - if linkrev is not None: - badrevs.add(linkrev) - else: - linkrev = '?' - msg = "%s: %s" % (linkrev, msg) - if filename: - msg = "%s@%s" % (filename, msg) - ui.warn(" " + msg + "\n") - errors[0] += 1 +class verifier(object): + def verify(self, repo): + repo = repo.unfiltered() + mflinkrevs = {} + filelinkrevs = {} + filenodes = {} + revisions = 0 + badrevs = set() + errors = [0] + warnings = [0] + ui = repo.ui + cl = repo.changelog + mf = repo.manifest + lrugetctx = util.lrucachefunc(repo.changectx) - def exc(linkrev, msg, inst, filename=None): - if isinstance(inst, KeyboardInterrupt): - ui.warn(_("interrupted")) - raise - if not str(inst): - inst = repr(inst) - err(linkrev, "%s: %s" % (msg, inst), filename) - - def warn(msg): - ui.warn(msg + "\n") - warnings[0] += 1 - - def checklog(obj, name, linkrev): - if not len(obj) and (havecl or havemf): - err(linkrev, _("empty or missing %s") % name) - return + if not repo.url().startswith('file:'): + raise error.Abort(_("cannot verify bundle or remote repos")) - d = obj.checksize() - if d[0]: - err(None, _("data length off by %d bytes") % d[0], name) - if d[1]: - err(None, _("index contains %d extra bytes") % d[1], name) - - if obj.version != revlog.REVLOGV0: - if not revlogv1: - warn(_("warning: `%s' uses revlog format 1") % name) - elif revlogv1: - warn(_("warning: `%s' uses revlog format 0") % name) - - def checkentry(obj, i, node, seen, linkrevs, f): - lr = obj.linkrev(obj.rev(node)) - if lr < 0 or (havecl and lr not in linkrevs): - if lr < 0 or lr >= len(cl): - msg = _("rev %d points to nonexistent changeset %d") + def err(linkrev, msg, filename=None): + if linkrev is not None: + badrevs.add(linkrev) else: - msg = _("rev %d points to unexpected changeset %d") - err(None, msg % (i, lr), f) - if linkrevs: - if f and len(linkrevs) > 1: - try: - # attempt to filter down to real linkrevs - linkrevs = [l for l in linkrevs - if lrugetctx(l)[f].filenode() == node] - except Exception: - pass - warn(_(" (expected %s)") % " ".join(map(str, linkrevs))) - lr = None # can't be trusted + linkrev = '?' + msg = "%s: %s" % (linkrev, msg) + if filename: + msg = "%s@%s" % (filename, msg) + ui.warn(" " + msg + "\n") + errors[0] += 1 + + def exc(linkrev, msg, inst, filename=None): + if isinstance(inst, KeyboardInterrupt): + ui.warn(_("interrupted")) + raise + if not str(inst): + inst = repr(inst) + err(linkrev, "%s: %s" % (msg, inst), filename) - try: - p1, p2 = obj.parents(node) - if p1 not in seen and p1 != nullid: - err(lr, _("unknown parent 1 %s of %s") % - (short(p1), short(node)), f) - if p2 not in seen and p2 != nullid: - err(lr, _("unknown parent 2 %s of %s") % - (short(p2), short(node)), f) - except Exception as inst: - exc(lr, _("checking parents of %s") % short(node), inst, f) + def warn(msg): + ui.warn(msg + "\n") + warnings[0] += 1 + + def checklog(obj, name, linkrev): + if not len(obj) and (havecl or havemf): + err(linkrev, _("empty or missing %s") % name) + return - if node in seen: - err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f) - seen[node] = i - return lr - - if os.path.exists(repo.sjoin("journal")): - ui.warn(_("abandoned transaction found - run hg recover\n")) + d = obj.checksize() + if d[0]: + err(None, _("data length off by %d bytes") % d[0], name) + if d[1]: + err(None, _("index contains %d extra bytes") % d[1], name) - revlogv1 = cl.version != revlog.REVLOGV0 - if ui.verbose or not revlogv1: - ui.status(_("repository uses revlog format %d\n") % - (revlogv1 and 1 or 0)) - - havecl = len(cl) > 0 - havemf = len(mf) > 0 + if obj.version != revlog.REVLOGV0: + if not revlogv1: + warn(_("warning: `%s' uses revlog format 1") % name) + elif revlogv1: + warn(_("warning: `%s' uses revlog format 0") % name) - ui.status(_("checking changesets\n")) - refersmf = False - seen = {} - checklog(cl, "changelog", 0) - total = len(repo) - for i in repo: - ui.progress(_('checking'), i, total=total, unit=_('changesets')) - n = cl.node(i) - checkentry(cl, i, n, seen, [i], "changelog") + def checkentry(obj, i, node, seen, linkrevs, f): + lr = obj.linkrev(obj.rev(node)) + if lr < 0 or (havecl and lr not in linkrevs): + if lr < 0 or lr >= len(cl): + msg = _("rev %d points to nonexistent changeset %d") + else: + msg = _("rev %d points to unexpected changeset %d") + err(None, msg % (i, lr), f) + if linkrevs: + if f and len(linkrevs) > 1: + try: + # attempt to filter down to real linkrevs + linkrevs = [l for l in linkrevs + if lrugetctx(l)[f].filenode() == node] + except Exception: + pass + warn(_(" (expected %s)") % " ".join(map(str, linkrevs))) + lr = None # can't be trusted + + try: + p1, p2 = obj.parents(node) + if p1 not in seen and p1 != nullid: + err(lr, _("unknown parent 1 %s of %s") % + (short(p1), short(node)), f) + if p2 not in seen and p2 != nullid: + err(lr, _("unknown parent 2 %s of %s") % + (short(p2), short(node)), f) + except Exception as inst: + exc(lr, _("checking parents of %s") % short(node), inst, f) - try: - changes = cl.read(n) - if changes[0] != nullid: - mflinkrevs.setdefault(changes[0], []).append(i) - refersmf = True - for f in changes[3]: - if _validpath(repo, f): - filelinkrevs.setdefault(_normpath(f), []).append(i) - except Exception as inst: - refersmf = True - exc(i, _("unpacking changeset %s") % short(n), inst) - ui.progress(_('checking'), None) + if node in seen: + err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f) + seen[node] = i + return lr + + if os.path.exists(repo.sjoin("journal")): + ui.warn(_("abandoned transaction found - run hg recover\n")) + + revlogv1 = cl.version != revlog.REVLOGV0 + if ui.verbose or not revlogv1: + ui.status(_("repository uses revlog format %d\n") % + (revlogv1 and 1 or 0)) + + havecl = len(cl) > 0 + havemf = len(mf) > 0 - ui.status(_("checking manifests\n")) - seen = {} - if refersmf: - # Do not check manifest if there are only changelog entries with - # null manifests. - checklog(mf, "manifest", 0) - total = len(mf) - for i in mf: - ui.progress(_('checking'), i, total=total, unit=_('manifests')) - n = mf.node(i) - lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest") - if n in mflinkrevs: - del mflinkrevs[n] - else: - err(lr, _("%s not in changesets") % short(n), "manifest") + ui.status(_("checking changesets\n")) + refersmf = False + seen = {} + checklog(cl, "changelog", 0) + total = len(repo) + for i in repo: + ui.progress(_('checking'), i, total=total, unit=_('changesets')) + n = cl.node(i) + checkentry(cl, i, n, seen, [i], "changelog") - try: - for f, fn in mf.readdelta(n).iteritems(): - if not f: - err(lr, _("file without name in manifest")) - elif f != "/dev/null": # ignore this in very old repos + try: + changes = cl.read(n) + if changes[0] != nullid: + mflinkrevs.setdefault(changes[0], []).append(i) + refersmf = True + for f in changes[3]: if _validpath(repo, f): - filenodes.setdefault( - _normpath(f), {}).setdefault(fn, lr) - except Exception as inst: - exc(lr, _("reading manifest delta %s") % short(n), inst) - ui.progress(_('checking'), None) - - ui.status(_("crosschecking files in changesets and manifests\n")) + filelinkrevs.setdefault(_normpath(f), []).append(i) + except Exception as inst: + refersmf = True + exc(i, _("unpacking changeset %s") % short(n), inst) + ui.progress(_('checking'), None) - total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes) - count = 0 - if havemf: - for c, m in sorted([(c, m) for m in mflinkrevs - for c in mflinkrevs[m]]): - count += 1 - if m == nullid: - continue - ui.progress(_('crosschecking'), count, total=total) - err(c, _("changeset refers to unknown manifest %s") % short(m)) - mflinkrevs = None # del is bad here due to scope issues + ui.status(_("checking manifests\n")) + seen = {} + if refersmf: + # Do not check manifest if there are only changelog entries with + # null manifests. + checklog(mf, "manifest", 0) + total = len(mf) + for i in mf: + ui.progress(_('checking'), i, total=total, unit=_('manifests')) + n = mf.node(i) + lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest") + if n in mflinkrevs: + del mflinkrevs[n] + else: + err(lr, _("%s not in changesets") % short(n), "manifest") - for f in sorted(filelinkrevs): - count += 1 - ui.progress(_('crosschecking'), count, total=total) - if f not in filenodes: - lr = filelinkrevs[f][0] - err(lr, _("in changeset but not in manifest"), f) + try: + for f, fn in mf.readdelta(n).iteritems(): + if not f: + err(lr, _("file without name in manifest")) + elif f != "/dev/null": # ignore this in very old repos + if _validpath(repo, f): + filenodes.setdefault( + _normpath(f), {}).setdefault(fn, lr) + except Exception as inst: + exc(lr, _("reading manifest delta %s") % short(n), inst) + ui.progress(_('checking'), None) + + ui.status(_("crosschecking files in changesets and manifests\n")) - if havecl: - for f in sorted(filenodes): - count += 1 - ui.progress(_('crosschecking'), count, total=total) - if f not in filelinkrevs: - try: - fl = repo.file(f) - lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]]) - except Exception: - lr = None - err(lr, _("in manifest but not in changeset"), f) + total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes) + count = 0 + if havemf: + for c, m in sorted([(c, m) for m in mflinkrevs + for c in mflinkrevs[m]]): + count += 1 + if m == nullid: + continue + ui.progress(_('crosschecking'), count, total=total) + err(c, _("changeset refers to unknown manifest %s") % short(m)) + mflinkrevs = None # del is bad here due to scope issues - ui.progress(_('crosschecking'), None) - - ui.status(_("checking files\n")) - - storefiles = set() - for f, f2, size in repo.store.datafiles(): - if not f: - err(None, _("cannot decode filename '%s'") % f2) - elif size > 0 or not revlogv1: - storefiles.add(_normpath(f)) + for f in sorted(filelinkrevs): + count += 1 + ui.progress(_('crosschecking'), count, total=total) + if f not in filenodes: + lr = filelinkrevs[f][0] + err(lr, _("in changeset but not in manifest"), f) - fncachewarned = False - files = sorted(set(filenodes) | set(filelinkrevs)) - total = len(files) - for i, f in enumerate(files): - ui.progress(_('checking'), i, item=f, total=total) - try: - linkrevs = filelinkrevs[f] - except KeyError: - # in manifest but not in changelog - linkrevs = [] + if havecl: + for f in sorted(filenodes): + count += 1 + ui.progress(_('crosschecking'), count, total=total) + if f not in filelinkrevs: + try: + fl = repo.file(f) + lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]]) + except Exception: + lr = None + err(lr, _("in manifest but not in changeset"), f) + + ui.progress(_('crosschecking'), None) - if linkrevs: - lr = linkrevs[0] - else: - lr = None + ui.status(_("checking files\n")) - try: - fl = repo.file(f) - except error.RevlogError as e: - err(lr, _("broken revlog! (%s)") % e, f) - continue + storefiles = set() + for f, f2, size in repo.store.datafiles(): + if not f: + err(None, _("cannot decode filename '%s'") % f2) + elif size > 0 or not revlogv1: + storefiles.add(_normpath(f)) - for ff in fl.files(): + fncachewarned = False + files = sorted(set(filenodes) | set(filelinkrevs)) + total = len(files) + for i, f in enumerate(files): + ui.progress(_('checking'), i, item=f, total=total) try: - storefiles.remove(ff) + linkrevs = filelinkrevs[f] except KeyError: - warn(_(" warning: revlog '%s' not in fncache!") % ff) - fncachewarned = True - - checklog(fl, f, lr) - seen = {} - rp = None - for i in fl: - revisions += 1 - n = fl.node(i) - lr = checkentry(fl, i, n, seen, linkrevs, f) - if f in filenodes: - if havemf and n not in filenodes[f]: - err(lr, _("%s not in manifests") % (short(n)), f) - else: - del filenodes[f][n] + # in manifest but not in changelog + linkrevs = [] - # verify contents - try: - l = len(fl.read(n)) - rp = fl.renamed(n) - if l != fl.size(i): - if len(fl.revision(n)) != fl.size(i): - err(lr, _("unpacked size is %s, %s expected") % - (l, fl.size(i)), f) - except error.CensoredNodeError: - # experimental config: censor.policy - if ui.config("censor", "policy", "abort") == "abort": - err(lr, _("censored file data"), f) - except Exception as inst: - exc(lr, _("unpacking %s") % short(n), inst, f) + if linkrevs: + lr = linkrevs[0] + else: + lr = None - # check renames try: - if rp: - if lr is not None and ui.verbose: - ctx = lrugetctx(lr) - found = False - for pctx in ctx.parents(): - if rp[0] in pctx: - found = True - break - if not found: - warn(_("warning: copy source of '%s' not" - " in parents of %s") % (f, ctx)) - fl2 = repo.file(rp[0]) - if not len(fl2): - err(lr, _("empty or missing copy source revlog %s:%s") - % (rp[0], short(rp[1])), f) - elif rp[1] == nullid: - ui.note(_("warning: %s@%s: copy source" - " revision is nullid %s:%s\n") - % (f, lr, rp[0], short(rp[1]))) + fl = repo.file(f) + except error.RevlogError as e: + err(lr, _("broken revlog! (%s)") % e, f) + continue + + for ff in fl.files(): + try: + storefiles.remove(ff) + except KeyError: + warn(_(" warning: revlog '%s' not in fncache!") % ff) + fncachewarned = True + + checklog(fl, f, lr) + seen = {} + rp = None + for i in fl: + revisions += 1 + n = fl.node(i) + lr = checkentry(fl, i, n, seen, linkrevs, f) + if f in filenodes: + if havemf and n not in filenodes[f]: + err(lr, _("%s not in manifests") % (short(n)), f) else: - fl2.rev(rp[1]) - except Exception as inst: - exc(lr, _("checking rename of %s") % short(n), inst, f) + del filenodes[f][n] + + # verify contents + try: + l = len(fl.read(n)) + rp = fl.renamed(n) + if l != fl.size(i): + if len(fl.revision(n)) != fl.size(i): + err(lr, _("unpacked size is %s, %s expected") % + (l, fl.size(i)), f) + except error.CensoredNodeError: + # experimental config: censor.policy + if ui.config("censor", "policy", "abort") == "abort": + err(lr, _("censored file data"), f) + except Exception as inst: + exc(lr, _("unpacking %s") % short(n), inst, f) - # cross-check - if f in filenodes: - fns = [(lr, n) for n, lr in filenodes[f].iteritems()] - for lr, node in sorted(fns): - err(lr, _("%s in manifests not found") % short(node), f) - ui.progress(_('checking'), None) - - for f in storefiles: - warn(_("warning: orphan revlog '%s'") % f) + # check renames + try: + if rp: + if lr is not None and ui.verbose: + ctx = lrugetctx(lr) + found = False + for pctx in ctx.parents(): + if rp[0] in pctx: + found = True + break + if not found: + warn(_("warning: copy source of '%s' not" + " in parents of %s") % (f, ctx)) + fl2 = repo.file(rp[0]) + if not len(fl2): + err(lr, _("empty or missing copy source revlog " + "%s:%s") % (rp[0], short(rp[1])), f) + elif rp[1] == nullid: + ui.note(_("warning: %s@%s: copy source" + " revision is nullid %s:%s\n") + % (f, lr, rp[0], short(rp[1]))) + else: + fl2.rev(rp[1]) + except Exception as inst: + exc(lr, _("checking rename of %s") % short(n), inst, f) - ui.status(_("%d files, %d changesets, %d total revisions\n") % - (len(files), len(cl), revisions)) - if warnings[0]: - ui.warn(_("%d warnings encountered!\n") % warnings[0]) - if fncachewarned: - ui.warn(_('hint: run "hg debugrebuildfncache" to recover from ' - 'corrupt fncache\n')) - if errors[0]: - ui.warn(_("%d integrity errors encountered!\n") % errors[0]) - if badrevs: - ui.warn(_("(first damaged changeset appears to be %d)\n") - % min(badrevs)) - return 1 + # cross-check + if f in filenodes: + fns = [(lr, n) for n, lr in filenodes[f].iteritems()] + for lr, node in sorted(fns): + err(lr, _("%s in manifests not found") % short(node), f) + ui.progress(_('checking'), None) + + for f in storefiles: + warn(_("warning: orphan revlog '%s'") % f) + + ui.status(_("%d files, %d changesets, %d total revisions\n") % + (len(files), len(cl), revisions)) + if warnings[0]: + ui.warn(_("%d warnings encountered!\n") % warnings[0]) + if fncachewarned: + ui.warn(_('hint: run "hg debugrebuildfncache" to recover from ' + 'corrupt fncache\n')) + if errors[0]: + ui.warn(_("%d integrity errors encountered!\n") % errors[0]) + if badrevs: + ui.warn(_("(first damaged changeset appears to be %d)\n") + % min(badrevs)) + return 1