# HG changeset patch # User Matt Mackall # Date 1214508950 18000 # Node ID e79a8f36c2a51f01199c57273fd1a94f938ec0ef # Parent 7424a75f919a30604402779623d8523e0b23655b verify: lots of refactoring - simplify finding first bad rev - no need to count changesets - add exc function to simplify exception handling - combine checksize and checkversion to checklog - unify missing revlog detection in checklog - add checkentry to consolidate - linkrev lookup - detailed check of linkrev - detailed check of parents - duplicate checking - use checkentry for changelog, manifest, and files - simplify havecl and havemf - track all changesets refering to a manifest - move unnamed file check somewhere more useful - reorder crosschecks - fix filenodes crosscheck and add exception handling - check unpacked size field diff -r 7424a75f919a -r e79a8f36c2a5 mercurial/verify.py --- a/mercurial/verify.py Thu Jun 26 14:35:50 2008 -0500 +++ b/mercurial/verify.py Thu Jun 26 14:35:50 2008 -0500 @@ -17,151 +17,137 @@ del lock def _verify(repo): + mflinkrevs = {} filelinkrevs = {} filenodes = {} - changesets = revisions = files = 0 - firstbad = [None] + revisions = 0 + badrevs = {} errors = [0] warnings = [0] - neededmanifests = {} ui = repo.ui cl = repo.changelog mf = repo.manifest def err(linkrev, msg, filename=None): if linkrev != None: - if firstbad[0] != None: - firstbad[0] = min(firstbad[0], linkrev) - else: - firstbad[0] = linkrev + badrevs[linkrev] = True else: - linkrev = "?" + linkrev = '?' msg = "%s: %s" % (linkrev, msg) if filename: msg = "%s@%s" % (filename, msg) ui.warn(" " + msg + "\n") errors[0] += 1 + def exc(linkrev, msg, inst, filename=None): + if isinstance(inst, KeyboardInterrupt): + ui.warn(_("interrupted")) + raise + err(linkrev, "%s: %s" % (msg, inst), filename) + def warn(msg): ui.warn(msg + "\n") warnings[0] += 1 - def checksize(obj, name): + def checklog(obj, name): + if not len(obj) and (havecl or havemf): + err(0, _("empty or missing %s") % name) + return + d = obj.checksize() if d[0]: err(None, _("data length off by %d bytes") % d[0], name) if d[1]: err(None, _("index contains %d extra bytes") % d[1], name) - def checkversion(obj, name): if obj.version != revlog.REVLOGV0: if not revlogv1: warn(_("warning: `%s' uses revlog format 1") % name) elif revlogv1: warn(_("warning: `%s' uses revlog format 0") % name) + def checkentry(obj, i, node, seen, linkrevs, f): + lr = obj.linkrev(node) + if lr < 0 or (havecl and lr not in linkrevs): + t = "unexpected" + if lr < 0 or lr >= len(cl): + t = "nonexistent" + err(None, _("rev %d point to %s changeset %d") % (i, t, lr), f) + if linkrevs: + warn(_(" (expected %s)") % " ".join(map(str,linkrevs))) + lr = None # can't be trusted + + try: + p1, p2 = obj.parents(node) + if p1 not in seen and p1 != nullid: + err(lr, _("unknown parent 1 %s of %s") % + (short(p1), short(n)), f) + if p2 not in seen and p2 != nullid: + err(lr, _("unknown parent 2 %s of %s") % + (short(p2), short(p1)), f) + except Exception, inst: + exc(lr, _("checking parents of %s") % short(node), inst, f) + + if node in seen: + err(lr, _("duplicate revision %d (%d)") % (i, seen[n]), f) + seen[n] = i + return lr + revlogv1 = cl.version != revlog.REVLOGV0 if ui.verbose or not revlogv1: ui.status(_("repository uses revlog format %d\n") % (revlogv1 and 1 or 0)) - havecl = havemf = 1 - seen = {} + havecl = len(cl) > 0 + havemf = len(mf) > 0 + ui.status(_("checking changesets\n")) - if not len(cl) and len(mf): - havecl = 0 - err(0, _("empty or missing 00changelog.i")) - else: - checksize(cl, "changelog") - + seen = {} + checklog(cl, "changelog") for i in repo: - changesets += 1 n = cl.node(i) - l = cl.linkrev(n) - if l != i: - err(i, _("incorrect link (%d) for changeset") %(l)) - if n in seen: - err(i, _("duplicates changeset at revision %d") % seen[n]) - seen[n] = i + checkentry(cl, i, n, seen, [i], "changelog") - for p in cl.parents(n): - if p not in cl.nodemap: - err(i, _("changeset has unknown parent %s") % short(p)) try: changes = cl.read(n) - except KeyboardInterrupt: - ui.warn(_("interrupted")) - raise + mflinkrevs.setdefault(changes[0], []).append(i) + for f in changes[3]: + filelinkrevs.setdefault(f, []).append(i) except Exception, inst: - err(i, _("unpacking changeset: %s") % inst) - continue - - if changes[0] not in neededmanifests: - neededmanifests[changes[0]] = i + exc(i, _("unpacking changeset %s") % short(n), inst) - for f in changes[3]: - filelinkrevs.setdefault(f, []).append(i) - + ui.status(_("checking manifests\n")) seen = {} - ui.status(_("checking manifests\n")) - if len(cl) and not len(mf): - havemf = 0 - err(0, _("empty or missing 00manifest.i")) - else: - checkversion(mf, "manifest") - checksize(mf, "manifest") - + checklog(mf, "manifest") for i in mf: n = mf.node(i) - l = mf.linkrev(n) - - if l < 0 or (havecl and l >= len(cl)): - err(None, _("bad link (%d) at manifest revision %d") % (l, i)) - - if n in neededmanifests: - del neededmanifests[n] - - if n in seen: - err(l, _("duplicates manifest from %d") % seen[n]) - - seen[n] = l - - for p in mf.parents(n): - if p not in mf.nodemap: - err(l, _("manifest has unknown parent %s") % short(p)) + lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest") + if n in mflinkrevs: + del mflinkrevs[n] try: for f, fn in mf.readdelta(n).iteritems(): - fns = filenodes.setdefault(f, {}) - if fn not in fns: - fns[fn] = n - except KeyboardInterrupt: - ui.warn(_("interrupted")) - raise + if not f: + err(lr, _("file without name in manifest")) + elif f != "/dev/null": + fns = filenodes.setdefault(f, {}) + if fn not in fns: + fns[fn] = n except Exception, inst: - err(l, _("reading manifest delta: %s") % inst) - continue + exc(lr, _("reading manifest delta %s") % short(n), inst) ui.status(_("crosschecking files in changesets and manifests\n")) - if havemf > 0: - nm = [(c, m) for m, c in neededmanifests.items()] + if havemf: + nm = [] + for m in mflinkrevs: + for c in mflinkrevs[m]: + nm.append((c, m)) nm.sort() for c, m in nm: err(c, _("changeset refers to unknown manifest %s") % short(m)) - del neededmanifests, nm + del mflinkrevs, nm - if havecl: - fl = filenodes.keys() - fl.sort() - for f in fl: - if f not in filelinkrevs: - lrs = [mf.linkrev(n) for n in filenodes[f]] - lrs.sort() - err(lrs[0], _("in manifest but not in changeset"), f) - del fl - - if havemf: fl = filelinkrevs.keys() fl.sort() for f in fl: @@ -170,115 +156,75 @@ err(lr, _("in changeset but not in manifest"), f) del fl + if havecl: + fl = filenodes.keys() + fl.sort() + for f in fl: + if f not in filelinkrevs: + try: + lr = min([repo.file(f).linkrev(n) for n in filenodes[f]]) + except: + lr = None + err(lr, _("in manifest but not in changeset"), f) + del fl + ui.status(_("checking files\n")) - ff = dict.fromkeys(filenodes.keys() + filelinkrevs.keys()).keys() - ff.sort() - for f in ff: - if f == "/dev/null": - continue - files += 1 - if not f: - lr = filelinkrevs[f][0] - err(lr, _("file without name in manifest")) - continue + files = dict.fromkeys(filenodes.keys() + filelinkrevs.keys()).keys() + files.sort() + for f in files: fl = repo.file(f) - checkversion(fl, f) - checksize(fl, f) - - if not len(fl): - err(filelinkrevs[f][0], _("empty or missing revlog"), f) - continue - + checklog(fl, f) seen = {} - nodes = {nullid: 1} for i in fl: revisions += 1 n = fl.node(i) - flr = fl.linkrev(n) - - if flr < 0 or (havecl and flr not in filelinkrevs.get(f, [])): - if flr < 0 or flr >= len(repo): - err(None, _("rev %d point to nonexistent changeset %d") - % (i, flr), f) - else: - err(None, _("rev %d points to unexpected changeset %d") - % (i, flr), f) - if f in filelinkrevs: - warn(_(" (expected %s)") % filelinkrevs[f][0]) - flr = None # can't be trusted - else: - if havecl: - filelinkrevs[f].remove(flr) - - if n in seen: - err(flr, _("duplicate revision %d") % i, f) + lr = checkentry(fl, i, n, seen, filelinkrevs.get(f, []), f) if f in filenodes: if havemf and n not in filenodes[f]: - err(flr, _("%s not in manifests") % (short(n)), f) + err(lr, _("%s not in manifests") % (short(n)), f) else: del filenodes[f][n] # verify contents try: t = fl.read(n) - except KeyboardInterrupt: - ui.warn(_("interrupted")) - raise + rp = fl.renamed(n) + if len(t) != fl.size(i): + if not fl._readmeta(n): # ancient copy? + err(lr, _("unpacked size is %s, %s expected") % + (len(t), fl.size(i)), f) except Exception, inst: - err(flr, _("unpacking %s: %s") % (short(n), inst), f) - - # verify parents - try: - (p1, p2) = fl.parents(n) - if p1 not in nodes: - err(flr, _("unknown parent 1 %s of %s") % - (short(p1), short(n)), f) - if p2 not in nodes: - err(flr, _("unknown parent 2 %s of %s") % - (short(p2), short(p1)), f) - except KeyboardInterrupt: - ui.warn(_("interrupted")) - raise - except Exception, inst: - err(flr, _("checking parents of %s: %s") % (short(n), inst), f) - nodes[n] = 1 + exc(lr, _("unpacking %s") % short(n), inst, f) # check renames try: - rp = fl.renamed(n) if rp: fl2 = repo.file(rp[0]) if not len(fl2): - err(flr, _("empty or missing copy source revlog %s:%s") + err(lr, _("empty or missing copy source revlog %s:%s") % (rp[0], short(rp[1])), f) elif rp[1] == nullid: - err(flr, _("copy source revision is nullid %s:%s") + err(lr, _("copy source revision is nullid %s:%s") % (rp[0], short(rp[1])), f) else: rev = fl2.rev(rp[1]) - except KeyboardInterrupt: - ui.warn(_("interrupted")) - raise except Exception, inst: - err(flr, _("checking rename of %s: %s") % - (short(n), inst), f) + exc(lr, _("checking rename of %s") % short(n), inst, f) # cross-check if f in filenodes: - fns = [(mf.linkrev(filenodes[f][n]), n) - for n in filenodes[f]] + fns = [(mf.linkrev(l), n) for n,l in filenodes[f].items()] fns.sort() for lr, node in fns: err(lr, _("%s in manifests not found") % short(node), f) ui.status(_("%d files, %d changesets, %d total revisions\n") % - (files, changesets, revisions)) - + (len(files), len(cl), revisions)) if warnings[0]: ui.warn(_("%d warnings encountered!\n") % warnings[0]) if errors[0]: ui.warn(_("%d integrity errors encountered!\n") % errors[0]) - if firstbad[0]: + if badrevs: ui.warn(_("(first damaged changeset appears to be %d)\n") - % firstbad[0]) + % min(badrevs)) return 1