view mercurial/verify.py @ 20182:04036798ebed

branches: avoid unnecessary changectx.branch() calls This requires reading from the changelog, which can be costly over NFS. Note that this does not totally remove reading from the changelog; we still do that when calling changectx.closesbranch(). That call will be removed in a later patch. Running hg branches on the PyPy repo (with 996) over a busy NFS server, before this change: $ time hg --profile branches > /dev/null CallCount Recursive Total(s) Inline(s) module:lineno(function) 2042 0 2.2827 2.2827 <open> 2036 0 0.9840 0.9840 <method 'close' of 'file' objects> 2036 0 0.0464 0.0464 <method 'read' of 'file' objects> 5233 0 0.1985 0.0453 mercurial.repoview:161(changelog) 10462 0 0.0791 0.0314 mercurial.changelog:133(tip) 5233 0 0.0388 0.0176 mercurial.localrepo:26(__get__) 10462 0 0.0250 0.0126 <len> 5233 0 0.0059 0.0039 mercurial.repoview:112(filterrevs) 10462 0 0.0029 0.0029 <hash> 2034 0 0.0444 0.0444 <method 'seek' of 'file' objects> 5340 0 0.0390 0.0390 mercurial.revlog:296(rev) 2582 0 0.0371 0.0371 <zlib.decompress> 3155 0 0.1963 0.0366 mercurial.context:202(__init__) 3155 0 0.1238 0.0306 mercurial.repoview:161(changelog) 3155 0 0.0261 0.0080 mercurial.changelog:183(rev) 9465 0 0.0061 0.0061 <isinstance> 1096 0 0.0023 0.0023 <binascii.unhexlify> 4251 0 0.0014 0.0014 <len> 2059 0 3.7341 0.0332 mercurial.changelog:270(read) 2059 0 3.6304 0.0307 mercurial.revlog:907(revision) 2057 0 0.0262 0.0137 mercurial.changelog:28(decodeextra) 4118 0 0.0094 0.0094 <method 'split' of 'str' objects> 4118 0 0.0270 0.0048 mercurial.encoding:61(tolocal) 2059 0 0.0040 0.0040 <method 'index' of 'str' objects> 10462 0 0.0791 0.0314 mercurial.changelog:133(tip) 10462 0 0.0289 0.0207 mercurial.changelog:190(node) 10462 0 0.0188 0.0091 <len> 52433 20932 0.0478 0.0310 <len> 20932 0 0.0221 0.0168 mercurial.revlog:262(__len__) 2059 0 3.6304 0.0307 mercurial.revlog:907(revision) real 0m4.361s user 0m0.986s sys 0m0.237s After this change: $ time hg --profile branches > /dev/null CallCount Recursive Total(s) Inline(s) module:lineno(function) 1069 0 1.1098 1.1098 <open> 1063 0 0.4865 0.4865 <method 'close' of 'file' objects> 4122 0 0.1811 0.0404 mercurial.repoview:161(changelog) 8240 0 0.0712 0.0272 mercurial.changelog:133(tip) 4122 0 0.0378 0.0177 mercurial.localrepo:26(__get__) 8240 0 0.0221 0.0115 <len> 4122 0 0.0057 0.0033 mercurial.repoview:112(filterrevs) 8240 0 0.0025 0.0025 <hash> 3029 0 0.1979 0.0371 mercurial.context:202(__init__) 3029 0 0.1278 0.0310 mercurial.repoview:161(changelog) 3029 0 0.0230 0.0081 mercurial.changelog:183(rev) 9087 0 0.0061 0.0061 <isinstance> 1096 0 0.0026 0.0026 <binascii.unhexlify> 4125 0 0.0014 0.0014 <len> 4229 0 0.0337 0.0337 mercurial.revlog:296(rev) 1061 0 0.0296 0.0296 <method 'seek' of 'file' objects> 1063 0 0.0292 0.0292 <method 'read' of 'file' objects> 8240 0 0.0712 0.0272 mercurial.changelog:133(tip) 8240 0 0.0271 0.0196 mercurial.changelog:190(node) 8240 0 0.0169 0.0083 <len> 40476 16488 0.0422 0.0271 <len> 16488 0 0.0193 0.0152 mercurial.revlog:262(__len__) 1342 0 0.0241 0.0241 <zlib.decompress> 9445 0 0.0336 0.0224 mercurial.changelog:190(node) 9445 0 0.0112 0.0112 mercurial.revlog:317(node) 1074 0 1.9102 0.0224 mercurial.changelog:270(read) 1074 0 1.8397 0.0202 mercurial.revlog:907(revision) 1073 0 0.0187 0.0099 mercurial.changelog:28(decodeextra) 2148 0 0.0061 0.0061 <method 'split' of 'str' objects> 2148 0 0.0184 0.0034 mercurial.encoding:61(tolocal) real 0m2.402s user 0m0.735s sys 0m0.177s
author Brodie Rao <brodie@sf.io>
date Fri, 15 Nov 2013 23:18:08 -0500
parents e1e181a64de7
children 78f547cdc362
line wrap: on
line source

# verify.py - repository integrity checking for Mercurial
#
# Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from node import nullid, short
from i18n import _
import os
import revlog, util, error

def verify(repo):
    lock = repo.lock()
    try:
        return _verify(repo)
    finally:
        lock.release()

def _normpath(f):
    # under hg < 2.4, convert didn't sanitize paths properly, so a
    # converted repo may contain repeated slashes
    while '//' in f:
        f = f.replace('//', '/')
    return f

def _verify(repo):
    repo = repo.unfiltered()
    mflinkrevs = {}
    filelinkrevs = {}
    filenodes = {}
    revisions = 0
    badrevs = set()
    errors = [0]
    warnings = [0]
    ui = repo.ui
    cl = repo.changelog
    mf = repo.manifest
    lrugetctx = util.lrucachefunc(repo.changectx)

    if not repo.cancopy():
        raise util.Abort(_("cannot verify bundle or remote repos"))

    def err(linkrev, msg, filename=None):
        if linkrev is not None:
            badrevs.add(linkrev)
        else:
            linkrev = '?'
        msg = "%s: %s" % (linkrev, msg)
        if filename:
            msg = "%s@%s" % (filename, msg)
        ui.warn(" " + msg + "\n")
        errors[0] += 1

    def exc(linkrev, msg, inst, filename=None):
        if isinstance(inst, KeyboardInterrupt):
            ui.warn(_("interrupted"))
            raise
        if not str(inst):
            inst = repr(inst)
        err(linkrev, "%s: %s" % (msg, inst), filename)

    def warn(msg):
        ui.warn(msg + "\n")
        warnings[0] += 1

    def checklog(obj, name, linkrev):
        if not len(obj) and (havecl or havemf):
            err(linkrev, _("empty or missing %s") % name)
            return

        d = obj.checksize()
        if d[0]:
            err(None, _("data length off by %d bytes") % d[0], name)
        if d[1]:
            err(None, _("index contains %d extra bytes") % d[1], name)

        if obj.version != revlog.REVLOGV0:
            if not revlogv1:
                warn(_("warning: `%s' uses revlog format 1") % name)
        elif revlogv1:
            warn(_("warning: `%s' uses revlog format 0") % name)

    def checkentry(obj, i, node, seen, linkrevs, f):
        lr = obj.linkrev(obj.rev(node))
        if lr < 0 or (havecl and lr not in linkrevs):
            if lr < 0 or lr >= len(cl):
                msg = _("rev %d points to nonexistent changeset %d")
            else:
                msg = _("rev %d points to unexpected changeset %d")
            err(None, msg % (i, lr), f)
            if linkrevs:
                if f and len(linkrevs) > 1:
                    try:
                        # attempt to filter down to real linkrevs
                        linkrevs = [l for l in linkrevs
                                    if lrugetctx(l)[f].filenode() == node]
                    except Exception:
                        pass
                warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
            lr = None # can't be trusted

        try:
            p1, p2 = obj.parents(node)
            if p1 not in seen and p1 != nullid:
                err(lr, _("unknown parent 1 %s of %s") %
                    (short(p1), short(node)), f)
            if p2 not in seen and p2 != nullid:
                err(lr, _("unknown parent 2 %s of %s") %
                    (short(p2), short(node)), f)
        except Exception, inst:
            exc(lr, _("checking parents of %s") % short(node), inst, f)

        if node in seen:
            err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
        seen[node] = i
        return lr

    if os.path.exists(repo.sjoin("journal")):
        ui.warn(_("abandoned transaction found - run hg recover\n"))

    revlogv1 = cl.version != revlog.REVLOGV0
    if ui.verbose or not revlogv1:
        ui.status(_("repository uses revlog format %d\n") %
                       (revlogv1 and 1 or 0))

    havecl = len(cl) > 0
    havemf = len(mf) > 0

    ui.status(_("checking changesets\n"))
    refersmf = False
    seen = {}
    checklog(cl, "changelog", 0)
    total = len(repo)
    for i in repo:
        ui.progress(_('checking'), i, total=total, unit=_('changesets'))
        n = cl.node(i)
        checkentry(cl, i, n, seen, [i], "changelog")

        try:
            changes = cl.read(n)
            if changes[0] != nullid:
                mflinkrevs.setdefault(changes[0], []).append(i)
                refersmf = True
            for f in changes[3]:
                filelinkrevs.setdefault(_normpath(f), []).append(i)
        except Exception, inst:
            refersmf = True
            exc(i, _("unpacking changeset %s") % short(n), inst)
    ui.progress(_('checking'), None)

    ui.status(_("checking manifests\n"))
    seen = {}
    if refersmf:
        # Do not check manifest if there are only changelog entries with
        # null manifests.
        checklog(mf, "manifest", 0)
    total = len(mf)
    for i in mf:
        ui.progress(_('checking'), i, total=total, unit=_('manifests'))
        n = mf.node(i)
        lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
        if n in mflinkrevs:
            del mflinkrevs[n]
        else:
            err(lr, _("%s not in changesets") % short(n), "manifest")

        try:
            for f, fn in mf.readdelta(n).iteritems():
                if not f:
                    err(lr, _("file without name in manifest"))
                elif f != "/dev/null":
                    filenodes.setdefault(_normpath(f), {}).setdefault(fn, lr)
        except Exception, inst:
            exc(lr, _("reading manifest delta %s") % short(n), inst)
    ui.progress(_('checking'), None)

    ui.status(_("crosschecking files in changesets and manifests\n"))

    total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
    count = 0
    if havemf:
        for c, m in sorted([(c, m) for m in mflinkrevs
                            for c in mflinkrevs[m]]):
            count += 1
            if m == nullid:
                continue
            ui.progress(_('crosschecking'), count, total=total)
            err(c, _("changeset refers to unknown manifest %s") % short(m))
        mflinkrevs = None # del is bad here due to scope issues

        for f in sorted(filelinkrevs):
            count += 1
            ui.progress(_('crosschecking'), count, total=total)
            if f not in filenodes:
                lr = filelinkrevs[f][0]
                err(lr, _("in changeset but not in manifest"), f)

    if havecl:
        for f in sorted(filenodes):
            count += 1
            ui.progress(_('crosschecking'), count, total=total)
            if f not in filelinkrevs:
                try:
                    fl = repo.file(f)
                    lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
                except Exception:
                    lr = None
                err(lr, _("in manifest but not in changeset"), f)

    ui.progress(_('crosschecking'), None)

    ui.status(_("checking files\n"))

    storefiles = set()
    for f, f2, size in repo.store.datafiles():
        if not f:
            err(None, _("cannot decode filename '%s'") % f2)
        elif size > 0 or not revlogv1:
            storefiles.add(_normpath(f))

    files = sorted(set(filenodes) | set(filelinkrevs))
    total = len(files)
    for i, f in enumerate(files):
        ui.progress(_('checking'), i, item=f, total=total)
        try:
            linkrevs = filelinkrevs[f]
        except KeyError:
            # in manifest but not in changelog
            linkrevs = []

        if linkrevs:
            lr = linkrevs[0]
        else:
            lr = None

        try:
            fl = repo.file(f)
        except error.RevlogError, e:
            err(lr, _("broken revlog! (%s)") % e, f)
            continue

        for ff in fl.files():
            try:
                storefiles.remove(ff)
            except KeyError:
                err(lr, _("missing revlog!"), ff)

        checklog(fl, f, lr)
        seen = {}
        rp = None
        for i in fl:
            revisions += 1
            n = fl.node(i)
            lr = checkentry(fl, i, n, seen, linkrevs, f)
            if f in filenodes:
                if havemf and n not in filenodes[f]:
                    err(lr, _("%s not in manifests") % (short(n)), f)
                else:
                    del filenodes[f][n]

            # verify contents
            try:
                l = len(fl.read(n))
                rp = fl.renamed(n)
                if l != fl.size(i):
                    if len(fl.revision(n)) != fl.size(i):
                        err(lr, _("unpacked size is %s, %s expected") %
                            (l, fl.size(i)), f)
            except Exception, inst:
                exc(lr, _("unpacking %s") % short(n), inst, f)

            # check renames
            try:
                if rp:
                    if lr is not None and ui.verbose:
                        ctx = lrugetctx(lr)
                        found = False
                        for pctx in ctx.parents():
                            if rp[0] in pctx:
                                found = True
                                break
                        if not found:
                            warn(_("warning: copy source of '%s' not"
                                   " in parents of %s") % (f, ctx))
                    fl2 = repo.file(rp[0])
                    if not len(fl2):
                        err(lr, _("empty or missing copy source revlog %s:%s")
                            % (rp[0], short(rp[1])), f)
                    elif rp[1] == nullid:
                        ui.note(_("warning: %s@%s: copy source"
                                  " revision is nullid %s:%s\n")
                            % (f, lr, rp[0], short(rp[1])))
                    else:
                        fl2.rev(rp[1])
            except Exception, inst:
                exc(lr, _("checking rename of %s") % short(n), inst, f)

        # cross-check
        if f in filenodes:
            fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
            for lr, node in sorted(fns):
                err(lr, _("%s in manifests not found") % short(node), f)
    ui.progress(_('checking'), None)

    for f in storefiles:
        warn(_("warning: orphan revlog '%s'") % f)

    ui.status(_("%d files, %d changesets, %d total revisions\n") %
                   (len(files), len(cl), revisions))
    if warnings[0]:
        ui.warn(_("%d warnings encountered!\n") % warnings[0])
    if errors[0]:
        ui.warn(_("%d integrity errors encountered!\n") % errors[0])
        if badrevs:
            ui.warn(_("(first damaged changeset appears to be %d)\n")
                    % min(badrevs))
        return 1